In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers
from tensorflow.keras.losses import Huber
import joblib

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")

#  ENHANCED FEATURE ENGINEERING

# Sort and compute cumulative metrics
df = df.sort_values(by=['match_id', 'inning', 'over', 'ball'])

# Basic cumulative features
df['cumulative_runs'] = df.groupby(['match_id', 'inning'])['total_runs'].cumsum()
df['cumulative_wickets'] = df.groupby(['match_id', 'inning'])['is_wicket'].cumsum()

# Enhanced features
df['balls_bowled'] = df.groupby(['match_id', 'inning']).cumcount() + 1
df['run_rate'] = df['cumulative_runs'] / (df['over'] + 0.1)  # Avoid division by zero
df['strike_rate'] = df.groupby(['match_id', 'inning', 'batter'])['total_runs'].cumsum() / \
                   df.groupby(['match_id', 'inning', 'batter']).cumcount().add(1)

# Player performance metrics (rolling averages)
for player_col in ['batter', 'bowler']:
    df[f'{player_col}_avg'] = df.groupby(player_col)['total_runs'].transform(
        lambda x: x.expanding().mean().shift(1)
    )

# Phase with more granularity
def get_phase(over):
    if over <= 6:
        return 'Powerplay'
    elif over <= 10:
        return 'Middle1'
    elif over <= 15:
        return 'Middle2'
    else:
        return 'Death'
df['phase'] = df['over'].apply(get_phase)

# Target engineering
final_scores = df.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index()
final_scores.columns = ['match_id', 'inning', 'final_score']
df = pd.merge(df, final_scores, on=['match_id', 'inning'])

# Next over runs with better handling of innings breaks
df['next_over_runs'] = df.groupby(['match_id', 'inning'])['total_runs'].rolling(
    window=6, min_periods=1
).sum().shift(-6).reset_index(level=[0,1], drop=True)

# DATA CLEANING
# Handle missing values
df.fillna({
    'batter_avg': df['total_runs'].mean(),
    'bowler_avg': df['total_runs'].mean(),
    'next_over_runs': 0
}, inplace=True)

# Remove outliers (top/bottom 1%)
for col in ['total_runs', 'cumulative_runs', 'next_over_runs']:
    q1 = df[col].quantile(0.01)
    q99 = df[col].quantile(0.99)
    df = df[(df[col] >= q1) & (df[col] <= q99)]


#  FEATURE SELECTION

features = [
    'venue', 'batting_team', 'bowling_team', 'batter', 'bowler',
    'over', 'cumulative_runs', 'cumulative_wickets', 'phase', 'pitch_type',
    'run_rate', 'strike_rate', 'batter_avg', 'bowler_avg'
]

X = df[features]
y_score = df['final_score']
y_next_over = df['next_over_runs']

#  IMPROVED PREPROCESSING

categorical_features = ['venue', 'batting_team', 'bowling_team', 'batter', 'bowler', 'phase', 'pitch_type']
numeric_features = ['over', 'cumulative_runs', 'cumulative_wickets', 'run_rate', 'strike_rate', 'batter_avg', 'bowler_avg']

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features),
    ('num', StandardScaler(), numeric_features)
])

#  TRAIN-TEST SPLIT WITH STRATIFICATION

# Stratify by phase to ensure balanced representation
X_train, X_test, y_score_train, y_score_test, y_next_train, y_next_test = train_test_split(
    X, y_score, y_next_over, test_size=0.2, random_state=42, stratify=df['phase']
)

# Fit preprocessor
preprocessor.fit(X_train)
X_train_proc = preprocessor.transform(X_train)
X_test_proc = preprocessor.transform(X_test)

#  ENHANCED MODEL ARCHITECTURE

def build_enhanced_model(input_shape, output_name):
    model = models.Sequential([
        layers.Input(shape=(input_shape,), name='input_layer'),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dense(1, name=output_name)
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=Huber(),  # Correct way to use Huber loss
        metrics=['mae', 'mse']
    )
    return model

#  TRAINING WITH CALLBACKS
early_stopping = callbacks.EarlyStopping(
    patience=10,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    factor=0.5,
    patience=5
)

# Train final score model
score_model = build_enhanced_model(X_train_proc.shape[1], 'score_output')
score_history = score_model.fit(
    X_train_proc, y_score_train,
    epochs=150,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Train next over model
over_model = build_enhanced_model(X_train_proc.shape[1], 'over_output')
over_history = over_model.fit(
    X_train_proc, y_next_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

#  EVALUATION METRICs

def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"\n🔍 {model_name} Evaluation:")
    print(f"MAE: {mae:.2f}")
    print(f"R² Score: {r2:.2f}")
    
    return mae, r2

score_mae, score_r2 = evaluate_model(score_model, X_test_proc, y_score_test, "Final Score Model")
over_mae, over_r2 = evaluate_model(over_model, X_test_proc, y_next_test, "Next Over Runs Model")

score_model.save("enhanced_score_model.keras")
over_model.save("enhanced_next_over_model.keras")
joblib.dump(preprocessor, "enhanced_preprocessor.pkl")

print("\n Models saved successfully!")

Epoch 1/150
[1m2559/2559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 19ms/step - loss: 46.1750 - mae: 46.2743 - mse: 4606.9243 - val_loss: 14.9771 - val_mae: 14.8200 - val_mse: 418.8941 - learning_rate: 0.0010
Epoch 2/150
[1m2559/2559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 20ms/step - loss: 18.3699 - mae: 18.1374 - mse: 569.2372 - val_loss: 12.7692 - val_mae: 12.3555 - val_mse: 308.3745 - learning_rate: 0.0010
Epoch 3/150
[1m2559/2559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 20ms/step - loss: 15.8519 - mae: 15.3976 - mse: 423.0517 - val_loss: 12.0678 - val_mae: 11.5035 - val_mse: 266.2339 - learning_rate: 0.0010
Epoch 4/150
[1m2559/2559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 18ms/step - loss: 14.8204 - mae: 14.2320 - mse: 367.5749 - val_loss: 10.8832 - val_mae: 10.2155 - val_mse: 223.1289 - learning_rate: 0.0010
Epoch 5/150
[1m2559/2559[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 20ms/step - loss: 14.1948 - mae: 1

In [1]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")

# Extract unique options
venues = sorted(df['venue'].dropna().unique())
teams = sorted(df['batting_team'].dropna().unique())
pitch_types = sorted(df['pitch_type'].dropna().unique())

# Initialize widgets with empty options for batter and bowler
venue = widgets.Dropdown(options=venues, description='Venue:')
batting_team = widgets.Dropdown(options=teams, description='Batting:')
bowling_team = widgets.Dropdown(options=teams, description='Bowling:')
batter = widgets.Dropdown(options=[], description='Batter:')
bowler = widgets.Dropdown(options=[], description='Bowler:')
over = widgets.BoundedFloatText(value=5.0, min=0.1, max=20.0, step=0.1, description='Over:')
cumulative_runs = widgets.IntText(value=30, description='Runs:')
cumulative_wickets = widgets.BoundedIntText(value=2, min=0, max=10, description='Wickets:')
pitch_type = widgets.Dropdown(options=pitch_types, description='Pitch:')
predict_button = widgets.Button(description="Predict")
output = widgets.Output()

# Load models and preprocessor
score_model = tf.keras.models.load_model("score_model.keras")
over_model = tf.keras.models.load_model("next_over_model.keras")
preprocessor = joblib.load("preprocessor.pkl")

# Update batter dropdown based on selected batting team
def update_batters(change):
    selected_team = change['new']
    team_batters = sorted(df[df['batting_team'] == selected_team]['batter'].dropna().unique())
    batter.options = team_batters

# Update bowler dropdown based on selected bowling team
def update_bowlers(change):
    selected_team = change['new']
    team_bowlers = sorted(df[df['bowling_team'] == selected_team]['bowler'].dropna().unique())
    bowler.options = team_bowlers

# Attach observers
batting_team.observe(update_batters, names='value')
bowling_team.observe(update_bowlers, names='value')

# Initial trigger to populate batter/bowler on app load
update_batters({'new': batting_team.value})
update_bowlers({'new': bowling_team.value})

# Prediction function
def predict(b):
    with output:
        clear_output()

        try:
            over_val = over.value
            run_rate = cumulative_runs.value / over_val if over_val > 0 else 0.0

            # Determine phase
            if over_val <= 6:
                phase = 'Powerplay'
            elif over_val <= 15:
                phase = 'Middle'
            else:
                phase = 'Death'

            # Prepare input
            input_df = pd.DataFrame([{
                'venue': venue.value,
                'batting_team': batting_team.value,
                'bowling_team': bowling_team.value,
                'batter': batter.value,
                'bowler': bowler.value,
                'over': over_val,
                'cumulative_runs': cumulative_runs.value,
                'cumulative_wickets': cumulative_wickets.value,
                'run_rate': run_rate,
                'phase': phase,
                'pitch_type': pitch_type.value
            }])

            # Transform and predict
            X = preprocessor.transform(input_df)
            score_pred = score_model.predict(X, verbose=0)[0][0]
            over_pred = over_model.predict(X, verbose=0)[0][0]

            print(f"🏏 Predicted Final Score: {score_pred:.2f}")
            print(f"🔁 Predicted Runs in Next Over: {over_pred:.2f}")
        except Exception as e:
            print(f"❌ Error: {e}")

# Bind button
predict_button.on_click(predict)

# Display widgets
display(
    venue, batting_team, bowling_team,
    batter, bowler, over, cumulative_runs,
    cumulative_wickets, pitch_type,
    predict_button, output
)


  saveable.load_own_variables(weights_store.get(inner_path))


Dropdown(description='Venue:', options=('Arun Jaitley Stadium', 'Arun Jaitley Stadium, Delhi', 'Barabati Stadi…

Dropdown(description='Batting:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Capitals', 'Delhi D…

Dropdown(description='Bowling:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Capitals', 'Delhi D…

Dropdown(description='Batter:', options=('A Flintoff', 'A Mukund', 'A Nehra', 'AM Rahane', 'AS Rajpoot', 'AT R…

Dropdown(description='Bowler:', options=('A Flintoff', 'A Nehra', 'AF Milne', 'AS Rajpoot', 'Akash Singh', 'B …

BoundedFloatText(value=5.0, description='Over:', max=20.0, min=0.1, step=0.1)

IntText(value=30, description='Runs:')

BoundedIntText(value=2, description='Wickets:', max=10)

Dropdown(description='Pitch:', options=('Balanced', 'Batting-friendly', 'Sluggish', 'Spin-friendly', 'Unknown'…

Button(description='Predict', style=ButtonStyle())

Output()

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers, constraints
from tensorflow.keras.losses import Huber
import joblib

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")

# Enhanced Feature Engineering
df = df.sort_values(by=['match_id', 'inning', 'over', 'ball'])
df['cumulative_runs'] = df.groupby(['match_id', 'inning'])['total_runs'].cumsum()
df['cumulative_wickets'] = df.groupby(['match_id', 'inning'])['is_wicket'].cumsum()
df['balls_bowled'] = df.groupby(['match_id', 'inning']).cumcount() + 1
df['run_rate'] = df['cumulative_runs'] / (df['over'] + 0.1)
df['strike_rate'] = df.groupby(['match_id', 'inning', 'batter'])['total_runs'].cumsum() / \
                    df.groupby(['match_id', 'inning', 'batter']).cumcount().add(1)

# Player performance metrics
for player_col in ['batter', 'bowler']:
    df[f'{player_col}_avg'] = df.groupby(player_col)['total_runs'].transform(
        lambda x: x.expanding().mean().shift(1)
    )

# Partnership features
df['partnership_runs'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker'])['total_runs'].cumsum()
df['partnership_balls'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker']).cumcount() + 1

# Bowler fatigue metrics
df['bowler_balls_bowled'] = df.groupby(['match_id', 'inning', 'bowler']).cumcount() + 1
df['bowler_recent_economy'] = df.groupby(['match_id', 'inning', 'bowler'])['total_runs'].rolling(12, min_periods=1).mean().reset_index(level=[0,1,2], drop=True)

# Phase
def get_phase(over):
    if over <= 6: return 'Powerplay'
    elif over <= 10: return 'Middle1'
    elif over <= 15: return 'Middle2'
    else: return 'Death'
df['phase'] = df['over'].apply(get_phase)
df['is_death_over'] = (df['over'] >= 16).astype(int)

# Target engineering
final_scores = df.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index()
final_scores.columns = ['match_id', 'inning', 'final_score']
df = pd.merge(df, final_scores, on=['match_id', 'inning'])

def adjust_final_score(row):
    reduction_factors = [0, 0.05, 0.10, 0.20, 0.35, 0.50, 0.65, 0.80, 0.90, 0.95, 1.0]
    return row['final_score'] * (1 - reduction_factors[min(row['cumulative_wickets'], 10)])
df['adjusted_final_score'] = df.apply(adjust_final_score, axis=1)

# Next over runs
MAX_RUNS_PER_OVER = 36
def calculate_next_over_runs(group):
    group = group.sort_values('ball')
    group['next_over_runs'] = np.clip(group['total_runs'].rolling(6).sum().shift(-6), 0, MAX_RUNS_PER_OVER)
    return group
df = df.groupby(['match_id', 'inning', 'over']).apply(calculate_next_over_runs)

# Additional match state features
df['wickets_remaining'] = 10 - df['cumulative_wickets']
df['resources_remaining'] = (20 - df['over']) * (df['wickets_remaining'] / 10)
df['aggression_factor'] = df['run_rate'] * (1 + (df['is_death_over'] * 0.5)) / (df['cumulative_wickets'] + 1)

# Data Cleaning
df.fillna({
    'batter_avg': df['total_runs'].mean(),
    'bowler_avg': df['total_runs'].mean(),
    'next_over_runs': 0,
    'bowler_recent_economy': df['total_runs'].mean()
}, inplace=True)

# Remove outliers
for col in ['total_runs', 'cumulative_runs', 'next_over_runs']:
    q1 = df[col].quantile(0.01)
    q99 = df[col].quantile(0.99)
    df = df[(df[col] >= q1) & (df[col] <= q99)]

# Feature Selection
features = [
    'venue', 'batting_team', 'bowling_team', 'batter', 'bowler',
    'over', 'cumulative_runs', 'cumulative_wickets', 'phase', 'pitch_type',
    'run_rate', 'strike_rate', 'batter_avg', 'bowler_avg',
    'partnership_runs', 'partnership_balls', 'bowler_balls_bowled',
    'bowler_recent_economy', 'wickets_remaining', 'resources_remaining',
    'aggression_factor', 'is_death_over'
]

X = df[features]
y_score = df['adjusted_final_score']
y_next_over = df['next_over_runs']

# Preprocessing
categorical_features = ['venue', 'batting_team', 'bowling_team', 'batter', 'bowler', 'phase', 'pitch_type']
numeric_features = [f for f in features if f not in categorical_features]

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features),
    ('num', StandardScaler(), numeric_features)
])

# Split
X_train, X_test, y_score_train, y_score_test, y_next_train, y_next_test = train_test_split(
    X, y_score, y_next_over, test_size=0.2, random_state=42, stratify=df['phase']
)

preprocessor.fit(X_train)
X_train_proc = preprocessor.transform(X_train)
X_test_proc = preprocessor.transform(X_test)

# Model
class OverConstraint(constraints.Constraint):
    def __call__(self, w):
        return tf.clip_by_value(w, 0, MAX_RUNS_PER_OVER)

def build_dual_output_model(input_shape):
    inputs = tf.keras.Input(shape=(input_shape,))
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)

    score_head = layers.Dense(64, activation='relu')(x)
    score_head = layers.Dense(1, name='score_output')(score_head)

    over_head = layers.Dense(64, activation='relu')(x)
    over_head = layers.Dense(1, activation='relu', name='over_output', 
                             kernel_constraint=OverConstraint())(over_head)

    model = tf.keras.Model(inputs=inputs, outputs=[score_head, over_head])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss={'score_output': Huber(), 'over_output': Huber()},
        metrics={'score_output': ['mae', 'mse'], 'over_output': ['mae', 'mse']}
    )
    return model

# Training
early_stopping = callbacks.EarlyStopping(patience=10, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(factor=0.5, patience=5)

model = build_dual_output_model(X_train_proc.shape[1])
model.fit(
    X_train_proc, {'score_output': y_score_train, 'over_output': y_next_train},
    epochs=150,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Post-process
def postprocess_predictions(predictions, current_over, wickets_lost):
    final_score, next_over = predictions
    if wickets_lost >= 8:
        final_score *= 0.7
    elif wickets_lost >= 6:
        final_score *= 0.85
    next_over = min(next_over, 25 if current_over >= 18 else 18)
    return final_score, next_over

# Evaluation
def evaluate_model(model, X_test_proc, y_score_test, y_next_test, X_test_raw):
    score_pred, over_pred = model.predict(X_test_proc)
    processed_preds = [
        postprocess_predictions(
            (score_pred[i], over_pred[i]),
            X_test_raw.iloc[i]['over'],
            X_test_raw.iloc[i]['cumulative_wickets']
        ) for i in range(len(X_test_raw))
    ]
    score_pred_adj = np.array([x[0] for x in processed_preds])
    over_pred_adj = np.array([x[1] for x in processed_preds])
    score_mae = mean_absolute_error(y_score_test, score_pred_adj)
    score_r2 = r2_score(y_score_test, score_pred_adj)
    over_mae = mean_absolute_error(y_next_test, over_pred_adj)
    over_r2 = r2_score(y_next_test, over_pred_adj)
    print("Evaluation Results:")
    print(f"Final Score Prediction -> MAE: {score_mae:.2f} | R²: {score_r2:.2f}")
    print(f"Next Over Run Prediction -> MAE: {over_mae:.2f} | R²: {over_r2:.2f}")
    return score_mae, score_r2, over_mae, over_r2

evaluate_model(model, X_test_proc, y_score_test, y_next_test, X_test_raw=X_test)

# Save
model.save("enhanced_dual_model.keras")
joblib.dump(preprocessor, "enhanced_preprocessor.pkl")

# Prediction wrapper
def predict_match_state(input_features):
    processed = preprocessor.transform(input_features)
    score_pred, over_pred = model.predict(processed)
    current_over = input_features['over'].values[0]
    wickets_lost = input_features['cumulative_wickets'].values[0]
    final_score, next_over = postprocess_predictions(
        (score_pred[0][0], over_pred[0][0]), current_over, wickets_lost
    )
    return {
        'predicted_final_score': round(float(final_score), 2),
        'predicted_next_over': round(float(next_over), 2)
    }


  df = df.groupby(['match_id', 'inning', 'over']).apply(calculate_next_over_runs)


Epoch 1/150
[1m2557/2557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 18ms/step - loss: 35.1081 - over_output_loss: 0.2196 - over_output_mae: 0.3005 - over_output_mse: 1.3249 - score_output_loss: 34.4903 - score_output_mae: 34.9853 - score_output_mse: 2935.3337 - val_loss: 13.3632 - val_over_output_loss: 0.1474 - val_over_output_mae: 0.1609 - val_over_output_mse: 1.1440 - val_score_output_loss: 12.6123 - val_score_output_mae: 13.0996 - val_score_output_mse: 363.5161 - learning_rate: 0.0010
Epoch 2/150
[1m2557/2557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 18ms/step - loss: 14.4456 - over_output_loss: 0.1509 - over_output_mae: 0.1658 - over_output_mse: 1.1782 - score_output_loss: 13.6348 - score_output_mae: 14.1257 - score_output_mse: 391.8369 - val_loss: 11.4672 - val_over_output_loss: 0.1474 - val_over_output_mae: 0.1611 - val_over_output_mse: 1.1439 - val_score_output_loss: 10.5384 - val_score_output_mae: 11.0200 - val_score_output_mse: 275.5423 - learning_ra

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras import layers, models, callbacks, regularizers, constraints
from tensorflow.keras.losses import Huber
import tensorflow as tf
import joblib

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")
df = df.sort_values(by=['match_id', 'inning', 'over', 'ball'])

# Feature Engineering
df['cumulative_runs'] = df.groupby(['match_id', 'inning'])['total_runs'].cumsum()
df['cumulative_wickets'] = df.groupby(['match_id', 'inning'])['is_wicket'].cumsum()
df['balls_bowled'] = df.groupby(['match_id', 'inning']).cumcount() + 1
df['run_rate'] = df['cumulative_runs'] / (df['over'] + 0.1)
df['strike_rate'] = df.groupby(['match_id', 'inning', 'batter'])['total_runs'].cumsum() / \
                    df.groupby(['match_id', 'inning', 'batter']).cumcount().add(1)

# Player stats
for col in ['batter', 'bowler']:
    df[f'{col}_avg'] = df.groupby(col)['total_runs'].transform(lambda x: x.expanding().mean().shift(1))

# Partnership & bowler economy
df['partnership_runs'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker'])['total_runs'].cumsum()
df['partnership_balls'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker']).cumcount() + 1
df['bowler_balls_bowled'] = df.groupby(['match_id', 'inning', 'bowler']).cumcount() + 1
df['bowler_recent_economy'] = df.groupby(['match_id', 'inning', 'bowler'])['total_runs']\
                                .rolling(12, min_periods=1).mean().reset_index(level=[0,1,2], drop=True)

# Phase encoding
df['phase'] = df['over'].apply(lambda x: 'Powerplay' if x <= 6 else 'Middle1' if x <= 10 else 'Middle2' if x <= 15 else 'Death')
df['is_death_over'] = (df['over'] >= 16).astype(int)

# Target variable engineering
final_scores = df.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index()
final_scores.columns = ['match_id', 'inning', 'final_score']
df = pd.merge(df, final_scores, on=['match_id', 'inning'])

# Adjust final score based on wickets
reduction_factors = [0, 0.05, 0.10, 0.20, 0.35, 0.50, 0.65, 0.80, 0.90, 0.95, 1.0]
df['adjusted_final_score'] = df.apply(
    lambda row: row['final_score'] * (1 - reduction_factors[min(row['cumulative_wickets'], 10)]), axis=1
)

# Predicting next over runs using 6-ball rolling window
def calculate_next_over_runs(group):
    group = group.sort_values('ball')
    group['next_over_runs'] = group['total_runs'].rolling(6).sum().shift(-6)
    return group

df = df.groupby(['match_id', 'inning', 'over'], group_keys=False).apply(calculate_next_over_runs)

# Match state features
df['wickets_remaining'] = 10 - df['cumulative_wickets']
df['resources_remaining'] = (20 - df['over']) * (df['wickets_remaining'] / 10)
df['aggression_factor'] = df['run_rate'] * (1 + 0.5 * df['is_death_over']) / (df['cumulative_wickets'] + 1)

# Handle missing values
df.fillna({
    'batter_avg': df['total_runs'].mean(),
    'bowler_avg': df['total_runs'].mean(),
    'next_over_runs': 0,
    'bowler_recent_economy': df['total_runs'].mean()
}, inplace=True)

# Remove outliers
for col in ['total_runs', 'cumulative_runs', 'next_over_runs']:
    q1, q99 = df[col].quantile([0.01, 0.99])
    df = df[(df[col] >= q1) & (df[col] <= q99)]

# Features
features = [
    'venue', 'batting_team', 'bowling_team', 'batter', 'bowler', 'over', 'cumulative_runs',
    'cumulative_wickets', 'phase', 'pitch_type', 'run_rate', 'strike_rate', 'batter_avg',
    'bowler_avg', 'partnership_runs', 'partnership_balls', 'bowler_balls_bowled',
    'bowler_recent_economy', 'wickets_remaining', 'resources_remaining',
    'aggression_factor', 'is_death_over'
]

X = df[features]
y_score = df['adjusted_final_score']
y_next = df['next_over_runs']

# Preprocessing
categorical = ['venue', 'batting_team', 'bowling_team', 'batter', 'bowler', 'phase', 'pitch_type']
numerical = [f for f in features if f not in categorical]

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical),
    ('num', StandardScaler(), numerical)
])

# Train/test split
X_train, X_test, y_score_train, y_score_test, y_next_train, y_next_test = train_test_split(
    X, y_score, y_next, test_size=0.2, stratify=df['phase'], random_state=42
)

preprocessor.fit(X_train)
X_train_proc = preprocessor.transform(X_train)
X_test_proc = preprocessor.transform(X_test)

# Custom constraint
MAX_RUNS_PER_OVER = 36
class OverConstraint(constraints.Constraint):
    def __call__(self, w): return tf.clip_by_value(w, 0, MAX_RUNS_PER_OVER)

# Model builder
def build_dual_output_model(input_shape):
    inputs = tf.keras.Input(shape=(input_shape,))
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.BatchNormalization()(x)

    score_head = layers.Dense(64, activation='relu')(x)
    score_head = layers.Dense(1, name='score_output')(score_head)

    over_head = layers.Dense(64, activation='relu')(x)
    over_head = layers.Dense(1, activation='relu', name='over_output', kernel_constraint=OverConstraint())(over_head)

    model = models.Model(inputs=inputs, outputs=[score_head, over_head])
    model.compile(
        optimizer='adam',
        loss={'score_output': Huber(), 'over_output': Huber()},
        metrics={'score_output': ['mae', 'mse'], 'over_output': ['mae', 'mse']}
    )
    return model

# Train model
model = build_dual_output_model(X_train_proc.shape[1])
model.fit(
    X_train_proc,
    {'score_output': y_score_train, 'over_output': y_next_train},
    validation_split=0.2,
    epochs=300,
    batch_size=32,
    callbacks=[
        callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
    ],
    verbose=1
)

# Postprocessing
def postprocess_predictions(preds, over, wickets):
    score, next_over = preds
    score = score * 0.85 if wickets >= 6 else score * 0.7 if wickets >= 8 else score
    next_over = min(next_over, 25 if over >= 18 else 18)
    return score, next_over

# Evaluation
def evaluate_model(model, X_test_proc, y_score_test, y_next_test, X_test_raw):
    score_preds, over_preds = model.predict(X_test_proc, verbose=0)
    post_preds = [postprocess_predictions((score_preds[i], over_preds[i]), 
                                          X_test_raw.iloc[i]['over'], 
                                          X_test_raw.iloc[i]['cumulative_wickets']) 
                  for i in range(len(X_test_raw))]

    score_final = np.array([x[0] for x in post_preds])
    over_final = np.array([x[1] for x in post_preds])

    print(f"Final Score -> MAE: {mean_absolute_error(y_score_test, score_final):.2f}, R²: {r2_score(y_score_test, score_final):.2f}")
    print(f"Next Over  -> MAE: {mean_absolute_error(y_next_test, over_final):.2f}, R²: {r2_score(y_next_test, over_final):.2f}")

evaluate_model(model, X_test_proc, y_score_test, y_next_test, X_test)

# Save model & preprocessor
model.save("enhanced_dual_model.keras")
joblib.dump(preprocessor, "enhanced_preprocessor.pkl")

# Wrapper for inference
def predict_match_state(input_df):
    proc = preprocessor.transform(input_df)
    score_pred, over_pred = model.predict(proc, verbose=0)
    current_over = input_df['over'].values[0]
    wickets = input_df['cumulative_wickets'].values[0]
    final, over = postprocess_predictions((score_pred[0][0], over_pred[0][0]), current_over, wickets)
    return {'predicted_final_score': round(final, 2), 'predicted_next_over': round(over, 2)}


  df = df.groupby(['match_id', 'inning', 'over'], group_keys=False).apply(calculate_next_over_runs)


Epoch 1/300
[1m5114/5114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - loss: 29.9049 - over_output_loss: 0.1887 - over_output_mae: 0.2502 - over_output_mse: 1.2192 - score_output_loss: 29.2445 - score_output_mae: 29.7391 - score_output_mse: 2173.0256 - val_loss: 13.8003 - val_over_output_loss: 0.1476 - val_over_output_mae: 0.1611 - val_over_output_mse: 1.1443 - val_score_output_loss: 12.8684 - val_score_output_mae: 13.3567 - val_score_output_mse: 374.4094 - learning_rate: 0.0010
Epoch 2/300
[1m5114/5114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - loss: 15.8301 - over_output_loss: 0.1573 - over_output_mae: 0.1708 - over_output_mse: 1.2437 - score_output_loss: 14.8264 - score_output_mae: 15.3183 - score_output_mse: 448.2397 - val_loss: 12.5726 - val_over_output_loss: 0.1475 - val_over_output_mae: 0.1606 - val_over_output_mse: 1.1440 - val_score_output_loss: 11.4475 - val_score_output_mae: 11.9313 - val_score_output_mse: 322.8271 - learning_rate

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers, constraints
from tensorflow.keras.losses import Huber
import joblib

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")

## ENHANCED FEATURE ENGINEERING
df = df.sort_values(by=['match_id', 'inning', 'over', 'ball'])

# Basic cumulative features
df['cumulative_runs'] = df.groupby(['match_id', 'inning'])['total_runs'].cumsum()
df['cumulative_wickets'] = df.groupby(['match_id', 'inning'])['is_wicket'].cumsum()

# Enhanced features with temporal context
df['run_rate'] = df['cumulative_runs'] / (df['over'] + 0.1)
df['strike_rate'] = df.groupby(['match_id', 'inning', 'batter'])['total_runs'].cumsum() / \
                   df.groupby(['match_id', 'inning', 'batter']).cumcount().add(1)

# Player performance metrics with recent form
for player_col in ['batter', 'bowler']:
    df[f'{player_col}_avg'] = df.groupby(player_col)['total_runs'].transform(
        lambda x: x.expanding().mean().shift(1))
    df[f'{player_col}_last5'] = df.groupby(player_col)['total_runs'].transform(
        lambda x: x.rolling(30).mean())

# Partnership dynamics
df['partnership_runs'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker'])['total_runs'].cumsum()
df['partnership_balls'] = df.groupby(['match_id', 'inning', 'batter', 'non_striker']).cumcount() + 1
df['partnership_momentum'] = df['partnership_runs'] / df['partnership_balls']

# Bowler fatigue and recent performance
df['bowler_balls_bowled'] = df.groupby(['match_id', 'inning', 'bowler']).cumcount() + 1
df['bowler_recent_economy'] = df.groupby(['match_id', 'inning', 'bowler'])['total_runs'].rolling(12, min_periods=1).mean().reset_index(level=[0,1,2], drop=True)

# Match phase and pressure indicators
def get_phase(over):
    if over <= 6: return 'Powerplay'
    elif over <= 10: return 'Middle1'
    elif over <= 15: return 'Middle2'
    return 'Death'
df['phase'] = df['over'].apply(get_phase)
df['is_death_over'] = (df['over'] >= 16).astype(int)
df['runs_last_5_overs'] = df.groupby(['match_id', 'inning'])['total_runs'].rolling(30, min_periods=1).sum().reset_index(level=[0,1], drop=True)
df['wickets_last_5_overs'] = df.groupby(['match_id', 'inning'])['is_wicket'].rolling(30, min_periods=1).sum().reset_index(level=[0,1], drop=True)

# Target engineering - improved next over calculation
def calculate_next_over_runs(group):
    group = group.sort_values('ball')
    # Use actual over boundaries instead of rolling window
    over_runs = group.groupby('over')['total_runs'].sum()
    next_over_runs = over_runs.shift(-1)
    group['next_over_runs'] = group['over'].map(next_over_runs)
    return group

df = df.groupby(['match_id', 'inning'], group_keys=False).apply(calculate_next_over_runs)

# Final score target
final_scores = df.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index()
final_scores.columns = ['match_id', 'inning', 'final_score']
df = pd.merge(df, final_scores, on=['match_id', 'inning'])

# Match state features
df['wickets_remaining'] = 10 - df['cumulative_wickets']
df['resources_remaining'] = (20 - df['over']) * (df['wickets_remaining'] / 10)
df['required_run_rate'] = df['final_score'] / (20 - df['over'])
df['run_rate_delta'] = df['run_rate'] - df['required_run_rate']

# DATA CLEANING
df.fillna({
    'batter_avg': df['total_runs'].mean(),
    'bowler_avg': df['total_runs'].mean(),
    'next_over_runs': 0,
    'bowler_recent_economy': df['total_runs'].mean(),
    'batter_last5': df['total_runs'].mean(),
    'bowler_last5': df['total_runs'].mean()
}, inplace=True)

# Remove outliers more carefully
for col in ['total_runs', 'cumulative_runs', 'next_over_runs']:
    q1 = df[col].quantile(0.01)
    q99 = df[col].quantile(0.99)
    df = df[(df[col] >= q1) & (df[col] <= q99)]

# FEATURE SELECTION
features = [
    'venue', 'batting_team', 'bowling_team', 'batter', 'bowler',
    'over', 'cumulative_runs', 'cumulative_wickets', 'phase', 'pitch_type',
    'run_rate', 'strike_rate', 'batter_avg', 'bowler_avg',
    'partnership_runs', 'partnership_balls', 'bowler_balls_bowled',
    'bowler_recent_economy', 'wickets_remaining', 'resources_remaining',
    'is_death_over', 'runs_last_5_overs', 'wickets_last_5_overs',
    'required_run_rate', 'run_rate_delta', 'partnership_momentum',
    'batter_last5', 'bowler_last5'
]

X = df[features]
y_score = df['final_score']
y_next_over = df['next_over_runs']

# PREPROCESSING
categorical_features = ['venue', 'batting_team', 'bowling_team', 'batter', 'bowler', 'phase', 'pitch_type']
numeric_features = [f for f in features if f not in categorical_features]

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features),
    ('num', StandardScaler(), numeric_features)
])

# TRAIN-TEST SPLIT
X_train, X_test, y_score_train, y_score_test, y_next_train, y_next_test = train_test_split(
    X, y_score, y_next_over, test_size=0.2, random_state=42, stratify=df['phase']
)

# Fit preprocessor
preprocessor.fit(X_train)
X_train_proc = preprocessor.transform(X_train)
X_test_proc = preprocessor.transform(X_test)

## ENHANCED MODEL ARCHITECTURE
class OverConstraint(constraints.Constraint):
    def __call__(self, w):
        return tf.clip_by_value(w, 0, 36)  # Max 36 runs in an over

def build_next_over_model(input_shape):
    inputs = tf.keras.Input(shape=(input_shape,))
    
    # Feature processing
    x = layers.Dense(512, activation='swish', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    
    # Temporal processing
    x = layers.Reshape((1, -1))(x)  # Add temporal dimension
    x = layers.LSTM(128, return_sequences=True)(x)
    x = layers.LSTM(64)(x)
    
    # Final layers
    x = layers.Dense(64, activation='swish')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='relu', kernel_constraint=OverConstraint())(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
        loss=Huber(),
        metrics=['mae', 'mse']
    )
    return model

def build_final_score_model(input_shape):
    inputs = tf.keras.Input(shape=(input_shape,))
    
    x = layers.Dense(512, activation='swish', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    
    x = layers.Dense(256, activation='swish')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    x = layers.Dense(128, activation='swish')(x)
    outputs = layers.Dense(1)(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
        loss=Huber(),
        metrics=['mae', 'mse']
    )
    return model

## TRAINING WITH ENHANCED CALLBACKS
class DynamicWeightAdjuster(callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Reduce learning rate when validation MAE plateaus
        if logs.get('val_mae') > logs.get('mae') * 1.1:
            lr = tf.keras.backend.get_value(self.model.optimizer.lr)
            tf.keras.backend.set_value(self.model.optimizer.lr, lr * 0.9)

early_stopping = callbacks.EarlyStopping(
    patience=15,
    monitor='val_mae',
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    factor=0.5,
    patience=7,
    min_lr=1e-6
)

# Train next over model with sample weighting
over_train_mean = y_next_train.mean()
sample_weights = np.where(y_next_train > over_train_mean, 1.5, 1.0)

over_model = build_next_over_model(X_train_proc.shape[1])
over_history = over_model.fit(
    X_train_proc, y_next_train,
    sample_weight=sample_weights,
    epochs=300,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr, DynamicWeightAdjuster()],
    verbose=1
)

# Train final score model
score_model = build_final_score_model(X_train_proc.shape[1])
score_history = score_model.fit(
    X_train_proc, y_score_train,
    epochs=200,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

## POST-PROCESSING AND EVALUATION
def postprocess_predictions(preds, over, wickets, run_rate, req_rate):
    # Score adjustments
    if wickets >= 8:
        preds *= 0.65  # More aggressive reduction for tailenders
    elif wickets >= 6:
        preds *= 0.8
    
    # Context-based adjustments
    aggression = run_rate / max(req_rate, 1)
    preds = min(preds, 25 if over >= 18 else 18) * aggression
    
    return max(preds, 0)  # Ensure non-negative

def evaluate_model(model, X_test, y_test, model_name, X_test_raw=None):
    y_pred = model.predict(X_test)
    
    if model_name == "Next Over Runs Model" and X_test_raw is not None:
        # Apply post-processing for next over predictions
        y_pred_adj = [
            postprocess_predictions(
                y_pred[i][0],
                X_test_raw.iloc[i]['over'],
                X_test_raw.iloc[i]['cumulative_wickets'],
                X_test_raw.iloc[i]['run_rate'],
                X_test_raw.iloc[i]['required_run_rate']
            )
            for i in range(len(X_test))
        ]
        y_pred = np.array(y_pred_adj).reshape(-1, 1)
    
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"\n🔍 {model_name} Evaluation:")
    print(f"MAE: {mae:.2f}")
    print(f"R² Score: {r2:.2f}")
    
    return mae, r2

score_mae, score_r2 = evaluate_model(score_model, X_test_proc, y_score_test, "Final Score Model")
over_mae, over_r2 = evaluate_model(over_model, X_test_proc, y_next_test, "Next Over Runs Model", X_test)

# SAVE MODELS
score_model.save("enhanced_score_model.keras")
over_model.save("enhanced_next_over_model.keras")
joblib.dump(preprocessor, "enhanced_preprocessor.pkl")

print("\nModels saved successfully!")

  df = df.groupby(['match_id', 'inning'], group_keys=False).apply(calculate_next_over_runs)


Epoch 1/300
[1m2563/2563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 10ms/step - loss: 20.3028 - mae: 3.8923 - mse: 24.9413 - val_loss: 4.3472 - val_mae: 3.3946 - val_mse: 18.1086 - learning_rate: 5.0000e-04
Epoch 2/300
[1m2563/2563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 10ms/step - loss: 4.4906 - mae: 3.4516 - mse: 18.8699 - val_loss: 4.4639 - val_mae: 3.3924 - val_mse: 18.2565 - learning_rate: 5.0000e-04
Epoch 3/300
[1m2563/2563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 10ms/step - loss: 4.4631 - mae: 3.4523 - mse: 18.8948 - val_loss: 4.2996 - val_mae: 3.4759 - val_mse: 18.5633 - learning_rate: 5.0000e-04
Epoch 4/300
[1m2563/2563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 10ms/step - loss: 4.4118 - mae: 3.4268 - mse: 18.6335 - val_loss: 4.2511 - val_mae: 3.3211 - val_mse: 17.5781 - learning_rate: 5.0000e-04
Epoch 5/300
[1m2563/2563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 10ms/step - loss: 4.3674 - mae: 3.4223 - m

In [7]:
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load dataset
df = pd.read_csv("final_dataset_with_pitch.csv")

# Extract unique options
venues = sorted(df['venue'].dropna().unique())
teams = sorted(df['batting_team'].dropna().unique())
pitch_types = sorted(df['pitch_type'].dropna().unique())

# Initialize widgets with empty options for batter and bowler
venue = widgets.Dropdown(options=venues, description='Venue:')
batting_team = widgets.Dropdown(options=teams, description='Batting:')
bowling_team = widgets.Dropdown(options=teams, description='Bowling:')
batter = widgets.Dropdown(options=[], description='Batter:')
bowler = widgets.Dropdown(options=[], description='Bowler:')
over = widgets.BoundedFloatText(value=5.0, min=0.1, max=20.0, step=0.1, description='Over:')
cumulative_runs = widgets.IntText(value=30, description='Runs:')
cumulative_wickets = widgets.BoundedIntText(value=2, min=0, max=10, description='Wickets:')
pitch_type = widgets.Dropdown(options=pitch_types, description='Pitch:')
predict_button = widgets.Button(description="Predict")
output = widgets.Output()

# Load models and preprocessor
score_model = tf.keras.models.load_model("score_model.keras")
over_model = tf.keras.models.load_model("next_over_model.keras")
preprocessor = joblib.load("preprocessor.pkl")

# Update batter dropdown based on selected batting team
def update_batters(change):
    selected_team = change['new']
    team_batters = sorted(df[df['batting_team'] == selected_team]['batter'].dropna().unique())
    batter.options = team_batters

# Update bowler dropdown based on selected bowling team
def update_bowlers(change):
    selected_team = change['new']
    team_bowlers = sorted(df[df['bowling_team'] == selected_team]['bowler'].dropna().unique())
    bowler.options = team_bowlers

# Attach observers
batting_team.observe(update_batters, names='value')
bowling_team.observe(update_bowlers, names='value')

# Initial trigger to populate batter/bowler on app load
update_batters({'new': batting_team.value})
update_bowlers({'new': bowling_team.value})

# Prediction function
def predict(b):
    with output:
        clear_output()

        try:
            over_val = over.value
            run_rate = cumulative_runs.value / over_val if over_val > 0 else 0.0

            # Determine phase
            if over_val <= 6:
                phase = 'Powerplay'
            elif over_val <= 15:
                phase = 'Middle'
            else:
                phase = 'Death'

            # Prepare input
            input_df = pd.DataFrame([{
                'venue': venue.value,
                'batting_team': batting_team.value,
                'bowling_team': bowling_team.value,
                'batter': batter.value,
                'bowler': bowler.value,
                'over': over_val,
                'cumulative_runs': cumulative_runs.value,
                'cumulative_wickets': cumulative_wickets.value,
                'run_rate': run_rate,
                'phase': phase,
                'pitch_type': pitch_type.value
            }])

            # Transform and predict
            X = preprocessor.transform(input_df)
            score_pred = score_model.predict(X, verbose=0)[0][0]
            over_pred = over_model.predict(X, verbose=0)[0][0]

            print(f"🏏 Predicted Final Score: {score_pred:.2f}")
            print(f"🔁 Predicted Runs in Next Over: {over_pred:.2f}")
        except Exception as e:
            print(f"❌ Error: {e}")

# Bind button
predict_button.on_click(predict)

# Display widgets
display(
    venue, batting_team, bowling_team,
    batter, bowler, over, cumulative_runs,
    cumulative_wickets, pitch_type,
    predict_button, output
)


  saveable.load_own_variables(weights_store.get(inner_path))


Dropdown(description='Venue:', options=('Arun Jaitley Stadium', 'Arun Jaitley Stadium, Delhi', 'Barabati Stadi…

Dropdown(description='Batting:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Capitals', 'Delhi D…

Dropdown(description='Bowling:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Capitals', 'Delhi D…

Dropdown(description='Batter:', options=('A Flintoff', 'A Mukund', 'A Nehra', 'AM Rahane', 'AS Rajpoot', 'AT R…

Dropdown(description='Bowler:', options=('A Flintoff', 'A Nehra', 'AF Milne', 'AS Rajpoot', 'Akash Singh', 'B …

BoundedFloatText(value=5.0, description='Over:', max=20.0, min=0.1, step=0.1)

IntText(value=30, description='Runs:')

BoundedIntText(value=2, description='Wickets:', max=10)

Dropdown(description='Pitch:', options=('Balanced', 'Batting-friendly', 'Sluggish', 'Spin-friendly', 'Unknown'…

Button(description='Predict', style=ButtonStyle())

Output()