# üèè IPL Score Prediction - Model Training

This notebook trains and evaluates deep learning models for IPL score prediction.

**Author:** IPL Score Prediction Team  
**Date:** 2024

## 1. Setup and Imports

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
import sys
import joblib

# TensorFlow/Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Add src to path
sys.path.append('../src')

# Import custom modules
from data_preprocessing import IPLDataLoader, IPLDataCleaner, DataSplitter
from feature_engineering import FeatureEngineer
from model_architectures import (
    BaselineModels, DNNModel, LSTMModel, GRUModel, 
    TransformerModel, HybridModel, ModelFactory
)
from train import TrainingConfig, ModelTrainer, TrainingVisualizer

# Settings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")
print("\n‚úÖ Libraries imported successfully!")

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


ImportError: cannot import name 'MultiHeadAttention' from 'tensorflow.keras.layers' (unknown location)

## 2. Load and Prepare Data

In [None]:
# Load data
loader = IPLDataLoader(data_path='../data/')
ball_df = loader.load_ball_by_ball_data()

# Clean data
cleaner = IPLDataCleaner()
ball_df = cleaner.clean_data(ball_df)

print(f"‚úÖ Data loaded: {len(ball_df):,} records")

In [None]:
# Apply feature engineering
feature_engineer = FeatureEngineer()
df_features = feature_engineer.fit_transform(ball_df)

print(f"‚úÖ Feature engineering complete: {len(df_features.columns)} features")

In [None]:
# Prepare features and target
feature_cols = feature_engineer.get_feature_columns()

X = df_features[feature_cols].copy()
y = df_features['total_runs'].copy()

# Handle missing values
X = X.fillna(0)

print(f"\nüìä Dataset Shape:")
print(f"   X: {X.shape}")
print(f"   y: {y.shape}")

## 3. Train-Test Split

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Further split training into train and validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.15, random_state=42
)

print(f"\nüìä Data Split:")
print(f"   Training: {len(X_train):,} samples")
print(f"   Validation: {len(X_val):,} samples")
print(f"   Test: {len(X_test):,} samples")

In [None]:
# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Save scaler
os.makedirs('../models', exist_ok=True)
joblib.dump(scaler, '../models/scaler.pkl')

print("‚úÖ Data normalized and scaler saved!")

## 4. Baseline Models

In [None]:
# Initialize baseline models
baseline = BaselineModels()
models = baseline.get_models()

# Train and evaluate baseline models
baseline_results = {}

for name, model in models.items():
    print(f"\nüîÑ Training {name}...")
    
    # Train
    model.fit(X_train_scaled, y_train)
    
    # Predict
    y_pred = model.predict(X_test_scaled)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    baseline_results[name] = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }
    
    print(f"   MAE: {mae:.2f}, RMSE: {rmse:.2f}, R¬≤: {r2:.4f}")

In [None]:
# Display baseline results
baseline_df = pd.DataFrame(baseline_results).T
baseline_df = baseline_df.round(4)
print("\nüìä Baseline Model Results:")
baseline_df

## 5. Deep Neural Network (DNN)

In [None]:
# Training configuration
config = TrainingConfig(
    epochs=100,
    batch_size=64,
    learning_rate=0.001,
    early_stopping_patience=15,
    reduce_lr_patience=5
)

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=config.early_stopping_patience,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=config.reduce_lr_patience,
        min_lr=1e-6,
        verbose=1
    ),
    ModelCheckpoint(
        '../models/dnn_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=0
    )
]

print("‚úÖ Training configuration set!")

In [None]:
# Build DNN model
input_dim = X_train_scaled.shape[1]

dnn_builder = DNNModel(
    input_dim=input_dim,
    hidden_units=[256, 128, 64, 32],
    dropout_rate=0.3,
    l2_reg=0.001
)

dnn_model = dnn_builder.build()

# Compile model
dnn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=config.learning_rate),
    loss='mse',
    metrics=['mae']
)

# Model summary
print("\nüìä DNN Model Architecture:")
dnn_model.summary()

In [None]:
# Train DNN
print("\nüîÑ Training DNN Model...")

dnn_history = dnn_model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=config.epochs,
    batch_size=config.batch_size,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Visualize training history
visualizer = TrainingVisualizer()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curve
axes[0].plot(dnn_history.history['loss'], label='Training Loss')
axes[0].plot(dnn_history.history['val_loss'], label='Validation Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('DNN Training History - Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# MAE curve
axes[1].plot(dnn_history.history['mae'], label='Training MAE')
axes[1].plot(dnn_history.history['val_mae'], label='Validation MAE')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('DNN Training History - MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Evaluate DNN
dnn_pred = dnn_model.predict(X_test_scaled, verbose=0).flatten()

dnn_mae = mean_absolute_error(y_test, dnn_pred)
dnn_rmse = np.sqrt(mean_squared_error(y_test, dnn_pred))
dnn_r2 = r2_score(y_test, dnn_pred)

print(f"\nüìä DNN Model Results:")
print(f"   MAE: {dnn_mae:.2f}")
print(f"   RMSE: {dnn_rmse:.2f}")
print(f"   R¬≤: {dnn_r2:.4f}")

## 6. LSTM Model

In [None]:
# Prepare sequence data for LSTM
# Reshape data for LSTM: (samples, timesteps, features)
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_val_lstm = X_val_scaled.reshape((X_val_scaled.shape[0], 1, X_val_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

print(f"LSTM input shape: {X_train_lstm.shape}")

In [None]:
# Build LSTM model
lstm_builder = LSTMModel(
    input_shape=(1, input_dim),
    lstm_units=[128, 64],
    dropout_rate=0.3,
    use_attention=True
)

lstm_model = lstm_builder.build()

# Compile model
lstm_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=config.learning_rate),
    loss='mse',
    metrics=['mae']
)

# Model summary
print("\nüìä LSTM Model Architecture:")
lstm_model.summary()

In [None]:
# LSTM callbacks
lstm_callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=config.early_stopping_patience,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=config.reduce_lr_patience,
        min_lr=1e-6,
        verbose=1
    ),
    ModelCheckpoint(
        '../models/lstm_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=0
    )
]

# Train LSTM
print("\nüîÑ Training LSTM Model...")

lstm_history = lstm_model.fit(
    X_train_lstm, y_train,
    validation_data=(X_val_lstm, y_val),
    epochs=config.epochs,
    batch_size=config.batch_size,
    callbacks=lstm_callbacks,
    verbose=1
)

In [None]:
# Visualize LSTM training
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(lstm_history.history['loss'], label='Training Loss')
axes[0].plot(lstm_history.history['val_loss'], label='Validation Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('LSTM Training History - Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(lstm_history.history['mae'], label='Training MAE')
axes[1].plot(lstm_history.history['val_mae'], label='Validation MAE')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('LSTM Training History - MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Evaluate LSTM
lstm_pred = lstm_model.predict(X_test_lstm, verbose=0).flatten()

lstm_mae = mean_absolute_error(y_test, lstm_pred)
lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_pred))
lstm_r2 = r2_score(y_test, lstm_pred)

print(f"\nüìä LSTM Model Results:")
print(f"   MAE: {lstm_mae:.2f}")
print(f"   RMSE: {lstm_rmse:.2f}")
print(f"   R¬≤: {lstm_r2:.4f}")

## 7. GRU Model

In [None]:
# Build GRU model
gru_builder = GRUModel(
    input_shape=(1, input_dim),
    gru_units=[128, 64],
    dropout_rate=0.3
)

gru_model = gru_builder.build()

# Compile model
gru_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=config.learning_rate),
    loss='mse',
    metrics=['mae']
)

print("\nüìä GRU Model Architecture:")
gru_model.summary()

In [None]:
# GRU callbacks
gru_callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=config.early_stopping_patience,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=config.reduce_lr_patience,
        min_lr=1e-6,
        verbose=1
    ),
    ModelCheckpoint(
        '../models/gru_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=0
    )
]

# Train GRU
print("\nüîÑ Training GRU Model...")

gru_history = gru_model.fit(
    X_train_lstm, y_train,
    validation_data=(X_val_lstm, y_val),
    epochs=config.epochs,
    batch_size=config.batch_size,
    callbacks=gru_callbacks,
    verbose=1
)

In [None]:
# Evaluate GRU
gru_pred = gru_model.predict(X_test_lstm, verbose=0).flatten()

gru_mae = mean_absolute_error(y_test, gru_pred)
gru_rmse = np.sqrt(mean_squared_error(y_test, gru_pred))
gru_r2 = r2_score(y_test, gru_pred)

print(f"\nüìä GRU Model Results:")
print(f"   MAE: {gru_mae:.2f}")
print(f"   RMSE: {gru_rmse:.2f}")
print(f"   R¬≤: {gru_r2:.4f}")

## 8. Model Comparison

In [None]:
# Compile all results
dl_results = {
    'DNN': {'MAE': dnn_mae, 'RMSE': dnn_rmse, 'R2': dnn_r2},
    'LSTM': {'MAE': lstm_mae, 'RMSE': lstm_rmse, 'R2': lstm_r2},
    'GRU': {'MAE': gru_mae, 'RMSE': gru_rmse, 'R2': gru_r2}
}

# Combine with baseline
all_results = {**baseline_results, **dl_results}

# Create comparison dataframe
results_df = pd.DataFrame(all_results).T
results_df = results_df.round(4)
results_df = results_df.sort_values('MAE')

print("\nüìä All Model Results (Sorted by MAE):")
results_df

In [None]:
# Visualize model comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# MAE comparison
results_df['MAE'].plot(kind='bar', ax=axes[0], color='steelblue', edgecolor='black')
axes[0].set_ylabel('MAE')
axes[0].set_title('Model Comparison - MAE (Lower is Better)')
axes[0].tick_params(axis='x', rotation=45)

# RMSE comparison
results_df['RMSE'].plot(kind='bar', ax=axes[1], color='coral', edgecolor='black')
axes[1].set_ylabel('RMSE')
axes[1].set_title('Model Comparison - RMSE (Lower is Better)')
axes[1].tick_params(axis='x', rotation=45)

# R¬≤ comparison
results_df['R2'].plot(kind='bar', ax=axes[2], color='green', edgecolor='black')
axes[2].set_ylabel('R¬≤')
axes[2].set_title('Model Comparison - R¬≤ (Higher is Better)')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 9. Prediction Analysis

In [None]:
# Select best model for analysis
best_model_name = results_df['MAE'].idxmin()
print(f"\nüèÜ Best Model: {best_model_name}")

# Get predictions from best model
if best_model_name == 'DNN':
    best_pred = dnn_pred
elif best_model_name == 'LSTM':
    best_pred = lstm_pred
elif best_model_name == 'GRU':
    best_pred = gru_pred
else:
    # Use best baseline
    best_pred = models[best_model_name].predict(X_test_scaled)

In [None]:
# Actual vs Predicted plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scatter plot
axes[0].scatter(y_test, best_pred, alpha=0.5, edgecolors='black', linewidth=0.5)
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2, label='Perfect Prediction')
axes[0].set_xlabel('Actual Score')
axes[0].set_ylabel('Predicted Score')
axes[0].set_title(f'{best_model_name}: Actual vs Predicted')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Residual distribution
residuals = y_test - best_pred
axes[1].hist(residuals, bins=50, edgecolor='black', alpha=0.7)
axes[1].axvline(0, color='red', linestyle='--', linewidth=2)
axes[1].set_xlabel('Residual (Actual - Predicted)')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Residual Distribution')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Error analysis by score range
score_ranges = [(0, 50), (50, 100), (100, 150), (150, 200), (200, 300)]

range_analysis = []
for low, high in score_ranges:
    mask = (y_test >= low) & (y_test < high)
    if mask.sum() > 0:
        range_mae = mean_absolute_error(y_test[mask], best_pred[mask])
        range_analysis.append({
            'Range': f'{low}-{high}',
            'Samples': mask.sum(),
            'MAE': range_mae
        })

range_df = pd.DataFrame(range_analysis)
print("\nüìä Error Analysis by Score Range:")
range_df

## 10. Save Best Model

In [None]:
# Save best model
if best_model_name == 'DNN':
    dnn_model.save('../models/best_model.keras')
    model_type = 'keras'
elif best_model_name == 'LSTM':
    lstm_model.save('../models/best_model.keras')
    model_type = 'keras'
elif best_model_name == 'GRU':
    gru_model.save('../models/best_model.keras')
    model_type = 'keras'
else:
    joblib.dump(models[best_model_name], '../models/best_model.pkl')
    model_type = 'sklearn'

# Save feature engineer
joblib.dump(feature_engineer, '../models/feature_engineer.pkl')

# Save model info
model_info = {
    'name': best_model_name,
    'type': model_type,
    'metrics': {
        'MAE': float(results_df.loc[best_model_name, 'MAE']),
        'RMSE': float(results_df.loc[best_model_name, 'RMSE']),
        'R2': float(results_df.loc[best_model_name, 'R2'])
    },
    'feature_columns': feature_cols
}

joblib.dump(model_info, '../models/model_info.pkl')

print(f"\n‚úÖ Model artifacts saved!")
print(f"   - Best model: models/best_model.{'keras' if model_type == 'keras' else 'pkl'}")
print(f"   - Feature engineer: models/feature_engineer.pkl")
print(f"   - Scaler: models/scaler.pkl")
print(f"   - Model info: models/model_info.pkl")

## 11. Training Summary

In [None]:
print("="*60)
print("üìä MODEL TRAINING SUMMARY")
print("="*60)

print(f"\nüìÅ Dataset:")
print(f"   - Training samples: {len(X_train):,}")
print(f"   - Validation samples: {len(X_val):,}")
print(f"   - Test samples: {len(X_test):,}")
print(f"   - Features: {input_dim}")

print(f"\nü§ñ Models Trained:")
print(f"   Baseline Models: {len(baseline_results)}")
print(f"   Deep Learning Models: {len(dl_results)}")

print(f"\nüèÜ Best Model: {best_model_name}")
print(f"   MAE: {results_df.loc[best_model_name, 'MAE']:.2f} runs")
print(f"   RMSE: {results_df.loc[best_model_name, 'RMSE']:.2f} runs")
print(f"   R¬≤: {results_df.loc[best_model_name, 'R2']:.4f}")

print(f"\nüìà Improvement over Linear Regression:")
lr_mae = baseline_results['Linear Regression']['MAE']
best_mae = results_df.loc[best_model_name, 'MAE']
improvement = ((lr_mae - best_mae) / lr_mae) * 100
print(f"   MAE improvement: {improvement:.1f}%")

print("\n" + "="*60)
print("‚úÖ Model training complete! Ready for deployment.")
print("="*60)