In [41]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import pickle

# Load your datasets
matches = pd.read_csv("iplDatasets/matches.csv")
deliveries = pd.read_csv("iplDatasets/deliveries.csv")

# Basic preprocessing
matches = matches.dropna(subset=['winner'])

# Encode team1, team2, and winner
le_team = LabelEncoder()
le_winner = LabelEncoder()

matches['team1_enc'] = le_team.fit_transform(matches['team1'])
matches['team2_enc'] = le_team.transform(matches['team2'])
matches['winner_enc'] = le_winner.fit_transform(matches['winner'])

# Calculate team average scores from deliveries.csv
team_avg_scores = deliveries.groupby('batting_team')['total_runs'].mean().reset_index()
team_avg_scores.columns = ['team', 'avg_score']

# Merge average scores for team1 and team2
matches = matches.merge(team_avg_scores, left_on='team1', right_on='team', how='left')
matches.rename(columns={'avg_score': 'team1_avg'}, inplace=True)
matches.drop('team', axis=1, inplace=True)

matches = matches.merge(team_avg_scores, left_on='team2', right_on='team', how='left')
matches.rename(columns={'avg_score': 'team2_avg'}, inplace=True)
matches.drop('team', axis=1, inplace=True)

# Final training data
X = matches[['team1_enc', 'team2_enc', 'team1_avg', 'team2_avg']]
y = matches['winner_enc']

# Train model
model = RandomForestClassifier()
model.fit(X, y)

# Save model
with open('ipl_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save encoders
with open('encoders.pkl', 'wb') as f:
    pickle.dump((le_team, le_winner), f)
