In [34]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import pickle as pkl

In [35]:
data_backup = pd.read_csv('../csvs/deliveries.csv')
df = data_backup.copy()
df.columns

Index(['match_id', 'season', 'start_date', 'venue', 'innings', 'ball',
       'batting_team', 'bowling_team', 'striker', 'non_striker', 'bowler',
       'runs_off_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes',
       'penalty', 'wicket_type', 'player_dismissed', 'other_wicket_type',
       'other_player_dismissed'],
      dtype='object')

In [36]:
avg = pd.read_csv('../csvs/batting_avg_cleaned.csv')
avg.head()

Unnamed: 0,Player,matches,innings,not_out,runs,highest_score,avg,strike_rate,100s,50s,0s
0,SA Abbott,1.0,0,0.0,0,0,0.0,0.0,0,0,0
1,Abdullah Shafique,8.0,8,0.0,336,113,42.0,93.33,1,3,1
2,CN Ackermann,9.0,9,0.0,216,69,24.0,82.12,0,1,1
3,Agha Salman,3.0,1,0.0,51,51,51.0,113.33,0,1,0
4,MM Ali,6.0,6,0.0,95,42,15.83,74.8,0,0,0


In [37]:
bol_avg = pd.read_csv('../csvs/bowling_avg_cleaned.csv')
bol_avg.head()

Unnamed: 0,player,matches,Inns,balls,maiden,runs,wickets,avg,economy
0,SA Abbott,1.0,1,60.0,0,61,2,30.5,6.1
1,Abdullah Shafique,8.0,0,0.0,0,0,0,0.0,0.0
2,CN Ackermann,9.0,9,284.0,2,234,5,46.8,4.94
3,Agha Salman,3.0,2,30.0,0,46,0,0.0,9.2
4,MM Ali,6.0,6,256.0,0,248,5,49.6,5.81


In [38]:
bol_avg.rename(columns={'player':'player_name'}, inplace=True)
bol_avg.rename(columns={'avg':'bowling_avg'}, inplace=True)
bol_avg.rename(columns={'runs':'bowling_runs'}, inplace=True)
bol_avg.rename(columns={'Inns':'bowling_inns'}, inplace=True)
bol_avg.drop(["matches"], axis=1, inplace=True)

In [39]:
bol_avg.head()

Unnamed: 0,player_name,bowling_inns,balls,maiden,bowling_runs,wickets,bowling_avg,economy
0,SA Abbott,1,60.0,0,61,2,30.5,6.1
1,Abdullah Shafique,0,0.0,0,0,0,0.0,0.0
2,CN Ackermann,9,284.0,2,234,5,46.8,4.94
3,Agha Salman,2,30.0,0,46,0,0.0,9.2
4,MM Ali,6,256.0,0,248,5,49.6,5.81


In [40]:
total_runs_by_batsman = df.groupby(['striker','batting_team', 'bowling_team', 'venue'])['runs_off_bat'].sum().reset_index()
total_runs_by_batsman.columns = ['Batsman','batting_team', 'bowling_team', 'Venue', 'Total Runs']

total_wickets_by_bowler = df[df['wicket_type'].notna()].groupby(['bowler','bowling_team', 'batting_team', 'venue']).size().reset_index(name='Total Wickets')

In [41]:
total_runs_by_batsman.head()

Unnamed: 0,Batsman,batting_team,bowling_team,Venue,Total Runs
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23


In [42]:
total_wickets_by_bowler.head()

Unnamed: 0,bowler,bowling_team,batting_team,venue,Total Wickets
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",1
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",2
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1
4,A Dutt,Netherlands,Sri Lanka,Bharat Ratna Shri Atal Bihari Vajpayee Ekana C...,3


In [43]:
player_df = pd.merge(total_runs_by_batsman, total_wickets_by_bowler,
                     left_on=['Batsman', 'batting_team', 'bowling_team', 'Venue'],
                     right_on=['bowler', 'bowling_team', 'batting_team', 'venue'],
                     how='outer')

player_df = player_df.drop(['bowler', 'bowling_team_y', 'batting_team_y', 'venue'], axis=1)

player_df.columns = ['player_name', 'team', 'opponent_team', 'venue', 'total_runs', 'total_wickets']

player_df[['total_runs', 'total_wickets']] = player_df[['total_runs', 'total_wickets']].fillna(0)

In [44]:
player_df['total_runs'] = player_df['total_runs'].astype(int)
player_df['total_wickets'] = player_df['total_wickets'].astype(int)
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,total_wickets
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0


In [45]:
avg.head()
avg = avg.rename(columns={'Player': 'player_name'})
player_df = pd.merge(player_df, avg, on='player_name', how='left')
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,total_wickets,matches,innings,not_out,runs,highest_score,avg,strike_rate,100s,50s,0s
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0


In [46]:
player_df.isna().sum()
player_df = player_df.dropna()
player_df.isna().sum()

player_name      0
team             0
opponent_team    0
venue            0
total_runs       0
total_wickets    0
matches          0
innings          0
not_out          0
runs             0
highest_score    0
avg              0
strike_rate      0
100s             0
50s              0
0s               0
dtype: int64

In [47]:
player_df['player_name'].nunique()

138

In [48]:
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,total_wickets,matches,innings,not_out,runs,highest_score,avg,strike_rate,100s,50s,0s
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0,9.0,9.0,3.0,70.0,23*,11.66,80.45,0.0,0.0,0.0


In [49]:
player_df['highest_score'] = player_df['highest_score'].str.replace('*', '').astype(int)

  player_df['highest_score'] = player_df['highest_score'].str.replace('*', '').astype(int)


In [50]:
player_df['innings'] = player_df['innings'].astype(int)
player_df['highest_score'] = player_df['highest_score'].astype(int)
player_df['100s'] = player_df['100s'].astype(int)
player_df['50s'] = player_df['50s'].astype(int)
player_df['0s'] = player_df['0s'].astype(int)

player_df.drop(['matches'], axis=1, inplace=True)

player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,total_wickets,innings,not_out,runs,highest_score,avg,strike_rate,100s,50s,0s
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1,9,3.0,70.0,23,11.66,80.45,0,0,0
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1,9,3.0,70.0,23,11.66,80.45,0,0,0
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2,9,3.0,70.0,23,11.66,80.45,0,0,0
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1,9,3.0,70.0,23,11.66,80.45,0,0,0
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0,9,3.0,70.0,23,11.66,80.45,0,0,0


In [51]:
player_df.rename(columns={'innings':'batting_innings'}, inplace=True)
player_df.rename(columns={'runs':'batting_runs'}, inplace=True)
player_df.rename(columns={'avg':'batting_avg'}, inplace=True)

In [52]:
player_df = pd.merge(player_df, bol_avg, on='player_name', how='left')
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,total_wickets,batting_innings,not_out,batting_runs,highest_score,...,100s,50s,0s,bowling_inns,balls,maiden,bowling_runs,wickets,bowling_avg,economy
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1,9,3.0,70.0,23,...,0,0,0,9,465.0,6,426,10,42.6,5.49
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1,9,3.0,70.0,23,...,0,0,0,9,465.0,6,426,10,42.6,5.49
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2,9,3.0,70.0,23,...,0,0,0,9,465.0,6,426,10,42.6,5.49
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1,9,3.0,70.0,23,...,0,0,0,9,465.0,6,426,10,42.6,5.49
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0,9,3.0,70.0,23,...,0,0,0,9,465.0,6,426,10,42.6,5.49


In [53]:
player_df.columns

Index(['player_name', 'team', 'opponent_team', 'venue', 'total_runs',
       'total_wickets', 'batting_innings', 'not_out', 'batting_runs',
       'highest_score', 'batting_avg', 'strike_rate', '100s', '50s', '0s',
       'bowling_inns', 'balls', 'maiden', 'bowling_runs', 'wickets',
       'bowling_avg', 'economy'],
      dtype='object')

In [54]:
player_df.rename(columns={'total_wickets':'match_wickets'}, inplace=True)
player_df.rename(columns={'total_runs':'match_runs'}, inplace=True)
player_df.rename(columns={'batting_runs':'total_runs'}, inplace=True)
player_df.rename(columns={'wickets':'total_wickets'}, inplace=True)

In [55]:
player_df.drop(['100s', '50s','0s','not_out','maiden','balls','bowling_inns','batting_innings'], axis=1, inplace=True)

In [56]:
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,match_runs,match_wickets,total_runs,highest_score,batting_avg,strike_rate,bowling_runs,total_wickets,bowling_avg,economy
0,A Dutt,Netherlands,Australia,"Arun Jaitley Stadium, Delhi",1,1,70.0,23,11.66,80.45,426,10,42.6,5.49
1,A Dutt,Netherlands,Bangladesh,"Eden Gardens, Kolkata",9,1,70.0,23,11.66,80.45,426,10,42.6,5.49
2,A Dutt,Netherlands,New Zealand,"Rajiv Gandhi International Stadium, Uppal, Hyd...",11,2,70.0,23,11.66,80.45,426,10,42.6,5.49
3,A Dutt,Netherlands,Pakistan,"Rajiv Gandhi International Stadium, Uppal, Hyd...",1,1,70.0,23,11.66,80.45,426,10,42.6,5.49
4,A Dutt,Netherlands,South Africa,"Himachal Pradesh Cricket Association Stadium, ...",23,0,70.0,23,11.66,80.45,426,10,42.6,5.49


In [57]:
player_df['total_runs'] = player_df['total_runs'].astype(int)

In [58]:
player_df.to_csv('../csvs/player_details.csv', index=False)

In [59]:
top_run_scorer = player_df.groupby(['player_name'])['match_runs'].sum().reset_index()
top_run_scorer = top_run_scorer.sort_values(by='match_runs', ascending=False)
print(top_run_scorer)

          player_name  match_runs
98          Q de Kock         545
100        R Ravindra         415
22          DA Warner         413
103         RG Sharma         398
4          AK Markram         362
..                ...         ...
31   Fazalhaq Farooqi           2
62        LH Ferguson           1
97       PVD Chameera           1
5            AT Carey           0
133         Usama Mir           0

[138 rows x 2 columns]


In [60]:
top_wicket_taker = player_df.groupby(['player_name'])['match_wickets'].sum().reset_index()
top_wicket_taker = top_wicket_taker.sort_values(by='match_wickets', ascending=False)
print(top_wicket_taker)

             player_name  match_wickets
65              M Jansen             13
119  Shaheen Shah Afridi             12
1                A Zampa             12
11          BFW de Leede             11
75            MJ Santner             11
..                   ...            ...
71         MADI Hemantha              0
14               C Green              0
66         M Labuschagne              0
15         C Karunaratne              0
137             WA Young              0

[138 rows x 2 columns]


In [61]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

mapping = {}
categorical_columns = ['team','venue','opponent_team','player_name']
for column in categorical_columns:
    player_df[column] = le.fit_transform(player_df[column])
    mapping[column] = dict(zip(le.classes_, le.transform(le.classes_)))

In [62]:
mapping


{'team': {'Afghanistan': 0,
  'Australia': 1,
  'Bangladesh': 2,
  'England': 3,
  'India': 4,
  'Netherlands': 5,
  'New Zealand': 6,
  'Pakistan': 7,
  'South Africa': 8,
  'Sri Lanka': 9},
 'venue': {'Arun Jaitley Stadium, Delhi': 0,
  'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow': 1,
  'Eden Gardens, Kolkata': 2,
  'Himachal Pradesh Cricket Association Stadium, Dharamsala': 3,
  'M Chinnaswamy Stadium, Bengaluru': 4,
  'MA Chidambaram Stadium, Chepauk, Chennai': 5,
  'Maharashtra Cricket Association Stadium, Pune': 6,
  'Narendra Modi Stadium, Ahmedabad': 7,
  'Rajiv Gandhi International Stadium, Uppal, Hyderabad': 8,
  'Wankhede Stadium, Mumbai': 9},
 'opponent_team': {'Afghanistan': 0,
  'Australia': 1,
  'Bangladesh': 2,
  'England': 3,
  'India': 4,
  'Netherlands': 5,
  'New Zealand': 6,
  'Pakistan': 7,
  'South Africa': 8,
  'Sri Lanka': 9},
 'player_name': {'A Dutt': 0,
  'A Zampa': 1,
  'AAP Atkinson': 2,
  'AD Mathews': 3,
  'AK Markram': 4,
  'A

In [63]:
for key, value in mapping['player_name'].items():
    if value == 0:
        print(key)

A Dutt


In [64]:
player_df.head()

Unnamed: 0,player_name,team,opponent_team,venue,match_runs,match_wickets,total_runs,highest_score,batting_avg,strike_rate,bowling_runs,total_wickets,bowling_avg,economy
0,0,5,1,0,1,1,70,23,11.66,80.45,426,10,42.6,5.49
1,0,5,2,2,9,1,70,23,11.66,80.45,426,10,42.6,5.49
2,0,5,6,8,11,2,70,23,11.66,80.45,426,10,42.6,5.49
3,0,5,7,8,1,1,70,23,11.66,80.45,426,10,42.6,5.49
4,0,5,8,3,23,0,70,23,11.66,80.45,426,10,42.6,5.49


In [65]:
player_df.columns

Index(['player_name', 'team', 'opponent_team', 'venue', 'match_runs',
       'match_wickets', 'total_runs', 'highest_score', 'batting_avg',
       'strike_rate', 'bowling_runs', 'total_wickets', 'bowling_avg',
       'economy'],
      dtype='object')

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = player_df.drop(['match_runs','match_wickets'], axis=1)
y = player_df[['match_runs','match_wickets']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)

scaler = StandardScaler()

X_train[['total_runs','highest_score','batting_avg','bowling_avg','strike_rate','economy','bowling_runs','total_wickets']] = scaler.fit_transform(X_train[['total_runs','highest_score','batting_avg','bowling_avg','strike_rate','economy','bowling_runs','total_wickets']])
X_test[['total_runs','highest_score','batting_avg','bowling_avg','strike_rate','economy','bowling_runs','total_wickets']] = scaler.transform(X_test[['total_runs','highest_score','batting_avg','bowling_avg','strike_rate','economy','bowling_runs','total_wickets']])

joblib.dump(scaler, '../pickles/run_wicket_scaler.pkl')

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((451, 12), (451, 2), (113, 12), (113, 2))

In [67]:
X_train.head()  

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,highest_score,batting_avg,strike_rate,bowling_runs,total_wickets,bowling_avg,economy
331,80,6,5,8,-0.807535,-0.641722,-0.567619,1.3798,-0.83486,-0.700993,-0.746939,-1.077127
117,26,9,8,0,-0.4281,-0.753199,-0.593443,-0.618953,0.184286,-0.700993,-0.746939,0.999257
211,49,6,1,3,-0.964542,-0.218109,-0.12862,1.344938,0.084715,-0.331662,2.13585,2.150814
0,0,5,1,0,-0.886038,-0.998448,-1.024179,-0.437587,1.660292,1.145665,0.817479,0.56071
444,108,0,3,0,-0.657069,-0.730903,-0.541796,0.149256,1.43772,1.330331,0.548296,0.259396


In [68]:
import joblib
joblib.dump(scaler, '../pickles/player_scaler.pkl')

['../pickles/player_scaler.pkl']

In [69]:
X_train.head()

Unnamed: 0,player_name,team,opponent_team,venue,total_runs,highest_score,batting_avg,strike_rate,bowling_runs,total_wickets,bowling_avg,economy
331,80,6,5,8,-0.807535,-0.641722,-0.567619,1.3798,-0.83486,-0.700993,-0.746939,-1.077127
117,26,9,8,0,-0.4281,-0.753199,-0.593443,-0.618953,0.184286,-0.700993,-0.746939,0.999257
211,49,6,1,3,-0.964542,-0.218109,-0.12862,1.344938,0.084715,-0.331662,2.13585,2.150814
0,0,5,1,0,-0.886038,-0.998448,-1.024179,-0.437587,1.660292,1.145665,0.817479,0.56071
444,108,0,3,0,-0.657069,-0.730903,-0.541796,0.149256,1.43772,1.330331,0.548296,0.259396


In [70]:
y_train.head()

Unnamed: 0,match_runs,match_wickets
331,5,0
117,11,0
211,58,1
0,1,1
444,23,3


In [71]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler


model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),  # Input layer
    layers.Dense(64, activation='selu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(2, activation='linear'),
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10)

model.fit(X_train, y_train, epochs=500, batch_size=32, verbose=1, callbacks=[early_stopping], validation_split=0.2)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500


<keras.src.callbacks.History at 0x1948203ab10>

In [72]:
train_loss, train_mae = model.evaluate(X_train, y_train, verbose=1)
print(f"Training MAE: {train_mae}")
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=1)
print(f"Test MAE: {test_mae}")



Training MAE: 9.818737030029297
Test MAE: 9.038482666015625


In [73]:
import pickle
pickle.dump(model, open('../pickles/player_model.pkl','wb'))

In [74]:
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(X_test)
np.round(y_pred)

y_pred[y_pred < 0] = 0

print("MAE for runs: ", mean_absolute_error(y_test['match_runs'], y_pred[:,0]))
print("MAE for wickets: ", mean_absolute_error(y_test['match_wickets'], y_pred[:,1]))

MAE for runs:  17.263873423095298
MAE for wickets:  0.7832824595074738


In [75]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import  mean_absolute_error

dt = DecisionTreeRegressor(random_state=42)

params = {'max_depth': [3, 5, 7, 9, 11, 13, 15],
            'min_samples_split': [2, 4, 6, 8, 10, 12, 14, 16],
            'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8]}
gs = GridSearchCV(estimator=dt, param_grid=params, scoring='neg_mean_squared_error', cv=5, n_jobs=-1)

gs.fit(X_train, y_train)

print(gs.best_params_)

train_mae = mean_absolute_error(y_train, gs.predict(X_train))
test_mae = mean_absolute_error(y_test, gs.predict(X_test))

print(f"Training MAE: {train_mae}")
print(f"Test MAE: {test_mae}")

{'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 2}
Training MAE: 9.502135751784689
Test MAE: 9.218338218140856


In [76]:
from sklearn.metrics import mean_absolute_error

y_pred = gs.predict(X_test)
y_pred = np.round(y_pred)
y_pred[y_pred < 0] = 0

print("MAE for runs: ", mean_absolute_error(y_test['match_runs'], y_pred[:,0]))
print("MAE for wickets: ", mean_absolute_error(y_test['match_wickets'], y_pred[:,1]))

MAE for runs:  17.902654867256636
MAE for wickets:  0.5663716814159292


In [77]:
pkl.dump(model, open('../pickles/run_wicket_predictor', 'wb'))