In [9]:
import numpy as np
import pandas as pd
import math 
import seaborn as sns
import matplotlib.pyplot as plt
from pprint import pprint
from scipy.optimize import minimize
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', 10)

In [2]:
data = pd.read_csv('cricket.csv')
data.head()

Unnamed: 0,Match,Date,Innings,Over,Runs,Total.Runs,Innings.Total.Runs,Runs.Remaining,Total.Out,Innings.Total.Out,Outs.Remaining,Wickets.in.Hand,Run.Rate,Innings.Run.Rate,Run.Rate.Required,Initial.Run.Rate.Required,Target.Score,Day-night,At.Bat,Fielding,Home.Team,Away.Team,Stadium,Country,Total.Overs,Winning.Team,Toss.Winner,at.bat.wins,at.bat.won.toss,at.bat.at.home,at.bat.bat.first,chose_bat_1st,chose_bat_2nd,forced_bat_1st,forced_bat_2nd,new.game,Error.In.Data,common.support
0,65193,14/05/1999,1,1,4,4,204,200,0,10,10,10,4.0,4.08,-1.0,-1.0,-1,0,Sri Lanka,England,England,Sri Lanka,Lord's,England,50,England,England,0,0,0,1,0,0,1,0,1,0,1
1,65193,14/05/1999,1,2,2,6,204,198,0,10,10,10,3.0,4.08,-1.0,-1.0,-1,0,Sri Lanka,England,England,Sri Lanka,Lord's,England,50,England,England,0,0,0,1,0,0,1,0,0,0,1
2,65193,14/05/1999,1,3,3,9,204,195,0,10,10,10,3.0,4.08,-1.0,-1.0,-1,0,Sri Lanka,England,England,Sri Lanka,Lord's,England,50,England,England,0,0,0,1,0,0,1,0,0,0,1
3,65193,14/05/1999,1,4,2,20,204,184,0,10,10,10,5.0,4.08,-1.0,-1.0,-1,0,Sri Lanka,England,England,Sri Lanka,Lord's,England,50,England,England,0,0,0,1,0,0,1,0,0,0,1
4,65193,14/05/1999,1,5,6,37,204,167,0,10,10,10,7.4,4.08,-1.0,-1.0,-1,0,Sri Lanka,England,England,Sri Lanka,Lord's,England,50,England,England,0,0,0,1,0,0,1,0,0,0,1


In [57]:
# data cleaning
data_first = data[data.Innings == 1].copy()

df1 = data_first[['Match', 'Over', 'Total.Runs', 'Innings.Total.Runs', 'Runs.Remaining', 'Wickets.in.Hand']].copy()
df1['Total.Overs'] = 50
df1['Overs.Remaining'] = df1['Total.Overs'] - df1['Over']
df1 = df1[['Match', 'Overs.Remaining', 'Runs.Remaining', 'Wickets.in.Hand']]

df2 = data_first[['Match', 'Over', 'Total.Runs', 'Innings.Total.Runs', 'Runs.Remaining', 'Wickets.in.Hand']].copy()
df2['Total.Overs'] = 50
df2 = df2.groupby('Match').first().reset_index()
df2['Wickets.in.Hand'] = 10
df2 = df2[['Match', 'Total.Overs', 'Innings.Total.Runs', 'Wickets.in.Hand']]
df2.columns = ['Match', 'Overs.Remaining', 'Runs.Remaining', 'Wickets.in.Hand']

df = pd.concat([df2, df1], axis = 0)
df.sort_values(['Match', 'Overs.Remaining'], ascending = [True, False], inplace = True)
df = df[['Overs.Remaining', 'Wickets.in.Hand', 'Runs.Remaining']]
df.columns = ['u', 'w', 'y']

X = df.copy()

In [58]:
# Compute Z0
Z = dict()

def computeZ0(w: int, data_first: pd.core.frame.DataFrame):
    if w == 10:
        return data_first.groupby('Match').first()['Innings.Total.Runs'].mean()
    else:
        return data_first[data_first['Wickets.in.Hand'] <= w].groupby('Match').first()['Runs.Remaining'].mean()
    
for w in range(10, -1, -1):
    Z[w] = computeZ0(w, data_first)
    print('Z0({}) = {}'.format(w, computeZ0(w, data_first)))

Z0(10) = 237.4926212227688
Z0(9) = 202.024595924104
Z0(8) = 165.59774964838255
Z0(7) = 129.3248407643312
Z0(6) = 95.128223495702
Z0(5) = 66.10911808669657
Z0(4) = 43.54865085854456
Z0(3) = 27.70970782280867
Z0(2) = 15.194379391100703
Z0(1) = 6.712732919254658
Z0(0) = 0.002976190476190476


In [64]:
# Question 1
def fn(b):
    global X
    t = X.copy()
    losses = t.apply(lambda row: (row['y'] - Z[row['w']] * (1 - np.exp(-b * row['u']))) ** 2, axis = 1)
    loss = losses.sum() / len(t)
#     print(loss)
    return loss

B = dict()
X_backup = X.copy()
for w in range(10, -1, -1):
    X = df[df.w == w].copy()
    result = minimize(fn, 0, method = 'L-BFGS-B')
    B[w] = result.x[0]
    print('b[{}] = {}'.format(w, B[w]))


b[10] = 0.07104365371507507
b[9] = 0.07607912916603857
b[8] = 0.09708601254569339
b[7] = 0.12513408370263018
b[6] = 0.18561537581855722
b[5] = 0.2669622248098945
b[4] = 0.4216774118121888
b[3] = 0.6766743404497069
b[2] = 1.0403517297635705
b[1] = 3.361093113851824
b[0] = 1.0


In [50]:
B

{10: 0.07104365371507507,
 9: 0.07607912916603857,
 8: 0.09708601254569339,
 7: 0.12513408370263018,
 6: 0.18561537581855722,
 5: 0.2669622248098945,
 4: 0.4216774118121888,
 3: 0.6766743404497069,
 2: 1.0403517297635705,
 1: 3.361093113851824,
 0: 1.0}

In [56]:
# compute MSE for Question 1
losses = df.apply(lambda row: (row['y'] - Z[row['w']] * (1 - np.exp(-B[row['w']] * row['u']))) ** 2, axis = 1)
loss = losses.sum() / len(df)
print(loss)

1820.484946916961


In [65]:
# Question 2
X = df.copy()

def fn(L):
    global X
    t = X.copy()
    losses = t.apply(lambda row: (row['y'] - Z[row['w']] * (1 - np.exp(-L * row['u'] / Z[row['w']]))) ** 2, axis = 1)
    loss = losses.sum() / len(t)
#     print(loss)
    return loss

res = minimize(fn, 0.35, method = 'L-BFGS-B')
L = res.x[0]
print('L = ', L)

L =  16.269229895957405


In [19]:
# compute MSE for Question 2
losses = X.apply(lambda row: (row['y'] - Z[row['w']] * (1 - np.exp(-optL * row['u'] / Z[row['w']]))) ** 2, axis = 1)
loss = losses.sum() / len(X)
print(loss)

[1822.4729018]


In [63]:
for w in range(10, 0, -1):
    print('b[{}] = {}, L/Z0[{}] = {}'.format(w, B[w], w, L/Z[w]))

b[10] = 0.07104365371507507, L/Z0[10] = 0.0014737299971593599
b[9] = 0.07607912916603857, L/Z0[9] = 0.001732462319248924
b[8] = 0.09708601254569339, L/Z0[8] = 0.002113555291319857
b[7] = 0.12513408370263018, L/Z0[7] = 0.0027063632781717884
b[6] = 0.18561537581855722, L/Z0[6] = 0.003679244572624794
b[5] = 0.2669622248098945, L/Z0[5] = 0.005294277251452732
b[4] = 0.4216774118121888, L/Z0[4] = 0.008036988358993616
b[3] = 0.6766743404497069, L/Z0[3] = 0.01263095238095238
b[2] = 1.0403517297635705, L/Z0[2] = 0.023034833538840935
b[1] = 3.361093113851824, L/Z0[1] = 0.05213971778857275
