In [212]:
import pandas as pd
import numpy as np
import math
import os
from datetime import datetime, date
from scipy import stats

In [22]:
cwd = os.getcwd()
path_split = cwd.split('/')

root_path = r'/'.join(path_split[:-1])

In [23]:
mi_path = root_path+'/extraction/mi_ipl.csv'
bb_path = root_path+'/extraction/bb_ipl.csv'

In [24]:
mi = pd.read_csv(mi_path)
bb = pd.read_csv(bb_path)

In [25]:
season_lookup = mi[['match_id','season']]

In [285]:
data = bb.merge(season_lookup, how='inner', on='match_id')

In [286]:
date.today().year

2021

In [287]:
data['four'] = np.where(data['runs'] == 4, True, False)
data['six'] = np.where(data['runs'] == 6, True, False)
data['boundary'] = data[['four','six']].any(1)
data['pp'] = np.where(data['over'] < 7, True, False)

# data[data['season'] = date.today().year-3]

In [288]:
data.head()

Unnamed: 0,match_id,batter,Non_striker,bowler,extra_type,runs,extras,total,wicket_type,player_out,innings,over,ball,bowling_wicket,season,four,six,boundary,pp
0,598067,RV Uthappa,AJ Finch,IK Pathan,,0,0,0,,,1,0,1,False,2013,False,False,False,True
1,598067,RV Uthappa,AJ Finch,IK Pathan,,2,0,2,,,1,0,2,False,2013,False,False,False,True
2,598067,RV Uthappa,AJ Finch,IK Pathan,,4,0,6,,,1,0,3,False,2013,True,False,True,True
3,598067,RV Uthappa,AJ Finch,IK Pathan,,0,0,6,,,1,0,4,False,2013,False,False,False,True
4,598067,RV Uthappa,AJ Finch,IK Pathan,,1,0,7,,,1,0,5,False,2013,False,False,False,True


In [289]:
grouped = data.groupby(['bowler','season','pp'], as_index=False).agg({'ball':'count',
                                                'runs':'sum',
                                                'extras':'sum',
                                                'bowling_wicket':'sum',
                                                'four':'sum',
                                                'six':'sum',
                                                'boundary':'sum'})

grouped['runs'] = grouped['runs']+grouped['extras']
grouped.drop(columns=['extras'], inplace=True)
grouped.rename(columns={'ball':'balls','bowling_wicket':'wickets'}, inplace=True)

In [290]:
for col in ['runs','wickets']:
    grouped[col+'_rate'] = grouped[col] / grouped['balls']

In [291]:
grouped

Unnamed: 0,bowler,season,pp,balls,runs,wickets,four,six,boundary,runs_rate,wickets_rate
0,A Ashish Reddy,2012,False,157,208,11,11,9,20,1.324841,0.070064
1,A Ashish Reddy,2012,True,12,30,0,3,3,6,2.500000,0.000000
2,A Ashish Reddy,2013,False,41,71,3,7,3,10,1.731707,0.073171
3,A Ashish Reddy,2015,False,37,51,3,1,3,4,1.378378,0.081081
4,A Ashish Reddy,2016,False,23,40,1,4,2,6,1.739130,0.043478
...,...,...,...,...,...,...,...,...,...,...,...
2644,Z Khan,2015,True,113,92,6,12,0,12,0.814159,0.053097
2645,Z Khan,2016,False,100,154,5,13,5,18,1.540000,0.050000
2646,Z Khan,2016,True,171,187,5,26,2,28,1.093567,0.029240
2647,Z Khan,2017,False,104,168,4,13,7,20,1.615385,0.038462


In [384]:
minmax = grouped.groupby(['season','pp']).agg({'runs_rate':['min','max'], 'wickets_rate':['min','max']}).T.to_dict()

In [385]:
def get_scores(row, minmax):
    season = row['season']
    pp = row['pp']

    for col in ['runs_rate','wickets_rate']:
        value = row[col]
        minv = minmax[season,pp][col,'min']
        maxv = minmax[season,pp][col,'max']

        if col == 'runs_rate':
            row[col+'_scaled'] = 1 - ((value-minv) / (maxv-minv))
        else:
            row[col+'_scaled'] = ((value-minv) / (maxv-minv))

    return row

In [386]:
grouped

Unnamed: 0,bowler,season,pp,balls,runs,wickets,four,six,boundary,runs_rate,wickets_rate
0,A Ashish Reddy,2012,False,157,208,11,11,9,20,1.324841,0.070064
1,A Ashish Reddy,2012,True,12,30,0,3,3,6,2.500000,0.000000
2,A Ashish Reddy,2013,False,41,71,3,7,3,10,1.731707,0.073171
3,A Ashish Reddy,2015,False,37,51,3,1,3,4,1.378378,0.081081
4,A Ashish Reddy,2016,False,23,40,1,4,2,6,1.739130,0.043478
...,...,...,...,...,...,...,...,...,...,...,...
2644,Z Khan,2015,True,113,92,6,12,0,12,0.814159,0.053097
2645,Z Khan,2016,False,100,154,5,13,5,18,1.540000,0.050000
2646,Z Khan,2016,True,171,187,5,26,2,28,1.093567,0.029240
2647,Z Khan,2017,False,104,168,4,13,7,20,1.615385,0.038462


In [417]:
scores = grouped.apply(lambda row: get_scores(row, minmax), axis=1)

In [418]:
seasons = scores.sort_values('season').season.unique().tolist()

In [419]:
def fibonacci(n):
    n0 = 0
    n1 = 1
    seq = [n1]

    while len(seq) < n:
        newn = n0+n1
        n0 = n1
        n1 = newn
        seq.append(newn)

    maxv = max(seq)

    scaled_seq = [i/maxv for i in seq]

    return scaled_seq

In [420]:
recency = dict(zip(seasons, fibonacci(len(seasons))))

In [421]:
scores['recency'] = scores['season'].apply(lambda x: recency[x])

In [422]:
scores['pp_multiplier'] = np.where(scores['pp'], 1.2, 1)

In [423]:
scores['runs_score'] = scores['pp_multiplier'] * scores['runs_rate_scaled'] * scores['recency']
scores['wickets_score'] = scores['pp_multiplier'] * scores['wickets_rate_scaled'] * scores['recency'] #* 1.1
scores['score'] = scores['runs_score'] + scores['wickets_score']

In [430]:
rankings = scores.groupby('bowler').agg({'score':'sum','season':['min','max']})
rankings = rankings[rankings[('season','max')] == 2020].sort_values(('score','sum'), ascending=False)

In [431]:
rankings['rank'] = rankings['score'].rank(ascending=False)