## Predicting Score using ML

In [None]:
import math
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)


pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

import matplotlib.pyplot as plt

In [None]:
match_ipl= pd.read_csv("ipl_csv2/all_matches.csv")

In [None]:
df = match_ipl.copy()

In [None]:
df.head(1)

## Columns

In [None]:
df.columns

In [None]:
# df.sort_values(['match_id','innings'],ascending=[True,True])

## Split over and ball in separate column

In [None]:
df['over']=df['ball'].astype(str)

In [None]:
df['over']=df['over'].str.replace('.','')

In [None]:
df['overno']=df['over'].str[:-1]

In [None]:
df['ballno']=df['over'].str[-1:]

In [None]:
df['total_runs']=df['runs_off_bat']+df['extras']

In [None]:
df.head(2)

In [None]:
df['isvalidball']=df['wides']+df['noballs']

In [None]:
df['isvalidball']=df['isvalidball'].fillna(0)

In [None]:
df['isvalidball']=1

In [None]:
df['isvalidball']=np.where((df['wides']>=1) | (df['noballs']>=1),0,1)

In [None]:
df['cum_ball']=df.groupby(['match_id','innings'])['isvalidball'].apply(lambda x : x.cumsum())

In [None]:
df['ball_left']=120-df['cum_ball']

## Select columns

In [None]:
df = df[['match_id','innings','venue','batting_team','bowling_team','ball','overno','ballno','total_runs','player_dismissed','cum_ball','ball_left']]

In [None]:
df.head(2)

In [None]:
df = df.replace(np.nan,0)

In [None]:
df.head(2)

## Total Runs in inning 

In [None]:
df.groupby(['match_id','innings'])['total_runs'].transform(sum)

In [None]:
df['inning_total']=df.groupby(['match_id','innings'])['total_runs'].transform(sum)

In [None]:
df.head(2)

## Cumulative runs

In [None]:
df['cum_sum']=df.groupby(['match_id','innings'])['total_runs'].apply(lambda x : x.cumsum())

In [None]:
df.head(2)

In [None]:
df['remaining_runs'] = df['inning_total']-df['cum_sum']

In [None]:
df.head(4)

## Runs in previous 30 balls

In [None]:
tmp=df.groupby(['match_id','innings'])['total_runs'].rolling(min_periods=1,window=30).sum().reset_index()

In [None]:
tmp[['total_runs']]

In [None]:
df['prev_30_balls_runs']=tmp['total_runs'].tolist()

In [None]:
df.head(2)

## Wickets in previous 30 balls

In [None]:
df['player_dismissed'] = np.where(df['player_dismissed']==0,0,1)

In [None]:
tmp_wkts=df.groupby(['match_id','innings'])['player_dismissed'].rolling(min_periods=1,window=30).sum().reset_index()

In [None]:
df['player_dismissed_in_last_30balls']=tmp_wkts['player_dismissed'].tolist()

In [None]:
df.head(2)

## Cumulative wickets

In [None]:
df['cum_wkts']=df.groupby(['match_id','innings'])['player_dismissed'].apply(lambda x:x.cumsum())

In [None]:
df['wkts_left']=10-df['cum_wkts']

In [None]:
df.head(2)

## Dot balls in previous 30 balls

In [None]:
df['dot_in_prev_30balls']=np.where(df['total_runs']==0,1,0)

In [None]:
tmp_dot = df.groupby(['match_id','innings'])['dot_in_prev_30balls'].rolling(min_periods=1,window=30).sum().reset_index()

In [None]:
df['dot_in_prev_30balls']=tmp_dot['dot_in_prev_30balls'].tolist()

In [None]:
df.head(2)

## Boundaries in previous 30 balls

In [None]:
df['boundaries_prev_30balls']=np.where(df['total_runs']>3,1,0)

In [None]:
tmp_bound=df.groupby(['match_id','innings'])['boundaries_prev_30balls'].rolling(min_periods=1,window=30).sum().reset_index()

In [None]:
df['boundaries_prev_30balls']=tmp_bound['boundaries_prev_30balls'].tolist()

In [None]:
df.head(2)

In [None]:
convert_dict = {
    'prev_30_balls_runs':int,
    'player_dismissed_in_last_30balls':int,
    'dot_in_prev_30balls':int,
    'boundaries_prev_30balls':int
    
    
}
df = df.astype(convert_dict)

In [None]:
df.head(5)

In [None]:
df.to_csv('IPL_DATA_FEATURES.csv',index=None)