In [408]:
import pandas as pd
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.io as pio
import math
# renderer for jupyter notebook
pio.renderers.default='notebook'

In [409]:
pio.templates.default = "plotly_dark"

In [410]:
df_scorecard=pd.read_csv(r'./full/odi_scorecard.csv')
df_info=pd.read_csv(r'./full/odi_info.csv')

### Hypothesis

#### 1

* H(0):Number of batsman bowled is equal to batsman dismissed by lbw
* H(A):Number of batsman bowled is not equal to batsman dismissed by lbw

##### Data

In [411]:
df_first=df_scorecard[(df_scorecard['wicket-method']=='bowled')|(df_scorecard['wicket-method']=='lbw')][['match-id','wicket-method']]

In [412]:
df_first['lbw']=df_first['wicket-method'].apply(lambda x: 0 if x=='bowled' else 1 )
df_first['bowled']=df_first['wicket-method'].apply(lambda x: 1 if x=='bowled' else 0 )

In [413]:
df_first=df_first.groupby(['match-id'],as_index=False).sum()

In [414]:
df_first=df_first[['lbw','bowled']]

In [415]:
df_first

Unnamed: 0,lbw,bowled
0,0,2
1,2,3
2,0,5
3,1,2
4,3,6
5,1,6
6,0,3
7,1,2
8,0,3
9,2,2


#### 2

* H(0):Wickets fallen in the first 70% of the first innings is equal to the wickets fallen in the last 30% of the first innings
* H(A):Wickets fallen in the first 70% of the first innings is not equal to the wickets fallen in the last 30% of the first innings

##### Data

In [416]:
temp=pd.DataFrame(data=None)
temp=df_scorecard[df_scorecard['innings']==1]
temp=temp.groupby('match-id',as_index=False).sum()
temp['total-overs']=round(temp['balls-played']/6)
temp=temp[['match-id','total-overs']]
temp['first-seventy']=round(temp['total-overs']*0.7)
temp=temp.merge(df_scorecard[(df_scorecard['innings']==1) &(df_scorecard['fall-of-wicket-overs']>0.0)],on=['match-id'])
temp['fall-of-wicket-overs']=temp['fall-of-wicket-overs'].apply(lambda x: int(x)+1)

In [417]:
df_second=pd.DataFrame({'match-id':temp['match-id']})
df_second['first-seventy-wickets']=temp[(temp['fall-of-wicket-overs']<=temp['first-seventy']) & (temp['fall-of-wicket-overs']>0)]['fall-of-wicket-overs']
df_second['last-thirty-wickets']=temp[temp['fall-of-wicket-overs']>temp['first-seventy']]['fall-of-wicket-overs']
df_second['first-seventy-wickets']=df_second['first-seventy-wickets'].apply(lambda x:1 if x>0 else 0)
df_second['last-thirty-wickets']=df_second['last-thirty-wickets'].apply(lambda x:1 if x>0 else 0)
df_second=df_second.groupby(['match-id'],as_index=False).sum()
df_second=df_second.drop(['match-id'],axis=1)

In [418]:
df_second

Unnamed: 0,first-seventy-wickets,last-thirty-wickets
0,3,4
1,5,5
2,4,5
3,6,2
4,7,2
5,4,5
6,2,6
7,4,3
8,4,3
9,2,5


#### 3

* H(0):There is an equal probability of wicket by the brilliance of bowler and mistake of batsman
* H(A):There is an equal probability of wicket by the brilliance of bowler and mistake of batsman

##### Data

In [419]:
df_third=df_scorecard[df_scorecard['wicket-method']!='0']

In [420]:
batsman_wic=['run out','hit wicket','obstructing the field','retired out','stumped']
bowler_wic=['caught','bowled','lbw','caught and bowled']

In [421]:
df_third['wic_batsman']=df_third['wicket-method'].apply(lambda x: 1 if x in batsman_wic else 0 )
df_third['wic_bowler']=df_third['wicket-method'].apply(lambda x: 1 if x in bowler_wic else 0 )

In [422]:
df_third=df_third[['match-id','wic_batsman','wic_bowler']]
df_third=df_third.groupby(['match-id'],as_index=False).sum()
df_third['wickets']=df_third['wic_batsman']+df_third['wic_bowler']

In [423]:
df_third.loc[:,'wic_batsman']=round(df_third['wic_batsman']/df_third['wickets'],3)
df_third.loc[:,'wic_bowler']=round(df_third['wic_bowler']/df_third['wickets'],3)
df_third=df_third[['wic_batsman','wic_bowler']]

In [424]:
df_third

Unnamed: 0,wic_batsman,wic_bowler
0,0.062,0.938
1,0.077,0.923
2,0.118,0.882
3,0.000,1.000
4,0.062,0.938
5,0.056,0.944
6,0.333,0.667
7,0.333,0.667
8,0.235,0.765
9,0.000,1.000


In [425]:
df_sample_mean_bats=[]
df_sample_mean_bowler=[]
for i in range(1000):
    df_sample=df_third.sample(n=100)
    mean_v=df_sample.mean(axis=0)
    df_sample_mean_bats.append(mean_v[0])
    df_sample_mean_bowler.append(mean_v[1])