In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

In [None]:
df = pd.read_csv('ODI_data.csv')
df.head(2)

In [None]:
df['Innings Runs Scored Num'].unique()

In [None]:
df = df[df['Innings Runs Scored Num'] != '-']

In [None]:
df = df.dropna(subset=['Innings Runs Scored Num'])

In [None]:
df['Innings Date'] = pd.to_datetime(df['Innings Date']) 

In [None]:
df['Year'] = df['Innings Date'].dt.year

In [None]:
df.head(2)

In [None]:
df.dtypes

In [None]:
df['Innings Runs Scored Num'] = df['Innings Runs Scored Num'].astype(int)
df['Innings Balls Faced'] = df['Innings Balls Faced'].astype(int)
df['Innings Not Out Flag'] = df['Innings Not Out Flag'].astype(int)
df['Innings Batted Flag'] = df['Innings Batted Flag'].astype(int)

In [None]:
sachin_df = df[(df.Year>=1994) & (df.Year<=2004)]
kohli_df = df[(df.Year>=2009) & (df.Year<=2019)]

In [None]:
sachin_df.head(2)

In [None]:
sdf = df[(df.Year>=1994) & (df.Year<=2004) & (df['Innings Player']=='SR Tendulkar')]
sdf.reset_index(drop=True)
sdf.head(2)

In [None]:
sdf['Innings Runs Scored Num'].sum()

In [None]:
kdf = df[(df.Year>=2009) & (df.Year<=2019) & (df['Innings Player']=='V Kohli')]
kdf.reset_index(drop=True)
kdf.head(2)

In [None]:
kdf['Innings Runs Scored Num'].sum()

In [None]:
# Runs per Innings  --> Total Runs / Total Innings Played
# SR                --> (Total Runs/ TotalBalls) *100
# 100's
# 50's
# Team Contribution --> (Runs score by the player / Runs score by the team) * 100 

In [None]:
# RPI
sachin_rpi = ((sdf['Innings Runs Scored Num'].sum()) / (sdf['Innings Batted Flag'].sum())).round(2)
kohli_rpi = ((kdf['Innings Runs Scored Num'].sum()) / (kdf['Innings Batted Flag'].sum())).round(2)

sachin_rpi,kohli_rpi

In [None]:
#SR
sachin_sr = (((sdf['Innings Runs Scored Num'].sum()) / ((sdf['Innings Balls Faced'].sum()))) * 100).round(2)
kohli_sr = (((kdf['Innings Runs Scored Num'].sum()) / ((kdf['Innings Balls Faced'].sum()))) * 100).round(2)

sachin_sr,kohli_sr

In [None]:
#100's
sachin_100 = sdf["100's"].sum()
kohli_100  = kdf["100's"].sum()

In [None]:
#50's
sachin_50 = sdf["50's"].sum()
kohli_50  = kdf["50's"].sum()

In [None]:
#Team's Contribution --> (Runs score by the player / Runs score by the team) * 100 

India_df_sachin = df[(df.Year>=1994) & (df.Year<=2004) & (df['Country']=='India')]
India_df_kohli = df[(df.Year>=2009) & (df.Year<=2019) & (df['Country']=='India')]

In [None]:
#1994-2004 All Indian Players Runs 
India_df_sachin['Innings Runs Scored Num'].sum()

In [None]:
#2009-2019 All Indian Players Runs
India_df_kohli['Innings Runs Scored Num'].sum()

In [None]:
sachin_contribution = ((sdf['Innings Runs Scored Num'].sum() / (India_df_sachin['Innings Runs Scored Num'].sum())) *100).round(2)
kohli_contribution = ((kdf['Innings Runs Scored Num'].sum() / (India_df_kohli['Innings Runs Scored Num'].sum())) *100).round(2)

sachin_contribution,kohli_contribution

In [None]:
sachin_df.groupby(['Innings Player'])['Innings Runs Scored Num'].sum().sort_values(ascending=False).head(10)

In [None]:
sachin_df.groupby(['Innings Player'])['Innings Runs Scored Num'].sum().sort_values(ascending=False).head(10).plot(kind='bar')
plt.show()

In [None]:
kohli_df.groupby(['Innings Player'])['Innings Runs Scored Num'].sum().sort_values(ascending=False).head(10)

In [None]:
kohli_df.groupby(['Innings Player'])['Innings Runs Scored Num'].sum().sort_values(ascending=False).head(10).plot(kind='barh')
plt.show()

In [None]:
sdf.groupby('Year')['Innings Runs Scored Num'].sum().plot(kind='bar')
plt.show()

In [None]:
kdf.groupby('Year')['Innings Runs Scored Num'].sum().plot(kind='bar')
plt.show()

In [None]:
y = [0,20,40,60,80,100]
y_min = min(y)
y_max = max(y)

parameters = ['Runs per Innings','Strike Rate',"No.of 100's","No.of 50's",'Contribution to team score']
sachin_data = [sachin_rpi,sachin_sr,sachin_100,sachin_50,sachin_contribution]
kohli_data  = [kohli_rpi,kohli_sr,kohli_100,kohli_50,kohli_contribution]


xpos = np.arange(len(parameters))
# width = 0.35     

fig, ax = plt.subplots(figsize=(11.5,6))
rects1 = plt.bar(xpos-0.2,sachin_data,width=0.35,label='Sachin')
plt.ylim(y_min,y_max)
rects2 = plt.bar(xpos+0.2,kohli_data,width=0.35,label='Kohli')


plt.title('Sachin vs Kohli Analysis',fontsize=25,color='green')
plt.xticks(xpos,parameters,fontsize=12)

plt.legend((rects1[0], rects2[0]), ('Sachin', 'Kohli'))


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        if((height == sachin_100) or (height == sachin_50) or (height == kohli_100) or (height == kohli_50)):
            ax.text(rect.get_x() + rect.get_width()/2., 1.018*height,
                    '%d' % int(height),ha='center')
       
        elif((height == sachin_contribution) or (height == kohli_contribution)) :
            ax.text(rect.get_x() + rect.get_width()/2., 1.018*height,
                    '%0.2f' % float(height)+'%',ha='center')
            
        else:
            ax.text(rect.get_x() + rect.get_width()/2., 1.018*height,
                    '%0.2f' % float(height),ha='center')

autolabel(rects1)
autolabel(rects2)
plt.show()

# Normalization :

In [None]:
# Sachin/Kohli avg vs Generation avg

non_sachin_df =  sachin_df[sachin_df['Innings Player'] != 'SR Tendulkar']
non_sachin_df.head()

non_kohli_df = kohli_df[kohli_df['Innings Player'] != 'V Kohli']
non_kohli_df.head()

In [None]:
# Scoring rate = Sachin's Rpi / Total players Rpi excluding Sachin's

Total_players_rpi = (non_sachin_df['Innings Runs Scored Num']).sum()/ (non_sachin_df['Innings Batted Flag']).sum()
sachin_scoring_rate = sachin_rpi/Total_players_rpi
sachin_scoring_rate

In [None]:
# Scoring rate = Kohli's Rpi / Total players Rpi excluding Kohli's

Total_players_rpi = (non_kohli_df['Innings Runs Scored Num']).sum()/ (non_kohli_df['Innings Batted Flag']).sum()
kohli_scoring_rate = kohli_rpi/Total_players_rpi
kohli_scoring_rate

In [None]:
#Sachin vs Others SR

Total_players_sr = ((non_sachin_df['Innings Runs Scored Num']).sum()/ (non_sachin_df['Innings Balls Faced']).sum())*100
sachin_scoring_sr = sachin_sr/Total_players_sr
sachin_scoring_sr

In [None]:
#Kohli vs Others SR

Total_players_sr = ((non_kohli_df['Innings Runs Scored Num']).sum()/ (non_kohli_df['Innings Balls Faced']).sum())*100
kohli_scoring_sr = kohli_sr/Total_players_sr
kohli_scoring_sr

In [None]:
#No.of matches taken to score 100 for Sachin, Kohli

sachin_scoring_100 =(sdf['Innings Batted Flag']).sum()/(sdf["100's"]).sum()
kohli_scoring_100 =(kdf['Innings Batted Flag']).sum()/(kdf["100's"]).sum()

sachin_scoring_100,kohli_scoring_100

In [None]:
#Sachin Scoring 100 vs others

sachin_scoring_100 =(sdf['Innings Batted Flag']).sum()/(sdf["100's"]).sum()
non_sachin_scoring_100 =(non_sachin_df['Innings Batted Flag']).sum()/(non_sachin_df["100's"]).sum()

sachin_scoring_100_rate = sachin_scoring_100/non_sachin_scoring_100
sachin_scoring_100_rate

In [None]:
#Kohli Scoring 100 vs others

kohli_scoring_100 =(kdf['Innings Batted Flag']).sum()/(kdf["100's"]).sum()
non_kohli_scoring_100 =(non_kohli_df['Innings Batted Flag']).sum()/(non_kohli_df["100's"]).sum()

kohli_scoring_100_rate = kohli_scoring_100/non_kohli_scoring_100
kohli_scoring_100_rate

In [None]:
#No.of matches taken to score 50 for Sachin, Kohli

sachin_scoring_50 =(sdf['Innings Batted Flag']).sum()/(sdf["50's"]).sum()
kohli_scoring_50 =(kdf['Innings Batted Flag']).sum()/(kdf["50's"]).sum()

sachin_scoring_50,kohli_scoring_50

In [None]:
#Sachin Scoring 50 vs others

sachin_scoring_50 =(sdf['Innings Batted Flag']).sum()/(sdf["50's"]).sum()
non_sachin_scoring_50 =(non_sachin_df['Innings Batted Flag']).sum()/(non_sachin_df["50's"]).sum()

sachin_scoring_50_rate = sachin_scoring_50/non_sachin_scoring_50
sachin_scoring_50_rate

In [None]:
#Kohli Scoring 50 vs others

kohli_scoring_50 =(kdf['Innings Batted Flag']).sum()/(kdf["50's"]).sum()
non_kohli_scoring_50 =(non_kohli_df['Innings Batted Flag']).sum()/(non_kohli_df["50's"]).sum()

kohli_scoring_50_rate = kohli_scoring_50/non_kohli_scoring_50
kohli_scoring_50_rate

In [None]:
sachin_contribution = (sdf['Innings Runs Scored Num'].sum() / (India_df_sachin['Innings Runs Scored Num'].sum())) *100
kohli_contribution = (kdf['Innings Runs Scored Num'].sum() / (India_df_kohli['Innings Runs Scored Num'].sum())) *100

sachin_contribution,kohli_contribution