## Load the standard libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Load the data

In [2]:
data = pd.read_csv('Sachin_ODI.csv')
data.head()

Unnamed: 0,runs,NotOut,mins,bf,fours,sixes,sr,Inns,Opp,Ground,Date,Winner,Won,century
0,13,0,30,15,3,0,86.66,1,New Zealand,Napier,1995-02-16,New Zealand,False,False
1,37,0,75,51,3,1,72.54,2,South Africa,Hamilton,1995-02-18,South Africa,False,False
2,47,0,65,40,7,0,117.5,2,Australia,Dunedin,1995-02-22,India,True,False
3,48,0,37,30,9,1,160.0,2,Bangladesh,Sharjah,1995-04-05,India,True,False
4,4,0,13,9,1,0,44.44,2,Pakistan,Sharjah,1995-04-07,Pakistan,False,False


## Universal Sample Space : 

- Total matches played by sachin = S = 360

In [4]:
data.shape

(360, 14)

In [12]:
len(data)

360

## A : India winning the match

In [3]:
## Defining the sample space
data['Won'].value_counts()

True     184
False    176
Name: Won, dtype: int64

## Probability of India Winning the match

#### Basic Approach

In [5]:
184/360

0.5111111111111111

### Pandas Approach

In [8]:
df_won = data[data['Won'] == True]
df_won.head()

Unnamed: 0,runs,NotOut,mins,bf,fours,sixes,sr,Inns,Opp,Ground,Date,Winner,Won,century
2,47,0,65,40,7,0,117.5,2,Australia,Dunedin,1995-02-22,India,True,False
3,48,0,37,30,9,1,160.0,2,Bangladesh,Sharjah,1995-04-05,India,True,False
5,112,1,137,107,15,1,104.67,2,Sri Lanka,Sharjah,1995-04-09,India,True,True
6,41,0,51,41,5,0,100.0,2,Sri Lanka,Sharjah,1995-04-14,India,True,False
8,39,0,79,51,4,0,76.47,2,New Zealand,Amritsar,1995-11-18,India,True,False


In [9]:
df_won.shape

(184, 14)

### Number of elements present in the event A

In [11]:
len(df_won)

184

In [10]:
prob_win = len(df_won) / len(data)
prob_win

0.5111111111111111

### Probability of losing

### Basic Approach

In [13]:
176/360

0.4888888888888889

#### Pandas Approach

In [15]:
prob_lose = 1 - prob_win
prob_lose

0.48888888888888893

In [16]:
prob_lose = 1 - (len(df_won) / len(data))
prob_lose

0.48888888888888893

## Event B : Sachin Scoring a century

In [17]:
data['century'].value_counts()

False    314
True      46
Name: century, dtype: int64

### Probability of Sachin scoring a century

#### Basic approach

In [19]:
46/360

0.12777777777777777

### Pandas approch

In [22]:
df_century = data[data['century'] == True]
df_century.head()

Unnamed: 0,runs,NotOut,mins,bf,fours,sixes,sr,Inns,Opp,Ground,Date,Winner,Won,century
5,112,1,137,107,15,1,104.67,2,Sri Lanka,Sharjah,1995-04-09,India,True,True
12,127,1,175,138,15,1,92.02,2,Kenya,Cuttack,1996-02-18,India,True,True
15,137,0,198,137,8,5,100.0,1,Sri Lanka,Delhi,1996-03-02,Sri Lanka,False,True
20,100,0,-,111,9,1,90.09,1,Pakistan,Singapore,1996-04-05,Pakistan,False,True
23,118,0,-,140,8,2,84.28,1,Pakistan,Sharjah,1996-04-15,India,True,True


In [23]:
df_century.shape

(46, 14)

### Sample space for Event B 

In [24]:
len(df_century)

46

In [26]:
prob_century = len(df_century)/len(data)
prob_century

0.12777777777777777

## Probability of India Winning and Sachin Scoring century

In [29]:
df_iwsc = data[(data['Won'] == True) & (data['century'] == True)]
df_iwsc.head()

Unnamed: 0,runs,NotOut,mins,bf,fours,sixes,sr,Inns,Opp,Ground,Date,Winner,Won,century
5,112,1,137,107,15,1,104.67,2,Sri Lanka,Sharjah,1995-04-09,India,True,True
12,127,1,175,138,15,1,92.02,2,Kenya,Cuttack,1996-02-18,India,True,True
23,118,0,-,140,8,2,84.28,1,Pakistan,Sharjah,1996-04-15,India,True,True
41,114,0,157,126,14,0,90.47,1,South Africa,Mumbai,1996-12-14,India,True,True
47,104,0,121,97,8,1,107.21,2,Zimbabwe,Benoni,1997-02-09,India,True,True


In [31]:
df_iwsc.shape

(30, 14)

### Sample Space for Sachin Scoring Century and India winning the match

In [32]:
len(df_iwsc)

30

In [34]:
prob_iwsc = len(df_iwsc) / len(data)
prob_iwsc

0.08333333333333333

### Sample Space 2nd Approach

In [39]:
pd.crosstab(index = data['century'], columns = data['Won'], margins = True)

Won,False,True,All
century,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,160,154,314
True,16,30,46
All,176,184,360


In [40]:
pd.crosstab(index = data['century'], columns = data['Won'])

Won,False,True
century,Unnamed: 1_level_1,Unnamed: 2_level_1
False,160,154
True,16,30


## Probability of India Winning or Sachin Scoring Century

- P(A | B) = P(A) + P(B) - P(A & B)

In [41]:
(16 + 30 + 154) / 360

0.5555555555555556

In [42]:
prob_win + prob_century - prob_iwsc

0.5555555555555555

In [44]:
df_or = data[(data['century'] == True) | (data['Won'] == True)]
df_or.head()

Unnamed: 0,runs,NotOut,mins,bf,fours,sixes,sr,Inns,Opp,Ground,Date,Winner,Won,century
2,47,0,65,40,7,0,117.5,2,Australia,Dunedin,1995-02-22,India,True,False
3,48,0,37,30,9,1,160.0,2,Bangladesh,Sharjah,1995-04-05,India,True,False
5,112,1,137,107,15,1,104.67,2,Sri Lanka,Sharjah,1995-04-09,India,True,True
6,41,0,51,41,5,0,100.0,2,Sri Lanka,Sharjah,1995-04-14,India,True,False
8,39,0,79,51,4,0,76.47,2,New Zealand,Amritsar,1995-11-18,India,True,False


### Define the sample space for India Winning or Sachin Scoring century

In [45]:
len(df_or)

200

In [46]:
len(df_or)/len(data)

0.5555555555555556