In [22]:
import numpy as np
import pandas as pd

In [23]:
df = pd.read_csv('play_tennis.csv')
df.head()

Unnamed: 0,day,outlook,temp,humidity,wind,play
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes


### Preprocessing :

In [24]:
df.shape

(14, 6)

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   day       14 non-null     object
 1   outlook   14 non-null     object
 2   temp      14 non-null     object
 3   humidity  14 non-null     object
 4   wind      14 non-null     object
 5   play      14 non-null     object
dtypes: object(6)
memory usage: 804.0+ bytes


In [26]:
df.isnull().sum()

day         0
outlook     0
temp        0
humidity    0
wind        0
play        0
dtype: int64

In [27]:
df.drop(columns=['day'],inplace=True)

In [28]:
df.head()

Unnamed: 0,outlook,temp,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [29]:
df.shape

(14, 5)

In [30]:
df.shape[0]

14

In [31]:
df['play'].value_counts()

play
Yes    9
No     5
Name: count, dtype: int64

In [32]:
pd.crosstab(df['outlook'],df['play'])

play,No,Yes
outlook,Unnamed: 1_level_1,Unnamed: 2_level_1
Overcast,0,4
Rain,2,3
Sunny,3,2


In [33]:
pd.crosstab(df['humidity'],df['play'])

play,No,Yes
humidity,Unnamed: 1_level_1,Unnamed: 2_level_1
High,4,3
Normal,1,6


In [34]:
pd.crosstab(df['temp'],df['play'])

play,No,Yes
temp,Unnamed: 1_level_1,Unnamed: 2_level_1
Cool,1,3
Hot,2,2
Mild,2,4


In [35]:
pd.crosstab(df['wind'],df['play'])

play,No,Yes
wind,Unnamed: 1_level_1,Unnamed: 2_level_1
Strong,3,3
Weak,2,6


### Probability Calculation :

In [36]:
P_Yes = (df['play'] == 'Yes').sum()/df.shape[0]   # 9/14
P_No = (df['play'] == 'No').sum()/df.shape[0]     # 5/14

print(f"P(Yes) = {P_Yes :.3f}, P(No) = {P_No :.3f}")

P(Yes) = 0.643, P(No) = 0.357


In [37]:
yes_count = len(df[df['play'] == 'Yes'])
no_count = len(df[df['play'] == 'No'])

# For outlook :-

P_Overcast_Yes = len(df[(df['outlook'] == 'Overcast') & (df['play'] == 'Yes')]) / yes_count  # 4/9 
P_Rain_Yes     = len(df[(df['outlook'] == 'Rain') & (df['play'] == 'Yes')]) / yes_count    # 3/9
P_Sunny_Yes    = len(df[(df['outlook'] == 'Sunny') & (df['play'] == 'Yes')]) / yes_count   #2/9

P_Overcast_No  = len(df[(df['outlook'] == 'Overcast') & (df['play'] == 'No')]) / no_count  # 0
P_Rain_No      = len(df[(df['outlook'] == 'Rain') & (df['play'] == 'No')]) / no_count      #2/5
P_Sunny_No     = len(df[(df['outlook'] == 'Sunny') & (df['play'] == 'No')]) / no_count    #3/5


In [38]:
# For Temperature :-

P_Cool_Yes = len(df[(df['temp'] == 'Cool') & (df['play'] == 'Yes')]) / yes_count   # 3/9
P_Hot_Yes  = len(df[(df['temp'] == 'Hot') & (df['play'] == 'Yes')]) / yes_count    # 2/9
P_Mild_Yes = len(df[(df['temp'] == 'Mild') & (df['play'] == 'Yes')]) / yes_count   # 4/9

P_Cool_No  = len(df[(df['temp'] == 'Cool') & (df['play'] == 'No')]) / no_count     # 1/5
P_Hot_No   = len(df[(df['temp'] == 'Hot') & (df['play'] == 'No')]) / no_count      # 2/5
P_Mild_No  = len(df[(df['temp'] == 'Mild') & (df['play'] == 'No')]) / no_count     # 2/5


In [39]:
# For Humidity :-

P_High_Yes   = len(df[(df['humidity'] == 'High') & (df['play'] == 'Yes')]) / yes_count    # 3/9
P_Normal_Yes = len(df[(df['humidity'] == 'Normal') & (df['play'] == 'Yes')]) / yes_count  # 6/9

P_High_No    = len(df[(df['humidity'] == 'High') & (df['play'] == 'No')]) / no_count   # 3/5
P_Normal_No  = len(df[(df['humidity'] == 'Normal') & (df['play'] == 'No')]) / no_count # 2/5

In [40]:
# For Wind :-

P_Strong_Yes = len(df[(df['wind'] == 'Strong') & (df['play'] == 'Yes')]) / yes_count   # 5/9
P_Weak_Yes   = len(df[(df['wind'] == 'Weak') & (df['play'] == 'Yes')]) / yes_count    # 4/9

P_Strong_No  = len(df[(df['wind'] == 'Strong') & (df['play'] == 'No')]) / no_count   # 2/5
P_Weak_No    = len(df[(df['wind'] == 'Weak') & (df['play'] == 'No')]) / no_count    # 3/5


### Question 1 :  Play = Yes/No ??
- outlook = Overcast
- temp = Cool
- humidity = Normal
- wind = Weak

In [41]:
test_case = {
    'outlook': 'Overcast',
    'temp': 'Cool',
    'humidity': 'Normal',
    'wind': 'Weak'
}

# Calculate posteriors for Yes and No
posterior_yes = P_Yes
posterior_no = P_No

posterior_yes *= P_Overcast_Yes * P_Cool_Yes * P_Normal_Yes * P_Weak_Yes
posterior_no *= P_Overcast_No * P_Cool_No * P_Normal_No * P_Weak_No

print(f"P(Yes | {test_case}) = {posterior_yes:.6f}")
print(f"P(No  | {test_case}) = {posterior_no:.6f}")

if (posterior_yes > posterior_no):
    print("üëâPrediction: Yes (Play)‚úÖ")
else:
    print("üëâPrediction: No (Don't Play)‚ùå")

P(Yes | {'outlook': 'Overcast', 'temp': 'Cool', 'humidity': 'Normal', 'wind': 'Weak'}) = 0.042328
P(No  | {'outlook': 'Overcast', 'temp': 'Cool', 'humidity': 'Normal', 'wind': 'Weak'}) = 0.000000
üëâPrediction: Yes (Play)‚úÖ


### Question 2 : Play = Yes/No ??

- outlook = Sunny
- temp = Hot
- humidity = High
- wind = Weak

In [None]:
test_case = {
    'outlook':'Sunny',
    'temp':'Hot',
    'humidity':'High',
    'wind':'Weak'
}

# Calculate posteriors for Yes and No
posterior_yes = P_Yes
posterior_no = P_No

posterior_yes *= P_Sunny_Yes* P_Hot_Yes* P_High_Yes* P_Weak_Yes
posterior_no *= P_Sunny_No * P_Hot_No * P_High_No * P_Weak_No

# Display results
print(f"P(Yes | {test_case}) = {posterior_yes:.6f}")
print(f"P(No  | {test_case}) = {posterior_no:.6f}")

if (posterior_yes > posterior_no):
    print("üëâPrediction: Yes (Play)‚úÖ")
else:
    print("üëâPrediction: No (Don't Play)‚ùå")

P(Yes | {'outlook': 'Sunny', 'temp': 'Hot', 'humidity': 'High', 'wind': 'Weak'}) = 0.007055
P(No  | {'outlook': 'Sunny', 'temp': 'Hot', 'humidity': 'High', 'wind': 'Weak'}) = 0.027429
üëâPrediction: No (Don't Play)‚ùå
