In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [7]:
ds = [
    ["Sunny", "Hot", "High", "Weak", "No"],
    ["Sunny", "Hot", "High", "Strong", "No"],
    ["Overcast", "Hot", "High", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Strong", "No"],
    ["Overcast", "Cool", "Normal", "Strong", "Yes"],
    ["Sunny", "Mild", "High", "Weak", "No"],
    ["Sunny", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "Normal", "Weak", "Yes"],
    ["Sunny", "Mild", "Normal", "Strong", "Yes"],
    ["Overcast", "Mild", "High", "Strong", "Yes"],
    ["Overcast", "Hot", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Strong", "No"],
]

df = pd.DataFrame(ds, columns=["Outlook", "Temperature", "Humidity", "Wind", "Play"])
df

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [8]:
label_encoders = {}
for column in df.columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

df

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1
5,1,0,1,0,0
6,0,0,1,0,1
7,2,2,0,1,0
8,2,0,1,1,1
9,1,2,1,1,1


In [9]:
X = df.drop("Play", axis=1)
y = df["Play"]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [11]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.6
Confusion Matrix:
[[1 1]
 [1 2]]


In [13]:
frequency_table = df.groupby("Play").agg(["count"])

print("Frequency Table:")
print(frequency_table)

Frequency Table:
     Outlook Temperature Humidity  Wind
       count       count    count count
Play                                   
0          5           5        5     5
1          9           9        9     9


In [14]:
likelihood_table = df.groupby("Play").apply(
    lambda x: x.iloc[:, :-1].apply(lambda y: y.value_counts() / y.count())
)

print("\nLikelihood Table:")
print(likelihood_table)


Likelihood Table:
         Outlook  Temperature  Humidity      Wind
Play                                             
0    0       NaN     0.200000  0.800000  0.600000
     1  0.400000     0.400000  0.200000  0.400000
     2  0.600000     0.400000       NaN       NaN
1    0  0.444444     0.333333  0.333333  0.333333
     1  0.333333     0.222222  0.666667  0.666667
     2  0.222222     0.444444       NaN       NaN


In [16]:

print("Frequency Table:")
print(df.groupby("Play").size())

print("\nLikelihood Table:")
for column in df.columns[:-1]:
    print(f"\nLikelihood of {column}:")
    print(df.groupby(["Play", column]).size() / df.groupby("Play").size())

Frequency Table:
Play
0    5
1    9
dtype: int64

Likelihood Table:

Likelihood of Outlook:
Play  Outlook
0     1          0.400000
      2          0.600000
1     0          0.444444
      1          0.333333
      2          0.222222
dtype: float64

Likelihood of Temperature:
Play  Temperature
0     0              0.200000
      1              0.400000
      2              0.400000
1     0              0.333333
      1              0.222222
      2              0.444444
dtype: float64

Likelihood of Humidity:
Play  Humidity
0     0           0.800000
      1           0.200000
1     0           0.333333
      1           0.666667
dtype: float64

Likelihood of Wind:
Play  Wind
0     0       0.600000
      1       0.400000
1     0       0.333333
      1       0.666667
dtype: float64
