In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})
data

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rainy,Mild,High,Weak,Yes
4,Rainy,Cool,Normal,Weak,Yes
5,Rainy,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rainy,Mild,Normal,Weak,Yes


In [3]:
# Frequency table of Outlook Column.
Outlook_play_freq = pd.crosstab(index=data['Outlook'], columns=data['Play'], margins=True, normalize='index')
Outlook_play_freq

Play,No,Yes
Outlook,Unnamed: 1_level_1,Unnamed: 2_level_1
Overcast,0.0,1.0
Rainy,0.4,0.6
Sunny,0.6,0.4
All,0.357143,0.642857


In [4]:
# Frequency table of Temperature Column.
Temperature_play_freq = pd.crosstab(index=data['Temperature'], columns=data['Play'], margins=True, normalize='index')
Temperature_play_freq

Play,No,Yes
Temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
Cool,0.25,0.75
Hot,0.5,0.5
Mild,0.333333,0.666667
All,0.357143,0.642857


In [5]:
# Frequency table of Humidity Column.
Humidity_play_freq = pd.crosstab(index=data['Humidity'], columns=data['Play'], margins=True, normalize='index')
Humidity_play_freq

Play,No,Yes
Humidity,Unnamed: 1_level_1,Unnamed: 2_level_1
High,0.571429,0.428571
Normal,0.142857,0.857143
All,0.357143,0.642857


In [6]:
# Frequency table of Wind Column.
Wind_play_freq = pd.crosstab(index=data['Wind'], columns=data['Play'], margins=True, normalize='index')
Wind_play_freq

Play,No,Yes
Wind,Unnamed: 1_level_1,Unnamed: 2_level_1
Strong,0.5,0.5
Weak,0.25,0.75
All,0.357143,0.642857


In [7]:
#Generate the Likelihood table
likelihood_table = data.groupby(['Play', 'Outlook', 'Temperature', 'Humidity', 'Wind']).size() / len(data)
likelihood_table

Play  Outlook   Temperature  Humidity  Wind  
No    Rainy     Cool         Normal    Strong    0.071429
                Mild         High      Strong    0.071429
      Sunny     Hot          High      Strong    0.071429
                                       Weak      0.071429
                Mild         High      Weak      0.071429
Yes   Overcast  Cool         Normal    Strong    0.071429
                Hot          High      Weak      0.071429
                             Normal    Weak      0.071429
                Mild         High      Strong    0.071429
      Rainy     Cool         Normal    Weak      0.071429
                Mild         High      Weak      0.071429
                             Normal    Weak      0.071429
      Sunny     Cool         Normal    Weak      0.071429
                Mild         Normal    Strong    0.071429
dtype: float64

In [8]:
# Encode categorical variables
from sklearn.preprocessing import LabelEncoder
Le = LabelEncoder()
Le

LabelEncoder()

In [9]:
col = ['Outlook','Temperature','Humidity','Wind','Play']
data['Temperature'] = Le.fit_transform(data['Temperature'])
data['Humidity'] = Le.fit_transform(data['Humidity'])
data['Wind'] = Le.fit_transform(data['Wind'])
data['Play'] = Le.fit_transform(data['Play'])
data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,Sunny,1,0,1,0
1,Sunny,1,0,0,0
2,Overcast,1,0,1,1
3,Rainy,2,0,1,1
4,Rainy,0,1,1,1


In [10]:
df = pd.get_dummies(data, columns=['Outlook'])
df.head()

Unnamed: 0,Temperature,Humidity,Wind,Play,Outlook_Overcast,Outlook_Rainy,Outlook_Sunny
0,1,0,1,0,0,0,1
1,1,0,0,0,0,0,1
2,1,0,1,1,1,0,0
3,2,0,1,1,0,1,0
4,0,1,1,1,0,1,0


In [11]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X = df.drop('Play', axis=1)
y = df['Play']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.naive_bayes import GaussianNB
Gnb = GaussianNB()
Gnb.fit(X_train, y_train)


GaussianNB()

In [13]:
y_pred = Gnb.predict(X_test)

In [14]:
# Evaluate the model
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6666666666666666
