In [1]:
import pandas as pd
import numpy as np
import math

In [5]:
wthr_df = pd.read_csv('weather_forecast.csv')
wthr_df

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [11]:
print("Rows:", wthr_df.shape[0], "Columns:", wthr_df.shape[1])

Rows: 14 Columns: 5


In [15]:
print(wthr_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Outlook      14 non-null     object
 1   Temperature  14 non-null     object
 2   Humidity     14 non-null     object
 3   Windy        14 non-null     object
 4   Play         14 non-null     object
dtypes: object(5)
memory usage: 692.0+ bytes
None


In [17]:
print("\nDistinct values for each feature:")
for col in wthr_df.columns[:-1]:
    print(f"{col}: {wthr_df[col].unique()} ({wthr_df[col].nunique()})")


Distinct values for each feature:
Outlook: ['Sunny' 'Overcast' 'Rain'] (3)
Temperature: ['Hot' 'Mild' 'Cool'] (3)
Humidity: ['High' 'Normal'] (2)
Windy: ['Weak' 'Strong'] (2)


In [19]:
print("\nClass label:")
print(wthr_df[wthr_df.columns[-1]].value_counts())


Class label:
Play
Yes    9
No     5
Name: count, dtype: int64


In [57]:
def get_count(data, condition):
    counts = {}
    for item in data:
        key = condition(item)
        counts[key] = counts.get(key, 0) + 1
    return counts

def NaiveBayes(df):
    N, cols = df.shape
    f_names = df.columns[:-1]
    l_name = df.columns[-1]
    labels = df[l_name].value_counts().to_dict()
    cl_p = {label: count / N for label, count in labels.items()}
    
    print("\nClass-label Prior probabilities:")
    for label, count in labels.items():
        print(f"P({label}) = {count}/{N}")
    
    def fit():
        model = {}
        print("\nFeature-wise Conditional Probabilities (Likelihood):")
        for f_name in f_names:
            print(f"\nFeature: {f_name}")
            vals = df[[f_name, l_name]].values
            f_count = get_count(vals, lambda item: item[0] + item[1])
            
            for feature in df[f_name].unique():
                for label, count in labels.items():
                    aib = f_count.get(feature + label, 0)
                    prob = aib / count if count != 0 else 0
                    model[(f_name, feature, label)] = prob
                    print(f"P({f_name} = {feature} | {l_name} = {label}) = {round(prob, 4)}")
        
        def predict(x_qs):
            sigma_cl = {}
            for label, prior in cl_p.items():
                sigma = math.log(prior)
                for idx, x_q in enumerate(x_qs):
                    cp = model.get((f_names[idx], x_q, label), 1e-10)
                    sigma += math.log(cp)
                sigma_cl[label] = sigma

            return max(sigma_cl, key=sigma_cl.get)
        
        return predict
    
    return fit

fit = NaiveBayes(wthr_df)
predict = fit()



Class-label Prior probabilities:
P(Yes) = 9/14
P(No) = 5/14

Feature-wise Conditional Probabilities (Likelihood):

Feature: Outlook
P(Outlook = Sunny | Play = Yes) = 0.2222
P(Outlook = Sunny | Play = No) = 0.6
P(Outlook = Overcast | Play = Yes) = 0.4444
P(Outlook = Overcast | Play = No) = 0.0
P(Outlook = Rain | Play = Yes) = 0.3333
P(Outlook = Rain | Play = No) = 0.4

Feature: Temperature
P(Temperature = Hot | Play = Yes) = 0.2222
P(Temperature = Hot | Play = No) = 0.4
P(Temperature = Mild | Play = Yes) = 0.4444
P(Temperature = Mild | Play = No) = 0.4
P(Temperature = Cool | Play = Yes) = 0.3333
P(Temperature = Cool | Play = No) = 0.2

Feature: Humidity
P(Humidity = High | Play = Yes) = 0.3333
P(Humidity = High | Play = No) = 0.8
P(Humidity = Normal | Play = Yes) = 0.6667
P(Humidity = Normal | Play = No) = 0.2

Feature: Windy
P(Windy = Weak | Play = Yes) = 0.6667
P(Windy = Weak | Play = No) = 0.4
P(Windy = Strong | Play = Yes) = 0.3333
P(Windy = Strong | Play = No) = 0.6


In [59]:
print("\nTest Model")


Test Model


In [61]:
print("Query point: ['Sunny', 'Cool', 'High', 'Strong']")
print("Output:", predict(['Sunny', 'Cool', 'High', 'Strong']))

Query point: ['Sunny', 'Cool', 'High', 'Strong']
Output: No


In [63]:
print("Query point: ['Sunny', 'Cool', 'Low', 'Strong']")
print("Output:", predict(['Sunny', 'Cool', 'Low', 'Strong']))

Query point: ['Sunny', 'Cool', 'Low', 'Strong']
Output: No


In [65]:
print("Query point: ['Sunny', 'Cool', 'Low', 'Weak']")
print("Output:", predict(['Sunny', 'Cool', 'Low', 'Weak']))

Query point: ['Sunny', 'Cool', 'Low', 'Weak']
Output: Yes
