## **Step 0: Data processing and Feature selection**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
data="weather.csv"
df = pd.read_csv(data)
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [2]:
unique_lables = df['weather'].unique()
print(unique_lables)

['drizzle' 'rain' 'sun' 'snow' 'fog']


In [3]:
df=df.replace("drizzle",0)
df=df.replace("rain",1)
df=df.replace("sun",2)
df=df.replace("snow",3)
df=df.replace("fog",4)
features=["precipitation","temp_max","temp_min","wind"]  #date is not included as a feature for obvious reasons
X = df.loc[0:len(df), features].values
Y = df.loc[0:len(df), "weather"].values
X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size=0.15, random_state=42)

## **Steps 1: Compute the prior probabilities**

In [4]:
class_counts = np.bincount(Y_train) # counts the no of ocuurancces of each lable
priors = class_counts / len(Y_train)
No_of_features=len(features)

## **Step 2: Compute the conditional probabilities**

In [5]:
conditional_probs = {}
for feature in range(0,No_of_features):
    for label in [0 ,1 ,2 ,3 ,4]:
        feature_given_label = X_train[Y_train == label, feature]
        conditional_probs[(feature, label)] = len(feature_given_label) / class_counts[label]

## **Step 3: Apply Bayes' theorem**

In [6]:
def predict_class(input_data):
    posteriors = []
    for label in [0 ,1, 2, 3, 4]:
        likelihood = 1.0
        for feature in range(0,No_of_features):
            likelihood *= conditional_probs[(feature, label)] ** input_data[feature]
        posterior = priors[label] * likelihood
        posteriors.append(posterior)
    return np.unique(Y_train)[np.argmax(posteriors)]

## **Step 4: Evaluate the performance of the model**

In [7]:
correct = 0
for i in range(len(X_test)):
    input_data = X_test[i]
    true_label = Y_test[i]
    predicted_label = predict_class(input_data)
    if true_label == predicted_label:
        correct += 1
accuracy = correct / len(X_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.42272727272727273
