In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset from Kaggle
df = pd.read_csv("/kaggle/input/play-badminton/badminton_dataset.csv")
print(df.head())
print(df.info())


In [None]:
# Encode categorical columns
categorical_cols = ['Outlook', 'Temperature', 'Humidity', 'Wind', 'Play_Badminton']
for col in categorical_cols:
    df[col] = pd.Categorical(df[col]).codes

# Separate features and target
X = df.drop('Play_Badminton', axis=1)
y = df['Play_Badminton']

print(X.head())
print(y.head())
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)


In [None]:
class NaiveBayesScratch:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]
    
    def gaussian_pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        eps = 1e-6  # avoid division by zero
        numerator = np.exp(-(x - mean)**2 / (2 * var + eps))
        denominator = np.sqrt(2 * np.pi * var + eps)
        return numerator / denominator
    
    def predict(self, X):
        y_pred = []
        for i in range(X.shape[0]):
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                conditional = np.sum(np.log(self.gaussian_pdf(c, X.iloc[i])))
                posterior = prior + conditional
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)


In [None]:
nb_scratch = NaiveBayesScratch()
nb_scratch.fit(X_train, y_train)
y_pred_scratch = nb_scratch.predict(X_test)
nb_sklearn = GaussianNB()
nb_sklearn.fit(X_train, y_train)
y_pred_sklearn = nb_sklearn.predict(X_test)
def evaluate_model(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1,1]
    TN = cm[0,0]
    FP = cm[0,1]
    FN = cm[1,0]
    
    print("Confusion Matrix:\n", cm)
    print(f"TP={TP}, TN={TN}, FP={FP}, FN={FN}")
    print("Accuracy :", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall   :", recall_score(y_true, y_pred))
    
    # Plot confusion matrix
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

print("Scratch Naive Bayes Performance:")
evaluate_model(y_test, y_pred_scratch)

print("Sklearn Naive Bayes Performance:")
evaluate_model(y_test, y_pred_sklearn)
