In [1]:
import pandas as pd 
import numpy as np

In [2]:
# Load the train and test data
train = pd.read_csv('/Users/lfarias/Downloads/Kaggle-IFT3395/data/train.csv')
test = pd.read_csv('/Users/lfarias/Downloads/Kaggle-IFT3395/data/test.csv')

# Extract features for the training data
X_train = train.iloc[:, :-1]  # Features in the training data
y_train = train.iloc[:, -1]    # Labels in the training data

# Extract features for the testing data (no labels to extract)
X_test = test.values    # Features in the testing data

In [3]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)

        for i, c in enumerate(self._classes):
            X_for_class_c = X[y == c]
            self._mean[i, :] = X_for_class_c.mean(axis=0)
            self._var[i, :] = X_for_class_c.var(axis=0)
            self._priors[i] = X_for_class_c.shape[0] / float(n_samples)

    def _calculate_likelihood(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        num = np.exp(- (x - mean) ** 2 / (2 * var))
        denom = np.sqrt(2 * np.pi * var)
        return num / denom 

    def predict(self, X):
        y_pred = [self._classify_sample(x) for x in X]
        return np.array(y_pred)

    def _classify_sample(self, x):
        posteriors = []
        
        for i, c in enumerate(self._classes):
            prior = np.log(self._priors[i])
            posterior = np.sum(np.log(self._calculate_likelihood(i, x)))
            posterior = prior + posterior
            posteriors.append(posterior)
        
        return self._classes[np.argmax(posteriors)]

In [4]:
# Fit the Gaussian Naive Bayes model
gnb = GaussianNaiveBayes()
gnb.fit(X_train.values, y_train.values)

In [5]:
# Make predictions on the test data
predicted_labels = gnb.predict(X_test)

In [None]:
# Create a DataFrame with 'SNo' and 'Label' columns
results = pd.DataFrame({'SNo': test['SNo'], 'Label': predicted_labels})

# Save the results to a CSV file
results.to_csv('gnb.csv', index=False)