In [4]:
import csv
import numpy as np

def load_data(filename):
    X = []
    y = []
    with open(filename, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header row
        for row in reader:
            X.append(row[:-1])  # Instance attributes
            y.append(int(row[-1]))   # Instance label
    return np.array(X, dtype=np.float64), np.array(y)

def fit_and_predict_naive_bayes(X_train, y_train, X_test):
    class_probs = {}  # Prior probabilities of classes
    feature_probs = {}  # Conditional probabilities of features given class

    num_samples, num_features = X_train.shape
    classes = np.unique(y_train)

    for c in classes:
        # Calculate prior probability of class
        class_probs[c] = np.sum(y_train == c) / num_samples

        # Calculate conditional probability of each feature given class
        feature_probs[c] = []
        for feature_idx in range(num_features):
            feature_values = np.unique(X_train[:, feature_idx])
            feature_prob = {}
            for val in feature_values:
                # Laplace smoothing for stability
                count = np.sum((X_train[:, feature_idx] == val) & (y_train == c)) + 1
                total_count = np.sum(y_train == c) + len(feature_values)
                feature_prob[val] = count / total_count
            feature_probs[c].append(feature_prob)

    # Predict for test data
    predictions = []
    probabilities = []
    for sample in X_test:
        probs = {}
        for c, class_prob in class_probs.items():
            # Calculate posterior probability for each class
            prob_c_given_x = class_prob
            for feature_idx, feature_val in enumerate(sample):
                if feature_val in feature_probs[c][feature_idx]:
                    prob_c_given_x *= feature_probs[c][feature_idx][feature_val]
                else:
                    # Apply Laplace smoothing for unknown feature values
                    prob_c_given_x *= 1 / (np.sum(y_train == c) + len(np.unique(X_train[:, feature_idx])))
            probs[c] = prob_c_given_x
        pred_class = max(probs, key=probs.get)
        predictions.append(pred_class)
        probabilities.append(probs)
    
    return predictions, probabilities

# Example usage:
if __name__ == "__main__":
    filename = 'naive.csv'
    X, y = load_data(filename)
    test_data = np.array([[40, 85], [35, 80]])
    predictions, probabilities = fit_and_predict_naive_bayes(X, y, test_data)
    print("Predictions:", predictions)
    print("Probabilities:", probabilities)

  


Predictions: [0, 1]
Probabilities: [{0: 0.031746031746031744, 1: 0.004223227752639518}, {0: 0.004232804232804232, 1: 0.012669683257918552}]
