In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
def preprocess_data(data):
    # Perform any necessary preprocessing steps
    # For example, handle missing values, convert categorical variables, etc.
    # This function should return the preprocessed data
    # Example: Data normalization
    data_normalized = (data - data.min()) / (data.max() - data.min())
    return data_normalized


In [7]:
def train_test_split(x, y, test_size=0.2):
    # Split the data into training and testing sets
    # The 'test_size' parameter determines the ratio of testing data
    # This function should return the training and testing sets
    # Example: Random shuffle and split
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(x))
    test_set_size = int(len(x) * test_size)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    x_train = x[train_indices]
    x_test = x[test_indices]
    y_train = y[train_indices]
    y_test = y[test_indices]
    return x_train, x_test, y_train, y_test


In [8]:

def calculate_prior_probabilities(y_train):
    # Calculate the prior probabilities for each class in the training data
    # This function should return a dictionary with class labels as keys and corresponding probabilities as values
    class_labels, class_counts = np.unique(y_train, return_counts=True)
    prior_probabilities = {}
    total_samples = len(y_train)
    for label, count in zip(class_labels, class_counts):
        prior_probabilities[label] = count / total_samples
    return prior_probabilities

In [None]:
def calculate_likelihoods(x_train, y_train):
    # Calculate the likelihoods for each feature and class combination in the training data
    # This function should return a dictionary with class labels as keys and dictionaries of feature likelihoods as values
    class_labels = np.unique(y_train)
    likelihoods = {}
    for label in class_labels:
        label_indices = np.where(y_train == label)
        label_data = x_train[label_indices]
        likelihoods[label] = {
            'mean': np.mean(label_data, axis=0),
            'std': np.std(label_data, axis=0)
        }
    return likelihoods

In [9]:
def predict_naive_bayes(x_test, prior_probabilities, likelihoods):
    # Predict the class labels for the test data using Naive Bayes algorithm
    # This function should return the predicted class labels
    predictions = []
    for sample in x_test:
        posteriors = {}
        for label, likelihood in likelihoods.items():
            prior = prior_probabilities[label]
            class_likelihood = np.prod(
                (1 / (np.sqrt(2 * np.pi) * likelihood['std'])) *
                np.exp(-((sample - likelihood['mean']) ** 2) / (2 * likelihood['std'] ** 2))
            )
            posterior = prior * class_likelihood
            posteriors[label] = posterior
        predicted_label = max(posteriors, key=posteriors.get)
        predictions.append(predicted_label)
   
