In [None]:
# Question 2

In [1]:
import csv
import numpy as np

# Read the CSV file and store its rows in a list
data_list = []
with open('data.csv', 'r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Get the header row
    header_index = header.index('target')  # Find the index of the 'target' column

    for row in reader:
        data_list.append(row)

# Convert the list of rows to a NumPy array, specifying dtype as np.float64
data = np.array(data_list, dtype=np.float64)

# Calculate prior
def calculate_prior(df, target):
    classes = sorted(np.unique(df[:, header_index]))
    prior = []
    total_samples = len(df)

    for label in classes:
        class_count = len(df[df[:, header_index] == label])
        prior.append(class_count / total_samples)

    return prior

priors = calculate_prior(data, 'target')
print("Prior Probabilities:", priors)

def calculate_likelihood_gaussian(df, feature, feat_value, header_index, label):
    df_class = df[df[:, header_index] == label]
    mean, std = df_class[:, feature].mean(), df_class[:, feature].std()
    p_x_given_y = (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-((feat_value - mean) ** 2 / (2 * std ** 2)))
    return p_x_given_y

def naive_bayes_gaussian(df, X, Y):
    labels = sorted(np.unique(df[:, header_index]))
    Y_pred = []

    for x in X:
        likelihood = np.ones(len(labels))
        for j, label in enumerate(labels):
            for i in range(X.shape[1]):
                likelihood[j] *= calculate_likelihood_gaussian(df, i, x[i], header_index, label)

        prior = calculate_prior(df, Y)

        post_prob = likelihood * prior
        Y_pred.append(labels[np.argmax(post_prob)])

    return np.array(Y_pred)

from sklearn.model_selection import train_test_split

# Split the data into train and test sets
train, test = train_test_split(data, test_size=0.1, random_state=41)

# Slice the test data (excluding the target column)
X_test = test[:, :-1]
Y_test = test[:, -1]

# Apply the Naive Bayes algorithm and make predictions
Y_pred = naive_bayes_gaussian(train, X_test, 'target')

from sklearn.metrics import confusion_matrix, accuracy_score

# Calculate the confusion matrix
confusion = confusion_matrix(Y_test, Y_pred)

# Calculate the accuracy
accuracy = accuracy_score(Y_test, Y_pred)

print("Confusion Matrix:")
print(confusion)
print("Accuracy:", accuracy)


Prior Probabilities: [0.45544554455445546, 0.5445544554455446]
Confusion Matrix:
[[11  2]
 [ 0 18]]
Accuracy: 0.9354838709677419
