In [95]:
import csv
import math
import numpy as np

In [96]:
test_path = 'wine_test.csv'
train_path = 'wine_train.csv'

In [97]:
def data_loader(path):
	samples = csv.reader(open(path, 'rt'))
	samples = list(samples)

	feature_info = list()
	label_info = list()

	for sample in samples:
		sample = list(map(float, sample))
		label_info.append(int(sample.pop()))
		feature_info.append(sample)

	return feature_info, label_info

In [98]:
def get_GaussianNBC(train_samples, train_labels):
	wine_class_0_samples = []
	wine_class_1_samples = []
	wine_class_2_samples = []
	
	for k in range(len(train_samples)):
		sample = train_samples[k]
		label = train_labels[k]

		if label == 0:
			wine_class_0_samples.append(sample)
		elif label == 1:
			wine_class_1_samples.append(sample)
		elif label == 2:
			wine_class_2_samples.append(sample)

	samples_by_classes = [
		wine_class_0_samples,
		wine_class_1_samples,
		wine_class_2_samples
	]

	numOf_classes = 3
	means_by_classes = []
	stdev_by_classes = []

	for C in range(numOf_classes):
		means = []
		stdevs = []
		for features in zip(*samples_by_classes[C]):
			means.append(np.mean(features))
			stdevs.append(np.std(features))
		means_by_classes.append(means)
		stdev_by_classes.append(stdevs)

	return means_by_classes, stdev_by_classes

In [99]:
def Gaussian_PDF(x, mean, stdev):
	if stdev == 0.0:
		if x == mean:
			return 1.0
		else:
			return 0.0
	return np.exp(-(x - mean)**2 / (2 * stdev**2)) / np.sqrt(2 * np.pi * stdev**2)

In [100]:
def predict(means, stdevs, test_samples):
	pred_classes = []
	numOf_classes = 3
	numOf_features = 13

	for i in range(len(test_samples)):
		prob_by_classes = []
		for C in range(numOf_classes):
			prob = 1
			for j in range(numOf_features):
				mean = means[C][j]
				stdev = stdevs[C][j]
				x = test_samples[i][j]
				prob *= Gaussian_PDF(x, mean, stdev)
			prob_by_classes.append(prob)
		
		bestProb = -1
		for C in range(numOf_classes):
			if prob_by_classes[C] > bestProb:
				bestProb = prob_by_classes[C]
				pred_Label = C
		pred_classes.append(pred_Label)
	return pred_classes



In [101]:
def get_Acc(pred_classes_of_testset, gt_of_testset):
	accuracy = np.equal(pred_classes_of_testset, gt_of_testset)
	return list(accuracy).count(True) / len(accuracy) * 100

In [102]:
train_samples, train_labels = data_loader(train_path)
test_samples, test_labels = data_loader(test_path)

In [103]:
means_by_classes, stdev_by_classes = get_GaussianNBC(train_samples, train_labels)

In [104]:
pred_classes = predict(means_by_classes, stdev_by_classes, test_samples)

In [105]:
acc = get_Acc(pred_classes, test_labels)
print(f'Acc: {acc}')

Acc: 93.25842696629213
