In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint
from tqdm import tqdm

In [None]:
def data_loader_wine(path):
  samples = pd.read_csv(path)
  feature = np.array(samples.iloc[:,:-1])
  label = np.array(samples.iloc[:,-1:]).reshape(-1)
  
  return feature, label

In [None]:
def data_loader(path):
  samples = pd.read_csv(path)
  feature = np.array(samples.iloc[:,1:])
  label = np.array(samples['label'])
  
  return feature, label

In [None]:
def get_GaussianNBC(train_samples, train_labels, num_classes=10):
  classes = [[] for i in range(num_classes)]

  for k in range(train_samples.shape[0]):
    sample = train_samples[k, ]
    label = train_labels[k]
    classes[label].append(sample)

  means_by_classes = []
  stds_by_classes = []

  for C in range(num_classes):
    means = []
    stds = []

    for features in zip(*classes[C]):
      means.append(np.mean(features))
      stds.append(np.std(features))

    means_by_classes.append(means)
    stds_by_classes.append(stds)
  
  return means_by_classes, stds_by_classes

In [None]:
def predict(means, stds, test_samples, num_classes=10):
  pred_classes = []

  for i in range(test_samples.shape[0]):
    prob_by_classes = []
    
    for C in range(num_classes):
      prob = 1

      x = test_samples[i]
      gdf = Gaussian_PDF(x, means[C], stds[C])
      prob = np.nanprod(gdf)
      prob_by_classes.append(prob)
  
    best_porb = -1

    for C in range(num_classes):
      if prob_by_classes[C] > best_porb:
        best_porb = prob_by_classes[C]
        pred_Label = C

    pred_classes.append(pred_Label)

  return pred_classes

In [None]:
def Gaussian_PDF(x, mean, std):
  return np.where(std == 0.0, np.where(x == mean, 1.0, 0.0), 1 / np.sqrt(2 * np.pi * np.power(std, 2)) * np.exp(-np.power(x - mean, 2) / (2 * np.power(std, 2))))

In [None]:
def get_Acc(pred, label):
  acc = np.equal(pred, label)
  return list(acc).count(True) / len(acc) * 100

In [None]:
def min_max_norm(input):
  max = input.max()
  min = input.min()
  norm = (input - min) / (max - min)
  return norm

In [None]:
train_path = '/content/drive/MyDrive/기계학습데이터/fashion-mnist_train.csv'
test_path = '/content/drive/MyDrive/기계학습데이터/fashion-mnist_test.csv'

train_data, train_label = data_loader(train_path)
test_data, test_label = data_loader(test_path)

In [None]:
plt.imshow(train_data[1,].reshape((28, 28)), cmap='gray')

In [None]:
train_data = min_max_norm(train_data)
test_data = min_max_norm(test_data)
means, stds = get_GaussianNBC(train_data, train_label)
pred_classes = predict(means, stds, test_data)

In [None]:
acc = get_Acc(pred_classes, test_label)
print('ACC:', acc)

In [None]:
train_set_path = '/content/drive/MyDrive/기계학습데이터/wine_train.csv'
test_set_path = '/content/drive/MyDrive/기계학습데이터/wine_test.csv'

train_samples, train_labels = data_loader_wine(train_set_path)
test_samples, test_labels = data_loader_wine(test_set_path)

train_data = min_max_norm(train_samples)
test_data = min_max_norm(test_samples)

means_by_classes, stds_by_classes = get_GaussianNBC(train_samples, train_labels, 3)
pred_classes = predict(means_by_classes, stds_by_classes, test_samples, 3)

acc = get_Acc(pred_classes, test_labels)
print('Acc:', acc)