## Naive Bayes Classifier

In [594]:
import numpy as np

In [595]:
def separate_classes(X, Y):
  separated_classes = {}
  samples = X.shape[0]
  for sample_idx in range(samples):
    sample_features = X[sample_idx]
    sample_class = np.argmax(Y[sample_idx], axis=0)
    if sample_class not in separated_classes:
      separated_classes[sample_class] = []
    separated_classes[sample_class].append(sample_features)
  return separated_classes

In [596]:
def classes_info(X, samples):
  info = {}
  for key in X:
    prob = len(X[key]) / samples
    mean = np.mean(X[key], axis=0)
    var = np.var(X[key], ddof=1, axis=0)
    info[key] = {'prob': prob, 'mean': mean, 'var': var}
  return info

In [597]:
def fit(X, Y):
  samples = X.shape[0]
  separated_classes = separate_classes(X, Y)
  info = classes_info(separated_classes, samples)
  return info

In [598]:
def gaussian_distribution(X, mean, var):
  exponent = np.exp(-((X - mean) ** 2 / (2 * var)))
  gd = exponent / (np.sqrt(2 * np.pi * var))
  return np.nan_to_num(gd, copy=False, nan=1.0)

In [599]:
def predict(X_train, Y_train, X_test):
  classes = Y_train.shape[1]
  test_size = X_test.shape[0]
  posteriors_numerator = []

  info = fit(X_train, Y_train)
  
  mean = np.array([])
  for i in range(classes):
    prob = info[i]['prob']
    mean = info[i]['mean']
    var = info[i]['var']
    gd = gaussian_distribution(X_test, mean, var)
    posteriors_numerator.append(np.prod(gd, axis=1) * prob)

  posteriors_numerator = np.array(posteriors_numerator).T
  return posteriors_numerator

In [600]:
def normalize(posteriors_numerator):
  posteriors = posteriors_numerator / np.sum(posteriors_numerator, axis=1, keepdims=True)
  return posteriors

In [601]:
def accuracy(Y_real, Y_predict):
  mask = np.equal(Y_real, Y_predict)
  size = np.size(mask)
  correct = np.count_nonzero(mask)
  return correct / size

## Train on slide male and female example

In [602]:
X_train = np.array([[6, 180, 12],
              [5.92, 190, 11],
              [5.58, 170, 12],
              [5.92, 165, 10],
              [5, 100, 6],
              [5.5, 150, 8],
              [5.42, 130, 7],
              [5.75, 150, 9]])

# one-hot represenatation: male, female
Y_train = np.array([[1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [0.0, 1.0],
                    [0.0, 1.0],
                    [0.0, 1.0],
                    [0.0, 1.0]])

X_test = np.array([[6, 130, 8]])

def onehot_to_class(result):
  text = ""
  if result == 0:
      text = "male"
  elif result == 1:
      text = "female"
  return text

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)

(8, 3)
(8, 2)
(1, 3)


In [603]:
posteriors_numerator = predict(X_train, Y_train, X_test)
posteriors = normalize(posteriors_numerator)

print("Posteriors Numerator")
print(posteriors_numerator)
print()

print("Posteriors")
print(posteriors)
print()

predicted_class = np.argmax(posteriors, axis=1)
vfunc = np.vectorize(onehot_to_class)
print(vfunc(predicted_class))

Posteriors Numerator
[[6.19707184e-09 5.37790918e-04]]

Posteriors
[[1.15230663e-05 9.99988477e-01]]

['female']


## Q2

In [604]:
X_train = np.array([[0.0, 0.0, 1.0],
              [0.0, 1.0, 1.0],
              [0.0, 1.0, 1.0],
              [1.0, 1.0, 0.0],
              [0.0, 1.0, 0.0],
              [0.0, 1.0, 1.0],
              [1.0, 0.0, 0.0],
              [1.0, 1.0, 0.0],
              [1.0, 0.0, 1.0],
              [1.0, 0.0, 0.0]])

# one-hot represenatation: spam, not spam
Y_train = np.array([[1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [1.0, 0.0],
                    [0.0, 1.0],
                    [0.0, 1.0],
                    [0.0, 1.0],
                    [0.0, 1.0]])

X_test = np.array([[1.0, 1.0, 0.0],
                   [1.0, 1.0, 1.0]])

def onehot_to_class(result):
  text = ""
  if result == 0:
      text = "spam"
  elif result == 1:
      text = "not spam"
  return text

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)

(10, 3)
(10, 2)
(2, 3)


In [605]:
posteriors_numerator = predict(X_train, Y_train, X_test)
posteriors = normalize(posteriors_numerator)

print("Posteriors Numerator")
print(posteriors_numerator)
print()

print("Posteriors")
print(posteriors)
print()

predicted_class = np.argmax(posteriors, axis=1)
vfunc = np.vectorize(onehot_to_class)
print(vfunc(predicted_class))

Posteriors Numerator
[[0.02203763 0.07295785]
 [0.04117171 0.02683969]]

Posteriors
[[0.2319861  0.7680139 ]
 [0.60536483 0.39463517]]

['not spam' 'spam']


  


## Q4

In [606]:
from sklearn import datasets
iris = datasets.load_iris()

X = iris.data
Y_temp = iris.target

Y = np.zeros((Y_temp.size, Y_temp.max() + 1))
Y[np.arange(Y_temp.size), Y_temp] = 1

print(X.shape)
print(Y.shape)
print(Y_temp)

(150, 4)
(150, 3)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [607]:
posteriors_numerator = predict(X, Y, X)
posteriors = normalize(posteriors_numerator)

predicted_class = np.argmax(posteriors, axis=1)
print(predicted_class)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 1 2 2 2 2
 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [608]:
acc = accuracy(Y_temp, predicted_class)
print("Accuracy: {}%".format(acc * 100))

Accuracy: 96.0%


## Q3

In [609]:
from keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [610]:
# Data Type
print(x_train.dtype)
print(y_train.dtype)
print(x_test.dtype)
print(y_test.dtype)

uint8
uint8
uint8
uint8


In [611]:
# Rank
print(x_train.ndim)
print(y_train.ndim)
print(x_test.ndim)
print(y_test.ndim)

4
2
4
2


In [612]:
# Shape
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 32, 32, 3)
(50000, 1)
(10000, 32, 32, 3)
(10000, 1)
