<a href="https://colab.research.google.com/github/YuxingW/deep_learning/blob/main/hw4/part_1_mnist_classifier_with_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

###Three dimention data

####MSE function

In [2]:
class MSE:
  def __call__(self, y_pred, y_true):
    self.y_pred = y_pred
    self.y_true = y_true
    return ((y_pred - y_true) ** 2).mean()

  def backward(self):
    n = self.y_true.shape[0]
    self.gradient = 2. * (self.y_pred - self.y_true) / n
    #print('MSE backward', self.y_pred.shape, self.y_true.shape, self.gradient.shape)
    return self.gradient

#### Fit fuction

In [3]:
from typing import Callable

def fit(x: np.ndarray, y: np.ndarray, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    y_pred = model(x)
    loss_value = loss(y_pred, y)
    if epoch % 200 == 0:
      print(f'Epoch {epoch}, loss {loss_value}')
    gradient_from_loss = loss.backward()
    model.backward(gradient_from_loss)
    model.update(lr)

In [4]:
batch_size = 100

def fit_by_batch(x: np.ndarray, y: np.ndarray, x_test: np.ndarray, y_test: np.ndarray, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    correct_cnt = 0
    for i in range(int(len(x) / batch_size)):
      batch_start, batch_end = ((i * batch_size), ((i+1)*batch_size))
      x_batch = x[batch_start: batch_end]
      y_batch = y[batch_start: batch_end]
      y_pred_batch = model(x_batch)
      loss_value = loss(y_pred_batch, y_batch)
      gradient_from_loss = loss.backward()
      model.backward(gradient_from_loss)
      model.update(lr)
      for k in range(batch_size):
        correct_cnt += int(np.argmax(y_pred_batch[k:k+1]) == np.argmax(y_batch[k:k+1]))

    if epoch % 100 == 0:
      test_correct_cnt = 0
      y_pred_test = model(x_test)
      for k in range(len(y_test)):
        test_correct_cnt += int(np.argmax(y_pred_test[k:k+1]) == np.argmax(y_test[k:k+1]))
      print(f'Epoch {epoch},loss {loss_value}, correct_rate {correct_cnt/float(len(y))}, test_correct_rate {test_correct_cnt/float(len(y_test))}')




#### Plot tsne function

In [5]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def plot_comparison(y_true, y_pred):
  #tsne = TSNE(n_components=2, perplexity=50, learning_rate=100, random_state=120)
  tsne = TSNE(n_components=2, random_state=0)
  x_2d = tsne.fit_transform(x)
  yt_2d = tsne.fit_transform(y_true)
  yp_2d = tsne.fit_transform(y_pred)

  plt.figure(figsize=(6, 5))
  plt.scatter(yt_2d[:, 0], yt_2d[:, 1], c='b', label='y_true')
  plt.scatter(yp_2d[:, 1], yp_2d[:, 1], c='y', label='y_pred')
  plt.legend()
  plt.title('TSNE Y_True Y_Pred Comparison')
  plt.xlabel('t_SNE1')
  plt.xlabel('t_SNE2')
  plt.show()

def plot_distribution(x, y_true, y_pred):
  tsne = TSNE(n_components=1, random_state=0)
  x_2d = tsne.fit_transform(x)
  yt_2d = tsne.fit_transform(y_true)
  yp_2d = tsne.fit_transform(y_pred)

  plt.figure(figsize=(6, 5))
  plt.scatter(x_2d[:, 0], yt_2d[:, 0], c='b', label='y_true')
  plt.scatter(x_2d[:, 0], yp_2d[:, 0], c='y', label='y_pred')
  plt.legend()
  plt.title('TSNE Y_True Y_Pred Distribution with X')
  plt.show()

def plot_3d(x, y, y_pred=None):
  import matplotlib.pyplot as plt
  from mpl_toolkits.mplot3d import Axes3D
  fig = plt.figure()
  ax = fig.add_subplot(111, projection='3d')
  ax.scatter(x[:, 0], x[:, 1], y, label='y_true')
  if y_pred is not None:
    ax.scatter(x[:, 0], x[:, 1], y_pred, label='y_pred')
  plt.legend()

####Linear model

In [6]:
class Linear:
  def __init__(self, input_dim: int, num_hidden: int = 1):
    self.weights = np.random.randn(input_dim, num_hidden) * np.sqrt(2. / input_dim)
    self.bias = np.zeros(num_hidden)
  
  def __call__(self, x):
    self.x = x
    output = x @ self.weights + self.bias
    return output

  def backward(self, gradient):
    self.weights_gradient = self.x.T @ gradient
    self.bias_gradient = gradient.sum(axis=0)
    self.x_gradient = gradient @ self.weights.T
    return self.x_gradient

  def update(self, lr):
    self.weights = self.weights - lr * self.weights_gradient
    self.bias = self.bias - lr * self.bias_gradient

#### Non-Linear model generation


3 layers neural network using relu nonlinearity

In [7]:
class Relu:
    def __call__(self, input_, dropout_rate=0):
        if dropout_rate > 0:
          dropout_mask = np.random.binomial(1, 1-dropout_rate, size=input_.shape)
          input_ *= dropout_mask * 2
        self.input_ = input_
        self.output = np.clip(self.input_, 0, None)
        return self.output
    
    def backward(self, output_gradient):
      # import pdb; pdb.set_trace()  # By the way, this is how you can debug
      self.input_gradient = (self.input_ > 0) * output_gradient
      return self.input_gradient

class Model:
  def __init__(self, input_dim, num_hidden, ouput_dim):
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = Relu()
    self.relu2 = Relu()
    self.linear2 = Linear(num_hidden, ouput_dim)
  
  def __call__(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1, dropout_rate=0.05)
    r2 = self.relu2(r1, dropout_rate=0.1)
    l2 = self.linear2(r2)
    return l2
  
  def backward(self, output_gradient):
    linear2_gradient = self.linear2.backward(output_gradient)
    relu2_gradient = self.relu2.backward(linear2_gradient)
    relu1_gradient = self.relu1.backward(relu2_gradient)
    linear1_gradient = self.linear1.backward(relu1_gradient)
    # print('Model backward', linear2_gradient.shape, relu_gradient.shape, linear1_gradient.shape)
    # import pdb; pdb.set_trace()
    return linear1_gradient

  def update(self, lr):
    self.linear2.update(lr)
    self.linear1.update(lr)

####Three dimention non-linear input generation

In [8]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:5000].reshape(5000,28*28) / 255, y_train[0:5000])

one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1
print(f'images: {images.shape}, labels: {labels.shape}, test_images: {test_images.shape}, test_labels: {test_labels.shape}')

x, y_true = images, labels
x_test, y_test = test_images, test_labels

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
images: (5000, 784), labels: (5000, 10), test_images: (10000, 784), test_labels: (10000, 10)


In [9]:
# dropout_mask = np.random.binomial(1, 0.8, size=x1.shape)
# dropout_mask

####Initialize nonlinear and loss for three dimention data

3-d input and 2-d output with 2 hidden layers, 10 neurons for hidden layers

In [10]:
loss = MSE()
nonlinear = Model(784, 200, 10)
x = images
y_true = labels

y_pred = nonlinear(x)
#print(x.shape, weights_true.shape, y_true.shape, y_pred.shape)
print(loss(y_pred, y_true))

4.233879957891376


####Train three dimention data

In [11]:
#fit(x, y_true, model=nonlinear, loss=loss, lr=0.1, num_epochs=1000)

#y_pred = nonlinear(x)

In [12]:
fit_by_batch(x, y_true, x_test, y_test, model=nonlinear, loss=loss, lr=0.01, num_epochs=800)

y_pred = nonlinear(x)

Epoch 0,loss 0.11877092606454248, correct_rate 0.2142, test_correct_rate 0.3161
Epoch 100,loss 0.022309809245400877, correct_rate 0.9094, test_correct_rate 0.8599
Epoch 200,loss 0.017173383427096248, correct_rate 0.932, test_correct_rate 0.8845
Epoch 300,loss 0.017312383414515863, correct_rate 0.9464, test_correct_rate 0.8914
Epoch 400,loss 0.012322031553421387, correct_rate 0.9568, test_correct_rate 0.8956
Epoch 500,loss 0.013985983831919981, correct_rate 0.9652, test_correct_rate 0.8991
Epoch 600,loss 0.012877621897729936, correct_rate 0.9702, test_correct_rate 0.8995
Epoch 700,loss 0.010357643807912329, correct_rate 0.9718, test_correct_rate 0.9042


####Plot the output and the real data using tsne

In [13]:
#plot_distribution(x, y_true, y_pred)

In [14]:
#plot_comparison(y_true, y_pred)

In [15]:
#plot_3d(x, y_true[:, 0], y_pred=y_pred[:, 0])