In [235]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [236]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

data = pd.read_csv('/content/drive/MyDrive/Neural Net/train.csv')

In [237]:
data.isna().any().any()

False

In [238]:
data.shape

(42000, 785)

In [239]:
data = data.dropna()

In [240]:
data = np.array(data)
m, n = data.shape
np.random.shuffle(data)

In [241]:
m, n

(42000, 785)

In [242]:
 # shuffle before splitting into dev and training sets

data_dev = data[0:1000].T
print(data_dev.shape)
Y_dev = data_dev[0].astype(np.int32)
X_dev = data_dev[1:n]
X_dev = X_dev / 255.

(785, 1000)


In [243]:
data_train = data[1000:m].T
print(data_train.shape)
Y_train = data_train[0].astype(np.int32)
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

(785, 41000)


In [244]:
data_train[0].shape

(41000,)

In [245]:
Y_train.shape, Y_dev.shape

((41000,), (1000,))

In [246]:
# Y_train = Y_train.astype(np.int32)

In [281]:
# W1_new, b1_new, W2_new, b2_new = grad_desc(X_train, Y_train, 0.10, 10)

Iteration:  0
[5 7 4 ... 7 4 0] [2 1 5 ... 7 8 7]
0.11824390243902438


In [296]:
def relu(Z):
  return np.maximum(Z, 0)
def softmax(Z):
  # e_x = np.exp(Z - np.max(Z))
  A = np.exp(Z) / sum(np.exp(Z))
  # print("Z ", Z.shape)
  # print("ez", np.exp(Z).shape, sum(np.exp(Z)).shape )
  # print("is nan")
  return A

In [297]:
def relu_der(Z):
  return Z > 0

In [298]:
def forward_pass(W1, b1, W2, b2, X):
  Z1 = W1.dot(X) + b1
  A1 = relu(Z1)
  Z2 = W2.dot(A1) + b2
  A2 = softmax(Z2)
  return A1, A2, Z1, Z2

In [299]:
def update(W1, W2, b1, b2, dW1, dW2, db1, db2, alpha):
  W1 = W1 - alpha*dW1
  b1 = b1 - alpha*db1
  W2 = W2 - alpha*dW2
  b2 = b2 - alpha*db2
  return W1, W2, b1, b2

In [300]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

In [301]:
def one_hot(Y):
  one_hot_Y = np.zeros((Y.size, int(Y.max() + 1)))
  # type(Y)
  one_hot_Y[np.arange(Y.size), Y] = 1
  one_hot_Y = one_hot_Y.T
  return one_hot_Y

In [302]:
Y_train

array([2, 1, 5, ..., 7, 8, 7], dtype=int32)

In [303]:
one_hot(Y_train).shape

(10, 41000)

In [304]:
def back_pass(A1, A2, W1, W2, Z1, Z2, m, Y, X):
  one_hot_Y = one_hot(Y)
  dZ2 = A2 - one_hot_Y
  dW2 = 1 / m * dZ2.dot(A1.T)
  db2 = 1 / m * np.sum(dZ2)
  dZ1 = W2.T.dot(dZ2) * relu_der(Z1)
  dW1 = 1 / m * dZ1.dot(X.T)
  db1 = 1 / m * np.sum(dZ1)
  return dW1, db1, dW2, db2

In [305]:
def preds(A2):
  return np.argmax(A2, axis = 0)
def get_accuracy(predictions, Y):
  print(predictions, Y)
  return np.sum(predictions == Y) / Y.size

In [306]:
def grad_desc(X, Y, alpha, iterations, m = m_train):
  W1, b1, W2, b2 = init_params()
  for i in range(iterations):
    A1, A2, Z1, Z2 = forward_pass(W1, b1, W2, b2, X)
    dW1, db1, dW2, db2 = back_pass(A1, A2, W1, W2, Z1, Z2, m, Y, X)
    W1, W2, b1, b2 = update(W1, W2, b1, b2, dW1, dW2, db1, db2, alpha)
    if i % 10 == 0:
      print("Iteration: ", i)
      predictions = preds(A2)
      print(get_accuracy(predictions, Y))
  return W1, b1, W2, b2

In [307]:
X_train.shape

(784, 41000)

In [308]:
Y_train.shape

(41000,)

In [None]:
W1, b1, W2, b2 = grad_desc(X_train, Y_train, 0.10, 500)

Iteration:  0
[5 7 5 ... 5 7 7] [2 1 5 ... 7 8 7]
0.1363658536585366
Iteration:  10
[2 5 5 ... 8 8 7] [2 1 5 ... 7 8 7]
0.21578048780487805
Iteration:  20
[2 2 3 ... 7 3 7] [2 1 5 ... 7 8 7]
0.2531219512195122
Iteration:  30
[2 2 2 ... 7 3 7] [2 1 5 ... 7 8 7]
0.2945609756097561
Iteration:  40
[2 1 2 ... 7 3 7] [2 1 5 ... 7 8 7]
0.33490243902439026
Iteration:  50
[2 1 2 ... 7 3 7] [2 1 5 ... 7 8 7]
0.3732439024390244
Iteration:  60
[2 1 2 ... 7 3 7] [2 1 5 ... 7 8 7]
0.4113414634146341
Iteration:  70
[2 1 2 ... 7 3 7] [2 1 5 ... 7 8 7]
0.4502926829268293
Iteration:  80
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.4848292682926829
Iteration:  90
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.5135853658536585
Iteration:  100
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.5397560975609756
Iteration:  110
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.5646585365853658
Iteration:  120
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.5858048780487805
Iteration:  130
[2 1 2 ... 7 8 7] [2 1 5 ... 7 8 7]
0.6071219512195122
Iteration:  140