In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape)

x_train:  (50000, 32, 32, 3)


In [3]:
K = len(np.unique(y_train)) # Classes
Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10

In [4]:
#x_train, x_test = x_train , x_test 
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

In [5]:
x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [6]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

In [7]:
std=1e-5
H = 200
w1 = std*np.random.randn(Din, H)
b1 = np.zeros(H)
w2 = std*np.random.randn(H, K)
b2 = np.zeros(K)

In [8]:
iterations = 300
alpha = 0.5e-2
lamda = 5e-6
decay = 0.999

In [9]:
for i in range(1,iterations+1):
    h = 1/(1+np.exp(-(x_train.dot(w1)+b1)))
    y_hat = h.dot(w2) + b2
    cost = (1/Ntr) * np.square(y_hat-y_train).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1))
    if i %20 == 0 or i==1:
        print('iteration', i, cost)
    
    dy_hat = (1/Ntr)*2*(y_hat-y_train)
    dw2 = h.T.dot(dy_hat) + lamda*w2
    db2 = dy_hat.sum(axis=0)
    dh = dy_hat.dot(w2.T)
    dw1 = x_train.T.dot(dh*h*(1-h)) + lamda*w1
    db1 = (dh*h*(1-h)).sum(axis=0)
    w1 -= alpha*dw1
    w2 -= alpha*dw2
    b1 -= alpha*db1
    b2 -= alpha*db2
    alpha = alpha * decay

iteration 1 0.9999999434941892
iteration 20 0.8627723385143642
iteration 40 0.8394134507013334
iteration 60 0.8278999979100715
iteration 80 0.8186861605095255
iteration 100 0.8098279482342781
iteration 120 0.8048366672977931
iteration 140 0.7954185879905953
iteration 160 0.7885879425121799
iteration 180 0.784561175825965
iteration 200 0.778490057196885
iteration 220 0.7750637506175638
iteration 240 0.7743316288942749
iteration 260 0.7686677498778419
iteration 280 0.7647785005310737
iteration 300 0.7639305981851796


In [10]:
h1 = 1/(1+np.exp(-(x_train.dot(w1)+b1)))
pred1 = h1.dot(w2) + b2

In [11]:
train_pred = np.argmax(pred1, axis=1)
y_t = np.argmax(y_train,axis=1)

In [15]:
print('Train accuracy and loss',np.mean(y_t == train_pred),(1/Ntr) * np.square(y_hat-y_train).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1)))

Train accuracy and loss 0.43268 0.76393059952796


In [17]:
h1 = 1/(1+np.exp(-(x_test.dot(w1)+b1)))
pred1 = h1.dot(w2) + b2
test_pred = np.argmax(pred1, axis=1)
y_t = np.argmax(y_test,axis=1)
test_acc = np.mean(y_t == test_pred)
test_loss = (1/Nte) * np.square(pred1-y_test).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1))

In [18]:
print('Test accuracy and loss',test_acc,test_loss)

Test accuracy and loss 0.4182 0.770682761124909
