In [20]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [21]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('x_train: ', x_train.shape)

x_train:  (50000, 32, 32, 3)


In [22]:
K = len(np.unique(y_train)) # Classes
Ntr = x_train.shape[0]
Nte = x_test.shape[0]
Din = 3072 # CIFAR10

In [23]:
# Normalize pixel values
x_train, x_test = x_train / 2, x_test / 2
mean_image = np.mean(x_train, axis=0)
x_train = x_train - mean_image
x_test = x_test - mean_image

In [24]:
x_train = np.reshape(x_train,(Ntr,Din))
x_test = np.reshape(x_test,(Nte,Din))
x_train = x_train.astype('float64')
x_test = x_test.astype('float64')

In [25]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes=K)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=K)

In [26]:
std=1e-5
H = 200
w1 = std*np.random.randn(Din, H)
b1 = np.zeros(H)
w2 = std*np.random.randn(H, K)
b2 = np.zeros(K)
batch_size = 500

In [27]:
iterations = round(Ntr/batch_size) * 300
alpha = 0.5e-2
lamda = 5e-6
decay = 0.999

In [28]:
for i in range(1,iterations+1):
    batch_indices = np.random.choice(Ntr, batch_size)
    x = x_train[batch_indices]
    y = y_train[batch_indices]
    h = 1/(1+np.exp(-(x.dot(w1)+b1)))
    y_hat = h.dot(w2) + b2
    cost = (1/batch_size) * np.square(y_hat-y).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1))
    if i % 500 == 0 or i==1:
        print('iteration', i, cost)
    
    dy_hat = (1/batch_size)*2*(y_hat-y)
    dw2 = h.T.dot(dy_hat) + lamda*w2
    db2 = dy_hat.sum(axis=0)
    dh = dy_hat.dot(w2.T)
    dw1 = x.T.dot(dh*h*(1-h)) + lamda*w1
    db1 = (dh*h*(1-h)).sum(axis=0)
    w1 -= 100*alpha*dw1
    w2 -= alpha*dw2
    b1 -= 100*alpha*db1
    b2 -= alpha*db2
    alpha = alpha * decay

iteration 1 0.9999546879273157
iteration 500 0.7672233113960608
iteration 1000 0.7410665765419735
iteration 1500 0.709267161170735
iteration 2000 0.6941710152984616
iteration 2500 0.6937131257779925
iteration 3000 0.6791701971289281
iteration 3500 0.669240697722151
iteration 4000 0.6753508275625142
iteration 4500 0.6707090606278688
iteration 5000 0.666072027846312
iteration 5500 0.6457191210253823
iteration 6000 0.6660135200560865
iteration 6500 0.6468608236049463
iteration 7000 0.6639376033304991
iteration 7500 0.664229904400331
iteration 8000 0.6485613149109044
iteration 8500 0.6609993548847835
iteration 9000 0.6690041563425004
iteration 9500 0.6624495316852832
iteration 10000 0.6726005049748591
iteration 10500 0.6677834211418737
iteration 11000 0.6457173833316879


KeyboardInterrupt: 

In [29]:
h1 = 1/(1+np.exp(-(x_test.dot(w1)+b1)))
pred1 = h1.dot(w2) + b2
test_pred = np.argmax(pred1, axis=1)
y_t = np.argmax(y_test,axis=1)
test_acc = np.mean(y_t == test_pred)
test_loss = (1/Nte) * np.square(pred1-y_test).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1))

In [30]:
print('Test accuracy and loss',test_acc,test_loss)

Test accuracy and loss 0.4562 0.7434132360502146


In [33]:
h1 = 1/(1+np.exp(-(x_train.dot(w1)+b1)))
pred1 = h1.dot(w2) + b2
test_pred = np.argmax(pred1, axis=1)
y_t = np.argmax(y_train,axis=1)
train_acc = np.mean(y_t == test_pred)
train_loss = (1/Ntr) * np.square(pred1-y_train).sum() + lamda*(np.sum(w2*w2) + np.sum(w1*w1))

In [34]:
print('Train accuracy and loss',train_acc,train_loss)

Train accuracy and loss 0.57414 0.6594692266985144


In [10]:
h1 = 1/(1+np.exp(-(x_train.dot(w1)+b1)))
pred1 = h1.dot(w2) + b2

  """Entry point for launching an IPython kernel.


In [11]:
train_pred = np.argmax(pred1, axis=1)
y_t = np.argmax(y_train,axis=1)

In [12]:
np.mean(y_t == train_pred)

0.5182

In [169]:
np.argmax(pred1,axis=1)

array([0, 3, 3, ..., 3, 7, 1], dtype=int64)

In [168]:
np.argmax(y_train, axis=1)

array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0.], dtype=float32)

In [57]:
dw2[0]

array([-2.55919508e-07,  1.85312726e-08,  8.01437203e-08,  1.08191779e-07,
        1.63330586e-07,  1.45486839e-07,  2.24864997e-07, -1.37344344e-07,
       -1.82360499e-07, -1.64057089e-07])

In [41]:
x_train.shape,w1.shape

((50000, 3072), (3072, 200))

In [141]:
h1.shape

(50000, 200)