# Pre-Processing


In [0]:
import tensorflow as tf
import numpy as np

In [0]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [47]:
print('X_train shape : ', X_train.shape)
print('y_train shape : ', X_train.shape)
print('X_train shape : ', X_train.shape)
print('y_train shape : ', X_train.shape)

X_train shape :  (50000, 32, 32, 3)
y_train shape :  (50000, 32, 32, 3)
X_train shape :  (50000, 32, 32, 3)
y_train shape :  (50000, 32, 32, 3)


In [0]:
X_val = X_train[-1000:,:]
y_val = y_train[-1000:]

X_train = X_train[:-1000,:]
y_train = y_train[:-1000]

In [49]:
print('X_train shape : ', X_train.shape)
print('y_train shape : ', X_train.shape)
print('X_val shape   : ', X_val.shape)
print('y_val shape   : ', X_val.shape)

X_train shape :  (49000, 32, 32, 3)
y_train shape :  (49000, 32, 32, 3)
X_val shape   :  (1000, 32, 32, 3)
y_val shape   :  (1000, 32, 32, 3)


In [0]:
X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

mean_image = np.mean(X_train, axis = 0)

X_train -= mean_image
X_val -= mean_image
X_test -= mean_image

In [51]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1]*X_train.shape[2]*X_train.shape[3]))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1]*X_val.shape[2]*X_val.shape[3]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1]*X_test.shape[2]*X_test.shape[3]))

print('X_train.shape : ', X_train.shape)
print('X_val.shape : ', X_val.shape)
print('X_test.shape : ', X_test.shape)

X_train.shape :  (49000, 3072)
X_val.shape :  (1000, 3072)
X_test.shape :  (10000, 3072)


In [54]:
y_train = y_train.ravel()
y_val = y_val.ravel()
y_test = y_test.ravel()

print('y_train.shape : ',y_train.shape)
print('y_val.shape   : ',y_val.shape)
print('y_test.shape  : ',y_test.shape)

y_train.shape :  (49000,)
y_val.shape   :  (1000,)
y_test.shape  :  (10000,)


#defining function

In [0]:
def affine_forward(x, W, b):
  return x.dot(W) + b

def affine_backward(dout, x, W, b):
  dW = x.T.dot(dout)
  db = np.sum(dout, axis = 0, keepdims = True)
  dx = dout.dot(W.T)
  return dW, db, dx

def relu_forward(x):
  out = np.maximum(0,x)
  return out

def relu_backward(dout, x):
  dr_prime = np.where((x<0),0,1)
  dout = dout*dr_prime
  return dout

def softmax(x):
  x -= np.max(x)
  x_exp = np.exp(x)
  x_sum = np.sum(x_exp, axis = 1, keepdims = True)
  score = x_exp/x_sum
  return score

def softmax_loss(score,y):
  num_examples = score.shape[0]
  
  number_list = range(num_examples)
  correct_logprobs = -np.log(score[number_list,y])
  loss = np.sum(correct_logprobs)/num_examples
  
  dscores = score 
  dscores[range(num_examples),y] -= 1
  dscores /= num_examples
  return loss,dscores

In [0]:
def predict_two_layer_relu(x, W, b):
  y_pred = np.zeros(x.shape[0])
  
  layer1 = affine_forward(x, W[0], b[0])
  act1= relu_forward(layer1)
  
  layer2 = affine_forward(act1, W[1], b[1])
  y_pred = np.argmax(layer2, axis=-1 )
  
  return y_pred

In [0]:
def train_two_layer_relu(X, y, X_val, y_val, hidden_size, W=None, b=None, learning_rate=1e-4, 
                         lr_decay = 0.9, reg = 0.5, num_iters = 100, batch_size = 200, verbose = True):
  num_train, dim = X.shape
  iterations_per_epoch = max(num_train/batch_size, 1)
  num_classes = np.max(y) +1
  
  if W is None:
    W0 = 1e-4 * np.random.randn(dim, hidden_size)
    W1 = 1e-4 * np.random.randn(hidden_size, num_classes)
    W = [W0, W1]
    
  if b is None:
    b0 = np.zeros((1, hidden_size))
    b1 = np.zeros((1, num_classes))
    b = [b0,b1]
    
  loss_history = []
  train_acc_history = []
  val_acc_history = []
  
  for it in range(num_iters):
    X_batch = None
    y_batch = None
    
    train_rows = np.arange(num_train)
    idxs = np.random.choice(train_rows, batch_size, replace = False)  
    
    X_batch = X[idxs]
    y_batch = y[idxs]
    
    layer1 = affine_forward(X_batch, W[0], b[0])
    act1 = relu_forward(layer1)
    
    layer2 = affine_forward(act1, W[1], b[1])
    
    softmax_score = softmax(layer2)
    loss, dout = softmax_loss(softmax_score, y_batch)
    
    loss_history.append(loss)
    
    dW1, db1, dact1 = affine_backward(dout, act1, W[1], b[1])
    dlayer1 = relu_backward(dact1, act1)
    
    dW0, db0, dact0 = affine_backward(dlayer1, X_batch, W[0], b[1])
    
    W[0] = W[0]- dW0*learning_rate
    b[0] = b[0] - db0*learning_rate
    W[1] = W[1] - dW1*learning_rate
    b[1] = b[1] - db1*learning_rate
    
    if verbose and it % 100 == 0:
      print('Iterations', it, '/', num_iters, ':loss =', loss)
      
    if it % iterations_per_epoch == 0:
      train_acc = (predict_two_layer_relu(X_batch, W, b) == y_batch).mean()
      val_acc = (predict_two_layer_relu(X_val,W,b) == y_val).mean()
      train_acc_history.append(train_acc)
      val_acc_history.append(val_acc)
      
      learning_rate *= lr_decay
  return loss_history, W, b, train_acc_history, val_acc_history

# LETS DO IT


In [95]:
loss, W_relu, b_relu, train_acc, val_acc = train_two_layer_relu(X_train, y_train, X_val, y_val, hidden_size = 1000, num_iters=2000,learning_rate=0.00007,)

print('Training Accuracy : ', train_acc[-1]*100,'%')
print('Validation Accuracy : ', val_acc[-1]*100,'%')

Iterations 0 / 2000 :loss = 2.3025510283228745
Iterations 100 / 2000 :loss = 2.2954409103442788
Iterations 200 / 2000 :loss = 2.248156100875269
Iterations 300 / 2000 :loss = 2.1819157950704096
Iterations 400 / 2000 :loss = 2.09111278605981
Iterations 500 / 2000 :loss = 2.1005790572463847
Iterations 600 / 2000 :loss = 2.0181231708890253
Iterations 700 / 2000 :loss = 2.016968000548069
Iterations 800 / 2000 :loss = 1.9855378206271115
Iterations 900 / 2000 :loss = 2.107074726951833
Iterations 1000 / 2000 :loss = 1.9686461501514492
Iterations 1100 / 2000 :loss = 1.935286932017583
Iterations 1200 / 2000 :loss = 1.9329819283571472
Iterations 1300 / 2000 :loss = 1.9533240126923466
Iterations 1400 / 2000 :loss = 1.9769873163982308
Iterations 1500 / 2000 :loss = 1.8996830011646302
Iterations 1600 / 2000 :loss = 1.8595131903580528
Iterations 1700 / 2000 :loss = 1.9013100105509886
Iterations 1800 / 2000 :loss = 1.8811238147778224
Iterations 1900 / 2000 :loss = 1.8422126926849935
Training Accuracy 