<a href="https://colab.research.google.com/github/YasirHabib/tensorflow/blob/master/fashion_mnist_rmsprop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
df_train = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/fashion-mnist_train.csv')
df_test = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/fashion-mnist_test.csv')

In [0]:
data_train = df_train.values
data_test = df_test.values

Xtrain = data_train[:,1:]
Ytrain = data_train[:,0]

Xtest = data_test[:,1:]
Ytest = data_test[:,0]

In [0]:
N, D = Xtrain.shape
K = len(set(Ytrain))
M = 128

In [0]:
def y2indicator(Y):
  N = len(Y)
  K = len(set(Y))
  ind=np.zeros((N,K))
  for L in range(N):
    ind[L,Y[L]]=1
  
  return ind

In [0]:
Ytrain_ind = y2indicator(Ytrain)
Ytest_ind = y2indicator(Ytest)

In [0]:
W1 = np.random.randn(D, M) / np.sqrt(D)
b1 = np.zeros(M)

W2 = np.random.randn(M, K) / np.sqrt(M)
b2 = np.zeros(K)

In [0]:
def gradW2(Z, pY, T):
  return Z.T.dot(pY - T)

def gradb2(pY, T):
  return (pY - T).sum(axis = 0)

def gradW1(X, W2, Z, pY, T):
  dZ = (pY - T).dot(W2.T) * (Z > 0)		# relu
  return X.T.dot(dZ)

def gradb1(W2, Z, pY, T):
  dZ = (pY - T).dot(W2.T) * (Z > 0)		# relu
  return dZ.sum(axis = 0)

In [0]:
def forward(X, W1, b1, W2, b2):
  Z = X.dot(W1) + b1
  Z[Z < 0] = 0				# relu
  
  A = Z.dot(W2) + b2
  expA = np.exp(A)
  pY = expA / expA.sum(axis=1, keepdims=True)
  
  return Z, pY

In [0]:
def cost(pY, Y):
	return -(Y * np.log(pY)).sum()

In [0]:
def error_rate(pY, Y):
  return np.mean(Y != np.argmax(pY, axis=1))

In [0]:
lr = 0.00001
reg = 0.001
cache_W2 = 1
cache_b2 = 1
cache_W1 = 1
cache_b1 = 1
decay_rate = 0.999
eps = 1e-10

training_epochs = 10
batch_sz = 500
n_batches = N // batch_sz
costs = []
t0 = datetime.now()

for epoch in range(training_epochs):
  for j in range(n_batches):
    Xbatch = Xtrain[j*batch_sz:(batch_sz+(j*batch_sz)), :]
    Ybatch = Ytrain_ind[j*batch_sz:(batch_sz+(j*batch_sz)), :]
    
    Z, pY = forward(Xbatch, W1, b1, W2, b2)

    gW2 = (gradW2(Z, pY, Ybatch) + reg*W2)
    cache_W2 = decay_rate*cache_W2 + (1 - decay_rate)*gW2*gW2
    W2 -= lr * gW2 / (np.sqrt(cache_W2) + eps)
    
    gb2 = (gradb2(pY, Ybatch) + reg*b2)
    cache_b2 = decay_rate*cache_b2 + (1 - decay_rate)*gb2*gb2
    b2 -= lr * gb2 / (np.sqrt(cache_b2) + eps)
    
    gW1 = (gradW1(Xbatch, W2, Z, pY, Ybatch) + reg*W1)
    cache_W1 = decay_rate*cache_W1 + (1 - decay_rate)*gW1*gW1
    W1 -= lr * gW1 / (np.sqrt(cache_W1) + eps)
    
    gb1 = (gradb1(W2, Z, pY, Ybatch) + reg*b1)
    cache_b1 = decay_rate*cache_b1 + (1 - decay_rate)*gb1*gb1
    b1 -= lr * gb1 / (np.sqrt(cache_b1) + eps)
    
    if j % 1 == 0:
      _,pY_test = forward(Xtest, W1, b1, W2, b2)
      c = cost(pY_test, Ytest_ind)
      costs.append(c)
      e = error_rate(pY_test, Ytest)
      print("Epoch", (epoch + 1), ": cost =", c, "error rate =", e)
      
plt.plot(costs, label="cost")
plt.show()

print("Elapsted time for 10 epoches:", datetime.now() - t0)