In [None]:
import numpy as np
import pandas as pd
import math
import random
from sklearn.metrics import accuracy_score

In [None]:
data = pd.read_csv('Apparel/apparel-trainval.csv')

tempss = data['label']
data = data.drop(labels = 'label', axis = 1)

mean = data.mean()
std = data.std()

data = (data-mean)/std
data.insert(0, 'label', tempss, allow_duplicates = False)

train = data.sample(frac=0.9, random_state=200)
test = data.drop(train.index)

train_act = np.array(train['label'])
train_act = train_act.reshape(train['label'].count(), 1)

test_act = np.array(test['label'])
test_act = test_act.reshape(test['label'].count(), 1)

train = train.drop(labels = 'label', axis = 1)
test = test.drop(labels = 'label', axis = 1)

train = np.array(train)
train = train.T

test = np.array(test)
test = test.T


one_hot = list()

for i in range(train_act.shape[0]) :
    temp = [0 for j in range(10)]
    temp[train_act[i][0]] = 1

    one_hot.append(temp)

y_train_encoded = np.array(one_hot)
y_train_encoded = y_train_encoded.T


batched_train_input = []
batched_train_output = []

def batch(train, batchsize, no_of_batches) :
    global batched_train_input
    global batched_train_output

    temp = 0
    for i in range(no_of_batches) :
        batched_train_input.append(train[:,temp*batchsize:temp*batchsize + batchsize])
        batched_train_output.append(y_train_encoded[:,temp*batchsize:temp*batchsize + batchsize])
        temp += 1

In [None]:
batchsize = 32
no_of_batches = math.ceil(train.shape[1]/batchsize)
batch(train, batchsize, no_of_batches)

In [None]:
W = {}
B = {}
Z = {}
A = {}

dW = {}
dB = {}
dZ = {}
dA = {}

activation = ["input", "relu", "relu", "softmax"]

layers = [784, 60, 60, 10]

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [None]:
def tanh(x):
    return np.tanh(x)

In [None]:
def relu(x) :
    return np.maximum(x, 0)

In [None]:
def softmax(x) :
    t = np.exp(x)
    return t/ np.sum(t, axis=0)

In [None]:
def sigmoid_derivative(x):
    return (sigmoid(x) * (1-sigmoid(x)))

In [None]:
def relu_derivative(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

In [None]:
def tanh_derivative(x):
    return (1 - np.pow(tanh(x), 2))

In [None]:
def init():
    layers = [784, 64, 64, 10]
    
    L = len(layers)
    
    for l in range(1, L):
        Matrix = np.random.randn(layers[l], layers[l-1]) * np.sqrt(2/(layers[l]+layers[l-1]))
        Matrix = np.array(Matrix)
        W[l] = Matrix
        B[l] = [[random.uniform(0.01,0.001) for j in range(layers[l])]]
        B[l] = np.array(B[l])
        B[l] = B[l].T
        np.reshape(B[l], layers[l], 1)

In [None]:
def forward(X): #activation is list with index 0 containing gibrish
    L = len(activation)
    
    A[0] = X
    
    for l in range(1, L):
        if l == 1:
            Z[l] = np.dot(W[l], X) + B[l]
        else:
            Z[l] = np.dot(W[l], A[l-1]) + B[l]

        if activation[l] == "sigmoid":
            A[l] = sigmoid(Z[l])
        
        if activation[l] == "relu":
            A[l] = relu(Z[l])
        
        if activation[l] == "softmax":
            A[l] = softmax(Z[l])

In [None]:
def backpropagation(y, m):
    L = len(activation)-1
    
    lr = 0.01
    
    # Calculate gradients
    for l in range(L, 0, -1):
        if l == L:
            dZ[l] = A[l] - y
        else:
            if activation[l] == "sigmoid":
                dZ[l] = np.multiply(dA[l], sigmoid_derivative(Z[l]))
            
            if activation[l] == "relu":
                dZ[l] = np.multiply(dA[l], relu_derivative(Z[l]))
                
        dW[l] = ((1/m) * np.dot(dZ[l], A[l-1].T))
    
        dB[l] = ((1/m) * np.sum(dZ[l], axis=1, keepdims=True))
        
        if l != 1:
            dA[l-1] = np.dot(W[l].T, dZ[l])
            
    # Update hyperparameters
    for l in range(1, L):
        W[l] = W[l] - lr*dW[l]
        B[l] = B[l] - lr*dB[l]

In [None]:
def predict(data):
    L = len(activation)
    
    final_Z = {}
    final_A = {}
    
    final_A[0] = data
    
    for l in range(1, L):
        if l == 1:
            final_Z[l] = np.dot(W[l], data) + B[l]
        else:
            final_Z[l] = np.dot(W[l], final_A[l-1]) + B[l]

        if activation[l] == "sigmoid":
            final_A[l] = sigmoid(final_Z[l])
        
        if activation[l] == "relu":
            final_A[l] = relu(final_Z[l])
        
        if activation[l] == "softmax":
            final_A[l] = softmax(final_Z[l])
            
    pred = final_A[L-1].argmax(axis=0)

    pred = pred.reshape(data.shape[1],1)
    
    return pred

In [None]:
def train(batch_ip, batch_op, no_of_batches, batchSize, iterations):
    init()
    for i in range(iterations):
        for j in range(no_of_batches):
            forward(batch_ip[j])

            backpropagation(batch_op[j], batchSize)
        print('Epoch: ', i)
        
        pred = predict(test)

        stats(test_act, pred)

In [None]:
def stats(actual, predicted):
    print(accuracy_score(actual, predicted))

In [None]:
train(batched_train_input, batched_train_output, no_of_batches, batchsize, 100)

In [None]:
pred = predict(test)

stats(test_act, pred)