In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
%matplotlib inline

# Loading Data

In [None]:
data = pd.read_csv('AHDBase_TrainingSet.csv')
data_cols = list(data.columns)
data_cols.remove('Unnamed: 0')
train_data = data[data_cols]

In [None]:
train_data.head()

In [None]:
data = pd.read_csv('AHDBase_TestingSet.csv')
data_cols = list(data.columns)
data_cols.remove('Unnamed: 0')
test_data = data[data_cols]

In [None]:
test_data.head()

In [None]:
import cv2
import matplotlib.pyplot as plt

%matplotlib inline

output = 'label'
features = list(train_data.columns)
features.remove(output)

image_with_label = train_data.iloc[6578]


label = image_with_label[output]
image = image_with_label[features].values

image = image.reshape(8, 8)

print('The label is:', label)
plt.imshow(image[0:64], cmap='gray')

In [None]:
print(len(train_data))
print(len(test_data))

In [None]:
# convert labels into one-hot encoding vectors
binarizer = LabelBinarizer()
one_hot_encoded_train_labels = binarizer.fit_transform(train_data[output])

test_value = 20
print(train_data[output][test_value])
print(one_hot_encoded_train_labels[test_value])

In [None]:
X_train = train_data[features].values
y_train = one_hot_encoded_train_labels

X_test = test_data[features].values
y_test = binarizer.transform(test_data[output])

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
# Take transpose since the neural network considers columns as examples and rows as features
# i.e. if m is the number of samples (images) and n is the number of features (image pixels), then 
# n x m is the shape that is accepted by the network
X_train = X_train.T
y_train = y_train.T

In [None]:
print(X_train.shape)
print(y_train.shape)

# Helper Functions

In [None]:
def sigmoid(Z):
    return 1. / (1 + np.exp(-Z))

def sigmoid_prime(A):
    return A * (1 - A)

In [None]:
def get_mini_batches(X, y, batch_size):
    assert X.shape[1] == y.shape[1], 'Size Mismatch'
    m = X.shape[1]
    num_of_mini_batches = m // batch_size

    for i in range(num_of_mini_batches):
        start = i * batch_size
        end = start + batch_size
        mini_bacth_X = X[:, start:end]
        mini_batch_y = y[:, start:end]
        yield mini_bacth_X, mini_batch_y

    if m % batch_size != 0:
        last_mini_bacth_X = X[:, end:]
        last_mini_bacth_y = y[:, end:]
        yield last_mini_bacth_X, last_mini_bacth_y

# Neural Network Implementation

## Stochastic Gradient Descent (SGD)

### Initialize Weights Randomly

In [None]:
W1 = 2*np.random.random((32, 64)) - 1
W1 /= 32

b1 = 2*np.random.random((32, 1)) - 1

W2 = 2*np.random.random((16, 32)) - 1
W2 /= 16

b2 = 2*np.random.random((16, 1)) - 1

W3 = 2*np.random.random((10, 16)) - 1
W3 /= 10

b3 = 2*np.random.random((10, 1)) - 1

print(W1.shape, b1.shape)
print(W2.shape, b2.shape)
print(W3.shape, b3.shape)

In [None]:
alpha = 0.1
costs = []

tic = time.time()
for epoch in range(1000):
    total_cost = 0
    
    ### Shuffling ###
    indices = np.arange(X_train.shape[1])
    np.random.shuffle(indices)

    X_train = X_train[:, indices]
    y_train = y_train[:, indices]
    
    #################
    
    for i in range(X_train.shape[1]):
        y_true = y_train[:, i]
        A0 = X_train[:, i]
        A0 = A0.reshape(A0.shape[0], 1)
        y_true = y_true.reshape(y_true.shape[0], 1)
        
        # feedforward
        Z1 = W1.dot(A0) + b1
        A1 = sigmoid(Z1)

        Z2 = W2.dot(A1) + b2
        A2 = sigmoid(Z2)

        Z3 = W3.dot(A2) + b3
        A3 = sigmoid(Z3)

        y_pred = A3

        y_true = np.atleast_2d(y_true)
        cost = 0.5 * np.sum((y_true - y_pred) ** 2)
        total_cost += cost

        # backpropagate error
        delta3 = (y_pred - y_true) * sigmoid_prime(y_pred)
        delta2 = W3.T.dot(delta3) * sigmoid_prime(A2)
        delta1 = W2.T.dot(delta2) * sigmoid_prime(A1)

        dW1 = delta1.dot(A0.T)
        dW2 = delta2.dot(A1.T)
        dW3 = delta3.dot(A2.T)

        db1 = delta1.sum(axis=1, keepdims=True)
        db2 = delta2.sum(axis=1, keepdims=True)
        db3 = delta3.sum(axis=1, keepdims=True)


        assert W1.shape == dW1.shape, 'Size Mismatch'
        assert W2.shape == dW2.shape, 'Size Mismatch'
        assert W3.shape == dW3.shape, 'Size Mismatch'
        assert b1.shape == db1.shape, 'Size Mismatch'
        assert b2.shape == db2.shape, 'Size Mismatch'
        assert b3.shape == db3.shape, 'Size Mismatch'

        # update weights
        W1 = W1 - alpha*dW1
        W2 = W2 - alpha*dW2
        W3 = W3 - alpha*dW3

        b1 = b1 - alpha*db1
        b2 = b2 - alpha*db2
        b3 = b3 - alpha*db3
    
    if epoch % 10 == 0:
        print(epoch, total_cost / X_train.shape[1])
        costs.append(total_cost / X_train.shape[1])
toc = time.time()

In [None]:
(toc - tic) / 3600

In [None]:
A0 = X_test.T
y_true = y_test.T

In [None]:
# feedforward
Z1 = W1.dot(A0) + b1
A1 = sigmoid(Z1)

Z2 = W2.dot(A1) + b2
A2 = sigmoid(Z2)

Z3 = W3.dot(A2) + b3
A3 = sigmoid(Z3)

y_pred = A3

In [None]:
print(y_pred.shape)
print(y_true.shape)

In [None]:
my_labels_pred = binarizer.inverse_transform(y_pred.T)
print(classification_report(test_data[output], my_labels_pred))

In [None]:
plt.plot(costs)

## Batch Gradient Descent

### Initialize Weights Randomly

In [None]:
W1 = 2*np.random.random((32, 64)) - 1
W1 /= 32

b1 = 2*np.random.random((32, 1)) - 1

W2 = 2*np.random.random((16, 32)) - 1
W2 /= 16

b2 = 2*np.random.random((16, 1)) - 1

W3 = 2*np.random.random((10, 16)) - 1
W3 /= 10

b3 = 2*np.random.random((10, 1)) - 1

print(W1.shape, b1.shape)
print(W2.shape, b2.shape)
print(W3.shape, b3.shape)

In [None]:
m = X_train.shape[1]
alpha = 0.9
costs = []
tic = time.time()
for epoch in range(20000):
    
    ### Shuffling ###
    indices = np.arange(X_train.shape[1])
    np.random.shuffle(indices)

    X_train = X_train[:, indices]
    y_train = y_train[:, indices]
    
    #################
    
    A0 = X_train
    y_true = y_train
    
    # feedforward
    Z1 = W1.dot(A0) + b1
    A1 = sigmoid(Z1)

    Z2 = W2.dot(A1) + b2
    A2 = sigmoid(Z2)

    Z3 = W3.dot(A2) + b3
    A3 = sigmoid(Z3)

    y_pred = A3

    y_true = np.atleast_2d(y_true)
    cost = 0.5 * np.sum((y_true - y_pred) ** 2) / m
    
    # backpropagate error
    delta3 = -(y_true - y_pred) * sigmoid_prime(y_pred)
    delta2 = W3.T.dot(delta3) * sigmoid_prime(A2)
    delta1 = W2.T.dot(delta2) * sigmoid_prime(A1)

    dW1 = delta1.dot(A0.T) / m
    dW2 = delta2.dot(A1.T) / m
    dW3 = delta3.dot(A2.T) / m

    db1 = delta1.sum(axis=1, keepdims=True) / m
    db2 = delta2.sum(axis=1, keepdims=True) / m
    db3 = delta3.sum(axis=1, keepdims=True) / m


    assert W1.shape == dW1.shape, 'Size Mismatch'
    assert W2.shape == dW2.shape, 'Size Mismatch'
    assert W3.shape == dW3.shape, 'Size Mismatch'
    assert b1.shape == db1.shape, 'Size Mismatch'
    assert b2.shape == db2.shape, 'Size Mismatch'
    assert b3.shape == db3.shape, 'Size Mismatch'

    # update weights
    W1 = W1 - alpha*dW1
    W2 = W2 - alpha*dW2
    W3 = W3 - alpha*dW3

    b1 = b1 - alpha*db1
    b2 = b2 - alpha*db2
    b3 = b3 - alpha*db3

    costs.append(cost)
    
    if epoch % 100 == 0:
        print(epoch, cost)
toc = time.time()

In [None]:
(toc - tic) / 3600

In [None]:
A0 = X_test.T
y_true = y_test.T

In [None]:
# feedforward
Z1 = W1.dot(A0) + b1
A1 = sigmoid(Z1)

Z2 = W2.dot(A1) + b2
A2 = sigmoid(Z2)

Z3 = W3.dot(A2) + b3
A3 = sigmoid(Z3)

y_pred = A3

In [None]:
print(y_pred.shape)
print(y_true.shape)

In [None]:
my_labels_pred = binarizer.inverse_transform(y_pred.T)
print(classification_report(test_data[output], my_labels_pred))

In [None]:
plt.plot(costs)

## Mini-Batch Gradient Descent

### Initialize Weights Randomly

In [None]:
W1 = 2*np.random.random((32, 64)) - 1
W1 /= 32

b1 = 2*np.random.random((32, 1)) - 1

W2 = 2*np.random.random((16, 32)) - 1
W2 /= 16

b2 = 2*np.random.random((16, 1)) - 1

W3 = 2*np.random.random((10, 16)) - 1
W3 /= 10

b3 = 2*np.random.random((10, 1)) - 1

print(W1.shape, b1.shape)
print(W2.shape, b2.shape)
print(W3.shape, b3.shape)

In [None]:
alpha = 0.9
costs = []
batch_size = 128

tic = time.time()
for epoch in range(1000):
    total_cost = 0
    
    ### Shuffling ###
    indices = np.arange(X_train.shape[1])
    np.random.shuffle(indices)

    X_train = X_train[:, indices]
    y_train = y_train[:, indices]
    
    #################
    j = 0
    for mini_bacth_X, mini_bacth_y in get_mini_batches(X_train, y_train, batch_size):
        m = mini_bacth_y.shape[1]
        y_true = mini_bacth_y
        A0 = mini_bacth_X
        
        # feedforward
        Z1 = W1.dot(A0) + b1
        A1 = sigmoid(Z1)

        Z2 = W2.dot(A1) + b2
        A2 = sigmoid(Z2)

        Z3 = W3.dot(A2) + b3
        A3 = sigmoid(Z3)

        y_pred = A3

        y_true = np.atleast_2d(y_true)
        cost = 0.5 * np.sum((y_true - y_pred) ** 2) / m
        total_cost += cost

        # backpropagate error
        delta3 = (y_pred - y_true) * sigmoid_prime(y_pred)
        delta2 = W3.T.dot(delta3) * sigmoid_prime(A2)
        delta1 = W2.T.dot(delta2) * sigmoid_prime(A1)

        dW1 = delta1.dot(A0.T) / m
        dW2 = delta2.dot(A1.T) / m
        dW3 = delta3.dot(A2.T) / m

        db1 = delta1.sum(axis=1, keepdims=True) / m
        db2 = delta2.sum(axis=1, keepdims=True) / m
        db3 = delta3.sum(axis=1, keepdims=True) / m


        assert W1.shape == dW1.shape, 'Size Mismatch'
        assert W2.shape == dW2.shape, 'Size Mismatch'
        assert W3.shape == dW3.shape, 'Size Mismatch'
        assert b1.shape == db1.shape, 'Size Mismatch'
        assert b2.shape == db2.shape, 'Size Mismatch'
        assert b3.shape == db3.shape, 'Size Mismatch'

        # update weights
        W1 = W1 - alpha*dW1
        W2 = W2 - alpha*dW2
        W3 = W3 - alpha*dW3

        b1 = b1 - alpha*db1
        b2 = b2 - alpha*db2
        b3 = b3 - alpha*db3
        
        j += 1
        costs.append(cost)
    
    if epoch % 10 == 0:
        print(epoch, total_cost / j)
        
toc = time.time()

In [None]:
(toc - tic) / 3600

In [None]:
A0 = X_test.T
y_true = y_test.T

In [None]:
# feedforward
Z1 = W1.dot(A0) + b1
A1 = sigmoid(Z1)

Z2 = W2.dot(A1) + b2
A2 = sigmoid(Z2)

Z3 = W3.dot(A2) + b3
A3 = sigmoid(Z3)

y_pred = A3

In [None]:
print(y_pred.shape)
print(y_true.shape)

In [None]:
my_labels_pred = binarizer.inverse_transform(y_pred.T)
print(classification_report(test_data[output], my_labels_pred))

In [None]:
plt.plot(costs)