In [1]:
from sklearn.preprocessing import Normalizer
import pandas
import numpy
import os

url = "data/abalone.data"
dataframe = pandas.read_csv(url, header=-1)

dumm = pandas.get_dummies(dataframe, prefix=None, prefix_sep='_', dummy_na=False, columns=[0,8], sparse=False, drop_first=False)

y = dumm.as_matrix(columns=dumm.columns[-28:])
# y = dumm.as_matrix(columns=dumm.columns[-3:])
X = dumm.as_matrix(columns=dumm.columns[:-28])
#  = dumm.as_matrix(columns=)
y.T.shape

(28, 4177)

In [2]:
%matplotlib inline
import math
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from sklearn.datasets.samples_generator import make_moons
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss


# # DATA FOR CLASSIFICATION
# np.random.seed(0)
# # X, y = make_blobs(n_samples=400, centers=2, n_features=2, cluster_std=0.1,random_state=0)
# X, y = make_moons(n_samples=400, shuffle=True, noise=None,random_state=0)
# plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)

# reshaped_y = np.array(y).reshape(len(y),1)

# enc = OneHotEncoder()
# enc.fit(reshaped_y)
# encoded_y = enc.transform(reshaped_y).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print(y_train.shape)

(2798, 28)


In [3]:
# FUNCTIONS FOR FORWARD PASS
def ReLU(x):
    return x*(x > 0)


def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)


def differentiate_ReLU(x):
    x[x <= 0] = 0
    x[x > 0] = 1
    return x

def differentiate_tanh(a_1):
    return 1 - np.tanh(a_1)**2

In [4]:
class NeuralNetwork:
    def __init__(self, neurons, x, y):
        np.random.seed(4)
        
        self.W_1 = np.random.rand(neurons,x.shape[1])
        self.W_15 = np.random.rand(neurons,neurons)
        self.W_2 = np.random.rand(neurons,neurons)
        self.W_out = np.random.rand(y.shape[1],neurons)
        self.W_ResNet = np.random.rand(y.shape[1],x.shape[1])
        
        self.b_1 = np.random.rand(neurons,1)
        self.b_15 = np.random.rand(neurons,1)
        self.b_2 = np.random.rand(neurons,1)
        self.b_out = np.random.rand(y.shape[1],1)
        
        
    def train(self, epochs, batch_size, X_train, X_test, y_train, y_test):
        for iteration in range(epochs):
            for i in range(len(X_train)//batch_size):
                hs = feedforward(nn, X_train[i:i+batch_size], y_train[i:i+batch_size])
                gradient = get_gradient(nn, hs)
                update_neural_network(nn, gradient)
                
            if (iteration%(epochs//1000)==0):
                hs = feedforward(nn, X_test, y_test)
                print(log_loss(y_test, hs['z_out'].T))
                
    
        if(len(X_train)%batch_size != 0):
            k = len(X_train)%batch_size
        
            hs = feedforward(nn, X_train[k:], y_train[k:])
            gradient = get_gradient(nn, hs)
            update_neural_network(nn, gradient)
            

def feedforward(network, x, y):
        forward_path = dict()
        
        forward_path['x'] = x.T
        forward_path['y'] = y.T
        
        forward_path['a_1'] = np.add(network.W_1.dot(forward_path['x']), network.b_1)
        forward_path['z_1'] = np.tanh(forward_path['a_1'])
        forward_path['a_15'] = np.add(network.W_15.dot(forward_path['z_1']), network.b_15)
        forward_path['z_15'] = np.tanh(forward_path['a_15'])
        forward_path['a_2'] = np.add(network.W_2.dot(forward_path['z_15']), network.b_2)
        forward_path['z_2'] = ReLU(forward_path['a_2'])
        forward_path['a_out_1'] = network.W_out.dot(forward_path['z_2'])
        forward_path['a_out_2'] = network.W_ResNet.dot(forward_path['x'])
        forward_path['a_out'] = np.add(np.add(forward_path['a_out_1'], forward_path['a_out_2']), network.b_out)
        forward_path['z_out'] = softmax(forward_path['a_out'])
        
        return forward_path
        
        
def get_gradient(network, hidden_state):
        gradient = dict()
        
        derivative_of_tanh = differentiate_tanh(hidden_state['a_1'])
        derivative_of_ReLU = differentiate_ReLU(hidden_state['a_2'])
 
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        delta_out = hidden_state['y'] - hidden_state['z_out'] 

        
        first_mul = network.W_out.T.dot(delta_out)
        delta_2 = np.multiply(derivative_of_ReLU, first_mul)
        delta_15 = np.multiply(derivative_of_tanh, network.W_2.T.dot(delta_2))
        delta_1 = np.multiply(derivative_of_tanh, network.W_15.T.dot(delta_15))
            
        dW_ResNet = delta_out.dot(hidden_state['x'].T)
        dW_out = delta_out.dot(hidden_state['z_2'].T)
        dW_2 = delta_2.dot(hidden_state['z_15'].T)
        dW_15 = delta_15.dot(hidden_state['z_1'].T)
        dW_1 = delta_1.dot(hidden_state['x'].T)
        
        gradient['W_1'] = dW_1
        gradient['W_15'] = dW_15
        gradient['W_2'] = dW_2
        gradient['W_out'] = dW_out
        gradient['W_ResNet'] = dW_ResNet
        
        gradient['b_1'] = np.expand_dims(np.mean(delta_1, axis=1), axis=1)
        gradient['b_15'] = np.expand_dims(np.mean(delta_15, axis=1), axis=1)
        gradient['b_2'] = np.expand_dims(np.mean(delta_2, axis=1), axis=1)
        gradient['b_out'] = np.expand_dims(np.mean(delta_out, axis=1), axis=1)
        
        return gradient
    
def update_neural_network(network, gradient, learning_rate = 0.025):
    network.W_1 += learning_rate * gradient['W_1']
    network.W_15 += learning_rate * gradient['W_15']
    network.W_2 += learning_rate * gradient['W_2']
    network.W_out += learning_rate * gradient['W_out']
    network.W_ResNet += learning_rate * gradient['W_ResNet']
        
    network.b_1 += learning_rate * gradient['b_1']
    network.b_2 += learning_rate * gradient['b_2']
    network.b_out += learning_rate * gradient['b_out']

In [None]:
nn = NeuralNetwork(64, X_train, y_train)
nn.train(epochs = 20000,batch_size = 1,X_train = X_train, X_test = X_test, y_train = y_train, y_test = y_test)

2.309464543564578
2.1116807569601845
2.074749386289508
2.0547941079751357
2.0416605673630865
2.032169568010843
2.0249223798328133
2.0191842759283682
2.0145231607960916
2.010664252994006
2.0074225281637825
2.004667790609966
2.002305129937996
2.00026331338281
1.9984875631465755
1.9969348884991982
1.995570969241333
1.9943680134319113
1.9933032448862924
1.9923578086344809
1.991515960833953
1.990764457063473
1.9900920822605057
1.989489284046446
1.9889478830414389
1.9884608415278817
1.9880220770056618
1.9876263107259768
1.9872689437699553
1.9869459550126045
1.9866538166088537
1.9863894236030406
1.9861500349921228
1.9859332241307712
1.9857368367978472
1.9855589555800164
1.9853978694920253
1.9852520479611686
1.9851201184681735
1.9850008472676128
1.9848931227153088
1.984795940813593
1.9847083926521818
1.9846296534762324
