In [None]:
from random import seed
import pandas as pd
import numpy as np


def split_dataset(dataset, train_perc=0.8,test_perc = 0.2):
    np.random.shuffle(dataset)
    data_len = len(dataset)
    train_index = int(data_len*train_perc) # last index from the dataset array that will go into training
    
    train = dataset[:train_index,:]  
    test = dataset[train_index:,:] 
    return (train, test)

def sigmoid(activation):
    return 1.0 / (1.0 + np.exp(-activation))
    
def compute_loss(prediction, actual):
    #return -sum(actual*log(prediction))
    return 0.5*np.sum((actual.T-prediction)*(actual.T-prediction))

def back_prop(train_X,W1,W2,layer1_output,layer2_output,actual_output):
    #find error in output unit
    difference = actual_output.T - layer2_output    
    delta_output = layer2_output*(1-layer2_output)*difference
    delta_hidden = layer1_output*(1-layer1_output)*W2.T.dot(delta_output)
    deltaW2 = lr*(delta_output.dot(layer1_output.T)/n_train) 
    deltaW1 = lr*(delta_hidden.dot(train_X)/n_train) 
    
    return (deltaW1,deltaW2)
    
def train_network(train_X, train_y):
    n_input = train_X.shape[1]  # the number of columns in the training data
    W1=np.random.random((n_hidden,n_input))
    W2=np.random.random((num_classes,n_hidden ))
    for epoch in range(n_epoch):
        layer1_output = sigmoid(W1.dot(train_X.T))
        layer2_output = sigmoid(W2.dot(layer1_output))
        
        (deltaW1,deltaW2)= back_prop(train_X,W1,W2,layer1_output,layer2_output,train_y)
        print(deltaW1[:5])
        W2 = W2+deltaW2
        W1 = W1+deltaW1
        if epoch%1000 == 0:
            loss = compute_loss(layer2_output,train_y)
            print(str.format('Loss in {0}th epoch is {1}',epoch,loss))
        
            
    return (W1,W2)

def evaluate(test_X,test_y,params):
    (W1,W2) = params
    layer1_output = sigmoid(W1.dot(test_X.T))
    final = sigmoid(W2.dot(layer1_output))
    
    prediction = final.argmax(axis=0)    
    return np.sum(prediction==test_y)/len(test_y)    

def convert_to_OH(data,num_classes):
    #create an array to store the one hot vectors
    one_hot = np.zeros((len(data),num_classes))
    one_hot[np.arange(len(data)),data] = 1
    return one_hot


np.random.seed(0)
# load and prepare data
#filename = 'seeds_dataset.csv'
filename = '/home/ishaan/Downloads/diabetes.csv'
df = pd.read_csv(filename,dtype=np.float64)
dataset = np.array(df)

#normalize data
min_data = dataset.min(axis = 0)
max_data = dataset.max(axis = 0)

#normalize all fields except the last column(class)
dataset[:,0:-1] = (dataset[:,0:-1] - min_data[0:-1])/(max_data[0:-1] - min_data[0:-1])
(train, test) = split_dataset(dataset)
print(train[:5,:-1])
#train = dataset 
n_train = len(train)
n_test = len(test)

# evaluate algorithm
lr = 0.8
n_epoch =15000

#determine the number of classes
num_classes = len(np.unique(dataset[:,-1]))
train_one_hot = convert_to_OH(train[:,-1].astype(int),num_classes)

n_hidden = 15

params = train_network(train[:,:-1],train_one_hot) 
accuracy = evaluate(test[:,:-1],test[:,-1],params)*100
print('Mean Accuracy: %.3f%%' % accuracy)