In [72]:
import pandas as pd
import numpy as np

In [73]:
#DATA PREPROCESSING
def process_data(dataset):
    m = np.shape(dataset)[0]
    x = np.shape(dataset)[1]
    
    X = dataset.values[: , 1:x-1]
    y = dataset.values[: , x-1:x]
    
    n_y = np.unique(y).shape[0]
    n_x = np.shape(X)[1]
    
    Y = np.zeros((m, n_y))
    
    for i in range(np.shape(Y)[0]):
        if y[i]=='Iris-setosa':
            Y[i] = np.array([1, 0, 0])
        elif y[i]=='Iris-versicolor':
            Y[i] = np.array([0, 1, 0])
        elif y[i]=='Iris-virginica':
            Y[i] = np.array([0, 0, 1])
            
    X = X.astype('float64')
    Y = Y.astype('float64')
    
    X = X.T 
    Y = Y.T
    
    return X, Y, m, n_x, n_y

In [227]:
def find_hyperparams(n_x, n_y):
    layer_dims = [n_x, 20, n_y]
    learning_rate = 0.01
    iteration = 20000
    
    return layer_dims, learning_rate, iteration

In [228]:
#ACTIVATION FUNCTION

def sigmoid(z):
    x = 1.0/(1.0 + np.exp(-z))
    return x

def relu(z):
    x = np.maximum(0, z)
    return x

In [229]:
#INITIALIZE WEIGHTS
def initialize_weights(layer_dims):
    L = np.shape(layer_dims)[0]
    weights = {}
    np.random.seed(3)
    for l in range(L-1):
        W = np.random.randn(layer_dims[l+1], layer_dims[l]) * 0.01
        b = np.zeros((layer_dims[l+1], 1))
        weights['W' + str(l+1)] = W
        weights['b' + str(l+1)] = b
    
    return weights

In [230]:
#FORWARD PROPAGATION
def forward_propagation(X, layer_dims, weights):
    L = np.shape(layer_dims)[0]
    
    cache = {}
    A_prev = X
    cache['A0'] = X
        
    for l in range(L-1):
        W = weights['W' + str(l+1)]
        b = weights['b' + str(l+1)]
        
        Z = np.dot(W, A_prev) + b
        A = np.zeros(np.shape(Z))
        
        if(l == L-2):
            A = sigmoid(Z)
        else:
            A = relu(Z)
        
        cache['Z' + str(l+1)] = Z
        cache['A' + str(l+1)] = A
        
        A_prev = A
    
    return cache

In [231]:
#COST FUNCTION

def cost(AL, Y, m):
    x = (-1/m) * np.sum( (Y) * np.log(AL) + (1-Y) * np.log(1-AL) )
    return x

In [232]:
#BACKPROPAGATION
def backward_propagation(Y, cache, weights, layer_dims, m):
    L = np.shape(layer_dims)[0]
    
    A = cache['A' + str(L-1)]
    dA = - (np.divide(Y, A) - np.divide(1 - Y, 1 - A)) 
    
    gradients = {}
    
    for l in  reversed(range(L-1)):
        W = weights['W' + str(l+1)]
        A_prev = cache['A' + str(l)]
        Z = cache['Z' + str(l+1)]
        dZ = np.zeros(np.shape(Z))
        if l == L-2:
            dZ = dA * sigmoid(Z) * (1 - sigmoid(Z))
        else:
            dZ = dA * 1 * (Z >= 0)
            #dZ = dA * sigmoid(Z) * (1 - sigmoid(Z))
        
        dW = (1/m) * np.dot(dZ, A_prev.T)
        db = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
        
        dA = np.dot(W.T, dZ)
        
        gradients['dW' + str(l+1)] = dW
        gradients['db' + str(l+1)] = db

    return gradients

In [233]:
#UPDATE WEIGHTS

def update_weights(weights, gradients, learning_rate, layer_dims):
    L = np.shape(layer_dims)[0]
    
    #print('weights: ')
    #print(weights['W' + str(L-1)])
    
    #print('gradients: ')
    #print(gradients['dW' + str(L-1)])
    
    for l in range(L-1):
        W = weights['W' + str(l+1)];
        dW = gradients['dW' + str(l+1)]
        b = weights['b' + str(l+1)]
        db = gradients['db' + str(l+1)]
        W = W - learning_rate * dW
        b = b = learning_rate * db
        
        weights['W' + str(l+1)] = W
        weights['b' + str(l+1)] = b
    return weights
    #print('weights: ')
    #print(weights['W' + str(L-1)])

In [234]:
#NEURAL NET

def train_neural_network():
    iris_train = pd.read_csv('Iris_Train_2.csv', sep=',', header=0)
    iris_test = pd.read_csv('Iris_Test_2.csv', sep=',', header=0)
    
    X, Y, m, n_x, n_y = process_data(iris_train)
    layer_dims, learning_rate, iteration = find_hyperparams(n_x, n_y)
    L = np.shape(layer_dims)[0]
    
    weights = initialize_weights(layer_dims)
    for i in range(iteration):
        cache = forward_propagation(X, layer_dims, weights)
        
        if i%1000 == 0:
            AL = cache['A' + str(L-1)]
            print(cost(AL, Y, m))
            print(np.shape(AL))
            
        gradients = backward_propagation(Y, cache, weights, layer_dims, m)
        
        if i%1000 == 0:
            dA = gradients['dW' + str(L-1)]
            print(np.shape(AL))
            
        weights = update_weights(weights, gradients , learning_rate, layer_dims)
        
    #find accuracy training
    X, Y, m, n_x, n_y = process_data(iris_train)
    cache = forward_propagation(X, layer_dims, weights)
    
    AL = cache['A' + str(L-1)];
    output = AL
    
    for i in range(m):
        for j in range(n_y):
            output[j,i] = 1 * (output[j,i] == max(output[:, i]))
    
    loss = np.absolute(output - Y)
    loss = np.sum(loss, axis=0)
    success = np.count_nonzero(loss==0)
    
    accuracy = (success/m) * 100
    print('training accuracy = ' + str(accuracy) + '%')
    
    #find accuracy test
    X, Y, m, n_x, n_y = process_data(iris_test)
    cache = forward_propagation(X, layer_dims, weights)
    
    AL = cache['A' + str(L-1)];
    output = AL
    
    for i in range(m):
        for j in range(n_y):
            output[j,i] = 1 * (output[j,i] == max(output[:, i]))
    
    loss = np.absolute(output - Y)
    loss = np.sum(loss, axis=0)
    success = np.count_nonzero(loss==0)
    
    accuracy = (success/m) * 100
    print('test accuracy = ' + str(accuracy) + '%')
    

In [235]:
train_neural_network()

2.079174904981336
(3, 120)
(3, 120)
0.8463524661069551
(3, 120)
(3, 120)
0.6809029126272595
(3, 120)
(3, 120)
0.4208868354031794
(3, 120)
(3, 120)
0.23550210510008818
(3, 120)
(3, 120)
0.1690323260134799
(3, 120)
(3, 120)
0.13920107012056263
(3, 120)
(3, 120)
0.12232585408512703
(3, 120)
(3, 120)
0.11136363008731334
(3, 120)
(3, 120)
0.10361149580592495
(3, 120)
(3, 120)
0.09781597257805694
(3, 120)
(3, 120)
0.09329542056688525
(3, 120)
(3, 120)
0.08965505129290978
(3, 120)
(3, 120)
0.08664912589971464
(3, 120)
(3, 120)
0.08411664068899238
(3, 120)
(3, 120)
0.0819471878891387
(3, 120)
(3, 120)
0.08006421854803795
(3, 120)
(3, 120)
0.07841185159350994
(3, 120)
(3, 120)
0.07694258156513639
(3, 120)
(3, 120)
0.07562487744485952
(3, 120)
(3, 120)
training accuracy = 98.33333333333333%
test accuracy = 93.33333333333333%
