CT475 December 2017/2018 Assignment 3 - Neural Net Owl type prediction
--------------


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from scipy import ndimage
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import time
from functools import *
import seaborn as sns
%matplotlib inline


Reading data file into dataframe
-------------

owls15.csv is required to be located in same dir as notebook.

In [2]:
df = pd.read_csv('./owls15.csv' , header = None)
df.head(5)

Unnamed: 0,0,1,2,3,4
0,3.0,5.0,1.6,0.2,LongEaredOwl
1,3.2,4.7,1.6,0.2,LongEaredOwl
2,3.4,4.6,1.4,0.3,LongEaredOwl
3,3.6,5.0,1.4,0.2,LongEaredOwl
4,4.1,5.2,1.5,0.1,LongEaredOwl


Preprocessing
---------------

Set columns headers of dataframe

In [3]:
df.columns = ['body-length', 'wing-length', 'body-width', 'wing-width', 'type']
df.head(2)

Unnamed: 0,body-length,wing-length,body-width,wing-width,type
0,3.0,5.0,1.6,0.2,LongEaredOwl
1,3.2,4.7,1.6,0.2,LongEaredOwl


Categorically encode type to 1 or 2 or 3

In [4]:
typeCategory = pd.Series(df.type.astype('category').cat.codes.as_matrix()).to_frame('Type_Cat')
df['typeCategory'] = typeCategory
df[['typeCategory']] = df[['typeCategory']].astype(str) 
df.head(2)

Unnamed: 0,body-length,wing-length,body-width,wing-width,type,typeCategory
0,3.0,5.0,1.6,0.2,LongEaredOwl,1
1,3.2,4.7,1.6,0.2,LongEaredOwl,1


Normalize the feature columns so that it is range between current value - minimum for column / column max - column min

In [5]:
dfNorm = df[['body-length', 'wing-length', 'body-width', 'wing-width']].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
dfNorm.head(2)


Unnamed: 0,body-length,wing-length,body-width,wing-width
0,0.454545,0.205882,0.101695,0.041667
1,0.545455,0.117647,0.101695,0.041667


One hot encode type
-------------

In [6]:
p = df['type']
mlb = MultiLabelBinarizer()
oneHotType = mlb.fit_transform(p.as_matrix())

In [7]:
p = df['typeCategory']
mlb = MultiLabelBinarizer()
oneHotTypeCat = mlb.fit_transform(p.as_matrix())

Append one hot encoded columns of string type and ctegorical type to dataframe

In [8]:
oneHotTypeAsStr = list(map(lambda x  : ''.join(map(str, x)) , oneHotType))
oneHotTypeCatAsStr = list(map(lambda x  : ''.join(map(str, x)) , oneHotTypeCat))
df['oneHotEncodedType'] = oneHotTypeAsStr
df['oneHotTypeCat'] = oneHotTypeCatAsStr
df.head(2)

Unnamed: 0,body-length,wing-length,body-width,wing-width,type,typeCategory,oneHotEncodedType,oneHotTypeCat
0,3.0,5.0,1.6,0.2,LongEaredOwl,1,11101111111110,10
1,3.2,4.7,1.6,0.2,LongEaredOwl,1,11101111111110,10


Define prediction data
-----------

Although I one hot encode type and category I just used the one hot encoding of category as prediction target to train model.

In [9]:
train_set_y = np.asarray(list(map(lambda x :   list(map(int, list(x))), df['oneHotTypeCat'].as_matrix())))
train_set_y.shape

(135, 3)

Split training and test data into 2/3 and 1/3 training and test sets respectively.
------------

As per doc http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html train_test_split splits randomly by default "Split arrays or matrices into random train and test subsets"

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(dfNorm, train_set_y, test_size=0.33)


Define activation function
----------

For this assignment using Sigmoid activation function.

In [11]:
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))  
    return s

Forward / Backward propagation
---------------

The gradients dictionary contains the partial derivative values for the weights and bias vectors.

In [12]:
def propagate(Y, X, numberTrainingExamples, w , b):

    A = sigmoid(np.dot(w , X) + b)

    cost = - (1 / numberTrainingExamples)*(Y * np.log(A) + (1-Y) * (1-np.log(A))).sum(axis=0)
    cost = cost.mean();
    
    dw =  ( 1 / numberTrainingExamples ) * np.dot((A - Y) , X.T / numberTrainingExamples)
    db =  (A - Y).mean(axis=1, keepdims=True)

    gradients = {"dw": dw,
             "db": db}

    return gradients, cost

Gradient Descent
--------------

The first iteration of this loop is forward propagation as computing an initial value for the weights and bias. Subsequent iterations compute the weights and bias by back propagating the current weights and bias by re-invoking propagate function.

In [13]:
def optimize(w , b, Y , X, num_iterations, learning_rate):

    for i in range(num_iterations):

        gradients, cost = propagate(Y, X, len(X), w , b)

        dw = gradients["dw"]
        db = gradients["db"]

        w = w - learning_rate * dw
        b = b - learning_rate * db

        if i % 20000 == 0:
            print(cost)
            
        if i == 49999:
            print('Final cost ' , cost)

    return w , b

Setup model parameters
------------

In [14]:
lrate = 0.0001

Train model on training set
-------------

reshaped_x will match number of features of X (input) attributes.

reshaped_y will match number of features of Y (output) attributes.

In [15]:
def model(X_train, Y_train, num_iterations, learning_rate):

    X_train = X_train.T
    Y_train = Y_train.T
    
    reshaped_y = Y_train.shape[0]
    reshaped_x = X_train.shape[0]

    w=np.zeros((reshaped_y, reshaped_x))
    b=np.zeros((reshaped_y, 1))
    learnedWeightValues , learnedBiasValues = optimize(w , b, Y_train , X_train , num_iterations, learning_rate)
    
    Y_prediction_train = sigmoid(np.dot(learnedWeightValues , X_train) + learnedBiasValues)

    Y_prediction_train_val = np.asarray((list(map(lambda x : list(map(lambda x2 : int(round((x2))) , x)) , Y_prediction_train))))

    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train_val - Y_train)) * 100))
    
    return learnedWeightValues , learnedBiasValues

Invoke model function to learn weights and bias values.
-------------------

In [16]:
learnedWeightValues , learnedBiasValues = model(X_train, Y_train, num_iterations = 50000, learning_rate = lrate)


-0.67328679514
-0.945569018899
-1.0702349213
Final cost  -1.11880949918
train accuracy: 86.29629629629629 %


Classify a single example
--------------

In [17]:
def toClassify(newCase) : 
    newCasePrediction = sigmoid(np.dot(learnedWeightValues , newCase) + learnedBiasValues.T).flatten()
    arr = []
    for a in newCasePrediction : 
        arr.append(int(round(a)))

    return np.asarray(arr)

Evaluate model on test data
---------------

In [18]:
numberSamples = len(X_test)

correctPredictionCount = 0;
for i in range(numberSamples) : 
    
    newCase = X_test.as_matrix()[i]

    if(np.array_equal(Y_test[i] , toClassify(newCase)) == True):
        correctPredictionCount = correctPredictionCount + 1
        
print('Evaluation set accuracy : ' , correctPredictionCount / numberSamples * 100 , '%')

Evaluation set accuracy :  62.22222222222222 %


Results
----------

10 repeated invocations of model result in following results.

In [19]:
ndf = []

ndf.append(('1','.0001','50000','84.4%','57.8%', '-1.108'))
ndf.append(('2','.0001','50000','85.2%','57.8%', '-1.142'))
ndf.append(('3','.0001','50000','86.3%','64.4%', '-1.120'))
ndf.append(('4','.0001','50000','85.9%','68.8%', '-1.121'))
ndf.append(('5','.0001','50000','76.7%','42.2%', '-1.113'))
ndf.append(('6','.0001','50000','82.59%','57.8%', '-1.11'))
ndf.append(('7','.0001','50000','85.92%','68.8%', '-1.115'))
ndf.append(('8','.0001','50000','84.07%','71.1%', '-1.11'))
ndf.append(('9','.0001','50000','84.81%','60.0%', '-1.107'))
ndf.append(('10','.0001','50000','85.56%','62.2%', '-1.126'))
toCC = pd.DataFrame(ndf)

toCC.columns = ['ID' , 'LearningRate'  , 'NumberIterations' , 'TrainingSetAccuracy', 'TestSetAccuracy', 'CostValue']

toCC

Unnamed: 0,ID,LearningRate,NumberIterations,TrainingSetAccuracy,TestSetAccuracy,CostValue
0,1,0.0001,50000,84.4%,57.8%,-1.108
1,2,0.0001,50000,85.2%,57.8%,-1.142
2,3,0.0001,50000,86.3%,64.4%,-1.12
3,4,0.0001,50000,85.9%,68.8%,-1.121
4,5,0.0001,50000,76.7%,42.2%,-1.113
5,6,0.0001,50000,82.59%,57.8%,-1.11
6,7,0.0001,50000,85.92%,68.8%,-1.115
7,8,0.0001,50000,84.07%,71.1%,-1.11
8,9,0.0001,50000,84.81%,60.0%,-1.107
9,10,0.0001,50000,85.56%,62.2%,-1.126


In [20]:
trainAsFloat = list(map(lambda x : float(x.replace('%' , '')) , toCC['TrainingSetAccuracy'] ))
print('Average training set accruacy' , sum(trainAsFloat)/len(trainAsFloat))

Average training set accruacy 84.14499999999998


In [22]:
testAsFloat = list(map(lambda x : float(x.replace('%' , '')) , toCC['TestSetAccuracy'] ))
print('Average test set accruacy' , sum(testAsFloat)/len(testAsFloat))

Average test set accruacy 61.09000000000001


------------------------------

Assignment End
------------