In [67]:
#Jimi Michael, B455 Project 1
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import KFold 

In [68]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',          
    names = ( 'Class', 'Alcohol', 'Malic Acid', 'Ash', 'Alcalinity of Ash', 'Magnesium', 'Total Phenols', 'Flavanoids', 
             'Nonflavanoid Phenols', 'Proanthocyanins', 'Color Intensity', 'Hue', 'OD280/OD315 of Diluted Wines', 'Proline'))

addition = pd.get_dummies(data['Class'])
data = pd.concat([data,addition],axis= 1)
x = data.drop([1, 2, 3,'Class'], axis = 1)
y = data[[1, 2, 3]].values

#softmax activation
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)
#loss
def loss_derivative(y, yPred):
    return (yPred - y)

def tanh_derivative(x):
    return (1 - np.power(x, 2))


In [69]:
# MLP initialization 
def myModel(input, hidden, output):
    # in
    w1 = 2 *np.random.randn(input, hidden) - 1
    b1 = np.zeros((1, hidden))
    # hid
    w2 = 2 * np.random.randn(hidden, hidden) - 1
    b2 = np.zeros((1, hidden))
    #out
    w3 = 2 * np.random.rand(hidden, output) - 1
    b3 = np.zeros((1,output))
    

    return {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2, 'w3': w3,'b3': b3}

In [70]:
# Method to handle forward propogation
def forwardProp(model, activ):
  
  # Get mapped model data
  w1 = model['w1'] 
  b1 = model['b1'] 
  w2 = model['w2'] 
  b2 = model['b2'] 
  w3 = model['w3'] 
  b3 = model['b3'] 
  
  # First step tanh
  firstLine = activ.dot(w1) + b1
  firstAct = np.tanh(firstLine)
  
  # Second step softmax
  secLine = firstAct.dot(w2) + b2
  secAct = softmax(secLine)
  
  # Third step softmax
  thirdLine = secAct.dot(w3) + b3
  thirdAct = softmax(thirdLine)
  
  # Return map of forwardProp
  return {'activ': activ, 'linOne': firstLine, 'firstAct': firstAct, 'secLine': secLine, 'secAct': secAct,
                        'thirdLine': thirdLine, 'thirdAct' : thirdAct}

In [71]:
def backwardPropogation(model, forProp, y):
    #Data from model
    w1 = model['w1'] 
    b1 = model['b1'] 
    w2 = model['w2'] 
    b2 = model['b2'] 
    w3 = model['w3'] 
    b3 = model['b3'] 
    
    # Data from forProp
    activ = forProp['activ']
    firstAct = forProp['firstAct']
    secAct = forProp['secAct']
    thirdAct = forProp['thirdAct']
    m = y.shape[0]

    # Loss calulations, goes backwords
    thirdLine = loss_derivative(y, thirdAct)
    lossWeight3 = (secAct.T).dot(thirdLine) * (1/m)
    lossBias3 = np.sum(thirdLine, axis=0) * (1/m)
    
    secLine = np.multiply(thirdLine.dot(w3.T), tanh_derivative(secAct))
    lossWeight2 = np.dot(firstAct.T, secLine)* (1/m)
    lossBias2 = np.sum(secLine, axis=0)* (1/m)
    
    firstLine = np.multiply(secLine.dot(w2.T), tanh_derivative(firstAct))
    lossWeight1 = np.dot(activ.T, firstLine)* (1/m)
    lossBias1 = np.sum(firstLine, axis=0)* (1/m)
    
    #Weights after backProp
    return {'lw3': lossWeight3, 'lb3': lossBias3, 'lw2': lossWeight2, 'lb2': lossBias2, 'lw1': lossWeight1,'lb1': lossBias1}

In [72]:
def updateWeightBias(model, backProp, learnRate):
    # Load original weights and biases
    weightOne, biasOne, weightTwo, biasTwo, weightThree, biasThree = model['w1'], model['b1'], model['w2'], model['b2'], model['w3'], model['b3']
    
    # Update weights and biases
    weightOne -= backProp['lw1'] * learnRate
    biasOne -=  backProp['lb1'] * learnRate
    weightTwo -= backProp['lw2'] * learnRate
    biasTwo -= backProp['lb2'] * learnRate
    weightThree -= backProp['lw3'] * learnRate
    biasThree -= backProp['lb3'] * learnRate
    
    # Return updated values
   
    return {'w1': weightOne, 'b1': biasOne, 'w2': weightTwo, 'b2': biasTwo, 'w3': weightThree,'b3': biasThree}


In [73]:
#accuracy calculation
def accuracy(testing, actual):
  incorrect = 0
  for i in range(len(testing)):
    if((testing[i] != actual[i]).any()):
      incorrect = incorrect + 1
  return 1 - (incorrect/len(testing))

In [74]:
#model prediction
def predict(model, x):
    # Do forward pass
    fp = forwardProp(model, x)
    yPred = np.argmax(fp['thirdAct'], axis=1)
    return yPred

In [75]:
#model training
def train(model, x , y, learnRate, count):
    
    for i in range(0, count):
        fp = forwardProp(model, x) # Forward propagation
        bp = backwardPropogation(model, fp, y) # Back propagation
        model = updateWeightBias(model, bp, learnRate) # Update weights and bias
    yPred = predict(model, x)
    yActual = y.argmax(axis=1)
    
    print('Accuracy: ', accuracy(yPred, yActual))
    return model

In [76]:
# gets random guess
def randomGuess(real):
  output = []
  
  for i in range(0,len(real)):
    
    x = np.random.randint(3)+1
    if x == 1:
      output.append([1, 0, 0])
    elif x == 2:
      output.append([0, 1, 0])
    else:
      output.append([0, 0, 1])
  output = np.array(output)
  
  print('Random Guess Accuracy:', accuracy(output, real) )

In [77]:
# Initialization of KFold Algorithm
kfold = KFold(n_splits = 5, random_state = None, shuffle = True)

for trainIndex, testIndex in kfold.split(data):
  print("TRAIN:", trainIndex, "TEST:", testIndex) # Prints which data rows are in the training and testing sets
  XTrain, XTest = x.iloc[trainIndex], x.iloc[testIndex] 
  YTrain, YTest = y[trainIndex], y[testIndex]
  
  # Batch normalizes the data using a standard scalar 
  scaler = StandardScaler()
  scaler.fit(XTrain)
  XTrain = scaler.transform(XTrain)
  XTest = scaler.transform(XTest)
  
  # Implementation of Model
  model = myModel(13, 5, 3) # Input layer = 13; Hidden layer = 7; Output layer = 3 (Tested out various hidden layer values)
  model = train(model, XTrain, YTrain, 0.5, 5000)
  randomGuess(YTest)


TRAIN: [  0   1   4   5   6   7   8   9  10  11  12  14  15  16  18  19  20  21
  22  24  26  27  29  30  32  33  34  35  36  37  38  39  40  42  43  44
  45  46  47  48  49  50  51  52  53  54  55  56  57  60  61  62  63  65
  66  67  68  69  70  71  72  74  75  76  77  78  80  83  85  86  87  88
  89  92  93  94  95  96  97  98 100 101 102 103 104 105 106 107 109 110
 112 114 115 117 118 119 120 121 122 123 124 125 126 127 133 134 136 137
 138 140 142 143 144 145 146 147 149 150 151 152 153 154 155 156 158 159
 160 161 162 163 164 165 166 167 168 169 171 172 174 175 176 177] TEST: [  2   3  13  17  23  25  28  31  41  58  59  64  73  79  81  82  84  90
  91  99 108 111 113 116 128 129 130 131 132 135 139 141 148 157 170 173]
Accuracy:  1.0
Random Guess Accuracy: 0.38888888888888884
TRAIN: [  0   1   2   3   5   7  10  13  14  15  16  17  20  21  22  23  25  26
  27  28  29  31  32  33  34  35  37  38  39  41  42  43  44  47  48  49
  50  51  52  54  55  57  58  59  61  62  63  64  65