## Explanation of Neural Network Steps

https://iamtrask.github.io/2015/07/12/basic-python-network/

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
import numpy as np

# sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))
    
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
    
# output dataset            
y = np.array([[0,1,1,0]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
# first layer of weights
syn0 = 2*np.random.random((3,4)) - 1
# second layer of weights
syn1 = 2*np.random.random((4,1)) - 1

for j in range(100000):
    
    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))
    l2 = nonlin(np.dot(l1,syn1))
    
    # how much did we miss the target value?
    l2_error = y - l2
    
    if (j% 10000) == 0:
        print ("Error:" + str(np.mean(np.abs(l2_error))))

    # in what direction is the target value?
    # were we really sure? if so, don't change too much.

    # derivative is smaller for y-values closer to 0 and 1
    # higher for values closer to 0.5 (graph of sigmoid)
    # l2_delta = error of network scaled by the confidence (confident errors muted)
    l2_delta = l2_error*nonlin(l2,deriv=True)
    
    # how much did each l1 value contribute to the l2 error (according to the weights)?
    # weight l2_delta by weight in syn1 --> calc error in middle layer
    # backward propogation --> errors in l2 contribute to errors in l1
    l1_error = l2_delta.dot(syn1.T)

    # in what direction is the target l1?
    # were we really sure? if so, don't change too much.
    l1_delta = l1_error * nonlin(l1,deriv=True)
    
    # second layer of weights
    syn1 += l1.T.dot(l2_delta)
    # first layer of weights
    syn0 += l0.T.dot(l1_delta)

# print ("Output After Training:")
# print (l1)

Error:0.49641003190272537
Error:0.008584525653247159
Error:0.0057894598625078085
Error:0.004629176776769985
Error:0.003958765280273649
Error:0.0035101225678616766
Error:0.003183502385874825
Error:0.0029323063422830717
Error:0.0027315064182105086
Error:0.0025663172400400263


## Neural Network Example using sklearn.neural_network

https://www.springboard.com/blog/beginners-guide-neural-network-in-python-scikit-learn-0-18/

In [3]:
import pandas as pd
import numpy as np

wine = pd.read_csv('./data/wine_data.csv', names = ["Cultivator", "Alchol", "Malic_Acid", "Ash", "Alcalinity_of_Ash", "Magnesium", "Total_phenols", "Falvanoids", "Nonflavanoid_phenols", "Proanthocyanins", "Color_intensity", "Hue", "OD280", "Proline"])

In [4]:
wine.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Cultivator,178.0,1.938202,0.775035,1.0,1.0,2.0,3.0,3.0
Alchol,178.0,13.000618,0.811827,11.03,12.3625,13.05,13.6775,14.83
Malic_Acid,178.0,2.336348,1.117146,0.74,1.6025,1.865,3.0825,5.8
Ash,178.0,2.366517,0.274344,1.36,2.21,2.36,2.5575,3.23
Alcalinity_of_Ash,178.0,19.494944,3.339564,10.6,17.2,19.5,21.5,30.0
Magnesium,178.0,99.741573,14.282484,70.0,88.0,98.0,107.0,162.0
Total_phenols,178.0,2.295112,0.625851,0.98,1.7425,2.355,2.8,3.88
Falvanoids,178.0,2.02927,0.998859,0.34,1.205,2.135,2.875,5.08
Nonflavanoid_phenols,178.0,0.361854,0.124453,0.13,0.27,0.34,0.4375,0.66
Proanthocyanins,178.0,1.590899,0.572359,0.41,1.25,1.555,1.95,3.58


In [5]:
X = wine.drop('Cultivator', axis=1)
y = wine['Cultivator']

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y)

In [7]:
## recommended to scale data to enhance for convergence of neural networks ##
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(13,13,13), max_iter=500, random_state=20)

mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(13, 13, 13), learning_rate='constant',
              learning_rate_init=0.001, max_iter=500, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=20, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [9]:
y_pred = mlp.predict(X_test)

In [10]:
from sklearn.metrics import classification_report, confusion_matrix

confusion_matrix(y_test, y_pred)

array([[18,  0,  0],
       [ 2, 11,  0],
       [ 0,  1, 13]], dtype=int64)

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.90      1.00      0.95        18
           2       0.92      0.85      0.88        13
           3       1.00      0.93      0.96        14

    accuracy                           0.93        45
   macro avg       0.94      0.92      0.93        45
weighted avg       0.94      0.93      0.93        45



## Converting previous example to use pipeline

In [12]:
## all numerical cols so no need for label or one-hot encoding ##
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline

scaler = StandardScaler()
num_col = X_train.shape[1]
mlp = MLPClassifier(hidden_layer_sizes=(num_col,num_col,num_col), max_iter=500, random_state=20)

pipe = Pipeline(steps=[('scaler', scaler),
                       ('regressor', mlp)
                      ])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

In [13]:
from sklearn.metrics import classification_report, confusion_matrix

confusion_matrix(y_test, y_pred)

array([[18,  0,  0],
       [ 2, 11,  0],
       [ 0,  1, 13]], dtype=int64)

In [14]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.90      1.00      0.95        18
           2       0.92      0.85      0.88        13
           3       1.00      0.93      0.96        14

    accuracy                           0.93        45
   macro avg       0.94      0.92      0.93        45
weighted avg       0.94      0.93      0.93        45

