# CMPSC 445 - M6 Assignment

### Importing Libraries

In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

### Load Iris Dataset

In [2]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv(url, names=names)

X = dataset.values[:,0:4].astype(float)
Y = dataset.values[:,4].astype(str)

### Train a Neural Network

#### Partition Training and Testing Data
Training size = 40% of Iris dataset instances

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.40)

#### Setup Model

- Multilayer Neural Network Model
- Learning Rate: 0.001
- 3 hidden neurons in 1 layer

In [4]:
hidden_nuerons = 1
alpha = 0.01

clf = MLPClassifier(
    solver = 'sgd', 
    activation = 'logistic',                 
    learning_rate_init = alpha, 
    learning_rate = 'constant', 
    max_iter = 1000, 
    verbose = True,
    hidden_layer_sizes = (hidden_nuerons,)
)

#### Cross-Validation

In [5]:
# 5-fold cross validation
cv_results = cross_val_score(clf, X_train, y_train, cv=5)
msg = "Average Accuracy on cross-validation: {cv_results.mean():.4f} ({cv_results.std():.4f})"

Iteration 1, loss = 1.33108626
Iteration 2, loss = 1.32470140
Iteration 3, loss = 1.31581477
Iteration 4, loss = 1.30489604
Iteration 5, loss = 1.29240300
Iteration 6, loss = 1.27877051
Iteration 7, loss = 1.26440181
Iteration 8, loss = 1.24966224
Iteration 9, loss = 1.23487474
Iteration 10, loss = 1.22031725
Iteration 11, loss = 1.20622170
Iteration 12, loss = 1.19277442
Iteration 13, loss = 1.18011802
Iteration 14, loss = 1.16835416
Iteration 15, loss = 1.15754736
Iteration 16, loss = 1.14772932
Iteration 17, loss = 1.13890361
Iteration 18, loss = 1.13105056
Iteration 19, loss = 1.12413192
Iteration 20, loss = 1.11809543
Iteration 21, loss = 1.11287880
Iteration 22, loss = 1.10841338
Iteration 23, loss = 1.10462724
Iteration 24, loss = 1.10144767
Iteration 25, loss = 1.09880326
Iteration 26, loss = 1.09662542
Iteration 27, loss = 1.09484952
Iteration 28, loss = 1.09341569
Iteration 29, loss = 1.09226921
Iteration 30, loss = 1.09136077
Iteration 31, loss = 1.09064645
Iteration 32, los



Note: There were many instances where the optimization did not converge. But the most recent value stored in `msg`.

<table>
    <thead>
        <tr>
            <th rowspan="2">Hidden Neurons</th>
            <th colspan="3">Learning Rates</th>
        </tr>
        <tr>
            <th>0.001</th>
            <th>0.01</th>
            <th>0.1</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <th>1</th>
            <td>0.4500 (0.1862)</td>
            <td>0.6578 (0.2389)</td>
            <td>0.7178 (0.2284)</td>
        </tr>
        <tr>
            <th>3</th>
            <td>0.6333 (0.1922)</td>
            <td>0.9122 (0.1220)</td>
            <td>0.9722 (0.0322)</td>
        </tr>
        <tr>
            <th>5</th>
            <td>0.6922 (0.1919)</td>
            <td>0.9822 (0.0252)</td>
            <td>0.9789 (0.0267)</td>
        </tr>
    </tbody>
</table>

Neural Network Configuration Selected: 
- Hidden Nuerons: 5
- Learning Rate: 0.01

In [6]:
hidden_nuerons = 5
learning_rate = 0.01

clf_fin = MLPClassifier(
    solver = 'sgd', 
    activation = 'logistic',                 
    learning_rate_init = learning_rate, 
    learning_rate = 'constant', 
    max_iter = 1000, 
    verbose = False,
    hidden_layer_sizes = (hidden_nuerons,)
)

clf_fin.fit(X_train, y_train)



### Predictions

Testing on entire test dataset `y_test`

Hidden Layer Size: 5
<br>
Learning Rate: 0.01

Reported Accuracy: 98.333%

In [7]:
# predictions with the selected neural network classifier with respective hidden layers and learning rate
results = clf_fin.predict(X_test)
acc_score = accuracy_score(y_test, results)
print(f"Accuracy Score: {acc_score:.3f}")
print(f"Accuracy Percentage: {acc_score*100:.3f}%")

Accuracy Score: 0.983
Accuracy Percentage: 98.333%


### Manual Calculations

Neural Network Questions

1. Output of the hidden node H, when feed-forwarding with sigmoid function (activation function)

    $ H_{out} = g(y) = \sigma(y) = \frac{1}{1+e^{-y}} $
    
    $ H_{in} = (\sum_{i=1}^n w_i x_i) + b$
    
    We know: $b=0$ and $[X_1, X_2] = [0.1, 0.2]$

    $ \rarr 0.1(1) + 0.2(2) + 0 = 0.5 \rarr H_{in} = 0.5 $
    
    $ \rarr g(0.5) = \sigma(0.5) = \frac{1}{1+e^{-0.5}} = 0.62246 $

    $ H_{out} = 0.6225 $
    
<br>

2. $\Delta w_{o1}$ using backpropagation

    We know: $[O_1, O_2] = [0, 1]$

    $ \frac{\partial E_{O_1}}{\partial w_{o1}} = (\frac{\partial E_{O_1}}{\partial out_{O_1}})(\frac{\partial out_{O_1}}{\partial O_1})(\frac{\partial O_1}{\partial w_{o1}})$ 

    $ E_{O_1} = \frac{1}{2} (target_{O_1} - out_{O_1})^2 \rarr \frac{\partial E_{O_1}}{\partial out_{O_1}} = -(target_{O_1} - out_{O_1}) $

    $ out_{O_1} = \sigma (O_1) \rarr \frac{\partial}{\partial O_1} \sigma (O_1) = \sigma (O_1) (1 - \sigma (O_1)) $

    $ O_1 = H_{out}w_{o1} + b \rarr \frac{\partial O_1}{\partial w_{o1}} = H_{out} $

    $ \rarr \Delta w_{o1} = [(-(target_{O_1} - out_{O_1}))(\sigma (O_1) (1 - \sigma (O_1)))(H_{out})] $

    $ \rarr (-(0 - 0.7764))(0.7764(1 - 0.7764))(0.6225) = 0.0839 $

    $ \Delta w_{o1} = 0.0839 $