# Artificial Neural Network Build

Lets build an artificial neural network

## Imports

In [45]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import numpy as np

## Getting data

In [33]:
data_path = '../data/dataset_cleaned.csv'
data = pd.read_csv('../data/dataset_cleaned.csv')

data.head()

Unnamed: 0,current_1,current_2,current_3,current_4,current_5,current_6,current_7,current_8,current_9,current_10,...,volt_992,volt_993,volt_994,volt_995,volt_996,volt_997,volt_998,volt_999,volt_1000,state
0,0.113723,0.11491,0.115306,0.111678,0.109026,0.10683,0.108055,0.083534,0.112588,0.114087,...,0.520293,0.474169,0.461303,0.391197,0.400257,0.363076,0.355066,0.362743,0.402392,0
1,0.117689,0.120581,0.122868,0.120302,0.117596,0.113794,0.111915,0.083626,0.109412,0.108518,...,0.429259,0.429447,0.463391,0.458402,0.541778,0.516046,0.481548,0.448816,0.448808,0
2,0.139864,0.140511,0.139569,0.13304,0.126874,0.120892,0.118347,0.088898,0.118342,0.120895,...,0.451568,0.417285,0.385515,0.310819,0.305093,0.268921,0.261212,0.269722,0.311225,0
3,0.107354,0.114177,0.119796,0.119918,0.119248,0.117398,0.117625,0.089471,0.119081,0.12028,...,0.576316,0.535075,0.490151,0.397969,0.404842,0.376783,0.389748,0.427383,0.488952,0
4,0.096558,0.099099,0.100371,0.096762,0.092431,0.087296,0.085177,0.064075,0.085037,0.086279,...,0.451605,0.384601,0.346154,0.288832,0.309612,0.307554,0.33367,0.375025,0.45724,0


In [34]:
X, y = data.iloc[:, :-1], data.iloc[:, -1]

X

Unnamed: 0,current_1,current_2,current_3,current_4,current_5,current_6,current_7,current_8,current_9,current_10,...,volt_991,volt_992,volt_993,volt_994,volt_995,volt_996,volt_997,volt_998,volt_999,volt_1000
0,0.113723,0.114910,0.115306,0.111678,0.109026,0.106830,0.108055,0.083534,0.112588,0.114087,...,0.503166,0.520293,0.474169,0.461303,0.391197,0.400257,0.363076,0.355066,0.362743,0.402392
1,0.117689,0.120581,0.122868,0.120302,0.117596,0.113794,0.111915,0.083626,0.109412,0.108518,...,0.360316,0.429259,0.429447,0.463391,0.458402,0.541778,0.516046,0.481548,0.448816,0.448808
2,0.139864,0.140511,0.139569,0.133040,0.126874,0.120892,0.118347,0.088898,0.118342,0.120895,...,0.383321,0.451568,0.417285,0.385515,0.310819,0.305093,0.268921,0.261212,0.269722,0.311225
3,0.107354,0.114177,0.119796,0.119918,0.119248,0.117398,0.117625,0.089471,0.119081,0.120280,...,0.473232,0.576316,0.535075,0.490151,0.397969,0.404842,0.376783,0.389748,0.427383,0.488952
4,0.096558,0.099099,0.100371,0.096762,0.092431,0.087296,0.085177,0.064075,0.085037,0.086279,...,0.419041,0.451605,0.384601,0.346154,0.288832,0.309612,0.307554,0.333670,0.375025,0.457240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,0.124239,0.128397,0.133158,0.133722,0.135076,0.136934,0.141820,0.111245,0.152041,0.156040,...,0.402826,0.401559,0.334530,0.313763,0.283868,0.328180,0.334456,0.350127,0.369325,0.409624
475,0.132562,0.139950,0.162313,0.178809,0.189051,0.192312,0.193388,0.145813,0.192398,0.191942,...,0.293938,0.348157,0.355811,0.387668,0.370035,0.406990,0.365573,0.325957,0.286269,0.275697
476,0.133588,0.136924,0.139389,0.136779,0.134744,0.132323,0.132286,0.100588,0.134064,0.135090,...,0.219395,0.257372,0.261120,0.292297,0.316605,0.454289,0.578198,0.706980,0.767582,0.779411
477,0.178377,0.181980,0.183664,0.177688,0.171664,0.165579,0.162911,0.122788,0.163429,0.165052,...,0.594754,0.593017,0.508948,0.476743,0.397328,0.395472,0.341749,0.319220,0.321717,0.360049


To get the best training and testing datasets, we're going to balance the amount of each state.
We're going to use:

Training:
120 from state 0
85 from state 1
85 from state 2

In [35]:
df_0 = data.query('state == 0')
df_1 = data.query('state == 1')
df_2 = data.query('state == 2')

print('State 0 shape:', df_0.shape)
print('State 1 shape:', df_1.shape)
print('State 2 shape:', df_2.shape)

State 0 shape: (288, 2001)
State 1 shape: (97, 2001)
State 2 shape: (94, 2001)


Get X_train and X_test

In [36]:
state0_train_size = 125
state1_train_size = 85
state2_train_size = 85

# state 0
X_train_0 = df_0.iloc[:state0_train_size, :]
X_test_0 = df_0.iloc[state0_train_size:, :]

y_train_0 = df_0.iloc[:state0_train_size, -1]
y_test_0 = df_0.iloc[state0_train_size:, -1]

# state 1
X_train_1 = df_1.iloc[:state1_train_size, :]
X_test_1 = df_1.iloc[state1_train_size:, :]

y_train_1 = df_1.iloc[:state1_train_size, -1]
y_test_1 = df_1.iloc[state1_train_size:, -1]

# state 2
X_train_2 = df_2.iloc[:state2_train_size, :]
X_test_2 = df_2.iloc[state2_train_size:, :]

y_train_2 = df_2.iloc[:state2_train_size, -1]
y_test_2 = df_2.iloc[state2_train_size:, -1]

# Concatenate
X_train = pd.concat([X_train_0, X_train_1, X_train_2])
X_test = pd.concat([X_test_0, X_test_1, X_test_2])

y_train = pd.concat([y_train_0, y_train_1, y_train_2])
y_test = pd.concat([y_test_0, y_test_1, y_test_2])

print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)
print("==========")
print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)

y_test_2


X_test shape (184, 2001)
y_test shape (184,)
X_train shape (295, 2001)
y_train shape (295,)


470    2
471    2
472    2
473    2
474    2
475    2
476    2
477    2
478    2
Name: state, dtype: int64

In [37]:
X_train

Unnamed: 0,current_1,current_2,current_3,current_4,current_5,current_6,current_7,current_8,current_9,current_10,...,volt_992,volt_993,volt_994,volt_995,volt_996,volt_997,volt_998,volt_999,volt_1000,state
0,0.113723,0.114910,0.115306,0.111678,0.109026,0.106830,0.108055,0.083534,0.112588,0.114087,...,0.520293,0.474169,0.461303,0.391197,0.400257,0.363076,0.355066,0.362743,0.402392,0
1,0.117689,0.120581,0.122868,0.120302,0.117596,0.113794,0.111915,0.083626,0.109412,0.108518,...,0.429259,0.429447,0.463391,0.458402,0.541778,0.516046,0.481548,0.448816,0.448808,0
2,0.139864,0.140511,0.139569,0.133040,0.126874,0.120892,0.118347,0.088898,0.118342,0.120895,...,0.451568,0.417285,0.385515,0.310819,0.305093,0.268921,0.261212,0.269722,0.311225,0
3,0.107354,0.114177,0.119796,0.119918,0.119248,0.117398,0.117625,0.089471,0.119081,0.120280,...,0.576316,0.535075,0.490151,0.397969,0.404842,0.376783,0.389748,0.427383,0.488952,0
4,0.096558,0.099099,0.100371,0.096762,0.092431,0.087296,0.085177,0.064075,0.085037,0.086279,...,0.451605,0.384601,0.346154,0.288832,0.309612,0.307554,0.333670,0.375025,0.457240,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,0.133607,0.140374,0.162511,0.179296,0.190524,0.195357,0.197967,0.149996,0.198151,0.197248,...,0.414806,0.344570,0.324000,0.277997,0.283586,0.255947,0.261546,0.298697,0.373311,2
466,0.153008,0.129034,0.105669,0.086824,0.077247,0.073202,0.074900,0.059375,0.081577,0.083750,...,0.468752,0.428719,0.393985,0.310018,0.306981,0.354964,0.509488,0.641568,0.720227,2
467,0.196252,0.199004,0.200592,0.194777,0.189199,0.183442,0.180796,0.135883,0.179939,0.180812,...,0.278794,0.247908,0.267732,0.281760,0.358121,0.385017,0.422856,0.453432,0.485666,2
468,0.155995,0.146491,0.150410,0.154224,0.158480,0.160376,0.162237,0.123180,0.162876,0.162329,...,0.826455,0.758820,0.679297,0.538194,0.541097,0.494096,0.472102,0.442293,0.424193,2


In [38]:
X_test

Unnamed: 0,current_1,current_2,current_3,current_4,current_5,current_6,current_7,current_8,current_9,current_10,...,volt_992,volt_993,volt_994,volt_995,volt_996,volt_997,volt_998,volt_999,volt_1000,state
125,0.135156,0.137213,0.139215,0.136466,0.134174,0.131659,0.131619,0.099899,0.132574,0.132612,...,0.387503,0.327172,0.323500,0.303850,0.342913,0.331569,0.336510,0.352387,0.408987,0
126,0.148558,0.148847,0.148529,0.143156,0.138576,0.134240,0.133326,0.101322,0.135604,0.138036,...,0.330421,0.319064,0.355853,0.359015,0.426080,0.425091,0.431954,0.441583,0.503423,0
127,0.157101,0.157637,0.157479,0.151913,0.147316,0.143198,0.142666,0.109170,0.147817,0.151760,...,0.464551,0.438801,0.435553,0.403385,0.503639,0.560944,0.597051,0.588676,0.584142,0
128,0.162670,0.166232,0.169525,0.166400,0.163003,0.159161,0.158285,0.120150,0.160619,0.162559,...,0.561504,0.464973,0.418508,0.334826,0.323198,0.287915,0.300011,0.349643,0.438636,0
129,0.170505,0.171533,0.170885,0.163864,0.158025,0.153099,0.152133,0.115760,0.155063,0.157738,...,0.333874,0.324341,0.321492,0.289926,0.360985,0.487692,0.687529,0.806554,0.830210,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,0.124239,0.128397,0.133158,0.133722,0.135076,0.136934,0.141820,0.111245,0.152041,0.156040,...,0.401559,0.334530,0.313763,0.283868,0.328180,0.334456,0.350127,0.369325,0.409624,2
475,0.132562,0.139950,0.162313,0.178809,0.189051,0.192312,0.193388,0.145813,0.192398,0.191942,...,0.348157,0.355811,0.387668,0.370035,0.406990,0.365573,0.325957,0.286269,0.275697,2
476,0.133588,0.136924,0.139389,0.136779,0.134744,0.132323,0.132286,0.100588,0.134064,0.135090,...,0.257372,0.261120,0.292297,0.316605,0.454289,0.578198,0.706980,0.767582,0.779411,2
477,0.178377,0.181980,0.183664,0.177688,0.171664,0.165579,0.162911,0.122788,0.163429,0.165052,...,0.593017,0.508948,0.476743,0.397328,0.395472,0.341749,0.319220,0.321717,0.360049,2


## Build model

### Useful imports

In [39]:
from sklearn.metrics import r2_score

Making the ANN

In [51]:
ann = MLPClassifier(
    hidden_layer_sizes=(150, 300),
    max_iter=5000,
    tol=0.0000001,
    learning_rate_init=0.1,
    solver='sgd',
    activation='logistic',
    learning_rate='constant',
    verbose=True,
)

Train the model

In [52]:
ann.fit(X_train, y_train)

Iteration 1, loss = 1.39733463
Iteration 2, loss = 3.52701206
Iteration 3, loss = 3.65905312
Iteration 4, loss = 2.08584370
Iteration 5, loss = 2.25162915
Iteration 6, loss = 1.06614263
Iteration 7, loss = 1.01585489
Iteration 8, loss = 1.00063760
Iteration 9, loss = 0.98132729
Iteration 10, loss = 0.95038843
Iteration 11, loss = 0.90842704
Iteration 12, loss = 0.84820822
Iteration 13, loss = 0.76683844
Iteration 14, loss = 0.67649313
Iteration 15, loss = 0.59101849
Iteration 16, loss = 0.52882159
Iteration 17, loss = 0.49112606
Iteration 18, loss = 0.47113357
Iteration 19, loss = 0.45681071
Iteration 20, loss = 0.44304190
Iteration 21, loss = 0.43462230
Iteration 22, loss = 0.41581097
Iteration 23, loss = 0.39961879
Iteration 24, loss = 0.38621955
Iteration 25, loss = 0.37067082
Iteration 26, loss = 0.35234828
Iteration 27, loss = 0.33950849
Iteration 28, loss = 0.31617207
Iteration 29, loss = 0.30195450
Iteration 30, loss = 0.28213123
Iteration 31, loss = 0.25874807
Iteration 32, los

Get the score

In [53]:
score = ann.score(X_test, y_test)

print('%.2f' %(score*100) + '%')

100.00%


Predict the probability for each state

In [67]:
print("Probability of a random sample of state 0:", np.round(ann.predict_proba(X_test_0[:1]), 4) )
print("Probability of a random sample of state 1:", np.round(ann.predict_proba(X_test_1[:1]), 4) )
print("Probability of a random sample of state 2:", np.round(ann.predict_proba(X_test_2[:1]), 4) )

Probability of a random sample of state 0: [[9.999e-01 0.000e+00 1.000e-04]]
Probability of a random sample of state 1: [[0.000e+00 9.998e-01 2.000e-04]]
Probability of a random sample of state 2: [[0.000e+00 1.000e-04 9.999e-01]]


## TO DO: CHECK OVERFITTING AND UNDERFITTING