# Artificial Neural Network Build

Lets build an artificial neural network

## Imports

In [68]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import numpy as np

## Getting data

In [69]:
data_path = '../data/dataset_cleaned.csv'
data = pd.read_csv('../data/dataset_cleaned.csv')

data.head()

Unnamed: 0,current_201,current_202,current_203,current_204,current_205,current_206,current_207,current_208,current_209,current_210,...,volt_792,volt_793,volt_794,volt_795,volt_796,volt_797,volt_798,volt_799,volt_800,state
0,0.380513,0.381062,0.37739,0.37243,0.368167,0.366427,0.368739,0.375499,0.38354,0.388614,...,0.406982,0.367278,0.364093,0.402194,0.448523,0.450469,0.356803,0.302953,0.274347,0
1,0.366451,0.36594,0.362012,0.358414,0.358198,0.362319,0.368449,0.374194,0.378019,0.379024,...,0.273656,0.261276,0.271836,0.3086,0.356592,0.389055,0.355631,0.36909,0.410403,0
2,0.424592,0.427085,0.423493,0.416858,0.40944,0.402997,0.397895,0.394615,0.392775,0.39158,...,0.317711,0.297014,0.288137,0.302547,0.330311,0.342483,0.289035,0.271342,0.278585,0
3,0.386662,0.391439,0.390609,0.387729,0.384666,0.381944,0.379041,0.376427,0.374441,0.372554,...,0.265779,0.276489,0.310451,0.382017,0.485955,0.568265,0.505633,0.450684,0.407461,0
4,0.329006,0.32929,0.3245,0.318141,0.31276,0.310042,0.309129,0.310233,0.315202,0.324002,...,0.415693,0.38111,0.37605,0.407861,0.460601,0.481486,0.389891,0.325563,0.281171,0


In [70]:
X, y = data.iloc[:, :-1], data.iloc[:, -1]

X

Unnamed: 0,current_201,current_202,current_203,current_204,current_205,current_206,current_207,current_208,current_209,current_210,...,volt_791,volt_792,volt_793,volt_794,volt_795,volt_796,volt_797,volt_798,volt_799,volt_800
0,0.380513,0.381062,0.377390,0.372430,0.368167,0.366427,0.368739,0.375499,0.383540,0.388614,...,0.475171,0.406982,0.367278,0.364093,0.402194,0.448523,0.450469,0.356803,0.302953,0.274347
1,0.366451,0.365940,0.362012,0.358414,0.358198,0.362319,0.368449,0.374194,0.378019,0.379024,...,0.313650,0.273656,0.261276,0.271836,0.308600,0.356592,0.389055,0.355631,0.369090,0.410403
2,0.424592,0.427085,0.423493,0.416858,0.409440,0.402997,0.397895,0.394615,0.392775,0.391580,...,0.346655,0.317711,0.297014,0.288137,0.302547,0.330311,0.342483,0.289035,0.271342,0.278585
3,0.386662,0.391439,0.390609,0.387729,0.384666,0.381944,0.379041,0.376427,0.374441,0.372554,...,0.277664,0.265779,0.276489,0.310451,0.382017,0.485955,0.568265,0.505633,0.450684,0.407461
4,0.329006,0.329290,0.324500,0.318141,0.312760,0.310042,0.309129,0.310233,0.315202,0.324002,...,0.474643,0.415693,0.381110,0.376050,0.407861,0.460601,0.481486,0.389891,0.325563,0.281171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,0.258145,0.259928,0.257084,0.253027,0.250381,0.251049,0.255665,0.263717,0.272843,0.280393,...,0.118411,0.085348,0.067686,0.062863,0.078773,0.102914,0.117693,0.097859,0.085690,0.079646
475,0.406074,0.420149,0.429162,0.434866,0.439312,0.443994,0.448384,0.452546,0.455894,0.456507,...,0.267515,0.243962,0.250543,0.279990,0.333953,0.393716,0.419058,0.355549,0.327488,0.312347
476,0.534424,0.534146,0.528990,0.521568,0.514759,0.509897,0.505541,0.501624,0.498767,0.496695,...,0.300854,0.269906,0.262787,0.261464,0.268023,0.270470,0.250558,0.187487,0.160850,0.156544
477,0.328971,0.334802,0.334699,0.332600,0.331332,0.331850,0.332882,0.333405,0.333061,0.331583,...,0.149968,0.143442,0.139477,0.127960,0.122262,0.118875,0.110584,0.083422,0.073359,0.071337


To get the best training and testing datasets, we're going to balance the amount of each state.
We're going to use:

Training:
120 from state 0
85 from state 1
85 from state 2

In [71]:
df_0 = data.query('state == 0')
df_1 = data.query('state == 1')
df_2 = data.query('state == 2')

print('State 0 shape:', df_0.shape)
print('State 1 shape:', df_1.shape)
print('State 2 shape:', df_2.shape)

State 0 shape: (288, 1201)
State 1 shape: (97, 1201)
State 2 shape: (94, 1201)


Get X_train and X_test

In [72]:
state0_train_size = 125
state1_train_size = 85
state2_train_size = 85

# state 0
X_train_0 = df_0.iloc[:state0_train_size, :]
X_test_0 = df_0.iloc[state0_train_size:, :]

y_train_0 = df_0.iloc[:state0_train_size, -1]
y_test_0 = df_0.iloc[state0_train_size:, -1]

# state 1
X_train_1 = df_1.iloc[:state1_train_size, :]
X_test_1 = df_1.iloc[state1_train_size:, :]

y_train_1 = df_1.iloc[:state1_train_size, -1]
y_test_1 = df_1.iloc[state1_train_size:, -1]

# state 2
X_train_2 = df_2.iloc[:state2_train_size, :]
X_test_2 = df_2.iloc[state2_train_size:, :]

y_train_2 = df_2.iloc[:state2_train_size, -1]
y_test_2 = df_2.iloc[state2_train_size:, -1]

# Concatenate
X_train = pd.concat([X_train_0, X_train_1, X_train_2])
X_test = pd.concat([X_test_0, X_test_1, X_test_2])

y_train = pd.concat([y_train_0, y_train_1, y_train_2])
y_test = pd.concat([y_test_0, y_test_1, y_test_2])

print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)
print("==========")
print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)

y_test_2


X_test shape (184, 1201)
y_test shape (184,)
X_train shape (295, 1201)
y_train shape (295,)


470    2
471    2
472    2
473    2
474    2
475    2
476    2
477    2
478    2
Name: state, dtype: int64

In [73]:
X_train

Unnamed: 0,current_201,current_202,current_203,current_204,current_205,current_206,current_207,current_208,current_209,current_210,...,volt_792,volt_793,volt_794,volt_795,volt_796,volt_797,volt_798,volt_799,volt_800,state
0,0.380513,0.381062,0.377390,0.372430,0.368167,0.366427,0.368739,0.375499,0.383540,0.388614,...,0.406982,0.367278,0.364093,0.402194,0.448523,0.450469,0.356803,0.302953,0.274347,0
1,0.366451,0.365940,0.362012,0.358414,0.358198,0.362319,0.368449,0.374194,0.378019,0.379024,...,0.273656,0.261276,0.271836,0.308600,0.356592,0.389055,0.355631,0.369090,0.410403,0
2,0.424592,0.427085,0.423493,0.416858,0.409440,0.402997,0.397895,0.394615,0.392775,0.391580,...,0.317711,0.297014,0.288137,0.302547,0.330311,0.342483,0.289035,0.271342,0.278585,0
3,0.386662,0.391439,0.390609,0.387729,0.384666,0.381944,0.379041,0.376427,0.374441,0.372554,...,0.265779,0.276489,0.310451,0.382017,0.485955,0.568265,0.505633,0.450684,0.407461,0
4,0.329006,0.329290,0.324500,0.318141,0.312760,0.310042,0.309129,0.310233,0.315202,0.324002,...,0.415693,0.381110,0.376050,0.407861,0.460601,0.481486,0.389891,0.325563,0.281171,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,0.451445,0.460761,0.462873,0.461091,0.458344,0.456697,0.455928,0.456818,0.458857,0.459010,...,0.156490,0.160885,0.174962,0.209262,0.245508,0.258957,0.223890,0.225651,0.280271,2
466,0.426060,0.432033,0.433913,0.436276,0.443433,0.455274,0.467274,0.477160,0.484212,0.487805,...,0.430326,0.492045,0.491282,0.453160,0.393129,0.308887,0.198685,0.149238,0.125311,2
467,0.398307,0.399916,0.395201,0.388281,0.381907,0.378268,0.377173,0.378181,0.381354,0.385613,...,0.166870,0.171276,0.188868,0.235896,0.324066,0.461164,0.508763,0.510587,0.458503,2
468,0.247707,0.254103,0.257707,0.261819,0.266734,0.271767,0.275819,0.278780,0.280862,0.281570,...,0.162887,0.151263,0.158412,0.186152,0.202066,0.179934,0.118501,0.085939,0.069742,2


In [74]:
X_test

Unnamed: 0,current_201,current_202,current_203,current_204,current_205,current_206,current_207,current_208,current_209,current_210,...,volt_792,volt_793,volt_794,volt_795,volt_796,volt_797,volt_798,volt_799,volt_800,state
125,0.418187,0.423908,0.422500,0.417340,0.411293,0.406404,0.403098,0.401524,0.401184,0.401270,...,0.342177,0.314036,0.318342,0.356268,0.401804,0.407339,0.316755,0.256458,0.221252,0
126,0.408609,0.407883,0.403046,0.397236,0.392782,0.390869,0.391814,0.397709,0.408277,0.419479,...,0.293242,0.277765,0.266636,0.272530,0.298734,0.329949,0.307095,0.308849,0.322219,0
127,0.476182,0.475892,0.472652,0.469600,0.468198,0.468617,0.469917,0.472918,0.477690,0.481527,...,0.212009,0.232290,0.283721,0.357671,0.427205,0.455655,0.389299,0.366152,0.376777,0
128,0.466639,0.472543,0.476635,0.480315,0.483274,0.485292,0.486290,0.487700,0.489611,0.490211,...,0.387404,0.347294,0.338249,0.361233,0.389561,0.377679,0.282076,0.228494,0.216340,0
129,0.508091,0.505642,0.497404,0.488580,0.483247,0.482150,0.482717,0.484029,0.485705,0.486974,...,0.308216,0.310868,0.323062,0.364767,0.443387,0.525665,0.484873,0.442928,0.400778,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,0.258145,0.259928,0.257084,0.253027,0.250381,0.251049,0.255665,0.263717,0.272843,0.280393,...,0.085348,0.067686,0.062863,0.078773,0.102914,0.117693,0.097859,0.085690,0.079646,2
475,0.406074,0.420149,0.429162,0.434866,0.439312,0.443994,0.448384,0.452546,0.455894,0.456507,...,0.243962,0.250543,0.279990,0.333953,0.393716,0.419058,0.355549,0.327488,0.312347,2
476,0.534424,0.534146,0.528990,0.521568,0.514759,0.509897,0.505541,0.501624,0.498767,0.496695,...,0.269906,0.262787,0.261464,0.268023,0.270470,0.250558,0.187487,0.160850,0.156544,2
477,0.328971,0.334802,0.334699,0.332600,0.331332,0.331850,0.332882,0.333405,0.333061,0.331583,...,0.143442,0.139477,0.127960,0.122262,0.118875,0.110584,0.083422,0.073359,0.071337,2


## Build model

### Useful imports

In [75]:
from sklearn.metrics import r2_score

Making the ANN

In [76]:
ann = MLPClassifier(
    hidden_layer_sizes=(150, 300),
    max_iter=5000,
    tol=0.0000001,
    learning_rate_init=0.1,
    solver='sgd',
    activation='logistic',
    learning_rate='constant',
    verbose=True,
)

Train the model

In [77]:
ann.fit(X_train, y_train)

Iteration 1, loss = 1.77344564
Iteration 2, loss = 2.05283666
Iteration 3, loss = 2.16850577
Iteration 4, loss = 1.69162706
Iteration 5, loss = 1.29527100
Iteration 6, loss = 1.25374521
Iteration 7, loss = 1.26423777
Iteration 8, loss = 1.17367975
Iteration 9, loss = 0.95742449
Iteration 10, loss = 0.88044489
Iteration 11, loss = 0.80926682
Iteration 12, loss = 0.73003083
Iteration 13, loss = 0.61391619
Iteration 14, loss = 0.62014005
Iteration 15, loss = 0.50579628
Iteration 16, loss = 0.49363062
Iteration 17, loss = 0.45604802
Iteration 18, loss = 0.44416979
Iteration 19, loss = 0.46307935
Iteration 20, loss = 0.46634297
Iteration 21, loss = 0.40799159
Iteration 22, loss = 0.40008823
Iteration 23, loss = 0.38491065
Iteration 24, loss = 0.37596469
Iteration 25, loss = 0.36276278
Iteration 26, loss = 0.35867585
Iteration 27, loss = 0.33888888
Iteration 28, loss = 0.34122054
Iteration 29, loss = 0.33445727
Iteration 30, loss = 0.31421643
Iteration 31, loss = 0.27657865
Iteration 32, los

Get the score

In [78]:
score = ann.score(X_test, y_test)

print('%.2f' %(score*100) + '%')

100.00%


Predict the probability for each state

In [79]:
print("Probability of a random sample of state 0:", np.round(ann.predict_proba(X_test_0[:1]), 4) )
print("Probability of a random sample of state 1:", np.round(ann.predict_proba(X_test_1[:1]), 4) )
print("Probability of a random sample of state 2:", np.round(ann.predict_proba(X_test_2[:1]), 4) )

Probability of a random sample of state 0: [[9.999e-01 0.000e+00 1.000e-04]]
Probability of a random sample of state 1: [[0.000e+00 9.999e-01 1.000e-04]]
Probability of a random sample of state 2: [[0.000e+00 1.000e-04 9.999e-01]]


## TO DO: CHECK OVERFITTING AND UNDERFITTING