# Neural Networks

This is a simple 1 hidden layer example. Application of more complex network can be found in other files.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

%cd "G:/Archive"

G:\Archive


## Data Prep

In [2]:
#load data
data = pd.read_csv("data/wine quality red.csv", header = 0)

#aggregate into binary classification problem
data["quality"] = pd.Categorical(np.where(data["quality"] > 5, "above_avg", "below_avg"))

#split data
X = data.iloc[:, :11]
y = data.iloc[:, 11]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1234)

## Searching Optimal Parameters
Standardize data first

In [3]:
#use same scaling to train and test sets
scale1 = StandardScaler().fit(X_train)
X_train_scale1 = scale1.transform(X_train)
X_test_scale1 = scale1.transform(X_test)

In [13]:
#tuning parameters: hidden_layer_sizes, alpha, learning_rate_init
#use sigmoid as activation function since there is only 1 hidden layer
#use lbfgs with regularization as solver
#10-fold cv with accuracy as target metrics
grid1 = {"hidden_layer_sizes": range(1, 11),
         "alpha": 1 / np.power(10, range(4))}                              
model1 = GridSearchCV(MLPClassifier(activation = "logistic", solver = "lbfgs", max_iter = 10000), 
                      param_grid = grid1, cv = 10, scoring = "accuracy")
model1.fit(X_train_scale1, y_train)

#show tuning results
print("Best score: ", model1.best_score_)
print("Corresponding parms: ", model1.best_params_)
print("Model description: ", model1.best_estimator_)

#predict on test set
pred1 = model1.predict(X_test_scale1)

#show prediction performance
print("Accuracy: {0:.4f}".format(metrics.accuracy_score(y_test, pred1)))
print("Recall: {0:.4f}".format(metrics.recall_score(y_test, pred1, pos_label = "above_avg", average = "binary")))
print("Precision: {0:.4f}".format(metrics.precision_score(y_test, pred1, pos_label = "above_avg", average = "binary")))
print("F1 score: {0:.4f}".format(metrics.f1_score(y_test, pred1, pos_label = "above_avg", average = "binary")))

Best score:  0.7560593011811024
Corresponding parms:  {'alpha': 1.0, 'hidden_layer_sizes': 10}
Model description:  MLPClassifier(activation='logistic', alpha=1.0, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=10, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=10000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)
Accuracy: 0.7812
Recall: 0.7791
Precision: 0.8072
F1 score: 0.7929


## Visualization