# DAP 5

In [5]:
!pip install sklearn
!pip install pandas

Collecting pandas
  Downloading pandas-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl (10.2 MB)
[K     |████████████████████████████████| 10.2 MB 2.0 MB/s eta 0:00:01
Collecting pytz>=2017.2
  Downloading pytz-2020.1-py2.py3-none-any.whl (510 kB)
[K     |████████████████████████████████| 510 kB 2.6 MB/s eta 0:00:01
Installing collected packages: pytz, pandas
Successfully installed pandas-1.0.3 pytz-2020.1


In [17]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import pandas as pd
import os
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
# from sklearn.neural_network import MLPClassifier

import tensorflow as tf
from tensorflow import keras

import plaidml.keras
plaidml.keras.install_backend()

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [18]:
# grab data and split
df = pd.read_csv("../DAP2/processeddata/2018_Financial_Data.csv", index_col=0)
target_cols = ['priceCashFlowRatio', 
               'priceEarningsRatio', 
               'priceEarningsToGrowthRatio', 
               'priceBookValueRatio', 
               'currentRatio', 
               'quickRatio',
               'payoutRatio']
X = df[target_cols]
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# scale data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# create cv
kf = KFold(n_splits=10, random_state=None, shuffle=True) 

## Neural Network Construction

In [33]:
hidden_layers = range(1,4)
hidden_layer_sizes = [10, 20, 50]
for num_layers in hidden_layers:
    for hidden_layer_size in hidden_layer_sizes:
        size = [hidden_layer_size]*num_layers
        nnclf = MLPClassifier(hidden_layer_sizes=size, solver='lbfgs',
                          alpha = 0.01,
                          random_state=0, max_iter = 10000).fit(X_train_scaled, y_train)
        
        print ("Hidden Layers: ", num_layers, "with size ", hidden_layer_size)
        print('\tAccuracy of NN classifier on training set: {:.2f}'
              .format(nnclf.score(X_train_scaled, y_train)))
        print('\tAccuracy of NN classifier on test set: {:.2f}'
              .format(nnclf.score(X_test_scaled, y_test)))

Hidden Layers:  1 with size  10
	Accuracy of NN classifier on training set: 0.77
	Accuracy of NN classifier on test set: 0.77
Hidden Layers:  1 with size  20
	Accuracy of NN classifier on training set: 0.79
	Accuracy of NN classifier on test set: 0.77
Hidden Layers:  1 with size  50
	Accuracy of NN classifier on training set: 0.81
	Accuracy of NN classifier on test set: 0.75
Hidden Layers:  2 with size  10
	Accuracy of NN classifier on training set: 0.79
	Accuracy of NN classifier on test set: 0.76
Hidden Layers:  2 with size  20
	Accuracy of NN classifier on training set: 0.87
	Accuracy of NN classifier on test set: 0.71
Hidden Layers:  2 with size  50
	Accuracy of NN classifier on training set: 0.98
	Accuracy of NN classifier on test set: 0.66
Hidden Layers:  3 with size  10
	Accuracy of NN classifier on training set: 0.79
	Accuracy of NN classifier on test set: 0.73
Hidden Layers:  3 with size  20
	Accuracy of NN classifier on training set: 0.91
	Accuracy of NN classifier on test se

In [None]:
# hidden_layers = range(1,5)
# hidden_layer_sizes = [10, 20, 50, 100]

# for num_layers in hidden_layers:
#     for hidden_layer_size in hidden_layer_sizes:
#         model = keras.models.Sequential()
#         model.add(Dense(hidden_layer_size))
        

In [38]:
# Find optimal alpha

alphas = [0.001, 0.01, 0.1, 1.0, 2.0, 3.0, 5.0]
for this_alpha in alphas:
    nnclf = MLPClassifier(hidden_layer_sizes=[20], solver='lbfgs',
                          alpha = this_alpha,
                          max_iter=10000,
                          random_state=0).fit(X_train_scaled, y_train)
    
    print ("Alpha: ", this_alpha)
    print('\tAccuracy of NN classifier on training set: {:.2f}'
          .format(nnclf.score(X_train_scaled, y_train)))
    print('\tAccuracy of NN classifier on test set: {:.2f}'
          .format(nnclf.score(X_test_scaled, y_test)))

Alpha:  0.001
	Accuracy of NN classifier on training set: 0.79
	Accuracy of NN classifier on test set: 0.75
Alpha:  0.01
	Accuracy of NN classifier on training set: 0.79
	Accuracy of NN classifier on test set: 0.77
Alpha:  0.1
	Accuracy of NN classifier on training set: 0.80
	Accuracy of NN classifier on test set: 0.77
Alpha:  1.0
	Accuracy of NN classifier on training set: 0.77
	Accuracy of NN classifier on test set: 0.77
Alpha:  2.0
	Accuracy of NN classifier on training set: 0.76
	Accuracy of NN classifier on test set: 0.76
Alpha:  3.0
	Accuracy of NN classifier on training set: 0.75
	Accuracy of NN classifier on test set: 0.77
Alpha:  5.0
	Accuracy of NN classifier on training set: 0.75
	Accuracy of NN classifier on test set: 0.77


In [43]:
for this_activation in ['identity', 'logistic', 'tanh', 'relu']:
    nnclf = MLPClassifier(hidden_layer_sizes=[20], solver='lbfgs',
                          activation = this_activation,
                          alpha = 0.1,
                          max_iter=10000,
                          random_state=0).fit(X_train_scaled, y_train)
    
    print ("Activation function ", this_activation)
    print('\tAccuracy of NN classifier on training set: {:.2f}'
          .format(nnclf.score(X_train_scaled, y_train)))
    print('\tAccuracy of NN classifier on test set: {:.2f}'
          .format(nnclf.score(X_test_scaled, y_test)))

Activation function  identity
	Accuracy of NN classifier on training set: 0.75
	Accuracy of NN classifier on test set: 0.77
Activation function  logistic
	Accuracy of NN classifier on training set: 0.78
	Accuracy of NN classifier on test set: 0.77
Activation function  tanh
	Accuracy of NN classifier on training set: 0.83
	Accuracy of NN classifier on test set: 0.76
Activation function  relu
	Accuracy of NN classifier on training set: 0.80
	Accuracy of NN classifier on test set: 0.77


## Analysis of Optimal Parameters

In [None]:
# Batch_Size,
# Learning_rate

In [44]:
nnclf = MLPClassifier(hidden_layer_sizes=[20], solver='lbfgs',
                          activation = 'relu',
                          alpha = 0.1,
                          max_iter=10000,
                          random_state=0).fit(X_train_scaled, y_train)

print('\tAccuracy of NN classifier on training set: {:.2f}'
          .format(nnclf.score(X_train_scaled, y_train)))
print('\tAccuracy of NN classifier on test set: {:.2f}'
          .format(nnclf.score(X_test_scaled, y_test)))

	Accuracy of NN classifier on training set: 0.80
	Accuracy of NN classifier on test set: 0.77


## Summary