# MANE 6399 - Data Science

## Week Fifteen

## MLP Classifier

# Jupyter Example 1

## MLP Classifier

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
# read excel file into dataframe
df1 = pd.read_excel(open('wpbc.data.xlsx','rb'))
df1=df1.dropna(axis=0,how='any')       # remove rows with missing values
df1.drop('id', axis=1, inplace=True)   # drop column with patient ids
# create endogenous and exogenous variables
X = np.array(df1.iloc[:, 1:14])
y = np.array(df1['recurrence'])
# split and transform data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
X_train.shape
# Transform training data
scaler=StandardScaler()
scaler.fit(X_train)
X_train_transformed=scaler.transform(X_train)
X_test_transformed=scaler.transform(X_test)
#fit model
mlp1=MLPClassifier(hidden_layer_sizes=(13,5,5))
mlp1.fit(X_train_transformed,y_train)
y_train_pred = mlp1.predict(X_train_transformed)
y_test_pred = mlp1.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%mlp1.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%mlp1.score(X_test_transformed,y_test))


Confusion matrix for training set
[[105   1]
 [ 24  15]]
The accuracy for the training set is 0.827586
Confusion matrix for test set
[[39  3]
 [ 6  1]]
The accuracy for the test set is 0.816327




# Jupyter Example 2

## SVM Classifier with Linear Kernel

In [4]:
from sklearn import svm
clf1=svm.SVC(kernel='linear')
clf1.fit(X_train,y_train)
#
y_train_pred = clf1.predict(X_train_transformed)
y_test_pred = clf1.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%clf1.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%clf1.score(X_test_transformed,y_test))


Confusion matrix for training set
[[  0 106]
 [  0  39]]
The accuracy for the training set is 0.268966
Confusion matrix for test set
[[ 0 42]
 [ 0  7]]
The accuracy for the test set is 0.142857


# Jupyter Example 3

## Grid Search for Example 2

In [5]:
from sklearn.model_selection import GridSearchCV
Cs=[.001,.01,.1,1,10,100]
param_grid={'C': Cs}
grid_search=GridSearchCV(svm.SVC(kernel='linear'), param_grid, cv=5,verbose=10,n_jobs=-1)
grid_search.fit(X,y)
print(grid_search.best_params_)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done  27 out of  30 | elapsed:  1.4min remaining:    8.9s


{'C': 0.001}


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:  1.7min finished


In [6]:
# implement best model
clf2=svm.SVC(kernel='linear',C=0.001)
clf2.fit(X_train,y_train)
#
y_train_pred = clf2.predict(X_train_transformed)
y_test_pred = clf2.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%clf2.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%clf2.score(X_test_transformed,y_test))


Confusion matrix for training set
[[  0 106]
 [  0  39]]
The accuracy for the training set is 0.268966
Confusion matrix for test set
[[ 0 42]
 [ 0  7]]
The accuracy for the test set is 0.142857


# Jupyter Example 4

# More Sophisticated Grid Search

In [7]:
Cs=[.001,.01,.1,1,10,100]
gammas=[.0001, .001, .01,.1,1.]
param_grid={'kernel':('linear','rbf'), 'C': Cs, 'gamma': gammas}
grid_search2=GridSearchCV(svm.SVC(),param_grid,cv=5,n_jobs=-1)
grid_search2.fit(X,y)
print(grid_search2.best_params_)

{'C': 0.001, 'gamma': 0.0001, 'kernel': 'linear'}


In [8]:
# implement best model
clf3=svm.SVC(kernel='linear',C=0.001,gamma=.0001)
clf3.fit(X_train,y_train)
#
y_train_pred = clf3.predict(X_train_transformed)
y_test_pred = clf3.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%clf3.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%clf3.score(X_test_transformed,y_test))


Confusion matrix for training set
[[  0 106]
 [  0  39]]
The accuracy for the training set is 0.268966
Confusion matrix for test set
[[ 0 42]
 [ 0  7]]
The accuracy for the test set is 0.142857
