### MANE 4333

## Week Twelve

## MLP Classifier

In [None]:
# Example 1 - MLPClassifier

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
# read excel file into dataframe
df1 = pd.read_excel(open('wpbc.data.xlsx','rb'))
df1=df1.dropna(axis=0,how='any')       # remove rows with missing values
df1.drop('id', axis=1, inplace=True)   # drop column with patient ids
print(df1)
# create endogenous and exogenous variables
X = np.array(df1.iloc[:, 1:14])
print(X)
y = np.array(df1['recurrence'])
# split and transform data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20,random_state=1114)
X_train.shape
# Transform training data
scaler=MinMaxScaler()
scaler.fit(X_train)
X_train_transformed=scaler.transform(X_train)
X_test_transformed=scaler.transform(X_test)
#fit model
mlp1=MLPClassifier(hidden_layer_sizes=(13,5,5))
mlp1.fit(X_train_transformed,y_train)
y_train_pred = mlp1.predict(X_train_transformed)
y_test_pred = mlp1.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%mlp1.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%mlp1.score(X_test_transformed,y_test))


In [None]:
# Example 2 - MLPClassifier with RandomizedSearchCV

import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from scipy.stats import uniform
from sklearn.metrics import confusion_matrix

# read excel file into dataframe
df1 = pd.read_excel(open('wpbc.data.xlsx','rb'))
df1=df1.dropna(axis=0,how='any')       # remove rows with missing values
df1.drop('id', axis=1, inplace=True)   # drop column with patient ids
#print(df1)
# create endogenous and exogenous variables
X = np.array(df1.iloc[:, 1:14])
#print(X)
y = np.array(df1['recurrence'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1114)

# Define the pipeline with MinMaxScaler and MLPClassifier
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('mlp', MLPClassifier(max_iter=100))
])

# Define parameter space for RandomizedSearchCV
param_dist = {
    'mlp__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100)],
    'mlp__activation': ['tanh', 'relu'],
    'mlp__solver': ['sgd', 'adam'],
    'mlp__alpha': uniform(0.0001, 0.1),
    'mlp__learning_rate': ['constant', 'adaptive'],
}

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=20,
    cv=5,
    verbose=2,
    random_state=1114
)

# Fit the model
random_search.fit(X_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", random_search.best_params_)
print("Best cross-validation score: ", random_search.best_score_)

# Test the best model on the test set
best_model = random_search.best_estimator_

# Generate predictions and confusion matrices for both training and test sets
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

# Compute and display confusion matrix for training set
train_cm = confusion_matrix(y_train, y_train_pred)
print("Confusion matrix for training set")
print(train_cm)
print("The accuracy for the training set is %f"%best_model.score(X_train,y_train))

# Compute and display confusion matrix for test set
test_cm = confusion_matrix(y_test, y_test_pred)
print("Confusion matrix for set set")
print(test_cm)
print("The accuracy for the test set is %f"%best_model.score(X_test,y_test))


In [None]:
# Example 3 - MLPClassifier with GridSearchCV

import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from scipy.stats import uniform
from sklearn.metrics import confusion_matrix

# read excel file into dataframe
df1 = pd.read_excel(open('wpbc.data.xlsx','rb'))
df1=df1.dropna(axis=0,how='any')       # remove rows with missing values
df1.drop('id', axis=1, inplace=True)   # drop column with patient ids
#print(df1)
# create endogenous and exogenous variables
X = np.array(df1.iloc[:, 1:14])
#print(X)
y = np.array(df1['recurrence'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1114)

# Define the pipeline with MinMaxScaler and MLPClassifier
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('mlp', MLPClassifier(max_iter=100))
])

# Define parameter grid for GridSearchCV
param_grid = {
    'mlp__hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 100)],
    'mlp__activation': ['tanh', 'relu'],
    'mlp__solver': ['sgd', 'adam'],
    'mlp__alpha': [0.0001, 0.001, 0.01, 0.1],
    'mlp__learning_rate': ['constant', 'adaptive'],
}

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    verbose=2
)

# Fit the model
grid_search.fit(X_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

# Test the best model on the test set
best_model = grid_search.best_estimator_

# Generate predictions and confusion matrices for both training and test sets
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

# Compute and display confusion matrix for training set
train_cm = confusion_matrix(y_train, y_train_pred)
print("Confusion matrix for training set")
print(train_cm)
print("The accuracy for the training set is %f"%best_model.score(X_train,y_train))

# Compute and display confusion matrix for test set
test_cm = confusion_matrix(y_test, y_test_pred)
print("Confusion matrix for set set")
print(test_cm)
print("The accuracy for the test set is %f"%best_model.score(X_test,y_test))


In [None]:
# Example 4 - Linear SVM 

from sklearn.svm import LinearSVC

# Define the LinearSVC model
linear_svc = LinearSVC(C=1.0, max_iter=1000, random_state=42)

# Fit the model to the training data
linear_svc.fit(X_train, y_train)


#
y_train_pred = linear_svc.predict(X_train_transformed)
y_test_pred = linear_svc.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%linear_svc.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%linear_svc.score(X_test_transformed,y_test))


In [None]:
# Example 5 - SVM with RBF Kernel

from sklearn.svm import SVC

# Define the SVM model with an RBF kernel
svm_rbf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Fit the model to the training data
svm_rbf.fit(X_train, y_train)
#
y_train_pred = svm_rbf.predict(X_train_transformed)
y_test_pred = svm_rbf.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%svm_rbf.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%svm_rbf.score(X_test_transformed,y_test))


In [None]:
# Example 6 - SVM Classifier - third order polynomial

from sklearn.svm import SVC

# Define the SVM model with a third-order polynomial kernel
svm_poly = SVC(kernel='poly', degree=3, C=1.0, gamma='scale', random_state=42)

# Fit the model to the training data
svm_poly.fit(X_train, y_train)

#
y_train_pred = svm_poly.predict(X_train_transformed)
y_test_pred = svm_poly.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%svm_poly.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%svm_poly.score(X_test_transformed,y_test))


In [None]:
# Example 7 - SVM Classifier - RandomizedSearchCV polyomial

from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.stats import uniform

# Define the SVM model
svm_poly = SVC(kernel='poly', random_state=42)

# Define the parameter distributions for RandomizedSearchCV
param_dist = {
    'C': uniform(0.1, 10.0),            # Regularization parameter
    'degree': [2, 3],                   # Second- and third-order polynomials
    'gamma': ['scale', 'auto'],         # Kernel coefficient
    'coef0': uniform(0, 1),             # Independent term in polynomial kernel
}

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=svm_poly,
    param_distributions=param_dist,
    n_iter=20,                          # Number of random combinations to try
    cv=5,                               # 5-fold cross-validation
    verbose=2,
    random_state=42
)

# Fit the model with randomized search
random_search.fit(X_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", random_search.best_params_)
print("Best cross-validation score: ", random_search.best_score_)

# Test the best model on the test set
best_model = random_search.best_estimator_

# Generate predictions and confusion matrices for both training and test sets
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

# Compute and display confusion matrix for training set
train_cm = confusion_matrix(y_train, y_train_pred)
print("Confusion matrix for training set")
print(train_cm)
print("The accuracy for the training set is %f"%best_model.score(X_train,y_train))

# Compute and display confusion matrix for test set
test_cm = confusion_matrix(y_test, y_test_pred)
print("Confusion matrix for set set")
print(test_cm)
print("The accuracy for the test set is %f"%best_model.score(X_test,y_test))


In [None]:
from sklearn.model_selection import GridSearchCV
Cs=[.001,.01,.1,1,10,100]
param_grid={'C': Cs}
grid_search=GridSearchCV(svm.SVC(kernel='linear'), param_grid, cv=5,verbose=10,n_jobs=-1)
grid_search.fit(X,y)
print(grid_search.best_params_)

In [None]:
# implement best model
clf2=svm.SVC(kernel='linear',C=0.001)
clf2.fit(X_train,y_train)
#
y_train_pred = clf2.predict(X_train_transformed)
y_test_pred = clf2.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%clf2.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%clf2.score(X_test_transformed,y_test))


# Jupyter Example 4

# More Sophisticated Grid Search

In [None]:
Cs=[.001,.01,.1,1,10,100]
gammas=[.0001, .001, .01,.1,1.]
param_grid={'kernel':('linear','rbf'), 'C': Cs, 'gamma': gammas}
grid_search2=GridSearchCV(svm.SVC(),param_grid,cv=5,n_jobs=-1)
grid_search2.fit(X,y)
print(grid_search2.best_params_)

In [None]:
# implement best model
clf3=svm.SVC(kernel='linear',C=0.001,gamma=.0001)
clf3.fit(X_train,y_train)
#
y_train_pred = clf3.predict(X_train_transformed)
y_test_pred = clf3.predict(X_test_transformed)
# produce results
print("Confusion matrix for training set")
print(confusion_matrix(y_train,y_train_pred))
print("The accuracy for the training set is %f"%clf3.score(X_train_transformed,y_train))
print("Confusion matrix for test set")
print(confusion_matrix(y_test,y_test_pred))
print("The accuracy for the test set is %f"%clf3.score(X_test_transformed,y_test))
