In [None]:
%pip install ../../mlrose/
import mlrose_hiive as mlrose
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
from ucimlrepo import fetch_ucirepo 
from collections import Counter
from sklearn import preprocessing
from sklearn.model_selection import learning_curve
from sklearn.model_selection import LearningCurveDisplay, ShuffleSplit
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import validation_curve

In [None]:
RANDOM_STATE=101

In [None]:
# Wine Quality Data
# fetch dataset 
wq_df = fetch_ucirepo(id=17) 
  
# data (as pandas dataframes) 
X = wq_df.data.features 
y = wq_df.data.targets 

# Change values to 0-1: 0 for benign and 1 for malignant
y['Diagnosis'] = y['Diagnosis'].map({'M': 1, 'B': 0})

In [None]:
# Split into training and testing sets. Don't use test set until the very end
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=RANDOM_STATE, stratify = y)

### Pre-Processing

In [None]:
# Standardize training and test set
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Apply SMOTE
smote = SMOTE(random_state=RANDOM_STATE)
X_train, y_train = smote.fit_resample(X_train, y_train)

### Original Back Propagation

In [None]:
# Neural Network - Back Propogation - Reimplemented with mlrose
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'gradient_descent', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True)

start_time = time.time()
nn_model1.fit(X_train, y_train)
end_time = time.time()
bp_time = end_time - start_time

# Fitted Weights
print('Fitted Weights: ', nn_model1.fitted_weights)

# Loss
print('Loss: ', nn_model1.loss)

# Run time
print('Run Time: ', bp_time)

#### Prediction

In [None]:
# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train)

y_train_accuracy = accuracy_score(y_train, y_train_pred)

print('Training accuracy: ', y_train_accuracy)

# Predict labels for test set and assess accuracy
y_test_pred = nn_model1.predict(X_test)

y_test_accuracy = accuracy_score(y_test, y_test_pred)

print('Test accuracy: ', y_test_accuracy)

#### Fitness Curve

In [None]:
# Evaluate

# Predicted Probabilities
# print('Predicted Probabilities: ', nn_model1.predicted_probs)

# Fitness Curve
fitness_curve_bp = nn_model1.fitness_curve
print('Fitness Curve: ', fitness_curve_bp)

In [None]:
# Plotting fitness curve
iterations = [*range(0, len(fitness_curve_bp))]
plt.plot(iterations, fitness_curve_bp, label='fitness')
plt.title('Original Back Propagation', fontsize=10)
plt.xlabel('Iterations', fontsize=8)
plt.ylabel('Fitness Score', fontsize=8)
plt.legend(loc='best', fontsize=8)
plt.grid()
plt.show()

#### Learning Curve

In [None]:
# Plotting learning curve
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'gradient_descent', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True)
train_sizes, train_scores, validation_scores = learning_curve(
estimator = nn_model1,
X = X_train,
y = y_train, cv = 5,
scoring = 'f1_macro',
random_state=RANDOM_STATE)

In [None]:
print(train_sizes)
print(train_scores)
print(validation_scores)

In [None]:
train_mean = train_scores.mean(axis=1)
validation_mean = validation_scores.mean(axis=1)

plt.plot(train_sizes, train_mean, label="train")
plt.plot(train_sizes, validation_mean, label="validation")

plt.title("Back Propagation Learning Curve")
plt.xlabel("Training Set Size")
plt.ylabel("f1 score")
plt.legend(loc="best")
plt.grid()

plt.show()

### Random Hill Climbing

In [None]:
grid_search_parameters = ({
        'max_iters': [2000],                     # nn params
        'learning_rate': [0.001],                         # nn params
        'activation': [mlrose.relu],
        # rhc params
        'restarts': [1, 10, 25, 50, 100]
    })

nnr = mlrose.NNGSRunner(x_train=X_train,
                 y_train=y_train,
                 x_test=X_test,
                 y_test=y_test,
                 experiment_name='nn_rhc_test',
                 algorithm=mlrose.algorithms.rhc.random_hill_climb,
                 grid_search_parameters=grid_search_parameters,
                 iteration_list=[1, 10, 50, 100, 250, 500, 1000, 2000],
                 hidden_layer_sizes=[(100,100,100)],
                 bias=True,
                 early_stopping=True,
                 clip_max=1e+10,
                 max_attempts=10,
                 generate_curves=True,
                 seed=RANDOM_STATE)

run_stats_df, curves_df, cv_results_df, grid_search_cv = nnr.run()          # GridSearchCV instance returned   

In [None]:
run_stats_df

In [None]:
curves_df

In [None]:
cv_results_df

In [None]:
grid_search_cv

In [None]:
grid_search_cv.get_params()

#### Learning Curve

In [None]:
# Plotting learning curve
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'random_hill_climb', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, restarts=100)
train_sizes, train_scores, validation_scores = learning_curve(
estimator = nn_model1,
X = X_train,
y = y_train, cv = 5,
scoring = 'f1_macro',
random_state=RANDOM_STATE)

In [None]:
train_mean = train_scores.mean(axis=1)
validation_mean = validation_scores.mean(axis=1)

plt.plot(train_sizes, train_mean, label="train")
plt.plot(train_sizes, validation_mean, label="validation")

plt.title("Random Hill Climb Learning Curve")
plt.xlabel("Training Set Size")
plt.ylabel("f1 score")
plt.legend(loc="best")
plt.grid()

plt.show()

In [None]:
# Neural Network - Random Hill Climbing - Reimplemented with mlrose
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'random_hill_climb', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, restarts=100)

start_time = time.time()
nn_model1.fit(X_train, y_train)
end_time = time.time()
bp_time = end_time - start_time

# Fitted Weights
print('Fitted Weights: ', nn_model1.fitted_weights)

# Loss
print('Loss: ', nn_model1.loss)

# Run time
print('Run Time: ', bp_time)

#### Prediction

In [None]:
# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train)

y_train_accuracy = accuracy_score(y_train, y_train_pred)

print('Training accuracy: ', y_train_accuracy)

# Predict labels for test set and assess accuracy
y_test_pred = nn_model1.predict(X_test)

y_test_accuracy = accuracy_score(y_test, y_test_pred)

print('Test accuracy: ', y_test_accuracy)

#### Fitness Curve

In [None]:
# Evaluate

# Predicted Probabilities
print('Predicted Probabilities: ', nn_model1.predicted_probs)

# Fitness Curve
fitness_curve_bp = nn_model1.fitness_curve
print('Fitness Curve: ', fitness_curve_bp)

In [None]:
# Plotting fitness curve
iterations = [*range(0, len(fitness_curve_bp))]
plt.plot(iterations, fitness_curve_bp, label='fitness')
plt.title('Random Hill Climbing', fontsize=10)
plt.xlabel('Iterations', fontsize=8)
plt.ylabel('Fitness Score', fontsize=8)
plt.legend(loc='best', fontsize=8)
plt.grid()
plt.show()

#### Validation Curves

In [None]:
parameter_range = [1, 10, 25, 50, 100]
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'random_hill_climb', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True)
train_score, test_score = validation_curve(nn_model1, X_train, y_train,
                                           param_name="restarts",
                                           param_range=parameter_range,
                                           cv=5, scoring="f1_macro")

In [None]:
print(train_score)

In [None]:
print(test_score)

In [None]:
# Calculating mean and standard deviation of training score
mean_train_score = np.mean(train_score, axis=1)
std_train_score = np.std(train_score, axis=1)
 
# Calculating mean and standard deviation of testing score
mean_test_score = np.mean(test_score, axis=1)
std_test_score = np.std(test_score, axis=1)

parameter_range = [1, 10, 25, 50, 100]
# Plot mean accuracy scores for training and testing scores
plt.plot(parameter_range, mean_train_score,
         label="training", color='b')
plt.plot(parameter_range, mean_test_score,
         label="validation", color='g')
 
# Creating the plot
plt.title("Restarts Validation Curve")
plt.xlabel("Restarts")
plt.ylabel("f1 score")
plt.tight_layout()
plt.legend(loc='best')
plt.grid()
# plt.savefig('breast_cancer_plots/nn_bc_layer_width_validation_curve.png')
plt.show()

### Simulated Annealing

In [None]:
grid_search_parameters = ({
        'max_iters': [2000],                     # nn params
        'learning_rate': [0.001],                         # nn params
        'activation': [mlrose.relu]
    })

nnr = mlrose.NNGSRunner(x_train=X_train,
                 y_train=y_train,
                 x_test=X_test,
                 y_test=y_test,
                 experiment_name='nn_sa_test',
                 algorithm=mlrose.algorithms.sa.simulated_annealing,
                 grid_search_parameters=grid_search_parameters,
                 iteration_list=[1, 10, 50, 100, 250, 500, 1000, 2000],
                 hidden_layer_sizes=[(100,100,100)],
                 bias=True,
                 early_stopping=True,
                 clip_max=1e+10,
                 max_attempts=10,
                 generate_curves=True,
                 seed=RANDOM_STATE)

run_stats_df, curves_df, cv_results_df, grid_search_cv = nnr.run()          # GridSearchCV instance returned   

In [None]:
run_stats_df

In [None]:
curves_df

In [None]:
cv_results_df

In [None]:
grid_search_cv

In [None]:
# Plotting learning curve
schedule = mlrose.GeomDecay(init_temp = .5)
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'simulated_annealing', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, schedule = schedule)
train_sizes, train_scores, validation_scores = learning_curve(
estimator = nn_model1,
X = X_train,
y = y_train, cv = 5,
scoring = 'f1_macro',
random_state=RANDOM_STATE)

In [None]:
train_mean = train_scores.mean(axis=1)
validation_mean = validation_scores.mean(axis=1)

plt.plot(train_sizes, train_mean, label="train")
plt.plot(train_sizes, validation_mean, label="validation")

plt.title("Simulated Annealing Learning Curve")
plt.xlabel("Iteration")
plt.ylabel("f1 score")
plt.legend(loc="best")
plt.grid()

plt.show()

In [None]:
parameter_range = [1, 10, 25, 50]
schedule = mlrose.GeomDecay()
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'simulated_annealing', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, schedule = schedule)
train_score, test_score = validation_curve(nn_model1, X_train, y_train,
                                           param_name="schedule__decay",
                                           param_range=parameter_range,
                                           cv=5, scoring="f1_macro")

In [None]:
# Calculating mean and standard deviation of training score
mean_train_score = np.mean(train_score, axis=1)
std_train_score = np.std(train_score, axis=1)
 
# Calculating mean and standard deviation of testing score
mean_test_score = np.mean(test_score, axis=1)
std_test_score = np.std(test_score, axis=1)

parameter_range = [1, 10, 25, 50]
# Plot mean accuracy scores for training and testing scores
plt.plot(parameter_range, mean_train_score,
         label="training", color='b')
plt.plot(parameter_range, mean_test_score,
         label="validation", color='g')
 
# Creating the plot
plt.title("Decay Validation Curve")
plt.xlabel("Decay")
plt.ylabel("f1 score")
plt.tight_layout()
plt.legend(loc='best')
plt.grid()
# plt.savefig('breast_cancer_plots/nn_bc_layer_width_validation_curve.png')
plt.show()

### Genetic Algorithm

In [None]:
grid_search_parameters = ({
        'max_iters': [2000],                     # nn params
        'learning_rate': [0.001],                         # nn params
        'activation': [mlrose.relu]
    })

nnr = mlrose.NNGSRunner(x_train=X_train,
                 y_train=y_train,
                 x_test=X_test,
                 y_test=y_test,
                 experiment_name='nn_ga_test',
                 algorithm=mlrose.algorithms.ga.genetic_alg,
                 grid_search_parameters=grid_search_parameters,
                 iteration_list=[1, 10, 50, 100, 250, 500, 1000, 2000],
                 hidden_layer_sizes=[(100,100,100)],
                 bias=True,
                 early_stopping=True,
                 clip_max=1e+10,
                 max_attempts=10,
                 generate_curves=True,
                 seed=RANDOM_STATE)

run_stats_df, curves_df, cv_results_df, grid_search_cv = nnr.run()          # GridSearchCV instance returned   

In [None]:
run_stats_df

In [None]:
curves_df

In [None]:
cv_results_df

In [None]:
grid_search_cv

In [None]:
# Plotting learning curve
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'genetic_alg', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, pop_size=500, mutation_prob=.25)
train_sizes, train_scores, validation_scores = learning_curve(
estimator = nn_model1,
X = X_train,
y = y_train, cv = 5,
scoring = 'f1_macro',
random_state=RANDOM_STATE)

In [None]:
train_mean = train_scores.mean(axis=1)
validation_mean = validation_scores.mean(axis=1)

plt.plot(train_sizes, train_mean, label="train")
plt.plot(train_sizes, validation_mean, label="validation")

plt.title("Genetic Algorithm Learning Curve")
plt.xlabel("Iteration")
plt.ylabel("f1 score")
plt.legend(loc="best")
plt.grid()

plt.show()

In [None]:
# Neural Network - Genetic Algorithm
nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'genetic_alg', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True, pop_size=500, mutation_prob=.25)

start_time = time.time()
nn_model1.fit(X_train, y_train)
end_time = time.time()
bp_time = end_time - start_time

# Fitted Weights
print('Fitted Weights: ', nn_model1.fitted_weights)

# Loss
print('Loss: ', nn_model1.loss)

# Run time
print('Run Time: ', bp_time)

In [None]:
# Predict labels for train set and assess accuracy
y_train_pred = nn_model1.predict(X_train)

y_train_accuracy = accuracy_score(y_train, y_train_pred)

print('Training accuracy: ', y_train_accuracy)

# Predict labels for test set and assess accuracy
y_test_pred = nn_model1.predict(X_test)

y_test_accuracy = accuracy_score(y_test, y_test_pred)

print('Test accuracy: ', y_test_accuracy)

In [None]:
# Evaluate

# Predicted Probabilities
# print('Predicted Probabilities: ', nn_model1.predicted_probs)

# Fitness Curve
fitness_curve = nn_model1.fitness_curve
print('Fitness Curve: ', fitness_curve)

In [None]:
# Plotting fitness curve
iterations = [*range(0, len(fitness_curve))]
plt.plot(iterations, fitness_curve, label='fitness')
plt.title('Genetic Algorithm', fontsize=10)
plt.xlabel('Iterations', fontsize=8)
plt.ylabel('Fitness Score', fontsize=8)
plt.legend(loc='best', fontsize=8)
plt.grid()
plt.show()

#### Validation Curves

In [None]:
parameter_range = [10, 50, 100, 150, 200, 500, 1000]

nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'genetic_alg', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True)
train_score, test_score = validation_curve(nn_model1, X_train, y_train,
                                           param_name="pop_size",
                                           param_range=parameter_range,
                                           cv=5, scoring="f1_macro")

In [None]:
# Calculating mean and standard deviation of training score
mean_train_score = np.mean(train_score, axis=1)
std_train_score = np.std(train_score, axis=1)
 
# Calculating mean and standard deviation of testing score
mean_test_score = np.mean(test_score, axis=1)
std_test_score = np.std(test_score, axis=1)

parameter_range = [10, 50, 100, 150, 200, 500, 1000]
# Plot mean accuracy scores for training and testing scores
plt.plot(parameter_range, mean_train_score,
         label="training", color='b')
plt.plot(parameter_range, mean_test_score,
         label="validation", color='g')
 
# Creating the plot
plt.title("Pop Size Validation Curve")
plt.xlabel("Pop Size")
plt.ylabel("f1 score")
plt.tight_layout()
plt.legend(loc='best')
plt.grid()
# plt.savefig('breast_cancer_plots/nn_bc_layer_width_validation_curve.png')
plt.show()

In [None]:
parameter_range = [.1, .25, .5, .75, .99]

nn_model1 = mlrose.NeuralNetwork(hidden_nodes = (100,100,100), activation = 'relu',
                                 algorithm = 'genetic_alg', max_iters = 2000,
                                 bias = True, is_classifier = True, learning_rate = 0.001,
                                 early_stopping = True, max_attempts = 10,
				 random_state = RANDOM_STATE, curve = True)
train_score, test_score = validation_curve(nn_model1, X_train, y_train,
                                           param_name="mutation_prob",
                                           param_range=parameter_range,
                                           cv=5, scoring="f1_macro")

In [None]:
# Calculating mean and standard deviation of training score
mean_train_score = np.mean(train_score, axis=1)
std_train_score = np.std(train_score, axis=1)
 
# Calculating mean and standard deviation of testing score
mean_test_score = np.mean(test_score, axis=1)
std_test_score = np.std(test_score, axis=1)

parameter_range = [.1, .25, .5, .75, .99]
# Plot mean accuracy scores for training and testing scores
plt.plot(parameter_range, mean_train_score,
         label="training", color='b')
plt.plot(parameter_range, mean_test_score,
         label="validation", color='g')
 
# Creating the plot
plt.title("Mutation Prob Validation Curve")
plt.xlabel("Mutation Prob")
plt.ylabel("f1 score")
plt.tight_layout()
plt.legend(loc='best')
plt.grid()
# plt.savefig('breast_cancer_plots/nn_bc_layer_width_validation_curve.png')
plt.show()