In [None]:
import mlrose
import pandas as pd
import numpy as np
import sklearn
import scipy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import time
from sklearn.metrics import accuracy_score
from sklearn import model_selection
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold, cross_val_score


In [None]:
mushrooms = pd.read_excel(r".\mushroom.xlsx")

def split_validation(df,onehot):
    if onehot == True:
        df = pd.get_dummies(df, drop_first=True)
    validation = df[-(int(df.shape[0]*.1)):] # hold last 10% for final validation
    train_test = df.iloc[:-(int(df.shape[0]*.1))] # first 90%
    return validation, train_test

def get_features(data,target):
    return data.loc[:,data.columns!=target].columns.tolist()

def x_y(data, target):
    x = data.loc[:,data.columns!=target]
    y = data[target]
    return x,y

def get_train_size(estimator, x,y, scoring):
    train_sizes = [.1, .2, .3, .4, .5, .6, .7, .8]
    sizes, train_scores, validation_scores = learning_curve(estimator=DecisionTreeClassifier(),
    X = x, y=y, train_sizes = train_sizes, cv = 5, scoring = scoring)
    train_scores_mean = train_scores.mean(axis=1)
    validation_scores_mean = validation_scores.mean(axis=1)
    print('Mean training scores\n\n', pd.Series(train_scores_mean, index = train_sizes))
    print('\nMean validation scores\n\n',pd.Series(validation_scores_mean, index = train_sizes))
    train_size = float(pd.DataFrame(validation_scores_mean, index = train_sizes).idxmax())
    print('\nTraining Size: {}'.format(train_size))
    return train_sizes, train_size, train_scores, validation_scores, train_scores_mean, validation_scores_mean

def validation_score(validation, target, model):
    x_val, y_val = x_y(validation, target)
    result = model.score(x_val, y_val)
    print("Accuracy: %.2f%%" % (result*100.0))
    predictions = model.predict(x_val)
    cm = confusion_matrix(y_val, predictions, labels = model.classes_)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                   display_labels=model.classes_)
    disp.plot(include_values=True, cmap='Blues', ax=None, xticks_rotation='horizontal')
    plt.tick_params(axis=u'both', which=u'both',length=0)
    plt.grid(b=None)
    plt.show()

In [None]:
def learning_curve_ann(alg, learning_rate, X_train, y_train, X_test, y_test):
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []
    dim = x.shape[1]
    for lr in learning_rate:
        ann_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = alg, max_iters = 1000,
                               is_classifier = True, learning_rate = lr,
                               early_stopping = True, max_attempts = 100,
                               random_state = 3, clip_max = 2)
        ann_model.fit(X_train, y_train)
        y_train_pred = ann_model.predict(X_train)
        y_train_acc = accuracy_score(y_train, y_train_pred)
        y_train_loss = ann_model.loss
        y_tet_pred = ann_model.predict(X_test)
        y_test_acc = accuracy_score(y_test, y_test_pred)
        y_test_loss = ann_model.loss
        train_acc.append(y_train_acc)
        train_loss.append(y_train_loss)
        val_loss.append(y_test_loss)
        val_acc.append(y_test_acc)
    return train_loss, train_acc, val_loss, val_acc

In [None]:
# set for which dataset using
df = mushrooms
target = 'class_p'

validation, train_test = split_validation(df,True)
features = get_features(train_test, target)
x, y = x_y(train_test, target)

In [None]:
ann_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'random_hill_climb', max_iters = 1000,
                               is_classifier = True, learning_rate = .01,
                               max_attempts = 100,
                               random_state = 3)

In [None]:
# need curve for picking percentage for train/test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(x, y, train_size=0.9, random_state=98)

In [None]:
start = time.time()
ann_model.fit(X_train, y_train)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
# ann_model.fitted_weights

In [None]:
y_train_pred = ann_model.predict(X_train)
y_train_acc = accuracy_score(y_train, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_test_pred = ann_model.predict(X_test)
y_test_acc = accuracy_score(y_test, y_test_pred)
print('Testing accuracy: ', y_test_acc)

In [None]:
learning_rates = [.00001, .0001, .001, .01, .1]
train_loss, train_acc, val_loss, val_acc = learning_curve_ann('random_hill_climb', learning_rates, X_train, y_train, X_test, y_test)

In [None]:
print('Training accuracy\n\n', pd.Series(train_acc, index = learning_rates))
print('\nValidation accuracy\n\n',pd.Series(val_acc, index = learning_rates))
print('\nTraining loss\n\n', pd.Series(train_loss, index = learning_rates))
print('\nValidation loss\n\n',pd.Series(val_loss, index = learning_rates))

In [None]:
rhc_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'random_hill_climb', max_iters = 10000,
                               is_classifier = True, learning_rate = .1,
                               early_stopping = True, max_attempts = 100,
                               random_state = 27, curve = True, clip_max = 2)

In [None]:
x_val, y_val = x_y(validation, target)

In [None]:
start = time.time()
rhc_model.fit(x, y)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = rhc_model.predict(x)
y_train_acc = accuracy_score(y, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_val_pred = rhc_model.predict(x_val)
y_val_acc = accuracy_score(y_val, y_val_pred)
print('Validation accuracy: ', y_val_acc)
print('Validation loss: ', rhc_model.loss)

In [None]:
fitness_curve = rhc_model.fitness_curve

In [None]:
fitness_curve[-1]

In [None]:
plt.style.use('seaborn')

plt.figure()
plt.plot(fitness_curve)
plt.xlabel('iteration')
plt.ylabel('fitness')
plt.title('Fitness Curve for Randomized Optimization with Random Hill Climb: Neural Network', fontsize = 18, y = 1.03)

In [None]:
ann_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'simulated_annealing', max_iters = 1000,
                               is_classifier = True, learning_rate = .01,
                               max_attempts = 100,
                               random_state = 3)

In [None]:
# need curve for picking percentage for train/test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(x, y, train_size=0.9, random_state=98)

In [None]:
start = time.time()
ann_model.fit(X_train, y_train)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = ann_model.predict(X_train)
y_train_acc = accuracy_score(y_train, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_test_pred = ann_model.predict(X_test)
y_test_acc = accuracy_score(y_test, y_test_pred)
print('Testing accuracy: ', y_test_acc)

In [None]:
learning_rates = [.00001, .0001, .001, .01, .1]
train_loss, train_acc, val_loss, val_acc = learning_curve_ann('simulated_annealing', learning_rates, X_train, y_train, X_test, y_test)

In [None]:
print('Training accuracy\n\n', pd.Series(train_acc, index = learning_rates))
print('\nValidation accuracy\n\n',pd.Series(val_acc, index = learning_rates))
print('\nTraining loss\n\n', pd.Series(train_loss, index = learning_rates))
print('\nValidation loss\n\n',pd.Series(val_loss, index = learning_rates))

In [None]:
sa_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'simulated_annealing', max_iters = 1000,
                               is_classifier = True, learning_rate = .1,
                               early_stopping = True, max_attempts = 100,
                               random_state = 98, curve = True)

In [None]:
x_val, y_val = x_y(validation, target)

In [None]:
start = time.time()
sa_model.fit(x, y)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = sa_model.predict(x)
y_train_acc = accuracy_score(y, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_val_pred = sa_model.predict(x_val)
y_val_acc = accuracy_score(y_val, y_val_pred)
print('Validation accuracy: ', y_val_acc)
print('Validation loss: ', sa_model.loss)

In [None]:
fitness_curve = sa_model.fitness_curve

In [None]:
fitness_curve[-1]

In [None]:
plt.style.use('seaborn')

plt.figure()
plt.plot(fitness_curve)
plt.xlabel('iteration')
plt.ylabel('fitness')
plt.title('Fitness Curve for Randomized Optimization with Simulated Annealing: Neural Network', fontsize = 18, y = 1.03)

In [None]:
ann_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'genetic_alg', pop_size = 2000,
                                max_iters = 50, is_classifier = True, learning_rate = .1,
                               early_stopping = True, max_attempts = 100,
                               random_state = 3, clip_max = 2)

In [None]:
# need curve for picking percentage for train/test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(x, y, train_size=0.9, random_state=98)

In [None]:
start = time.time()
ann_model.fit(X_train, y_train)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = ann_model.predict(X_train)
y_train_acc = accuracy_score(y_train, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_test_pred = ann_model.predict(X_test)
y_test_acc = accuracy_score(y_test, y_test_pred)
print('Testing accuracy: ', y_test_acc)

In [None]:
learning_rates = [.00001, .0001, .001, .01, .1]
train_loss, train_acc, val_loss, val_acc = learning_curve_ann('genetic_alg', learning_rates, X_train, y_train, X_test, y_test)

In [None]:
print('Training accuracy\n\n', pd.Series(train_acc, index = learning_rates))
print('\nValidation accuracy\n\n',pd.Series(val_acc, index = learning_rates))
print('\nTraining loss\n\n', pd.Series(train_loss, index = learning_rates))
print('\nValidation loss\n\n',pd.Series(val_loss, index = learning_rates))

In [None]:
ga_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'genetic_alg', max_iters = 100,
                               is_classifier = True, learning_rate = .1,
                               early_stopping = True, max_attempts = 100,
                               clip_max = 2, random_state = 3, curve = True)

In [None]:
x_val, y_val = x_y(validation, target)

In [None]:
start = time.time()
ga_model.fit(X_train, y_train)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = ga_model.predict(x)
y_train_acc = accuracy_score(y, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_val_pred = ga_model.predict(X_test)
y_val_acc = accuracy_score(y_test, y_val_pred)
print('Validation accuracy: ', y_val_acc)
print('Validation loss: ', ga_model.loss)

In [None]:
fitness_curve = ga_model.fitness_curve

In [None]:
fitness_curve[-1]

In [None]:
plt.style.use('seaborn')

plt.figure()
plt.plot(fitness_curve)
plt.xlabel('iteration')
plt.ylabel('fitness')
plt.title('Fitness Curve for Randomized Optimization with Genetic Algorithm: Neural Network', fontsize = 18, y = 1.03)

In [None]:
baseline_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'gradient_descent', max_iters = 1000,
                               is_classifier = True, learning_rate = .1,
                               early_stopping = True, max_attempts = 100,
                               random_state = 3)

In [None]:
# need curve for picking percentage for train/test split
X_train, X_test, y_train, y_test = model_selection.train_test_split(x, y, train_size=0.9, random_state=27)

In [None]:
start = time.time()
baseline_model.fit(X_train, y_train)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = baseline_model.predict(X_train)
y_train_acc = accuracy_score(y_train, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_test_pred = baseline_model.predict(X_test)
y_test_acc = accuracy_score(y_test, y_test_pred)
print('Testing accuracy: ', y_test_acc)

In [None]:
baseline_model = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                               algorithm = 'gradient_descent', max_iters = 1000,
                               is_classifier = True, learning_rate = .1,
                               early_stopping = False, max_attempts = 100,
                               random_state = 32, curve = True, clip_max=2)

In [None]:
x_val, y_val = x_y(validation, target)

In [None]:
start = time.time()
baseline_model.fit(x, y)
end = time.time()
print('Elapsed Time: {} s'.format(end-start))

In [None]:
y_train_pred = baseline_model.predict(x)
y_train_acc = accuracy_score(y, y_train_pred)
print('Training accuracy: ', y_train_acc)
y_val_pred = baseline_model.predict(x_val)
y_val_acc = accuracy_score(y_val, y_val_pred)
print('Validation accuracy: ', y_val_acc)
print('Validation loss: ', baseline_model.loss)

In [None]:
fitness_curve = baseline_model.fitness_curve

In [None]:
fitness_curve[-1]

In [None]:
plt.style.use('seaborn')

plt.figure()
plt.plot(fitness_curve)
plt.xlabel('iteration')
plt.ylabel('fitness')
plt.title('Fitness Curve for Randomized Optimization with Gradient Descent: Neural Network', fontsize = 18, y = 1.03)

In [None]:
learning_rates = [.00001, .0001, .001, .01, .1]
train_loss, train_acc, val_loss, val_acc = learning_curve_ann('simulated_annealing', learning_rates, X_train, y_train, X_test, y_test)

In [None]:
print('Training accuracy\n\n', pd.Series(train_acc, index = learning_rates))
print('\nValidation accuracy\n\n',pd.Series(val_acc, index = learning_rates))
print('\nTraining loss\n\n', pd.Series(train_loss, index = learning_rates))
print('\nValidation loss\n\n',pd.Series(val_loss, index = learning_rates))