In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import mlrose_hiive as mlrh

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

The text dataset (depression_dataset_reddit_cleaned) is adopted from kaggle (https://www.kaggle.com/datasets/infamouscoder/depression-reddit-cleaned), which contains texts to classify mental health.

### Load Data

In [None]:
# load drybean data
df_bean = pd.read_excel('Dry_Bean_Dataset.xlsx')
print("Data has", len(df_bean), "rows and", len(df_bean.columns), "columns.")
if df_bean.isnull().values.any():
    print("Warning: Missing Data")
label = df_bean["Class"].unique()
df_bean.head()

#### Data Preparation

## Gradient Descent vs Optimization method

In this part, Gradient Descent is compared with randomized hill climbing, simulated annealing and genetic algorithm

### Define Helper functions
Let's define some helper functions that will be used across all of the models. We define a function that plots the learning curve of an classification model. Additionally, we define functions to output final model scores using an untouched test dataset.

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import precision_score, accuracy_score, ConfusionMatrixDisplay, roc_auc_score, recall_score, classification_report
import timeit

plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['font.size'] = 12

def plot_learning_curve(clf, X, y, title="Insert Title"):

    n = len(y)
    train_mean = [] #model performance score (f1)
    cv_mean = []  #model performance score (f1)
    fit_mean = [] #model fit/training time
    pred_mean = [] #model test/prediction times
    train_sizes = (np.linspace(0.1, 1.0, 5)*n).astype('int')

    fit_clf = clf.fit(X, y)

    for i in train_sizes:
        idx = np.random.randint(X.shape[0], size=i)
        X_subset = X[idx,:]
        y_subset = y[idx]
        scores = cross_validate(clf, X_subset, y_subset, cv=5, scoring='f1_weighted', n_jobs=-1, return_train_score=True)

        train_mean.append(np.mean(scores['train_score']))
        cv_mean.append(np.mean(scores['test_score']))
        fit_mean.append(np.mean(scores['fit_time']))
        pred_mean.append(np.mean(scores['score_time']))

    train_mean = np.array(train_mean)
    cv_mean = np.array(cv_mean)
    fit_mean = np.array(fit_mean)

    plot_LC(train_sizes, train_mean, cv_mean, title)
    plot_times(train_sizes, fit_mean, pred_mean, title)
    plot_fitness_iteration(fit_clf.fitness_curve, "{} Fitness Curve".format(title))



def plot_LC(train_sizes, train_mean, cv_mean, title):

    plt.figure()
    plt.title("Learning Curve: "+ title)
    plt.xlabel("Training Examples")
    plt.ylabel("Model Accuracy Score")
    plt.plot(train_sizes, train_mean, 'o-', color="b", label="Training Score")
    plt.plot(train_sizes, cv_mean, 'o-', color="r", label="Cross-Validation Score")
    plt.legend(loc="best")
    plt.show()


def plot_times(train_sizes, fit_mean, pred_mean, title):

    plt.figure()
    plt.title("Modeling Time: "+ title)
    plt.xlabel("Training Examples")
    plt.ylabel("Training Time (s)")
    plt.plot(train_sizes, fit_mean, 'o-', color="b", label="Training Time (s)")
    plt.plot(train_sizes, pred_mean, 'o-', color="r", label="Prediction Time (s)")
    plt.legend(loc="best")
    plt.show()

def plot_fitness_iteration(curve, title, max_fitness=None):
    plt.figure()
    plt.xlabel('Iteration')
    plt.ylabel('Fitness')
    plt.title(title)
    length = len(curve)
    plt.plot(range(length), curve, label='Fitness', lw=2)

    if max_fitness:
        plt.plot(range(length), [max_fitness] * length, label="Max Fitness", lw=1, color="darkorange")

    plt.legend(loc="best")
    plt.show()


def final_classifier_evaluation(clf,X_train, X_test, y_train, y_test):

    start_time = timeit.default_timer()
    clf.fit(X_train, y_train)
    end_time = timeit.default_timer()
    training_time = end_time - start_time

    y_pred_train = clf.predict(X_train)

    start_time = timeit.default_timer()
    y_pred_test = clf.predict(X_test)
    end_time = timeit.default_timer()
    pred_time = end_time - start_time

    accuracy = accuracy_score(y_test, y_pred_test)

    print("Model Evaluation Metrics Using Test Dataset")
    print("*****************************************************")
    print("Model Training Time (s):   "+"{:.5f}".format(training_time))
    print("Model Prediction Time (s): "+"{:.5f}\n".format(pred_time))
    print("Accuracy:  "+"{:.2f}".format(accuracy))
    print("Training Classification report", classification_report(y_train, y_pred_train))
    print("*****************************************************")
    print("Testing Classification report", classification_report(y_test, y_pred_test))

### Gradient Descent

In [None]:
# turn categorical class type into numerical values
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = df_bean.drop(['Class'], axis=1)
y = df_bean.Class

# Data Split
X_train, X_test, y_train, y_test = train_test_split(np.array(X), y, test_size = 0.2, random_state = 100, stratify=y)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = pd.get_dummies(y_train.values.ravel()).values
y_test = pd.get_dummies(y_test.values.ravel()).values

In [None]:
# Tune max_iters and learning_rate_init
grid_search = {
    "max_iters": [5000],
    "learning_rate_init": [0.01, 0.1, 0.5],
    "hidden_layers_sizes": [3],
    "activation": [mlrh.neural.activation.relu],
    "is_classifier": [True],
}

runner = mlrh.NNGSRunner(x_train=X_train, y_train=y_train,
                           x_test=X_test, y_test=y_test,
                           experiment_name="full_grid_search",
                           output_directory="nn_gradient_descent/",
                           algorithm=mlrh.algorithms.gradient_descent,
                           grid_search_parameters=grid_search,
                           iteration_list = [5000],
                           hidden_layer_sizes=[3],
                           bias=True,
                           early_stopping=True,
                           clip_max=1,
                           max_attempts=100,
                           generate_curves=True,
                           seed=100,
                           n_jobs=-1
                          )
run_stats, curves, cv_results, best_est = runner.run()

In [None]:
best_est.best_params_

In [None]:
nn_model = mlrh.NeuralNetwork(hidden_nodes = [3],
                                activation = 'relu',
                                algorithm = 'gradient_descent',
                                max_iters = 5000,
                                bias = True,
                                is_classifier = True,
                                learning_rate = 0.1,
                                early_stopping = True,
                                clip_max = 1.0,
                                max_attempts =100,
                                curve=True,
                                random_state = 100)

The final section for neural network will plot the loss curve.

In [None]:
#plot_learning_curve(nn_model, X_train, y_train, title="Gradient Descent")

In [None]:
final_classifier_evaluation(nn_model, X_train, X_test, y_train, y_test)

### Simulated Annealing


In [None]:
grid_search = {
    "max_iters": [5000],
    "learning_rate_init": [0.01, 0.1, 0.5],
    "hidden_layers_sizes": [3],
    "activation": [mlrh.neural.activation.relu],
    "is_classifier": [True],
    "schedule": [mlrh.GeomDecay(5), mlrh.GeomDecay(10),mlrh.GeomDecay(15),
                 mlrh.ArithDecay(5), mlrh.ArithDecay(10), mlrh.ArithDecay(15),
                 mlrh.ExpDecay(5), mlrh.ExpDecay(10), mlrh.ExpDecay(15)]
}

runner = mlrh.NNGSRunner(x_train=X_train, y_train=y_train,
                         x_test=X_test, y_test=y_test,
                         experiment_name="full_grid_search",
                         output_directory="nn_simulated_annealing/",
                         algorithm=mlrh.algorithms.sa.simulated_annealing,
                         grid_search_parameters=grid_search,
                         iteration_list = [5000],
                         hidden_layer_sizes=[3],
                         bias=True,
                         early_stopping=True,
                         clip_max=1,
                         max_attempts=100,
                         generate_curves=True,
                         seed=100,
                         n_jobs=-1
                         )
run_stats, curves, cv_results, best_est = runner.run()

In [None]:
best_est.best_params_

In [None]:
nn_model = mlrh.NeuralNetwork(hidden_nodes=[3],
                                activation='relu',
                                algorithm='simulated_annealing',
                                schedule=mlrh.GeomDecay(5),
                                max_iters=5000,
                                max_attempts=100,
                                bias=True,
                                is_classifier=True,
                                learning_rate=0.1,
                                early_stopping=True,
                                clip_max=1.0,
                                curve=True,
                                random_state=100)

In [None]:
#plot_learning_curve(nn_model, X_train, y_train, title="Simulated Annealing")

In [None]:
final_classifier_evaluation(nn_model, X_train, X_test, y_train, y_test)

### Randomized Hill Climbing


In [None]:
grid_search = {
    "max_iters": [5000],
    "learning_rate_init": [0.01, 0.1, 0.5],
    "hidden_layers_sizes": [3],
    "activation": [mlrh.neural.activation.relu],
    "is_classifier": [True],
    "restarts": [5, 10, 20]
}

runner = mlrh.NNGSRunner(x_train=X_train, y_train=y_train,
                         x_test=X_test, y_test=y_test,
                         experiment_name="full_grid_search",
                         output_directory="nn_randomized_hill_climbing/",
                         algorithm=mlrh.algorithms.random_hill_climb,
                         grid_search_parameters=grid_search,
                         iteration_list = [5000],
                         hidden_layer_sizes=[3],
                         bias=True,
                         early_stopping=True,
                         clip_max=1,
                         max_attempts=100,
                         generate_curves=True,
                         seed=100,
                         n_jobs=-1
                         )
run_stats, curves, cv_results, best_est = runner.run()

In [None]:
best_est.best_params_

In [None]:
nn_model = mlrh.NeuralNetwork(hidden_nodes=[3],
                                activation='relu',
                                algorithm='random_hill_climb',
                                max_iters=5000,
                                max_attempts=100,
                                bias=True,
                                restarts=20,
                                is_classifier=True,
                                learning_rate=0.1,
                                early_stopping=True,
                                clip_max=1.0,
                                curve=True,
                                random_state=100)



In [None]:
#plot_learning_curve(nn_model, X_train, y_train, title="Randomized Hill Climbing")

In [None]:
final_classifier_evaluation(nn_model, X_train, X_test, y_train, y_test)

###  Genetic Algorithm

In [None]:
grid_search = {
    "max_iters": [5000],
    "learning_rate_init": [0.01, 0.1, 0.5],
    "hidden_layers_sizes": [3],
    "activation": [mlrh.neural.activation.relu],
    "is_classifier": [True],
    "mutation_prob": [0.1, 0.5, 0.7],
    "pop_size": [500, 750, 1000]
}

runner = mlrh.NNGSRunner(x_train=X_train, y_train=y_train,
                         x_test=X_test, y_test=y_test,
                         experiment_name="full_grid_search",
                         output_directory="nn_genetic_algorithm/",
                         algorithm=mlrh.algorithms.genetic_alg,
                         grid_search_parameters=grid_search,
                         iteration_list = [5000],
                         hidden_layer_sizes=[3],
                         bias=True,
                         early_stopping=True,
                         clip_max=1,
                         max_attempts=100,
                         generate_curves=True,
                         seed=100,
                         n_jobs=-1
                         )
run_stats, curves, cv_results, best_est = runner.run()

In [None]:
best_est.best_params_

In [None]:
nn_model = mlrh.NeuralNetwork(hidden_nodes=[3],
                              activation='relu',
                              algorithm='genetic_alg',
                              max_iters=5000,
                              max_attempts=100,
                              bias=True,
                              is_classifier=True,
                              learning_rate=0.01,
                              mutation_prob=0.1,
                              pop_size=1000,
                              early_stopping=True,
                              clip_max=1.0,
                              curve=True,
                              random_state=100)



In [None]:
#plot_learning_curve(nn_model, X_train, y_train, title="Genetic Algorithm")

In [None]:
final_classifier_evaluation(nn_model, X_train, X_test, y_train, y_test)