In [12]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.metrics import f1_score, accuracy_score

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn


credit=np.genfromtxt('./dataset/german.data-numeric.txt') 
X,y = credit[:,:-1], credit[:,-1:].squeeze() 
print(X.shape, y.shape)

# Change 2 to 0, to make it a binary problem
y[y == 2] = 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=111, stratify=y)

# Normalize feature data
scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# # One hot encode target values
# one_hot = OneHotEncoder()

# y_train_hot = one_hot.fit_transform(y_train.reshape(-1, 1)).todense()
# y_test_hot = one_hot.transform(y_test.reshape(-1, 1)).todense()


print(y_test)


(1000, 24) (1000,)
[1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0.
 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1.
 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1.
 0. 0. 1. 0. 1. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1.
 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1.
 1. 1. 1. 0. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 0.
 0. 1. 1. 1. 1. 1. 1. 1.]


In [None]:
import mlrose
from datetime import datetime

np.random.seed(3)
max_iters = range(100,2000,100)

def get_scores(max_iters):
    train_scores = []
    test_scores = []
    train_times = []
    for max_iter in max_iters:
        _test_scores = []
        _train_scores = []
        _train_times = []
        for j in range(5):
            clf = mlrose.NeuralNetwork(
                hidden_nodes = [90], 
                activation = 'tanh',
                algorithm = 'gradient_descent',
                max_iters = max_iter,
                bias = True,
                is_classifier = True,
                learning_rate = 0.001,
                early_stopping = True,
                clip_max = 5,
                max_attempts = 10
            )

            start_time = datetime.now()
            clf.fit(X_train_scaled, y_train)
            time_diff = datetime.now() - start_time
            


            y_pred_train = clf.predict(X_train_scaled)
            y_pred_test = clf.predict(X_test_scaled)

            train_score = f1_score(y_train, y_pred_train, average='binary')
            test_score = f1_score(y_test, y_pred_test, average='binary')

            _train_scores.append(train_score)
            _test_scores.append(test_score)
            _train_times.append(time_diff.microseconds/1000)
        train_scores.append(np.mean(_train_scores))
        test_scores.append(np.mean(_test_scores))
        train_times.append(np.mean(_train_times))

gd_train_scores,gd_test_scores,gd_train_times=get_scores(max_iters)
        
plt.plot(max_iters, gd_train_scores, label="Training Score")
plt.plot(max_iters, gd_test_scores, label="Testing Score")
plt.xlabel(f"Max Iterations")
plt.ylabel(f"F1 Score")
plt.legend(loc="best")
plt.show()
    
plt.plot(max_iters, gd_train_times)
plt.xlabel(f"Max Iterations")
plt.ylabel(f"Training Time")
plt.legend(loc="best")
plt.show()

In [23]:
import mlrose

np.random.seed(3)
max_iters = range(100,1000,100)
rhc_train_scores = []
rhc_test_scores = []
rhc_train_times = []

for max_iter in max_iters:
    _rhc_train_scores = []
    _rhc_test_scores = []
    for j in range(5):
        clf = mlrose.NeuralNetwork(
            hidden_nodes = [90], 
            activation = 'tanh',
            algorithm = 'random_hill_climb',
            max_iters = max_iter,
            bias = True,
            is_classifier = True,
            learning_rate = 0.001,
            early_stopping = True,
            clip_max = 5,
            max_attempts = 10
        )

        clf.fit(X_train_scaled, y_train)

        y_pred_train = clf.predict(X_train_scaled)
        y_pred_test = clf.predict(X_test_scaled)

        train_score = f1_score(y_train, y_pred_train, average='binary')
        test_score = f1_score(y_test, y_pred_test, average='binary')

        _rhc_train_scores.append(train_score)
        _rhc_test_scores.append(test_score)
    rhc_train_scores.append(np.mean(_train_scores))
    rhc_test_scores.append(np.mean(_test_scores))
    
plt.plot(max_iters, train_scores, label="Training Score")
plt.plot(max_iters, test_scores, label="Testing Score")
plt.xlabel(f"Max Iterations")
plt.ylabel(f"F1 Score")
plt.legend(loc="best")
plt.show()
    

In [None]:
import mlrose

np.random.seed(3)
max_iters = range(20,300,20)
sa_train_scores = []
sa_test_scores = []

for max_iter in max_iters:
    _test_scores = []
    _train_scores = []
    for j in range(5):
        clf = mlrose.NeuralNetwork(
            hidden_nodes = [90], 
            activation = 'tanh',
            algorithm = 'simulated_annealing',
            max_iters = max_iter,
            bias = True,
            is_classifier = True,
            learning_rate = 0.001,
            early_stopping = True,
            clip_max = 5,
            max_attempts = 10
        )

        clf.fit(X_train_scaled, y_train)

        y_pred_train = clf.predict(X_train_scaled)
        y_pred_test = clf.predict(X_test_scaled)

        train_score = f1_score(y_train, y_pred_train, average='binary')
        test_score = f1_score(y_test, y_pred_test, average='binary')

        _train_scores.append(train_score)
        _test_scores.append(test_score)
    train_scores.append(np.mean(_train_scores))
    test_scores.append(np.mean(_test_scores))
    
plt.plot(max_iters, train_scores, label="Training Score")
plt.plot(max_iters, test_scores, label="Testing Score")
plt.xlabel(f"Max Iterations")
plt.ylabel(f"F1 Score")
plt.legend(loc="best")
plt.show()