In [None]:
import mlrose
import numpy as np
import csv
import pandas as pd
import matplotlib.pyplot as plt
from timeit import default_timer as timer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from textwrap import wrap

In [None]:
df = pd.read_csv('audit_dataset.csv', header = None)

df = pd.get_dummies(df)

df = df.astype(float)

X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=30)

In [None]:
scaler = MinMaxScaler()

X_train_scaled = X_train
X_test_scaled = X_test

In [None]:
one_hot = OneHotEncoder(categories='auto')

y_train_hot = one_hot.fit_transform(y_train.values.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.values.reshape(-1, 1)).todense()

# Testing Number of Restarts

In [None]:
np.random.seed(4)
restartss = list(range(0, 51, 5))
testError = []
trainError = []
for r in restartss :
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='random_hill_climb', 
                                 max_iters = 1000, bias = True, is_classifier = True, restarts = r, 
                                 learning_rate = .0001, early_stopping = True)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    print(r, trainError, test_error)


In [None]:
plt.figure()
title = "Random Hill Climb : Error x Number of Restarts - 1000 Max Iterations"
plt.title('\n'.join(wrap(title,60)))
# plt.subplots_adjust(top=0.85)
plt.plot(restartss, testError, '-', label='Test Error')
plt.plot(restartss, trainError, '-', label='Train Error')
plt.legend()
plt.xlabel('Restarts')
plt.ylabel('Error')
filename = 'RHC_MaxIter.png'
plt.savefig("RHC/" + filename)

# Testing Number of Iterations

In [None]:
np.random.seed(4)
restartss = list(range(100, 1000, 50))
testError = []
trainError = []
for r in restartss :
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='random_hill_climb', 
                                 max_iters = r, bias = True, is_classifier = True, restarts = 40, 
                                 learning_rate = .0001, early_stopping = True)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    print(r, test_error)



In [None]:
print(testError)
plt.figure()
title = "Random Hill Climb : Error x Max Iterations - Number of Restarts 40"
plt.title('\n'.join(wrap(title,60)))
# plt.subplots_adjust(top=0.85)
plt.plot(restartss, testError, '-', label='Test Error')
plt.plot(restartss, trainError, '-', label='Train Error')
plt.legend()
plt.xlabel('Max Iterations')
plt.ylabel('Error')
filename = 'RHC_MaxIter.png'
plt.savefig("RHC/" + filename)

# Runtime 

In [None]:
np.random.seed(4)
time = []
testError = []
trainError = []
print("start")
for i in range(0, 10) :
    start = timer()
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='random_hill_climb', 
                                 max_iters = 1000, bias = True, is_classifier = True, restarts = 40, 
                                 learning_rate = .0001, early_stopping = True)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    end = timer()
    time.append(end - start)
    print(i, time[i])
print(time)



In [None]:
print(sum(time)/len(time))