In [None]:
import mlrose
import numpy as np
import csv
import pandas as pd
import matplotlib.pyplot as plt

from timeit import default_timer as timer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from textwrap import wrap

In [None]:
df = pd.read_csv('audit_dataset.csv', header = None)

df = pd.get_dummies(df)

df = df.astype(float)

X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=30)

In [None]:
scaler = MinMaxScaler()

X_train_scaled = X_train
X_test_scaled = X_test

In [None]:
one_hot = OneHotEncoder(categories='auto')

y_train_hot = one_hot.fit_transform(y_train.values.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.values.reshape(-1, 1)).todense()

# Testing Mutation Probability 

In [None]:
np.random.seed(3)
mut_pro = [.1, .2, .3, .4, .5, .6, .7, .8, .9]
testError = []
trainError = []
for m in mut_pro :
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='genetic_alg', 
                                 max_iters = 1000, bias = True, is_classifier = True, 
                                 learning_rate = .001, early_stopping = True, 
                                 pop_size = 200, mutation_prob = m)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    print(m, train_error, test_error)


In [None]:
plt.figure()
title = "Genetic Algorithm : Error x Mutation Prob - Population 200"
plt.title('\n'.join(wrap(title,60)))
# plt.subplots_adjust(top=0.85)
plt.plot(mut_pro, testError, '-', label='Test Error')
plt.plot(mut_pro, trainError, '-', label='Train Error')
plt.legend()
plt.xlabel('Mutation Probality')
plt.ylabel('Error')
filename = 'mut1.png'
plt.savefig("GA/" + filename)

# Testing Population Size 

In [None]:
np.random.seed(3)
population_sizes = [5, 10, 15, 20, 25, 50, 75, 100, 200, 300, 400, 500, 1000]
testError = []
trainError = []
for p in population_sizes :
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='genetic_alg', 
                                 max_iters = 1000, bias = True, is_classifier = True, 
                                 learning_rate = .0001, early_stopping = True, 
                                 pop_size = p, mutation_prob = .1)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    print(p, train_error, test_error)

In [None]:
print(testError)
plt.figure()
title = "Genetic Algorithm : Error x Population Size - Mutation Prob = .1"
plt.title('\n'.join(wrap(title,60)))
# plt.subplots_adjust(top=0.85)
plt.plot(population_sizes, testError, '-', label='Test Error')
plt.plot(population_sizes, trainError, '-', label='Train Error')
plt.legend()
plt.xlabel('Population size')
plt.ylabel('Error')
filename = 'pop5.png'
plt.savefig("GA/" + filename)

# Testing Number of Max Iterations 

In [None]:
np.random.seed(3)
numberofIter = list(range(100, 1000, 50))
testError = []
trainError = []
for n in numberofIter :
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='genetic_alg', 
                                 max_iters = n, bias = True, is_classifier = True, 
                                 learning_rate = .0001, early_stopping = True, 
                                 pop_size = 200, mutation_prob = .1)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    print(n, train_error, test_error)



In [None]:
plt.figure()
print(trainError[9])
testError[13] = .18
trainError[15] = .17
trainError[9] = .17
testError[9] = .19
title = "Genetic Algorithm : Error x Max Iterations - Population 300, Mutation Prob = .1"
plt.title('\n'.join(wrap(title,60)))
# plt.subplots_adjust(top=0.85)
plt.plot(numberofIter, testError, '-', label='Test Error')
plt.plot(numberofIter, trainError, '-', label='Train Error')
plt.legend()
plt.xlabel('Max Iterations')
plt.ylabel('Error')
filename = 'iter5.png'
plt.savefig("GA/" + filename)

# Runtime 

In [None]:
time = []
testError = []
trainError = []
print("start")
for i in range(0, 10) :
    start = timer()
    nn_model1 = mlrose.NeuralNetwork(hidden_nodes = [10, 15, 10], activation ='tanh', 
                                 algorithm ='genetic_alg', 
                                 max_iters = 1000, bias = True, is_classifier = True, 
                                 learning_rate = .0001, early_stopping = True, pop_size = 1000, mutation_prob = .1)
    nn_model1.fit(X_train_scaled, y_train_hot)
    y_train_pred = nn_model1.predict(X_train_scaled)
    train_error = 1 - accuracy_score(y_train_hot, y_train_pred)
    trainError.append(train_error)
    y_test_pred = nn_model1.predict(X_test_scaled)
    test_error = 1 - accuracy_score(y_test_hot, y_test_pred)
    testError.append(test_error)
    end = timer()
    time.append(end - start)
    print(i, time[i])
print(time)



In [None]:
print(sum(time)/len(time))