In [None]:
import sklearn as sk
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.neural_network import MLPClassifier

Load Data Sets
--

In [None]:
# Load data set #1
f = open(".\Private-Data\Seng 474\cleaned_processed.cleveland.data.txt")
data = np.loadtxt(f, delimiter= ",")
# select columns 1 till class column
X1 = data[:, 0:-1]
Y1 = data[:, -1]

In [None]:
# Load data set #2
f = open(".\Private-Data\Seng 474\Breast_Cancer.csv")
features = f.readline().split(",")
features[-1] = features[-1].strip("\n")
data = np.loadtxt(f, delimiter= ",")
# select columns 1 till end and seperate class column
X2 = data[:, 1:]
Y2 = data[:, 0]

Misc functions
--

In [None]:
def stats(pred, test):
    print("\n-----------------------------------Metrics-----------------------------------")
    print('Mean Absolute Error:', metrics.mean_absolute_error(test, pred))
    print('           Accuracy:',accuracy_score(test, pred))
    print("\n-----------------------------------Report------------------------------------")
    print(classification_report(test,pred))
    return accuracy_score(test, pred)

Build Decision Trees
--
Change the following parameters:<br> 
    * Number of features
    * Tree depth
    * Split criterion
    * Validation size
    * Number of trees
    * Pruning rule

In [None]:
def executeDecisionTree(X, Y, testSize = 0.2, c = "gini", maxDepth = None, minSamplesSplit = 2, maxFeatures = None, minImpurityDecrease = 0.0, rand = 0):
    decisionTree = tree.DecisionTreeClassifier(criterion = c, max_depth = maxDepth, min_samples_split = minSamplesSplit, max_features = maxFeatures, random_state = rand, min_impurity_decrease = minImpurityDecrease)
    # criterion: The function to measure the quality of a split, information gain(Entropy) or impurity(Gini).
    # max_depth: The maximum depth of the tree. 
    # min_samples_split: The minimum number of samples required to split an internal node.
    # max_feature: The number of features to consider when looking for the best split.
    # random_state: Controls the randomness of the estimator.
    # min_impurity_decrease: A node will be split if this split induces a decrease of the impurity greater than or equal to this value.
    result = []
    print("Parameters:")
    print(decisionTree.get_params())
    xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = testSize, random_state = rand)
    clf = decisionTree.fit(xTrain, yTrain)    
    result.append(stats(clf.predict(xTrain), yTrain))
    result.append(stats(clf.predict(xTest), yTest))
    tree.plot_tree(clf)
    print(tree.export_text(clf))
    return result

Build Random Forest
--
Change the following parameters:
    * Number of features
    * Tree depth
    * Split criterion
    * Validation size
    * Number of trees

In [None]:
def executeRandomForest(X, Y, testSize = 0.2, rand = 0, nEst = 100, c = "gini",  maxDepth = None, minSamplesSplit = 2, maxFeatures = "auto", minImpurityDecrease = 0.0):
    classifier = RandomForestClassifier(n_estimators = nEst, criterion = c, max_depth = maxDepth, min_samples_split = minSamplesSplit, max_features = maxFeatures, random_state = rand, min_impurity_decrease = minImpurityDecrease)
    # n_estimators: The number of trees in the forest.
    # criterion: The function to measure the quality of a split, information gain(Entropy) or impurity(Gini).
    # max_depth: The maximum depth of the tree. 
    # min_samples_split: The minimum number of samples required to split an internal node.  
    # max_features: The number of features to consider when looking for the best split. 
    # random_state: Controls the randomness of the estimator.
    # min_impurity_decrease: A node will be split if this split induces a decrease of the impurity greater than or equal to this value.
    result = []
    print("Parameters:")
    print(classifier.get_params())
    xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = testSize, random_state = rand)
    clf = classifier.fit(xTrain, yTrain)    
    result.append(stats(clf.predict(xTrain), yTrain))
    result.append(stats(clf.predict(xTest), yTest))
    return result

Build Artificial Neural Network
--
Change the following parameters:<br>
    * Learning rate
    * Number of hidden layers
    * Number of training iterations

In [None]:
def executeNeuralNetwork(X, Y, testSize = 0.2, rand = 0, layers = (100,), solverType = 'adam', a = 0.0001, learningRate = 'constant', learningRateVal = 0.001, iterations = 200, earlyStopping = False, validationSet = 0.1, act = 'relu'):
    classifier =  MLPClassifier(hidden_layer_sizes = layers , solver = solverType, alpha = a, learning_rate = learningRate, learning_rate_init = learningRateVal , max_iter = iterations, random_state = rand, early_stopping = earlyStopping, validation_fraction = validationSet, activation = act)
    # hidden_layer_sizes: The ith element represents the number of neurons in the ith hidden layer.
    # solver: ‘lbfgs’ is an optimizer in the family of quasi-Newton methods, and ‘sgd’ refers to stochastic gradient descent.
    # alpha: L2 penalty (regularization term) parameter.
    # learning_rate: Learning rate schedule for weight updates, and only used when solver='sgd'.
    # learning_rate_init: The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.
    # max_iter: Maximum number of iterations. For sgd this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.
    # random_state: Determines random number generation for weights and bias initialization.
    # early_stopping: Whether to use early stopping to terminate training when validation score is not improving.
    #                 If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving.
    #                 Only effective when solver=’sgd’ or ‘adam’.
    # validation_fraction: The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.
    result = []
    print("Parameters:")
    print(classifier.get_params())
    xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = testSize, random_state = rand)
    clf = classifier.fit(xTrain, yTrain)    
    result.append(stats(clf.predict(xTrain), yTrain))
    result.append(stats(clf.predict(xTest), yTest))
    return result

Find the most efficent parameters
--
**Find the most efficent paramters.**

In [None]:
def findBestParams(X, Y, pIndex):
    if pIndex == 0:
        parameters = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(start=1, stop=10, step=1), 'max_features': np.arange(start=1, stop= len(X), step=2), 'min_samples_split': np.arange(start=1, stop=100, step=5), 'min_impurity_decrease': np.arange(0.0, 0.51, 0.05)}
        Classifier = tree.DecisionTreeClassifier()
    elif pIndex == 1:
        parameters = {'n_estimators': np.arange(start=1, stop=500, step=100),'criterion': ['gini', 'entropy'], 'max_depth': np.arange(start=1, stop=10, step=1), 'max_features': ['auto', 'sqrt', 'log2', None], 'min_samples_split': np.arange(start=1, stop=10, step=1), 'min_impurity_decrease': np.arange(start=0.0, stop=1.0, step=0.1)}
        Classifier = RandomForestClassifier()
    elif pIndex == 2:
        parameters = {'solver': ['lbfgs','sgd'], 'max_iter': np.arange(start=1, stop=800, step=5),'hidden_layer_sizes': np.arange(10, 200, 10), 'learning_rate': ['constant', 'invscaling', 'adaptive']} 
        Classifier = MLPClassifier()
    else:
        print("Incorrect index")
        return
    clf = GridSearchCV(Classifier, parameters, n_jobs=-1)
    clf.fit(X,Y)
    print(clf.best_params_)
    print(clf.best_score_)

Plot Graph
--

In [None]:
def plotGraph(a, fa, b, fb, name, xvalrange = 100, xAxisTitle = 'Test Size (%)', title = "Test Size", digits = 5):
    plt.clf()
    plt.plot(a, fa, linewidth=3.0, label='Training Data')
    plt.plot(b, fb, linewidth=3.0, label='Test Data')
    ymax = max(fb)
    xpos = fb.index(ymax)
    xmax = x[xpos]
    plt.annotate('Maximum \n(' + str(xmax) + ', ' + str(ymax)[0:digits] + ')', xy=(xmax, ymax), xytext=(xmax, ymax -30), arrowprops=dict(facecolor='black', shrink=0.05))
    plt.xlabel(xAxisTitle)
    plt.ylabel('Accuracy (%)')
    plt.xlim(0, xvalrange)
    plt.ylim(0, 100)
    plt.title(title + ' vs. Accuracy')
    plt.legend(loc = 'lower right')
    plt.savefig(name  + '.png')

Main program -- Decision Trees
--
**Contains expirements and parameter changes.**

In [None]:
# Decision tree no specified parameters using default parameters
# Change test and training distributed size to view change in accurracy
# Expirement 1
x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeDecisionTree(X1, Y1, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1")

x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeDecisionTree(X2, Y2, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2")

In [None]:
# Decision tree using entropy instead of gini for the split crierion
# Change test and training distributed size to view change in accurracy
# Expirement 2
x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeDecisionTree(X1, Y1, testSize = i/100, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1")

x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeDecisionTree(X2, Y2, testSize = i/100, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2")

In [None]:
# Decision tree with varying depth
# Expirement 3
x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

In [None]:
# Decision tree with varying depth and using entropy instead of gini for the split crierion
# Expirement 4
x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, c = 'entropy', maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, c = 'entropy', maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2",  xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

In [None]:
# Decision tree with varying minimum split samples
# Expirement 5
x = []
fa = []
fb = []
for i in range(1, 8, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, minSamplesSplit = 2 ** i )
    x.append(2 ** i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 2 ** 7, xAxisTitle = "Minimum Split Samples", title = "Minimum Split Samples", digits = 2)

x = []
fa = []
fb = []
for i in range(1,10, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, minSamplesSplit = 2 ** i )
    x.append(2 ** i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 2 ** 9, xAxisTitle = "Minimum Split Samples", title = "Minimum Split Samples", digits = 2)

In [None]:
# Decision tree with varying minimum split samples and using entropy instead of gini for the split crierion
# Expirement 6
x = []
fa = []
fb = []
for i in range(1, 8, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2,  c = 'entropy', minSamplesSplit = 2 ** i )
    x.append(2 ** i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 2 ** 7, xAxisTitle = "Minimum Split Samples", title = "Minimum Split Samples", digits = 2)

x = []
fa = []
fb = []
for i in range(1,10, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, c = 'entropy', minSamplesSplit = 2 ** i )
    x.append(2 ** i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 2 ** 9, xAxisTitle = "Minimum Split Samples", title = "Minimum Split Samples", digits = 2)

In [None]:
# Decision tree with varying randomization
# Expirement 7
x = []
fa = []
fb = []
for i in range(0, 31, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, c = 'entropy', rand = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 30, xAxisTitle = "Randomization", title = "Randomization", digits = 2)

x = []
fa = []
fb = []
for i in range(0, 31, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, c = 'entropy', rand = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 30, xAxisTitle = "Randomization", title = "Randomization", digits = 2)

In [None]:
# Decision tree with varying randomization and using entropy instead of gini for the split crierion
# Expirement 8
x = []
fa = []
fb = []
for i in range(0, 31, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, rand = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 30, xAxisTitle = "Randomization", title = "Randomization", digits = 2)

x = []
fa = []
fb = []
for i in range(0, 31, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, rand = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 30, xAxisTitle = "Randomization", title = "Randomization", digits = 2)

In [None]:
# Decision tree with varying max features
# Expirement 9
x = []
fa = []
fb = []
for i in range(2, 14, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, maxFeatures = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 13, xAxisTitle = "Max Features", title = "Max Feautures", digits = 2)

x = []
fa = []
fb = []
for i in range(2, 31, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, maxFeatures = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 30, xAxisTitle = "Max Features", title = "Max Feautures", digits = 2)

In [None]:
# Decision tree with varying max features and using entropy instead of gini for the split crierion
# Expirement 10
x = []
fa = []
fb = []
for i in range(2, 14, 1):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, c = 'entropy',  maxFeatures = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 13, xAxisTitle = "Max Features", title = "Max Feautures", digits = 2)

x = []
fa = []
fb = []
for i in range(2, 31, 1):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, c = 'entropy', maxFeatures = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 30, xAxisTitle = "Max Features", title = "Max Feautures", digits = 2)

In [None]:
# Decision tree with varying impurity decrease
# Expirement 11
x = []
fa = []
fb = []
for i in np.arange(0.0, 0.51, 0.05):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, minImpurityDecrease = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 0.5, xAxisTitle = "Impurity Decrease", title = "Impurity Decrease", digits = 2)


x = []
fa = []
fb = []
for i in np.arange(0, 0.51, 0.05):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, minImpurityDecrease = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 0.5, xAxisTitle = "Impurity Decrease", title = "Impurity Decrease", digits = 2)

In [None]:
# Decision tree with varying impurity decrease and using entropy instead of gini for the split crierion
# Expirement 12
x = []
fa = []
fb = []
for i in np.arange(0.0, 0.51, 0.05):
    accurracy = executeDecisionTree(X1, Y1, testSize = 0.2, c = 'entropy', minImpurityDecrease = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 0.5, xAxisTitle = "Impurity Decrease", title = "Impurity Decrease", digits = 2)

x = []
fa = []
fb = []
for i in np.arange(0, 0.51, 0.05):
    accurracy = executeDecisionTree(X2, Y2, testSize = 0.2, c = 'entropy', minImpurityDecrease = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 0.5, xAxisTitle = "Impurity Decrease", title = "Impurity Decrease", digits = 2)

Main program -- Random Forests
--
**Contains expirements and parameter changes.**

In [None]:
# Random forest no specified parameters using default parameters
# Change test and training distributed size to view change in accurracy
# Expirement 13
x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeRandomForest(X1, Y1, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1")

x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeRandomForest(X2, Y2, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2")

In [None]:
# Random forest using entropy instead of gini for the split crierion
# Change test and training distributed size to view change in accurracy
# Expirement 14
x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeRandomForest(X1, Y1, testSize = i/100, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1")

x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeRandomForest(X2, Y2, testSize = i/100, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2")

In [None]:
# Random forest with varying number of estimators
# Expirement 15
x = []
fa = []
fb = []
for i in range(1, 1002, 50):
    accurracy = executeRandomForest(X1, Y1, nEst = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 1000, xAxisTitle = "Number of Estimators", title = "Number of Estimators", digits = 2)

x = []
fa = []
fb = []
for i in range(1, 1002, 50):
    accurracy = executeRandomForest(X2, Y2, nEst = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 1000, xAxisTitle = "Number of Estimators", title = "Number of Estimators", digits = 2)

In [None]:
# Random forest with varying number of estimators and entropy instead of gini for the split crierion
# Expirement 16
x = []
fa = []
fb = []
for i in range(1, 1002, 50):
    accurracy = executeRandomForest(X1, Y1, nEst = i, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 1000, xAxisTitle = "Number of Estimators", title = "Number of Estimators", digits = 2)

x = []
fa = []
fb = []
for i in range(1, 1002, 50):
    accurracy = executeRandomForest(X2, Y2, nEst = i, c = 'entropy')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 1000, xAxisTitle = "Number of Estimators", title = "Number of Estimators", digits = 2)

In [None]:
# Random forest with varying depth
# Expirement 17
x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeRandomForest(X1, Y1, testSize = 0.2, maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeRandomForest(X2, Y2, testSize = 0.2, maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

In [None]:
# Random forest with varying depth and using entropy instead of gini for the split crierion
# Expirement 18
x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeRandomForest(X1, Y1, testSize = 0.2, c = 'entropy', maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

x = []
fa = []
fb = []
for i in range(1, 9, 1):
    accurracy = executeRandomForest(X2, Y2, testSize = 0.2, c = 'entropy', maxDepth = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2",  xvalrange = 9, xAxisTitle = "Tree Depth", title = "Tree Depth")

Main program -- Neural Networks
--
**Contains expirements and parameter changes.**

In [None]:
# Neural network with varying test and training distributed size to view change in accurracy
# Expirement 19
x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeNeuralNetwork(X1, Y1, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1")


x = []
fa = []
fb = []
for i in range(10, 91, 5):
    accurracy = executeNeuralNetwork(X2, Y2, testSize = i/100)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2")

In [None]:
# Neural network with varying number of layers
# Expirement 20
x = []
fa = []
fb = []
for i in range(1, 30, 1):
    accurracy = executeNeuralNetwork(X1, Y1, layers = tuple([100 for item in range(1,i)]))
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 30, xAxisTitle = "Number of Layers", title = "Number of Layers")

x = []
fa = []
fb = []
for i in range(1, 30, 1):
    accurracy = executeNeuralNetwork(X2, Y2, layers = tuple([100 for item in range(1,i)]))
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 30, xAxisTitle = "Number of Layers", title = "Number of Layers")

In [None]:
# Neural network with varying number of interations using sgd as the solver
# Expirement 21
x = []
fa = []
fb = []
for i in range(100, 1501, 50):
    accurracy = executeNeuralNetwork(X1, Y1, solverType = 'sgd', iterations = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 1500, xAxisTitle = "Number of Iterations", title = "Number of Iterations")


x = []
fa = []
fb = []
for i in range(100, 1501, 50):
    accurracy = executeNeuralNetwork(X2, Y2, solverType = 'sgd', iterations = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 1500, xAxisTitle = "Number of Iterations", title = "Number of Iterations")

In [None]:
# Neural network with varying number of iterations using Adam as the solver
# Expirement 22
x = []
fa = []
fb = []
for i in range(100, 1501, 50):
    accurracy = executeNeuralNetwork(X1, Y1, iterations = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 1500, xAxisTitle = "Number of Iterations", title = "Number of Iterations")


x = []
fa = []
fb = []
for i in range(100, 1501, 50):
    accurracy = executeNeuralNetwork(X2, Y2, iterations = i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 1500, xAxisTitle = "Number of Iterations", title = "Number of Iterations")

In [None]:
# Neural network with varying Alpha value
# Expirement 23
x = []
fa = []
fb = []
for i in range(0, 5, 1):
    accurracy = executeNeuralNetwork(X1, Y1, a = 1000.0 ** -i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 4, xAxisTitle = "Alpha (1000.0^(-x))", title = "Alpha")


x = []
fa = []
fb = []
for i in range(0, 5, 1):
    accurracy = executeNeuralNetwork(X2, Y2, a = 1000.0 ** -i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 4, xAxisTitle = "Alpha (1000.0^(-x))", title = "Alpha")

In [None]:
# Neural network with varying number layer size and ‘logistic’ activation function
# Expirement 24
x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X1, Y1, layers = (i,i,), act = 'logistic')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")


x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X2, Y2, layers = (i,i,), act = 'logistic')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")

In [None]:
# Neural network with varying learning rate
# Expirement 25
x = []
fa = []
fb = []
for i in range(0, 5, 1):
    accurracy = executeNeuralNetwork(X1, Y1, learningRateVal= 1000.0 ** -i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange =  4, xAxisTitle = "Learning Rate (1000.0^(-x))", title = "Learning Rate")


x = []
fa = []
fb = []
for i in range(0, 5, 1):
    accurracy = executeNeuralNetwork(X2, Y2, learningRateVal= 1000.0 ** -i)
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 4, xAxisTitle = "Learning Rate (1000.0^(-x))", title = "Learning Rate")

In [None]:
# Neural network using adaptive learning rate
# Expirement 26
x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X1, Y1, learningRate = 'adaptive', layers = (i,i,), solverType = 'sgd')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")


x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X2, Y2, learningRate = 'adaptive', layers = (i,i,), solverType = 'sgd')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")

In [None]:
# Neural network using invscaling learning rate
# Expirement 27
x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X1, Y1, learningRate = 'invscaling', layers = (i,i,), solverType = 'sgd')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet1", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")


x = []
fa = []
fb = []
for i in range(10, 301, 10):
    accurracy = executeNeuralNetwork(X2, Y2, learningRate = 'invscaling', layers = (i,i,), solverType = 'sgd')
    x.append(i)
    fa.append(accurracy[0] * 100)
    fb.append(accurracy[1] * 100)
plotGraph(x, fa, x, fb, "DataSet2", xvalrange = 300, xAxisTitle = "Number of Hidden Neurons per Layer", title = "Number of Hidden Neurons")

Optimization Portion
--
**Optimize all the model parameters to achieve the best accuracy**

In [None]:
print("Best decision tree parameters and score, Dataset 1")
findBestParams(X1, Y1, 0)
print("Best decision tree parameters and score, Dataset 2")
findBestParams(X2, Y2, 0)
print("Best random forest parameters and score, Dataset 1")
findBestParams(X1, Y1, 1)
print("Best random forest parameters and score, Dataset 2")
findBestParams(X2, Y2, 1)
print("Best neural network parameters and score, Dataset 1")
findBestParams(X1, Y1, 2)
print("Best neural network parameters and score, Dataset 2")
findBestParams(X2, Y2, 2)