In [None]:
import sklearn as sk
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neural_network import MLPClassifier

In [None]:
import numpy as np
import pandas as pd
import random
def apply(dataRow):
    ret = list(dataRow[0:11])
    if random.choice([True, False]):
        ret = list(dataRow[0:2])
        ret.extend(list(dataRow[6:10]))
        ret.extend(list(dataRow[2:6]))
        ret.append('0')
    return ret

def addStats(dataRow, playerData):
    ret = list(dataRow[0:2])
    ret.extend(list(playerData[float(dataRow[2])]))
    ret.extend(list(dataRow[3:6]))
    ret.extend(list(playerData[float(dataRow[6])]))
    ret.extend(list(dataRow[7:]))
    return ret

In [None]:
# Load data
fGameData = pd.read_csv("ATP_data_cleaned.csv", delimiter=',')
gameData = np.array([list(row) for row in fGameData.values])
# Seperate features and target columns
gameData =  np.array([apply(item) for item in gameData])
X = gameData[:, 0:10]
Y = gameData[:, 10]
# Add in player data
fGameData = pd.read_csv("ATP_player_data.csv", delimiter=',')
playerData = np.array([list(row) for row in fGameData.values])
playerData = dict((float(row[1]),list(row[2:])) for row in playerData)
X = np.array([addStats(item, playerData) for item in X], dtype='float64')
X = X.astype(np.float)

Misc functions
--

In [None]:
def stats(pred, test):
    print("\n-----------------------------------Report------------------------------------")
    print(classification_report(test,pred))
    print('Accuracy: ',accuracy_score(test, pred))
    print("\n")
    return accuracy_score(test, pred)

In [None]:
def findBestParams(X, Y, pIndex):
    parameters = {'solver': ['lbfgs','sgd'], 'max_iter': np.arange(start=1, stop=800, step=5),'hidden_layer_sizes': np.arange(10, 200, 10), 'learning_rate': ['constant', 'invscaling', 'adaptive']} 
    clf = GridSearchCV(MLPClassifier(), parameters, n_jobs=-1)
    clf.fit(X,Y)
    print(clf.best_params_)
    print(clf.best_score_)

Plot Graph
--

In [None]:
def plotGraph(a, fa, b, fb, name, xvalrange = 100, xAxisTitle = 'Test Size (%)', title = "Test Size", digits = 5):
    plt.clf()
    plt.plot(a, fa, linewidth=3.0, label='Training Data')
    plt.plot(b, fb, linewidth=3.0, label='Test Data')
    ymax = max(fb)
    plt.xlabel(xAxisTitle)
    plt.ylabel('Error (%)')
    plt.xlim(0, xvalrange)
    plt.ylim(0, ymax + 10)
    plt.title(title + ' vs. Error')
    plt.legend(loc = 'lower right')
    plt.savefig(name  + '.png')

Build Artificial Neural Network
--

In [None]:
def executeNeuralNetwork(X, Y, testSize = 0.2, rand = 0, layers = (100,), solverType = 'adam', a = 0.0001, learningRate = 'constant', learningRateVal = 0.001, iterations = 200, earlyStopping = False, validationSet = 0.1, act = 'relu'):
    classifier =  MLPClassifier(hidden_layer_sizes = layers , solver = solverType, alpha = a, learning_rate = learningRate, learning_rate_init = learningRateVal , max_iter = iterations, random_state = rand, early_stopping = earlyStopping, validation_fraction = validationSet, activation = act)
    # hidden_layer_sizes: The ith element represents the number of neurons in the ith hidden layer.
    # solver: ‘lbfgs’ is an optimizer in the family of quasi-Newton methods, and ‘sgd’ refers to stochastic gradient descent.
    # alpha: L2 penalty (regularization term) parameter.
    # learning_rate: Learning rate schedule for weight updates, and only used when solver='sgd'.
    # learning_rate_init: The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.
    # max_iter: Maximum number of iterations. For sgd this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.
    # random_state: Determines random number generation for weights and bias initialization.
    # early_stopping: Whether to use early stopping to terminate training when validation score is not improving.
    #                 If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving.
    #                 Only effective when solver=’sgd’ or ‘adam’.
    # validation_fraction: The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.
    result = []
    print("Parameters:")
    print(classifier.get_params())
    xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size = testSize, random_state = rand)
    clf = classifier.fit(xTrain, yTrain)    
    result.append(1 - stats(clf.predict(xTrain), yTrain))
    result.append(1 - stats(clf.predict(xTest), yTest))
    return [result, clf]

Main program -- Neural Networks
--

In [None]:
 executeNeuralNetwork(X, Y, solverType = 'sgd')

In [None]:
 executeNeuralNetwork(X, Y)

In [None]:
 executeNeuralNetwork(X, Y, solverType = 'sgd', act = 'tanh')

In [None]:
 executeNeuralNetwork(X, Y, act = 'tanh')

In [None]:
 executeNeuralNetwork(X, Y, act = 'logistic', solverType = 'sgd')

In [None]:
 executeNeuralNetwork(X, Y, act = 'logistic')

In [None]:
# Using
# https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
x = []
fa = []
fb = []
for i in range(200,4000,200):
    Error = executeNeuralNetwork(X, Y, iterations = i)
    x.append(i)
    fa.append(Error[0][0] * 100)
    fb.append(Error[0][1] * 100)
    
plotGraph(x, fa, x, fb, "Group Project", xvalrange = 4000, xAxisTitle = "Number of Iterations", title = "Number of Iterations")

In [None]:
# Using
# https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
x = []
fa = []
fb = []
for i in range(1,5):
    Error = executeNeuralNetwork(X, Y, layers = tuple([100 for item in range(1,i)]), act = 'logistic')
    x.append(i)
    fa.append(Error[0][0] * 100)
    fb.append(Error[0][1] * 100)
    
plotGraph(x, fa, x, fb, "Group Project", xvalrange = 4, xAxisTitle = "Number of Layers", title = "Number of Layers")

In [None]:
# Using
# https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
x = []
fa = []
fb = []
for i in range(1,202,10):
    Error = executeNeuralNetwork(X, Y, layers = tuple([i]*2), act = 'logistic')
    x.append(i)
    fa.append(Error[0][0] * 100)
    fb.append(Error[0][1] * 100)
    
plotGraph(x, fa, x, fb, "Group Project", xvalrange = 200, xAxisTitle = "Size of Hidden Layer", title = "Size of Hidden Layer")

In [None]:
executeNeuralNetwork(X, Y, layers = tuple([10]*2), solverType= 'sgd', act = 'logistic', earlyStopping = True)

In [None]:
executeNeuralNetwork(X, Y, layers = tuple([10]*2), learningRate = 'invscaling', act = 'logistic', earlyStopping = True)