In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from itertools import permutations

data = pd.read_csv('C:\\Users\\Owner\\Napa\\results_model_data.csv')

In [2]:
def result_assign(win_margin):
    # This function converts the win_margin column into a binary win/loss result
    if win_margin>0:
        return 1
    else:
        return 0

In [3]:
def sigmoid(z):
    # Computes the sigmoid function for logistic regression
    return 1 / (1 + np.exp(-z))

In [4]:
def sigmoid_gradient(z):
    # Computes the gradient of the sigmoid function, to be used in backpropagation
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

In [5]:
def forward_propagate(X, theta1, theta2):
    # Calculate the hypothesis using input values of theta for each stage of the network
    m = X.shape[0]
    # Insert bias unit for input layer
    a1 = np.insert(X, 0, values=np.ones(m), axis=1)   
    z2 = a1 * theta1.T
    # Insert bias unit for hidden layer
    a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)
    z3 = a2 * theta2.T
    h = sigmoid(z3)
    
    return a1, z2, a2, z3, h

In [6]:
def backward_prop(params, input_layer_size, hidden_layer_size, num_labels, X, y):

    # Reshape the parameter array back into the respective matrices
    theta1 = np.matrix(np.reshape(params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1))))
    
    # Forward propagate through the network
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # Initialize
    J = 0
    delta1 = np.zeros(theta1.shape)
    delta2 = np.zeros(theta2.shape)
    
    # Compute cost
    first = np.multiply(-y, np.log(h))
    second = np.multiply((1 - y), np.log(1 - h))
    J = np.sum(first - second) / m
    
    # Backpropagate to get gradients   
    d3 = h - y
    d2 = np.multiply((d3*theta2[:,1:hidden_layer_size+1]), sigmoid_gradient(z2))  
    delta1 = (np.matmul(a1.T, d2)).T / m
    delta2 = (np.matmul(d3.T, a2)) / m
    
    # Reshape gradient matrices into a single array
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))
    
    return J, grad

In [9]:
def get_race(p1, p2):
    # This function calculates the respective number of  games that player 1 and player 2 are required to win, 
    # in order to win a particular match according to NAPA rules, given their respective skill levels, p1 and p2.
    # Full explanation at www.napaleagues.com/naparaces
    
    rc1 = [2,3,3,3,4,4,4,5,4,5,6,5,5,6,5,6,7,6,7,8,6,6,7,6,7,8,7,8,9,8,9,10]
    rc2 = [2,2,3,2,2,4,3,3,2,2,2,5,4,4,3,3,3,2,2,2,6,5,5,4,4,4,3,3,3,2,2,2]

    if (p1 < 40) & ((p1 - p2) < 20):
        r1 = rc1[0]
        r2 = rc2[0]
    elif (p1 < 40) & ((p1 - p2) > 19):
        r1 = rc1[1]
        r2 = rc2[1]
    elif (p1 > 39) & (p1 < 50) & ((p1 - p2) < 11):
        r1 = rc1[2]
        r2 = rc2[2]
    elif (p1 > 39) & (p1 < 50) & ((p1 - p2) > 10) & ((p1 - p2) < 27):
        r1 = rc1[3]
        r2 = rc2[3]
    elif (p1 > 39) & (p1 < 50) & ((p1 - p2) > 26):
        r1 = rc1[4]
        r2 = rc2[4]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) < 7):
        r1 = rc1[5]
        r2 = rc2[5]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) > 6) & ((p1 - p2) < 19):
        r1 = rc1[6]
        r2 = rc2[6]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) > 18) & ((p1 - p2) < 30):
        r1 = rc1[7]
        r2 = rc2[7]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) > 29) & ((p1 - p2) < 40):
        r1 = rc1[8]
        r2 = rc2[8]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) > 39) & ((p1 - p2) < 49):
        r1 = rc1[9]
        r2 = rc2[9]
    elif (p1 > 49) & (p1 < 70) & ((p1 - p2) > 48):
        r1 = rc1[10]
        r2 = rc2[10]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) < 6):
        r1 = rc1[11]
        r2 = rc2[11]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 5) & ((p1 - p2) < 15):
        r1 = rc1[12]
        r2 = rc2[12]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 14) & ((p1 - p2) < 22):
        r1 = rc1[13]
        r2 = rc2[13]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 21) & ((p1 - p2) < 29):
        r1 = rc1[14]
        r2 = rc2[14]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 28) & ((p1 - p2) < 37):
        r1 = rc1[15]
        r2 = rc2[15]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 36) & ((p1 - p2) < 47):
        r1 = rc1[16]
        r2 = rc2[16]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 46) & ((p1 - p2) < 57):
        r1 = rc1[17]
        r2 = rc2[17]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 56) & ((p1 - p2) < 63):
        r1 = rc1[18]
        r2 = rc2[18]
    elif (p1 > 69) & (p1 < 90) & ((p1 - p2) > 62):
        r1 = rc1[19]
        r2 = rc2[19]
    elif (p1 > 89) & ((p1 - p2) < 5):
        r1 = rc1[20]
        r2 = rc2[20]
    elif (p1 > 89) & ((p1 - p2) > 4) & ((p1 - p2) < 12):
        r1 = rc1[21]
        r2 = rc2[21]
    elif (p1 > 89) & ((p1 - p2) > 11) & ((p1 - p2) < 18):
        r1 = rc1[22]
        r2 = rc2[22]
    elif (p1 > 89) & ((p1 - p2) > 17) & ((p1 - p2) < 23):
        r1 = rc1[23]
        r2 = rc2[23]
    elif (p1 > 89) & ((p1 - p2) > 22) & ((p1 - p2) < 29):
        r1 = rc1[24]
        r2 = rc2[24]
    elif (p1 > 89) & ((p1 - p2) > 28) & ((p1 - p2) < 36):
        r1 = rc1[25]
        r2 = rc2[25]
    elif (p1 > 89) & ((p1 - p2) > 35) & ((p1 - p2) < 43):
        r1 = rc1[26]
        r2 = rc2[26]
    elif (p1 > 89) & ((p1 - p2) > 42) & ((p1 - p2) < 49):
        r1 = rc1[27]
        r2 = rc2[27]
    elif (p1 > 89) & ((p1 - p2) > 48) & ((p1 - p2) < 59):
        r1 = rc1[28]
        r2 = rc2[28]
    elif (p1 > 89) & ((p1 - p2) > 58) & ((p1 - p2) < 69):
        r1 = rc1[29]
        r2 = rc2[29]
    elif (p1 > 89) & ((p1 - p2) > 68) & ((p1 - p2) < 75):
        r1 = rc1[30]
        r2 = rc2[30]
    elif (p1 > 89) & ((p1 - p2) > 74):
        r1 = rc1[31]
        r2 = rc2[31]
    else:
        r1 = 0
        r2 = 0
    return r1, r2

In [7]:
# Add a new binary column to the data, which has value 1 where the result is positive, and 0 if negative
data['Result'] = data.apply(lambda x: result_assign(x['Win Margin']),axis=1)
# Select only quantitive paramaters to be used in the model
model_data = data[['Race Margin', 'Win % Margin', 'Skill Margin', 'Game Margin', 'AvgPPM Margin', 'Result']]

# Set X (training data) and y (target variable)
cols = model_data.shape[1]
X = model_data.iloc[:,0:cols-1]
y = model_data.iloc[:,cols-1:cols]
y0 = y
# Split the data into training and validation sets with 80/20 ratio
train_X, val_X, train_y, val_y = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state = 0)

# Convert to numpy matrices
m = X.shape[0]
X_train = np.matrix(train_X)
y_train = np.matrix(train_y)
X_val = np.matrix(val_X)
y_val = np.matrix(val_y)

# Define architecture of neural network
input_layer_size  = cols-1;  # Each match has 5 features
hidden_layer_size = 50;      # 50 hidden units
num_labels = 1;              # Win/Loss parameter

# Randomly initialize the input parameter array, with values normalized by length
epsilon_1 = np.sqrt(6./(hidden_layer_size + input_layer_size))
epsilon_2 = np.sqrt(6./(hidden_layer_size + num_labels))
param1 = np.random.random(size=hidden_layer_size * (input_layer_size + 1))*2*epsilon_1 - epsilon_1
param2 = np.random.random(size=num_labels * (hidden_layer_size + 1))*2*epsilon_2 - epsilon_2
params = np.concatenate((param1,param2))

In [8]:
# Minimize the backpropagation cost function
fmin = minimize(fun=backward_prop, x0=params, args=(input_layer_size, hidden_layer_size, num_labels, X_train, y_train), 
                method='TNC', jac=True, options={'maxiter': 250})

# Retrieve the corresponding theta parameters and reshape to matrices
theta1 = np.matrix(np.reshape(fmin.x[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1))))

# Calculate predictions based on the model
a1_t, z2_t, a2_t, z3_t, h_t = forward_propagate(X_train, theta1, theta2)
a1_v, z2_v, a2_v, z3_v, h_v = forward_propagate(X_val, theta1, theta2)
y_pred_train = [1 if i>=0.5 else 0 for i in h_t]
y_pred_val = [1 if i>=0.5 else 0 for i in h_v]

# Compare predictions to actual data
correct_train = [1 if a == b else 0 for (a, b) in zip(y_pred_train, y_train)]
correct_val = [1 if a == b else 0 for (a, b) in zip(y_pred_val, y_val)]
accuracy_train = (sum(map(int, correct_train)) / float(len(correct_train)))
accuracy_val = (sum(map(int, correct_val)) / float(len(correct_val)))
print 'Train accuracy = {0}%'.format(accuracy_train * 100)
print 'Validation accuracy = {0}%'.format(accuracy_val * 100)

  This is separate from the ipykernel package so we can avoid doing imports until


Train accuracy = 63.1289308176%
Validation accuracy = 63.5369871219%


In [10]:
# Enter Player details in order: Name, Win %, Skill, Num Games, AvgPPM
# e.g. playerA1 = [Name, 57.2, 88, 23, 8.76]

A1 = ['Brugamyer',63,109,63,10.86]
A2 = ['Jones',45,53,51,8.56]
A3 = ['Andres',41,21,44,8.73]
A4 = ['Czysz',47,36,79,9.25]
A5 = ['Myers',68,108,62,11.6]

B1 = ['Erdmann',75,110,28,12.89]
B2 = ['Carillo',63,61,16,11.44]
B3 = ['Bilberry',11,0,9,2.78]
B4 = ['Warrington',25,41,12,6.58]
B5 = ['Miller',55,90,60,9.35]

team_A = [A1,A2,A3,A4,A5]
team_B = [B1,B2,B3,B4,B5]
all_players = team_A + team_B

# Generate a list of all possible permutations for matchups between players on each team
pm = list(permutations(range(0,len(team_A))))

# Initialize an array to hold the overall probability of team A winning the match for each permutation
tot_probs = np.zeros(len(pm))

# Initialize list to hold the probabilities of each player from team A winning their matchup for a given permutation
probs = []

for i in range(0,len(pm)):
    
    # For each permutation, remove player names and create a match matrix in the same format as the 
    # training/validation data
    
    A=[x[1:] for x in team_A]
    B = [team_B[x][1:] for x in pm[i]]
    
    for j,dm in enumerate(A):
        
        # Calculate the required races for each matchup based on the skill levels of the competing players
        
        A[j].insert(0,get_race(A[j][1],B[j][1])[0])
        B[j].insert(0,get_race(A[j][1],B[j][1])[1])
        
    matchup = np.matrix(A) - np.matrix(B)
    
    # Fit the match matrix with the neural network model and record winning probabilities for team A
    a1_m, z2_m, a2_m, z3_m, h_m = forward_propagate(matchup, theta1, theta2)
    probs.append(h_m)
    tot_probs[i] = h_m.sum()
    
# Find the permutation which yields the highest total winning probability 
best_idx = np.argmax(np.array(tot_probs))
best_B = [team_B[x][0] for x in pm[best_idx]]
best_probs = probs[best_idx]

a_names = [team_A[i][0] for i in range(0,len(team_A))]
best_matchup = pd.DataFrame({'Player A':a_names,'Player B': best_B})
best_matchup['% Win Prob'] = np.array(best_probs*100).round(1)

print('BEST PLAYER MATCHUP FOR TEAM A')
print('------------------------------------')
print(best_matchup)
print('------------------------------------')
print('Match win probability for team A: {0}%'.format(round((tot_probs[best_idx]*100)/len(team_A) , 1)))
print('Average match win probability for team A: {0}%'.format(round((sum(tot_probs)*100)/(len(tot_probs)*len(team_A)),1)))

BEST PLAYER MATCHUP FOR TEAM A
------------------------------------
    Player A    Player B  % Win Prob
0  Brugamyer     Carillo        56.2
1      Jones     Erdmann        19.4
2     Andres    Bilberry        79.6
3      Czysz  Warrington        80.0
4      Myers      Miller        55.2
------------------------------------
Match win probability for team A: 58.1%
Average match win probability for team A: 54.1%
