In [None]:
# import torch 
from torch import nn
import torch
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.stats import skew
from scipy.stats import kurtosis
from prettytable import PrettyTable
import os
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

else:
    device = torch.device("cpu")
    
def changeCourtToValue(court):
    if court == 'Hard':
        return 1
    elif court == 'Clay':
        return 2
    elif court == 'Grass':
        return 3
    elif court == 'Carpet':
        return 4
    
def changeHandToValue(hand):
    if hand == 'R':
        return 1
    elif hand == 'U':
        return 0
    elif hand == 'L':
        return -1

    
    
def BuildDataset():
    df = pd.read_csv("data/atp_matches_2015.csv")
    for i in range(2016, 2023):
        newDf = pd.read_csv("data/atp_matches_" + str(i) + ".csv")
        df = pd.concat([df, newDf], join="inner", ignore_index = True)
    
    df = df[['surface', 'winner_hand', 'winner_ht', 'winner_age', 'winner_rank', 'winner_rank_points', 'loser_hand', 'loser_ht', 'loser_age', 'loser_rank', 'loser_rank_points']]
    
    
    avgSwitches = ['winner_ht', 'winner_age', 'loser_ht', 'loser_age']
    for i in avgSwitches:
        mean = df[i].mean()
        df[i] = df[i].fillna(mean)
        
    maxSwitches = ['winner_rank', 'loser_rank']
    for i in maxSwitches:
        Max = df[i].max()
        df[i] = df[i].fillna(Max)
    
    minSwitches = ['winner_rank_points', 'loser_rank_points']
    for i in minSwitches:
        df[i] = df[i].fillna(1)
        
    hands = ['winner_hand', 'loser_hand']
    for i in hands:
        df[i] = df[i].fillna('R')
    
    df['surface'] = df['surface'].apply(changeCourtToValue)
    df['winner_hand'] = df['winner_hand'].apply(changeHandToValue)
    df['loser_hand'] = df['loser_hand'].apply(changeHandToValue)
    df['Player1Wins'] = 1
    
    df.rename(columns = {'winner_hand': 'P1Hand', 'winner_ht': 'P1Height', 
                        'winner_age':'P1Age', 'winner_rank':'P1Rank', 'winner_rank_points': 'P1RankPoint',
                         'loser_hand': 'P2Hand', 'loser_ht': 'P2Height', 
                        'loser_age':'P2Age', 'loser_rank':'P2Rank', 'loser_rank_points': 'P2RankPoint'
                        }, inplace = True)
    
    for i in range(19622):
        if (i%2 == 0):
            df = swapPlayers(df, i)
    return df

def swapPlayers(df, index):
    df.loc[index, 'P1Hand'], df.loc[index, 'P2Hand'] = df.loc[index, 'P2Hand'], df.loc[index, 'P1Hand']
    df.loc[index, 'P1Height'], df.loc[index, 'P2Height'] = df.loc[index, 'P2Height'], df.loc[index, 'P1Height']
    df.loc[index, 'P1Age'], df.loc[index, 'P2Age'] = df.loc[index, 'P2Age'], df.loc[index, 'P1Age']
    df.loc[index, 'P1Rank'], df.loc[index, 'P2Rank'] = df.loc[index, 'P2Rank'], df.loc[index, 'P1Rank']
    df.loc[index, 'P1RankPoint'], df.loc[index, 'P2RankPoint'] = df.loc[index, 'P2RankPoint'], df.loc[index, 'P1RankPoint']
    df.loc[index, 'Player1Wins'] = 0
    return df

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.ff1 = nn.Linear(11, 32)
        self.ff2 = nn.Linear(32, 64)
        self.ff3 = nn.Linear(64, 32)
        self.ff4 = nn.Linear(32, 16)
        self.ff5 = nn.Linear(16, 4)
        self.ff6 = nn.Linear(4, 1)

    def forward(self, x):
        x = F.normalize(x)
        #print(x, "\n")
        x = F.relu(self.ff1(x))
        #print(x, "\n")
        x = F.relu(self.ff2(x))
        #print(x, "\n")
        x = F.relu(self.ff3(x))
        #print(x, "\n")
        x = F.relu(self.ff4(x))
        #print(x, "\n")
        x = F.relu(self.ff5(x))
        #print(x, "\n")
        x = torch.sigmoid(self.ff6(x))
        #print(x, "\n")
        return x
    
def Train(model, X_train, y_train, epochs, learningRate, batchSize):
    
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
    
    dataset = torch.utils.data.TensorDataset(X_train, y_train)
    dataLoader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchSize, shuffle=True)
    
    for epoch in range(epochs):
        for i, (data, label) in enumerate(dataLoader):
         #   data = data
            label = label.type(torch.LongTensor)

            output = model(data)
            output = output.squeeze(1)
            output = output.float()
            label = label.float()
            #print(output)
            #print(label)
            output = torch.nan_to_num(output, nan=0.5)
            loss = criterion(output, label)
            #print(loss)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #Need Validation
            
        print("Epoch: " + str(epoch) + " Loss: " + str(loss.item()))

    print('Training Complete')
    
def Test(model, X_test, y_test):
    numCorrect = 0
    
    output = model(X_test)
    label = y_test
    
    error = output - label
    error = error.cpu().detach().numpy()
    
    S = np.abs(np.sum(error[0]))
    
    return S
            

def TestDNN():
    
    #main()
    df = BuildDataset()
    M = NeuralNet()
    train, test = train_test_split(df, test_size=0.33)
    X_train, y_train = torch.tensor(np.array(train.iloc[:, 0:-1])), torch.tensor(np.array(train.iloc[:, -1]))
    X_test, y_test = torch.tensor(np.array(test.iloc[:, 0:-1])), torch.tensor(np.array(test.iloc[:, -1]))
    X_train, y_train = X_train.float(), y_train.float()
    X_test, y_test = X_test.float(), y_test.float()

    Train(M, X_train, y_train, 10, 0.0001, 10)
    print("Sum Error: " + str(Test(M, X_test, y_test)))
    
TestDNN()

Cuda Device Available
Name of the Cuda Device:  NVIDIA GeForce RTX 3060 Laptop GPU
GPU Computational Capablity:  (8, 6)
Epoch: 0 Loss: 0.6963943839073181
Epoch: 1 Loss: 0.6900873184204102
Epoch: 2 Loss: 0.6901467442512512
Epoch: 3 Loss: 0.6961531639099121
Epoch: 4 Loss: 0.6902897357940674
Epoch: 5 Loss: 0.69318026304245
Epoch: 6 Loss: 0.6986598372459412


In [8]:
#df.isna().sum()

surface        0
P1Hand         0
P1Height       0
P1Age          0
P1Rank         0
P1RankPoint    0
P2Hand         0
P2Height       0
P2Age          0
P2Rank         0
P2RankPoint    0
Player1Wins    0
dtype: int64