In [41]:
import torch 
from torch import nn
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.stats import skew
from scipy.stats import kurtosis
from prettytable import PrettyTable
import os

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

else:
    device = torch.device("cpu")
    
def changeCourtToValue(court):
    if court == 'Hard':
        return 1
    elif court == 'Clay':
        return 2
    elif court == 'Grass':
        return 3
    elif court == 'Carpet':
        return 4
    
def changeHandToValue(hand):
    if hand == 'R':
        return 1
    elif hand == 'U':
        return 0
    elif hand == 'L':
        return -1

    
    
def BuildDataset():
    df = pd.read_csv("data/atp_matches_2015.csv")
    for i in range(2016, 2023):
        newDf = pd.read_csv("data/atp_matches_" + str(i) + ".csv")
        df = pd.concat([df, newDf], join="inner", ignore_index = True)
    
    df = df[['surface', 'winner_hand', 'winner_ht', 'winner_age', 'winner_rank', 'winner_rank_points', 'loser_hand', 'loser_ht', 'loser_age', 'loser_rank', 'loser_rank_points']]
    
    
    avgSwitches = ['winner_ht', 'winner_age', 'loser_ht', 'loser_age']
    for i in avgSwitches:
        mean = df[i].mean()
        df[i] = df[i].fillna(mean)
        
    maxSwitches = ['winner_rank', 'loser_rank']
    for i in maxSwitches:
        Max = df[i].max()
        df[i] = df[i].fillna(Max)
    
    minSwitches = ['winner_rank_points', 'loser_rank_points']
    for i in minSwitches:
        df[i] = df[i].fillna(1)
    
    df['surface'] = df['surface'].apply(changeCourtToValue)
    df['winner_hand'] = df['winner_hand'].apply(changeHandToValue)
    df['loser_hand'] = df['loser_hand'].apply(changeHandToValue)
    
    return df

def main():
    df = BuildDataset()
    print(df.tail())
    
#main()
df = BuildDataset()
df


Cuda Device Available
Name of the Cuda Device:  NVIDIA GeForce RTX 3060 Laptop GPU
GPU Computational Capablity:  (8, 6)


Unnamed: 0,surface,winner_hand,winner_ht,winner_age,winner_rank,winner_rank_points,loser_hand,loser_ht,loser_age,loser_rank,loser_rank_points
0,1,1.0,183.00000,25.560575,153.0,328.0,1.0,186.930038,23.791923,220.0,221.0
1,1,-1.0,185.00000,33.453799,73.0,689.0,1.0,180.000000,22.384668,123.0,440.0
2,1,1.0,183.00000,22.956879,125.0,430.0,1.0,183.000000,30.023272,21.0,1730.0
3,1,1.0,188.00000,27.895962,31.0,1195.0,1.0,185.000000,27.457906,72.0,691.0
4,1,-1.0,190.00000,25.486653,34.0,1094.0,-1.0,183.000000,33.623546,110.0,505.0
...,...,...,...,...,...,...,...,...,...,...,...
19617,2,1.0,187.60774,20.895277,1103.0,9.0,1.0,186.930038,20.257358,1130.0,8.0
19618,2,0.0,187.60774,21.590691,808.0,23.0,1.0,186.930038,28.098563,1390.0,4.0
19619,1,1.0,187.60774,23.498973,1059.0,10.0,0.0,186.930038,33.092402,1881.0,1.0
19620,1,1.0,187.60774,17.730322,2101.0,1.0,1.0,186.930038,21.702943,2147.0,1.0
