In [12]:

import gc
import os
import random   

import numpy as np
import pandas as pd

from tqdm import tqdm


### Load Data

In [13]:
## https://github.com/JeffSackmann/tennis_wta

DATA_PATH = 'D://Medium'
os.listdir(DATA_PATH)


['ncaam_sample_data.csv',
 'wta_matches_2018.csv',
 'wta_matches_2019.csv',
 'wta_matches_2020.csv',
 'wta_matches_2021.csv',
 'wta_matches_2022.csv',
 'wta_matches_2023.csv',
 'wta_matches_qual_itf_2018.csv',
 'wta_matches_qual_itf_2019.csv',
 'wta_matches_qual_itf_2020.csv',
 'wta_matches_qual_itf_2021.csv',
 'wta_matches_qual_itf_2022.csv',
 'wta_matches_qual_itf_2023.csv']

In [14]:

qual_itf_data = pd.concat([
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2018.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2019.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2020.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2021.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2022.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2023.csv'))
], axis=0).reset_index(drop=True)


  pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2019.csv')),
  pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2021.csv')),
  pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2022.csv')),
  pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_qual_itf_2023.csv'))


In [15]:
tour_level = pd.concat([
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2018.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2019.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2020.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2021.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2022.csv')),
    pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2023.csv'))
], axis=0).reset_index(drop=True)

wta_sample_data = pd.concat([qual_itf_data, tour_level], axis=0).reset_index(drop=True)
wta_sample_data['tourney_date'] = pd.to_datetime(wta_sample_data['tourney_date'].apply(lambda x: f"{str(x)[4:-2]}-{str(x)[-2:]}-{str(x)[:4]}"))
wta_sample_data = wta_sample_data.sort_values(by=['tourney_id','match_num']).reset_index(drop=True)
wta_sample_data = wta_sample_data.drop_duplicates(subset=['tourney_id','match_num','winner_id']).reset_index(drop=True)


In [16]:

players = set(wta_sample_data['winner_id'].values)
losers = set(wta_sample_data['loser_id'].values)
players = players.union(losers)


### Run Classic Elo

In [18]:

player_names = {}
win_names = wta_sample_data[['winner_id','winner_name']].copy().set_index('winner_id').to_dict()['winner_name']
lose_names = wta_sample_data[['loser_id','loser_name']].copy().set_index('loser_id').to_dict()['loser_name']
player_names.update(win_names)
player_names.update(lose_names)


In [19]:

class StatefulSystem:
    def __init__(self):
        self.history = []  # to store history of predictions and results

    def predict_1v1(self, player1, player2, **kwargs):
        raise NotImplementedError  # This method should be implemented in child classes

    def update_1v1(self, player1, player2, result, **kwargs):
        raise NotImplementedError  # This method should be implemented in child classes
        
class PlayerNode():
    def __init__(self, rating):
        self.rating=rating
        
class EloNode(PlayerNode):
    def __init__(self,_id, name, rating=1500):
        super().__init__(rating)
        self._id = _id
        self.name = name
        self.rating = rating
        self.rank = 200
        
class EloSystem(StatefulSystem):
    def __init__(self, k_factor, meta_functions=None):
        super().__init__()
        self.history = []
        self.k_factor = k_factor
        ## for edge info like home, days off
        self.meta_functions = meta_functions

    def predict_1v1(self, player1, player2, **kwargs):
        # Meta information can be accessed as dictionary items, e.g., kwargs['is_home'], kwargs['days_off']
        rd = player1.rating - player2.rating
        if self.meta_functions is not None:
            ## add all adjustments for meta information
            for meta_key, meta_function in self.meta_functions.items():
                rd += meta_function(kwargs[meta_key])
        prediction = 1/(1+10**(-rd/400))
        return prediction
    def update_1v1(self, prediction, result):
        points_exchanged = self.k_factor*(result-prediction)
        return points_exchanged
    
    def play_match(self, p1, p2, result, **kwargs):
        prediction = self.predict_1v1(p1, p2, **kwargs)
        ratings_delta = self.update_1v1(prediction, result)
        self.history.append([p1._id, p1.name, p2._id, p2.name, p1.rating, p2.rating, prediction, result, ratings_delta])
        p1.rating+=ratings_delta
        p2.rating-=ratings_delta
        return p1, p2
    
    def get_history(self):
        return pd.DataFrame(self.history, columns=['p1_id','p1_name','p2_id','p2_name','p1_rating','p2_rating','prediction','result','ratings_delta'])

elo_sys = EloSystem(k_factor=50)
player_ratings = {_id:EloNode(_id, name, 1500) for _id, name in player_names.items()}
for index, row in tqdm(wta_sample_data.iterrows(), total=len(wta_sample_data)):
    ## randomize who is p1 and who is p2
    match_participants = [row['winner_id'], row['loser_id']]
    random.shuffle(match_participants)
    p1_id, p2_id = match_participants
    p1_node = player_ratings[p1_id]
    p2_node = player_ratings[p2_id]
    result = 1 if p1_node._id == row['winner_id'] else 0
    p1_node, p2_node = elo_sys.play_match(p1_node, p2_node, result)
    
    ## just tracking world ranking
    if result == 1:
        p1_node.rank = row['winner_rank']
        p2_node.rank = row['loser_rank']
    else:
        p1_node.rank = row['loser_rank']
        p2_node.rank = row['winner_rank']
    player_ratings[p1_id] = p1_node
    player_ratings[p2_id] = p2_node
    
    
hist = elo_sys.get_history()
hist
    

100%|███████████████████████████████████████████████████████████████████████| 145608/145608 [00:03<00:00, 37159.22it/s]


Unnamed: 0,p1_id,p1_name,p2_id,p2_name,p1_rating,p2_rating,prediction,result,ratings_delta
0,202702,Tereza Smitkova,211222,Jamie Loeb,1500.000000,1500.000000,0.500000,1,25.000000
1,204430,Antonia Lottner,211685,Rebecca Sramkova,1500.000000,1500.000000,0.500000,0,-25.000000
2,212044,Katharina Gerlach,213889,Greet Minnen,1500.000000,1500.000000,0.500000,0,-25.000000
3,206349,Mayo Hibi,201697,Kristyna Pliskova,1500.000000,1500.000000,0.500000,0,-25.000000
4,213779,Raluka Serban,202446,Eugenie Bouchard,1500.000000,1500.000000,0.500000,0,-25.000000
...,...,...,...,...,...,...,...,...,...
145603,222866,Katherine Hui,221374,Danielle Willson,1721.003373,1596.003660,0.672509,1,16.374536
145604,263685,Alessia Cau,215840,Haley Giavara,1576.349293,1755.683189,0.262633,0,-13.131642
145605,259853,Eryn Cayetano,203288,Jia Jing Lu,1721.567422,1743.831527,0.468003,0,-23.400158
145606,222866,Katherine Hui,215840,Haley Giavara,1737.377909,1768.814832,0.454882,0,-22.744085


In [20]:
from sklearn.metrics import log_loss
#50: 0.5812750355074674, -0.750812
#60: 0.5780273230899833, -0.748345
grade_cutoff = int(0.2*len(hist))
log_loss(hist[-grade_cutoff:]['result'].values, hist[-grade_cutoff:]['prediction'].values)

0.5812750355074674

In [21]:
rtg_df = pd.DataFrame([[k,v.name, v.rating, v.rank] for k,v in player_ratings.items()], columns=['id','name','rating','world_rank'])
rtg_df[['rating','world_rank']].corr()

Unnamed: 0,rating,world_rank
rating,1.0,-0.750812
world_rank,-0.750812,1.0


In [230]:

rtg_df.sort_values(by='rating', ascending=False).head(20)


Unnamed: 0,id,name,rating,world_rank
255,206219,Oceane Dodin,11.66798,77.0
1651,213976,Diana Demidova,10.832479,821.0
495,213666,Maddison Inglis,10.377305,182.0
1115,213710,Cristina Bucsa,10.037376,161.0
11,214096,Karolina Muchova,9.406801,149.0
260,214839,Jodie Burrage,9.382569,216.0
507,206069,Marianna Zakarlyuk,8.690384,797.0
53,202663,Magda Linette,8.423919,58.0
247,201597,Kurumi Nara,8.334119,237.0
722,215910,Panna Udvardy,8.305833,97.0


In [278]:

import torch.nn as nn

# One neuron to learn scaling and a sigmoid activation function.
class EloPredictNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1, bias=False)  # No bias so that ratings delta of 0 = 50% win prob

    def forward(self, rd):
        return torch.sigmoid(self.linear(rd))
    
class EloUpdateNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1, bias=False)  # No bias so that matches are symmetrical (no advantage to being player A)
        
    def forward(self, pred_error):
        return self.linear(pred_error)
    
class NetworkNode(PlayerNode):
    def __init__(self,_id, name, rating=0):
        super().__init__(rating)
        self._id = _id
        self.name = name
        self.rating = rating
        self.last_rating = rating
        self.last_error = 0.5
        self.rank = 200 



### Let's try learning the weights

In [279]:

## split into train/test
percent_30 = int(0.3*len(wta_sample_data))
train = wta_sample_data.copy()[:-percent_30]
test = wta_sample_data.copy()[-percent_30:]
 
## split into train/test/val
# percent_20 = int(0.2*len(wta_sample_data))
# train = wta_sample_data.copy()[:-2*percent_20]
# test = wta_sample_data.copy()[-2*percent_20:-1*percent_20]
# val = wta_sample_data.copy()[-1*percent_20:]
# print(len(train), len(test), len(val))


In [280]:
import torch
from copy import copy
from torch.optim import SGD

predict_model = EloPredictNN()
predict_optimizer = SGD(predict_model.parameters(), lr=0.0005)

update_model = EloUpdateNN()
update_optimizer = SGD(update_model.parameters(), lr=0.0005)

batch_size = 32 ## in effect it will be 2x size
num_batches = len(train)//batch_size
num_epochs = 3

for j in range(num_epochs):
    network_player_ratings = {_id:NetworkNode(_id, name, np.random.random()-0.5) for _id, name in player_names.items()}
    epoch_loss = []
    for i in tqdm(range(num_batches-1), total=num_batches-1):
        predict_optimizer.zero_grad()
        update_optimizer.zero_grad()
        train_data = train[i*batch_size:(i+1)*batch_size].copy()

        ## randomize who is p1 and who is p2
        match_participants_1 = train_data[['winner_id','loser_id','winner_rank','loser_rank']]
        rank_dict = match_participants_1.copy().set_index('winner_id').to_dict()['winner_rank']
        rank_2_dict = match_participants_1.copy().set_index('loser_id').to_dict()['loser_rank']
        rank_dict.update(rank_2_dict)

        match_participants_2 = match_participants_1.copy()[['loser_id','winner_id','loser_rank','winner_rank']]
        match_participants = pd.concat([match_participants_1, match_participants_2], axis=0).reset_index(drop=True).values
        input_order = list(range(batch_size*2))
        random.shuffle(input_order)
        results = torch.cat([torch.ones(batch_size), torch.zeros(batch_size)]).view(-1,1)
        
        p1_nodes = [network_player_ratings[match_participants[k][0]] for k in input_order]
        p2_nodes = [network_player_ratings[match_participants[k][1]] for k in input_order]
        
        ## only update one model at a time
        update_or_predict = np.random.random()
        
        if update_or_predict>0.5:
            ## PREDICT MODEL (easy part)
            ## have to make it symmetrical (p2_rating-p1_rating and p1_rating-p2_rating)
            predict_X = torch.Tensor([p2_nodes[k].rating-p1_nodes[k].rating for k in range(len(p1_nodes))]).view(-1,1)
            predict_y = torch.Tensor([results[k] for k in input_order]).view(-1,1)

    #         print("Predict X:", predict_X)
            predictions = predict_model(predict_X)
    #         print("Predictions: ", predictions)
            predict_loss = nn.BCELoss()(predictions, predict_y)
            predict_loss.backward()

            # Update the model's parameters
            ## for the first few iterations, let ratings stabilize
            if (j>0)&(i < 25):
                continue
            else:
                predict_optimizer.step()
            # Zero the gradients since PyTorch accumulates them
            predict_optimizer.zero_grad()


            ## UPDATE MODEL (slightly harder)
            ## use previous game's results to calculate loss of the update model
        
        
        else:
            ## directionality is important here, last error is simply result - prediction (is negative if the player loses)
            p1_update_X = torch.Tensor([p1_node.last_error for p1_node in p1_nodes]).view(-1,1)
            p2_update_X = torch.Tensor([p2_node.last_error for p2_node in p2_nodes]).view(-1,1)
#             update_X = torch.cat([
#                 torch.Tensor([p1_node.last_error for p1_node in p1_nodes]).view(-1,1),
#                 torch.Tensor([p2_node.last_error for p2_node in p2_nodes]).view(-1,1)
#             ], axis=1)
#             update_predictions = update_model(update_X)
            p1_update_predictions = update_model(p1_update_X)
            p2_update_predictions = update_model(p2_update_X)

            ## now time to have baseline predict model (no update) and predict model with updates. Compare to create loss
            ## create baseline
            baseline_X = torch.Tensor([p2_nodes[k].last_rating-p1_nodes[k].last_rating for k in range(len(p1_nodes))]).view(-1,1)
    #         baseline_X = torch.cat([
    #             torch.Tensor([p2_nodes[k].last_rating-p1_nodes[k].last_rating for k in range(len(p1_nodes))]),
    #             torch.Tensor([p1_nodes[k].last_rating-p2_nodes[k].last_rating for k in range(len(p1_nodes))])
    #         ]).view(-1,1)
#             update_model_X = torch.Tensor([p2_nodes[k].last_rating+update_predictions[k,1] - p1_nodes[k].last_rating+update_predictions[k, 0] for k in range(len(p1_nodes))]).view(-1,1)
            update_model_X = torch.cat([
                torch.Tensor([p2_nodes[k].last_rating+p2_update_predictions[k] - p1_nodes[k].last_rating+p1_update_predictions[k] for k in range(len(p1_nodes))])
#                 torch.Tensor([p1_nodes[k].last_rating+p1_update_predictions[k] - p2_nodes[k].last_rating+p2_update_predictions[k] for k in range(len(p1_nodes))])
            ]).view(-1,1)
#             print(update_model_X.shape)
            baseline_predictions = predict_model(baseline_X)
            update_model_predictions = predict_model(update_model_X)
            baseline_BCE = nn.BCELoss()(baseline_predictions, predict_y)
            update_model_BCE = nn.BCELoss()(update_model_predictions, predict_y)
            ## lower is better, has to be negative if improving ratings
            update_loss = update_model_BCE-baseline_BCE
            update_loss.backward()
            if (j>0)&(i < 25):
                continue
            else:
                update_optimizer.step()
            update_optimizer.zero_grad()
        
        ## now just update the nodes with the predictions
        ## in Elo this step is k*(result-predictions)
        ## we are finding k
        pred_error = predict_y - predictions
#         print(pred_error.shape)
#         pred_error = torch.cat([
#             pred_error.view(-1,1),
#             torch.zeros(pred_error.shape)
#         ], axis=1)
        epoch_loss.extend(list(pred_error[:,0].detach().numpy()))
#         if i%250==0:
#             print(np.mean(epoch_loss))
        updates = update_model(pred_error)
        new_ratings = [p1_nodes[k].rating+updates[k] for k in range(len(p1_nodes))]
#         new_ratings = [p1_nodes[k].rating+updates[k, 0] for k in range(len(p1_nodes))]
        for l, player_node in enumerate(p1_nodes):
            player_node.last_rating = copy(player_node.rating)
            player_node.last_error = pred_error[l, 0]
            player_node.rating = float(new_ratings[l])
            player_node.rank = rank_dict[player_node._id]
            network_player_ratings[player_node._id] = copy(player_node)

# rds = np.array([2,1,0,-1,-2]).astype('float32').reshape(-1,1)
# predictions = predict_model(torch.from_numpy(rds))
# results = torch.Tensor([1, 1, 0, 0, 0]).view(-1,1)
# loss = nn.BCELoss()(predictions, results.float())
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()


100%|█████████████████████████████████████████████████████████████████████████████| 3184/3184 [00:10<00:00, 307.53it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3184/3184 [00:10<00:00, 310.70it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3184/3184 [00:10<00:00, 311.83it/s]


In [281]:

predict_model(torch.Tensor([2.2, 1.1, 0, -1.1, -2.1]).view(-1,1))


tensor([[0.2265],
        [0.3512],
        [0.5000],
        [0.6488],
        [0.7635]], grad_fn=<SigmoidBackward0>)

In [282]:
# update_model(torch.Tensor([[-0.75,0], [-0.25,0], [0,0], [0.25,0], [0.75,0]]))
update_model(torch.Tensor([-0.75, -0.25, 0, 0.25, 0.75]).view(-1,1))


tensor([[-0.2199],
        [-0.0733],
        [ 0.0000],
        [ 0.0733],
        [ 0.2199]], grad_fn=<MmBackward0>)

In [291]:
rtg_df = pd.DataFrame([[k,v.name, v.rating, v.rank] for k,v in network_player_ratings.items()], columns=['id','name','rating','world_rank'])

rtg_df.loc[rtg_df['name'].str.contains('Swiatek')]

Unnamed: 0,id,name,rating,world_rank
389,216347,Iga Swiatek,"[[tensor(7.6317, grad_fn=<UnbindBackward0>)]]",200


In [284]:
rtg_df.corr()

  rtg_df.corr()


Unnamed: 0,id,rating,world_rank
id,1.0,0.009289,-0.353625
rating,0.009289,1.0,-0.033472
world_rank,-0.353625,-0.033472,1.0


In [285]:

predict_model.eval()
update_model.eval()


EloUpdateNN(
  (linear): Linear(in_features=1, out_features=1, bias=False)
)

In [286]:


elo_sys = EloSystem(k_factor=50)

player_ratings = {_id:EloNode(_id, name, 1500) for _id, name in player_names.items()}
network_player_ratings = {_id:NetworkNode(_id, name, 0) for _id, name in player_names.items()}

nn_history = []
for index, row in tqdm(wta_sample_data.iterrows(), total=len(wta_sample_data)):
    ## randomize who is p1 and who is p2
    match_participants = [row['winner_id'], row['loser_id']]
    random.shuffle(match_participants)
    p1_id, p2_id = match_participants
    p1_node = player_ratings[p1_id]
    p2_node = player_ratings[p2_id]
    result = 1 if p1_node._id == row['winner_id'] else 0
    p1_node, p2_node = elo_sys.play_match(p1_node, p2_node, result)
    
    p1_nn_node = network_player_ratings[p1_id]
    p2_nn_node = network_player_ratings[p2_id]
    
    rtg_diff = p2_nn_node.rating-p1_nn_node.rating
    nn_prediction = predict_model(torch.Tensor([[rtg_diff]]))
    error = result - nn_prediction
    rtg_update = update_model(torch.Tensor([[error]]))
    nn_history.append([p1_nn_node._id, p1_nn_node.name, p2_nn_node._id, p2_nn_node.name, float(p1_nn_node.rating), float(p2_nn_node.rating), float(nn_prediction), result, float(rtg_update)])
    p1_nn_node.rating += rtg_update
    p2_nn_node.rating -= rtg_update
    
    ## just tracking world ranking
    if result == 1:
        p1_node.rank = row['winner_rank']
        p2_node.rank = row['loser_rank']
    else:
        p1_node.rank = row['loser_rank']
        p2_node.rank = row['winner_rank']
    player_ratings[p1_id] = p1_node
    player_ratings[p2_id] = p2_node
    
    
hist = elo_sys.get_history()
nn_hist = pd.DataFrame(nn_history, columns=['p1_id','p1_name','p2_id','p2_name','p1_rating','p2_rating','prediction','result','ratings_delta'])


100%|███████████████████████████████████████████████████████████████████████| 145608/145608 [00:13<00:00, 10734.90it/s]


In [289]:

from sklearn.metrics import log_loss
grade_cutoff = int(0.2*len(hist))
print(len(hist),len(nn_hist))
reg_log_loss = log_loss(hist[-grade_cutoff:]['result'].values, hist[-grade_cutoff:]['prediction'].values)
nn_log_loss = log_loss(nn_hist[-grade_cutoff:]['result'].values, nn_hist[-grade_cutoff:]['prediction'].values)
print("reg log loss: ", )
print("nn log loss: ", )



145608 145608
reg log loss:  0.5812750355074674
nn log loss:  0.5979153307765209


In [296]:

rtg_df = pd.DataFrame([[k,v.name, float(v.rating), v.rank] for k,v in network_player_ratings.items()], columns=['id','name','rating','world_rank'])
rtg_df.loc[rtg_df['name'].str.contains('Swiatek')]
rtg_df.sort_values(by='rating', ascending=False)


Unnamed: 0,id,name,rating,world_rank
389,216347,Iga Swiatek,7.631668,200


In [298]:

nn_hist


Unnamed: 0,p1_id,p1_name,p2_id,p2_name,p1_rating,p2_rating,prediction,result,ratings_delta
0,211222,Jamie Loeb,202702,Tereza Smitkova,0.000000,0.000000,0.500000,0,-0.146601
1,204430,Antonia Lottner,211685,Rebecca Sramkova,0.000000,0.000000,0.500000,0,-0.146601
2,213889,Greet Minnen,212044,Katharina Gerlach,0.000000,0.000000,0.500000,1,0.146601
3,201697,Kristyna Pliskova,206349,Mayo Hibi,0.000000,0.000000,0.500000,1,0.146601
4,213779,Raluka Serban,202446,Eugenie Bouchard,0.000000,0.000000,0.500000,0,-0.146601
...,...,...,...,...,...,...,...,...,...
145603,222866,Katherine Hui,221374,Danielle Willson,1.253033,0.487803,0.605183,1,0.115761
145604,263685,Alessia Cau,215840,Haley Giavara,0.474552,1.443871,0.367951,0,-0.107884
145605,203288,Jia Jing Lu,259853,Eryn Cayetano,1.543297,1.586995,0.493903,1,0.148388
145606,222866,Katherine Hui,215840,Haley Giavara,1.368794,1.551755,0.474493,0,-0.139122


In [85]:


for name, param in predict_model.named_parameters():
    print(name, param)



linear.weight Parameter containing:
tensor([[-0.4483]], requires_grad=True)


In [86]:

for name, param in update_model.named_parameters():
    print(name, param)


linear.weight Parameter containing:
tensor([[-0.0406]], requires_grad=True)


In [59]:

predict_model(torch.from_numpy(rds))


tensor([[0.8331],
        [0.6908],
        [0.5000],
        [0.3092],
        [0.1669]], grad_fn=<SigmoidBackward0>)

In [62]:
for name, param in predict_model.named_parameters():
    print(name, param)

linear.weight Parameter containing:
tensor([[0.8038]], requires_grad=True)


### Testing PredictNN

In [26]:


predict_model = EloPredictNN()
predict_optimizer = SGD(predict_model.parameters(), lr=0.01)

for epoch in range(5):
    for index, row in 







In [None]:

update_model = EloUpdateNN()
predict_optimizer = SGD(predict_model.parameters(), lr=0.01)
update_optimizer = SGD(update_model.parameters(), lr=0.01)
bce_loss = nn.BCELoss()

In [27]:

class NetworkNode(PlayerNode):
    def __init__(self,_id, name, rating=0):
        super().__init__(rating)
        self._id = _id
        self.name = name
        self.rating = rating
        self.rank = 200 
        
        
network_player_ratings = {_id:NetworkNode(_id, name, np.random.random()-0.5) for _id, name in player_names.items()}


In [None]:

class PredictNN():
    def __init__(self):
        super().__init__()
        
        self.match_pred_net = nn.Sequential(
            nn.Linear(2, 1),
            nn.Sigmoid()
        )
        
    def forward(self, player1_rating, player2_rating, match_result):
        
        # Make match prediction
        _input = torch.cat([player1_rating, player2_rating], dim=1)
        pred = self.match_pred_net(_input)
        
        # Compute prediction loss
        loss = binary_cross_entropy(pred, match_result)
        return loss
    




In [None]:

import torch
import torch.nn as nn

class NetworkModel(nn.Module):

    def __init__(self):
        super().__init__()
        
        # Neural network layers        
        self.rating_update_net = nn.Sequential(
            nn.Linear(2, 2),
            nn.ReLU(),
            nn.Linear(2, 1)
        )
        
        self.match_pred_net = nn.Sequential(
            nn.Linear(2, 2),
            nn.ReLU(), 
            nn.Sigmoid()
        )
        
    def forward(self, player1_rating, player2_rating, match_result):
        
        # Make match prediction
        _input = torch.cat([player1_rating, player2_rating], dim=1)
        pred = self.match_pred_net(_input)
        
        # Compute prediction loss
        loss = binary_cross_entropy(pred, match_result)
        
        # Update player ratings based on match result
        rating_delta1 = self.rating_update_net(torch.cat([player1_rating, player2_rating, match_result]))
        rating_delta2 = self.rating_update_net(torch.cat([player2_rating, player1_rating, 1-match_result]))
        
        player1_rating += rating_delta1
        player2_rating += rating_delta2
        
        return loss, player1_rating, player2_rating


In [None]:



elo_sys = EloSystem(k_factor=50)
player_ratings = {_id:EloNode(_id, name, 1500) for _id, name in player_names.items()}
for index, row in tqdm(wta_sample_data.iterrows(), total=len(wta_sample_data)):
    ## randomize who is p1 and who is p2
    match_participants = [row['winner_id'], row['loser_id']]
    random.shuffle(match_participants)
    p1_id, p2_id = match_participants
    p1_node = player_ratings[p1_id]
    p2_node = player_ratings[p2_id]
    result = 1 if p1_node._id == row['winner_id'] else 0
    p1_node, p2_node = elo_sys.play_match(p1_node, p2_node, result)
    
    ## just tracking world ranking
    if result == 1:
        p1_node.rank = row['winner_rank']
        p2_node.rank = row['loser_rank']
    else:
        p1_node.rank = row['loser_rank']
        p2_node.rank = row['winner_rank']
    player_ratings[p1_id] = p1_node
    player_ratings[p2_id] = p2_node
    
    
hist = elo_sys.get_history()





In [221]:

import torch
import torch.nn as nn

class SportsRatingModel(nn.Module):

    def __init__(self):
        super().__init__()
        
        # Neural network layers        
        self.rating_update_net = nn.Sequential(
            nn.Linear(2, 2),
            nn.ReLU(),
            nn.Linear(2, 1)
        )
        
        self.match_pred_net = nn.Sequential(
            nn.Linear(2, 2),
            nn.ReLU(), 
            nn.Sigmoid()
        )
        
    def forward(self, player1_rating, player2_rating, match_result):
        
        # Make match prediction
        _input = torch.cat([player1_rating, player2_rating], dim=1)
        pred = self.match_pred_net(_input)
        
        # Compute prediction loss
        loss = binary_cross_entropy(pred, match_result)
        
        # Update player ratings based on match result
        rating_delta1 = self.rating_update_net(torch.cat([player1_rating, player2_rating, match_result]))
        rating_delta2 = self.rating_update_net(torch.cat([player2_rating, player1_rating, 1-match_result]))
        
        player1_rating += rating_delta1
        player2_rating += rating_delta2
        
        return loss, player1_rating, player2_rating
        
        


In [194]:

hist.loc[(hist['p1_id']==216347)|(hist['p2_id']==216347)]


Unnamed: 0,p1_id,p1_name,p2_id,p2_name,p1_rating,p2_rating,prediction,result,ratings_delta
4592,214006,Petia Arshinkova,216347,Iga Swiatek,1500.000000,1500.000000,0.500000,0,-20.000000
7080,205918,Ulrikke Eikeri,216347,Iga Swiatek,1562.186393,1520.000000,0.560414,0,-22.416580
7110,211148,Jasmine Paolini,216347,Iga Swiatek,1504.340272,1542.416580,0.445422,0,-17.816885
7124,201621,Mona Barthel,216347,Iga Swiatek,1591.766455,1560.233464,0.545255,0,-21.810217
7130,203325,Martina Di Giuseppe,216347,Iga Swiatek,1531.333983,1582.043681,0.427537,1,22.898532
...,...,...,...,...,...,...,...,...,...
128101,206252,Barbora Krejcikova,216347,Iga Swiatek,2172.616897,2395.643522,0.216897,1,31.324139
128483,201709,Yulia Putintseva,216347,Iga Swiatek,1955.894912,2364.319383,0.086980,0,-3.479198
128485,202505,Belinda Bencic,216347,Iga Swiatek,2210.403200,2367.798581,0.287811,0,-11.512429
128495,216347,Iga Swiatek,203354,Martina Trevisan,2379.311010,1916.453314,0.934894,1,2.604224


In [17]:
test['score']

0           2-6 6-4 6-3
1               6-1 6-4
2        6-3 6-7(3) 7-5
3               6-1 6-2
4           6-4 3-6 6-3
              ...      
29418           6-4 6-2
29419        7-6(3) 6-3
29420        7-6(6) 6-4
29421    6-7(6) 7-5 6-3
29422           6-1 6-2
Name: score, Length: 29423, dtype: object

In [12]:
test.loc[test['tourney_id']=='2019-0300']

Unnamed: 0,tourney_id,tourney_name,surface,draw_size,tourney_level,tourney_date,match_num,winner_id,winner_seed,winner_entry,...,l_1stIn,l_1stWon,l_2ndWon,l_SvGms,l_bpSaved,l_bpFaced,winner_rank,winner_rank_points,loser_rank,loser_rank_points
27800,2019-0300,Luxembourg,Hard,32,I,20191014,227,215219,5.0,,...,48.0,22.0,8.0,8.0,5.0,10.0,131.0,491.0,,
27801,2019-0300,Luxembourg,Hard,32,I,20191014,228,216146,,,...,33.0,20.0,10.0,9.0,3.0,6.0,162.0,373.0,258.0,226.0
27802,2019-0300,Luxembourg,Hard,32,I,20191014,229,204246,,,...,36.0,25.0,8.0,9.0,3.0,7.0,227.0,264.0,1007.0,11.0
27803,2019-0300,Luxembourg,Hard,32,I,20191014,230,201527,4.0,,...,28.0,17.0,13.0,8.0,6.0,10.0,127.0,511.0,275.0,206.0
27804,2019-0300,Luxembourg,Hard,32,I,20191014,231,203564,7.0,,...,25.0,13.0,6.0,8.0,3.0,9.0,150.0,396.0,506.0,70.0
27805,2019-0300,Luxembourg,Hard,32,I,20191014,232,201565,,,...,38.0,19.0,7.0,9.0,6.0,10.0,353.0,134.0,224.0,265.0
27806,2019-0300,Luxembourg,Hard,32,I,20191014,233,211316,,,...,31.0,17.0,5.0,8.0,5.0,9.0,184.0,330.0,174.0,346.0
27807,2019-0300,Luxembourg,Hard,32,I,20191014,234,204434,3.0,,...,33.0,17.0,6.0,7.0,3.0,7.0,110.0,570.0,232.0,254.0
27808,2019-0300,Luxembourg,Hard,32,I,20191014,235,202457,8.0,,...,51.0,33.0,14.0,12.0,6.0,11.0,157.0,382.0,208.0,283.0
27809,2019-0300,Luxembourg,Hard,32,I,20191014,236,211817,,,...,66.0,29.0,12.0,13.0,5.0,14.0,177.0,343.0,277.0,205.0


In [13]:
list(test)

['tourney_id',
 'tourney_name',
 'surface',
 'draw_size',
 'tourney_level',
 'tourney_date',
 'match_num',
 'winner_id',
 'winner_seed',
 'winner_entry',
 'winner_name',
 'winner_hand',
 'winner_ht',
 'winner_ioc',
 'winner_age',
 'loser_id',
 'loser_seed',
 'loser_entry',
 'loser_name',
 'loser_hand',
 'loser_ht',
 'loser_ioc',
 'loser_age',
 'score',
 'best_of',
 'round',
 'minutes',
 'w_ace',
 'w_df',
 'w_svpt',
 'w_1stIn',
 'w_1stWon',
 'w_2ndWon',
 'w_SvGms',
 'w_bpSaved',
 'w_bpFaced',
 'l_ace',
 'l_df',
 'l_svpt',
 'l_1stIn',
 'l_1stWon',
 'l_2ndWon',
 'l_SvGms',
 'l_bpSaved',
 'l_bpFaced',
 'winner_rank',
 'winner_rank_points',
 'loser_rank',
 'loser_rank_points']

In [16]:
list(reg)

['tourney_id',
 'tourney_name',
 'surface',
 'draw_size',
 'tourney_level',
 'tourney_date',
 'match_num',
 'winner_id',
 'winner_seed',
 'winner_entry',
 'winner_name',
 'winner_hand',
 'winner_ht',
 'winner_ioc',
 'winner_age',
 'loser_id',
 'loser_seed',
 'loser_entry',
 'loser_name',
 'loser_hand',
 'loser_ht',
 'loser_ioc',
 'loser_age',
 'score',
 'best_of',
 'round',
 'minutes',
 'w_ace',
 'w_df',
 'w_svpt',
 'w_1stIn',
 'w_1stWon',
 'w_2ndWon',
 'w_SvGms',
 'w_bpSaved',
 'w_bpFaced',
 'l_ace',
 'l_df',
 'l_svpt',
 'l_1stIn',
 'l_1stWon',
 'l_2ndWon',
 'l_SvGms',
 'l_bpSaved',
 'l_bpFaced',
 'winner_rank',
 'winner_rank_points',
 'loser_rank',
 'loser_rank_points']

In [15]:

reg = []

reg = pd.read_csv(os.path.join(DATA_PATH, 'wta_matches_2019.csv'))
