In [1]:
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from datetime import datetime

df_players = pd.read_csv("./../data_scrapped/atp_players.csv")
df_matchs = pd.read_csv("./../data_formatted/training_dgl_dataset.csv")

In [2]:
# Les 10 premières années sont utilisées comme base pour le graphe initial
s = "20110101"
s_datetime = datetime.strptime(s, '%Y%m%d')
df_matchs.match_date = df_matchs.match_date.apply(lambda x:datetime.strptime(x, '%Y-%m-%d'))
df_matchs_filtered = df_matchs[df_matchs.match_date < s_datetime]

In [3]:
#On construit le premier graphe

# lecture des données attendues
# tensor1[1] -> tensor2[1] 
# le player index en tensor1 a joué contre le player au meme index en tensor 2
# y stocke le résultat du match

tensor1=[]
tensor2=[]

for index,row in df_players.iterrows():
    winmatchs = df_matchs_filtered[df_matchs_filtered.player1_id == row.player_id]
    #print(len(winmatchs))
    if len(winmatchs) > 0:
        #print(row.player_id)
        for index2, row2 in winmatchs.iterrows():
                tensor1.append(index)
                tensor2.append(df_players.loc[df_players.player_id == row2.player2_id].index[0])

In [14]:
# Pour chaque match après 2010, on calcule le nombre de chemin du joueur A au joueur B et vice versa et on l'ajoute comme feature
# puis on ajoute le match au graphe et on fait le match suivant

import networkx as nx
G = nx.DiGraph()
G.add_nodes_from(set(tensor1 + tensor2))

for i in range(0,len(tensor1),1):
    G.add_edge(tensor1[i], tensor2[i])

In [15]:
def all_simple_paths(adjlist, start, end, path):
    path = path + (start,)
    if start == end:
        return [path]
    paths = []
    for child in adjlist[start]:

        if child not in path:

            child_paths = all_simple_paths(tuple(adjlist), child, end, path)
            paths.extend(child_paths)
    return paths

In [39]:
# https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.simple_paths.all_simple_paths.html#networkx.algorithms.simple_paths.all_simple_paths

# Les 10 premières années sont utilisées comme base pour le graphe initial
# donc cette fois ci on utilise les années ultérieures
s = "20110101"
s_datetime = datetime.strptime(s, '%Y%m%d')
df_matchs_filtered = df_matchs[df_matchs.match_date > s_datetime]

e = "20120101"
e_datetime = datetime.strptime(e, '%Y%m%d')

df_matchs_filtered = df_matchs_filtered[df_matchs_filtered.match_date < e_datetime]

# on veut récupérer les index de nos joueurs
dfwinner_1 = []
dfwinner_2 = []
dfwinner_3 = []
dfwinner_4 = []
dfwinner_5 = []
dfloser_1 = []
dfloser_2 = []
dfloser_3 = []
dfloser_4 = []
dfloser_5 = []
for i,match in df_matchs_filtered.iterrows() :
    for index,row in df_players.iterrows():
        cpt=0
        if match.player1_id == row.player_id:
            player1 = index
            cpt+=1
        if match.player2_id == row.player_id:
            player2 = index
            cpt+=1
        if cpt==2:
            break
    if (G.has_node(player1) & G.has_node(player2)):
        asp1 = nx.all_simple_paths(G, player1, player2, 4) #last parameter is the arbitrary depth cutoff  
        winner_1 = 0
        winner_2 = 0
        winner_3 = 0
        winner_4 = 0
        winner_5 = 0
        for chem in list(asp1):
            if (len(chem)==1):
                winner_1+=1
            if (len(chem)==2):
                winner_2+=1
            if (len(chem)==3):
                winner_3+=1
            elif (len(chem)==4):
                winner_4+=1
            elif (len(chem)==5):
                winner_5+=1
        asp2 = nx.all_simple_paths(G, player2, player1, 4) #last parameter is the arbitrary depth cutoff
        loser_1 = 0
        loser_2 = 0
        loser_3 = 0
        loser_4 = 0
        loser_5 = 0
        for chem in list(asp2):
            if (len(chem)==1):
                loser_1+=1
            if (len(chem)==2):
                loser_2+=1
            if (len(chem)==3):
                loser_3+=1
            elif (len(chem)==4):
                loser_4+=1
            elif (len(chem)==5):
                loser_5+=1
        dfwinner_1.append(winner_1)
        dfwinner_2.append(winner_2)
        dfwinner_3.append(winner_3)
        dfwinner_4.append(winner_4)
        dfwinner_5.append(winner_5)
        dfloser_1.append(loser_1)
        dfloser_2.append(loser_2)
        dfloser_3.append(loser_3)
        dfloser_4.append(loser_4)
        dfloser_5.append(loser_5)
    else:
        dfwinner_1.append(np.nan)
        dfwinner_2.append(np.nan)
        dfwinner_3.append(np.nan)
        dfwinner_4.append(np.nan)
        dfwinner_5.append(np.nan)
        dfloser_1.append(np.nan)
        dfloser_2.append(np.nan)
        dfloser_3.append(np.nan)
        dfloser_4.append(np.nan)
        dfloser_5.append(np.nan)          




In [40]:
df_matchs_filtered["dfwinner_1"] = dfwinner_1
df_matchs_filtered["dfwinner_2"] = dfwinner_2
df_matchs_filtered["dfwinner_3"] = dfwinner_3
df_matchs_filtered["dfwinner_4"] = dfwinner_4
df_matchs_filtered["dfwinner_5"] = dfwinner_5

df_matchs_filtered["dfloser_1"] = dfloser_1
df_matchs_filtered["dfloser_2"] = dfloser_2
df_matchs_filtered["dfloser_3"] = dfloser_3
df_matchs_filtered["dfloser_4"] = dfloser_4
df_matchs_filtered["dfloser_5"] = dfloser_5


In [42]:
df_matchs_filtered.to_csv("./../data_formatted/training_dgl2_dataset.csv", index= False)

In [43]:
len(df_matchs_filtered)

2354

In [60]:
len(df_matchs_filtered[(df_matchs_filtered["dfwinner_1"] * 10000 
                       + df_matchs_filtered["dfwinner_2"] * 1500 
                       + df_matchs_filtered["dfwinner_3"] * 100
                        + df_matchs_filtered["dfwinner_4"] 
                        + df_matchs_filtered["dfwinner_5"] )
                        > (df_matchs_filtered["dfloser_1"] * 10000 
                       + df_matchs_filtered["dfloser_2"] * 5000 
                       + df_matchs_filtered["dfloser_3"] * 100
                        + df_matchs_filtered["dfloser_4"] 
                        + df_matchs_filtered["dfloser_5"])])

1503

In [49]:
print(1521/2354)

0.6461342395921835


In [47]:
len(df_matchs_filtered[df_matchs_filtered["dfwinner_2"] < df_matchs_filtered["dfloser_2"]])

242

In [26]:
        lasp2 = len(list(asp2))
        print(player1, player2, lasp1, lasp2)
        if (lasp1 > lasp2):
            winner_have_more+=1
        else:
            winner_have_less+=1
        print("current count : ", winner_have_more, winner_have_less)

86

In [19]:
s = "20110101"
s_datetime = datetime.strptime(s, '%Y%m%d')
df_matchs_filtered = df_matchs[df_matchs.match_date > s_datetime]

e = "20120101"
e_datetime = datetime.strptime(e, '%Y%m%d')

df_matchs_filtered = df_matchs_filtered[df_matchs_filtered.match_date < e_datetime]

In [None]:
    
    if (len(list(asp1)) > len(list(asp2))) :
        winner_have_more +=1
    else:
        winner_have_less+=1
    