In [4]:
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


#https://colab.research.google.com/drive/1N3LvAO0AXV4kBPbTMX866OwJM9YS6Ji2?usp=sharing#scrollTo=fl5W1gg5Jhzz

df_players = pd.read_csv("./../data_scrapped/atp_players.csv")
df_matchs = pd.read_csv("./../data_formatted/training_torch2_dataset.csv")

In [6]:
#On construit le graph

# lecture des données attendues
# tensor1[1] -> tensor2[1] 
# le player index en tensor1 a joué contre le player au meme index en tensor 2
# y stocke le résultat du match


tensor1=[]
tensor2=[]

for index,row in df_players.iterrows():
    winmatchs = df_matchs[df_matchs.player1_id == row.player_id]
    #print(len(winmatchs))
    if len(winmatchs) > 0:
        #print(row.player_id)
        for index2, row2 in winmatchs.iterrows():
                tensor1.append(index)
                tensor2.append(df_players.loc[df_players.player_id == row2.player2_id].index[0])

In [7]:
import numpy
import random

labels = numpy.ones(len(tensor1))

indexes = random.sample(range(0, len(tensor1)-1), ((len(tensor1)-1)//2))

for index in indexes:
    temp = tensor1[index]
    tensor1[index] = tensor2[index]
    tensor2[index] = temp
    labels[index] = 0

In [18]:
import torch_geometric.transforms as T
from torch_geometric.data import Data

# Create the heterogeneous graph data object:
#data = Data()

# Add the user nodes:
in_node_features = torch.tensor(list(df_players[["birth_year","weight_kg","height_cm"]].values),dtype=torch.float)  # [num_users, num_features_users]
in_node_features = torch.nan_to_num(in_node_features, nan=0.0)
#x = torch.masked_select(x, ~torch.isnan(x))
#x = torch.ones(df_players.shape[0])
in_edge_features = torch.tensor(list(df_matchs[["player1_games_won","player1_sets_won", "player2_games_won","player2_sets_won"]].values),dtype=torch.float)
in_edge_features = torch.nan_to_num(in_edge_features, nan=0.0)

edge_index = torch.stack([torch.tensor(tensor1), torch.tensor(tensor2)], dim=0)
#edge_attr = torch.Tensor(list(df_matchs[["player1_atprank","player1_oddsB365","player2_atprank","player2_oddsB365"]].values))
#y = torch.Tensor(list(df_matchs[["winner_player1"]].values))

labels = torch.tensor(labels, dtype=torch.long)

data = Data(x=in_node_features, edge_index=edge_index, edge_attr = in_edge_features, edge_label=labels) 
#data = T.ToUndirected()(data)
# Add the movie nodes:
#data['movie'].x = movie_features  # [num_movies, num_features_movies]

data

  labels = torch.tensor(labels, dtype=torch.long)


Data(x=[3446, 3], edge_index=[2, 51981], edge_attr=[51981, 4], edge_label=[51981])

In [25]:
in_edge_features.isnan().unique()

tensor([False])

In [19]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.05,
    num_test=0.1,
    is_undirected=False,
    add_negative_train_samples=False,
    neg_sampling_ratio=0
)(data)
train_data, val_data, test_data

(Data(x=[3446, 3], edge_index=[2, 44184], edge_attr=[44184, 4], edge_label=[44184], edge_label_index=[2, 44184]),
 Data(x=[3446, 3], edge_index=[2, 44184], edge_attr=[44184, 4], edge_label=[2599], edge_label_index=[2, 2599]),
 Data(x=[3446, 3], edge_index=[2, 46783], edge_attr=[46783, 4], edge_label=[5198], edge_label_index=[2, 5198]))

In [27]:
labels

tensor([1, 0, 1,  ..., 0, 1, 1])

In [13]:
print(train_data.edge_label.unique())
print(test_data.edge_label.unique())

tensor([0, 1])
tensor([0, 1])


In [11]:
train_data.edge_index

tensor([[ 878,  231, 3062,  ..., 2998, 2290,  979],
        [3014,  327, 2179,  ...,  878, 1982,  969]])

In [38]:
from torch_geometric.nn import NNConv,MLP
from torch.nn import Linear, ReLU, Sequential

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

class Net(torch.nn.Module):
    def __init__(self, in_node_features, in_edge_features, hidden_channels, out_channels):
        super().__init__()
        nn1 = Sequential(
            Linear(2, 25),
            ReLU(),
            Linear(25, 4 * 32),
        )

        nn2 = Sequential(
            Linear(2, 25),
            ReLU(),
            Linear(25, 32 * 64),
        )
        # GraphSAGE layers
        self.conv1 = NNConv(3, 32, nn1, aggr='mean')
        self.conv2 = NNConv(32, 64, nn2, aggr='mean')


        # Output layer for binary classification
        self.mlp = MLP(in_channels=6, hidden_channels=hidden_channels, out_channels=1, num_layers=3)

    def encode(self, x, edge_attr, edge_index):
        # Embed node and edge features
        x = F.relu(self.node_embedding(x))
        edge_attr = F.relu(self.edge_embedding(edge_attr))

        # Combine node and edge embeddings
        x = torch.cat((x, edge_attr), dim=-1)

        # Apply GraphSAGE layers
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

    def decode(self, z, edge_label_index):
        # Pairwise combination of node embeddings for edge prediction
        return self.mlp(torch.cat((z[edge_label_index[0]], z[edge_label_index[1]]), dim=1))
    #(z[edge_label_index[0]] * z[edge_label_index[1]]).sum(dim=-1)

    #def decode_all(self, z):
    #    prob_adj = z @ z.t()
    #    return (prob_adj > 0).nonzero(as_tuple=False).t()


model = Net(in_node_features = 3, in_edge_features = 4, hidden_channels= 32, out_channels=64).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

In [36]:

def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(train_data.x, train_data.edge_attr, train_data.edge_index)
    out = model.decode(z, train_data.edge_label_index).view(-1).sigmoid()
    loss = criterion(out.squeeze(), train_data.edge_label.float())
    loss.backward()
    optimizer.step()
    return loss


@torch.no_grad()
def test(data):
    model.eval()
    z = model.encode(data.x, data.edge_attr, data.edge_index)
    out = model.decode(z, data.edge_label_index).view(-1).sigmoid()
    print(out)
    return roc_auc_score(data.edge_label.float(), out.squeeze())


best_val_auc = final_test_auc = 0
for epoch in range(1, 101):
    loss = train()
    val_auc = test(val_data)
    test_auc = test(test_data)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        final_test_auc = test_auc
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
          f'Test: {test_auc:.4f}')


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 3446 but got size 44184 for tensor number 1 in the list.

In [140]:
# Specify the edge you want to predict (e.g., edge from node 0 to node 1)
node_0 = 3
node_1 = 664

print(df_players.iloc[node_0])
#print(df_players.iloc[node_0]["first_name","last_name"])

# Predict the direction of the edge
with torch.no_grad():
    model.eval()
    edge_label_index =  torch.tensor([[node_0, node_1], [node_1, node_0]], dtype=torch.long).t()
    print(edge_label_index)
    z = model.encode(data.x, data.edge_index)
    prediction = model.decode(z, edge_label_index).view(-1).sigmoid()
    print(prediction)

player_id                l836
first_name               Nick
last_name             Lindahl
country_code              AUS
birthdate          1988.07.31
birth_year             1988.0
birth_month               7.0
birth_day                31.0
turned_pro             2006.0
weight_kg                77.0
weight_lbs              170.0
height_cm               183.0
height_in                72.0
birthplace      Malmo, Sweden
Name: 3, dtype: object
tensor([[  3, 664],
        [664,   3]])
tensor([0.6593, 0.7405])
