In [1]:
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

df_players = pd.read_csv("./../data_scrapped/atp_players.csv")
df_matchs = pd.read_csv("./../data_formatted/training_dgl_dataset.csv")

In [3]:
#On construit le graph

# lecture des données attendues
# tensor1[1] -> tensor2[1] 
# le player index en tensor1 a gagné contre le player au meme index en tensor 2

tensor1=[]
tensor2=[]

for index,row in df_players.iterrows():
    winmatchs = df_matchs[df_matchs.player1_id == row.player_id]
    #print(len(winmatchs))
    if len(winmatchs) > 0:
        #print(row.player_id)
        for index2, row2 in winmatchs.iterrows():
                tensor1.append(index)
                tensor2.append(df_players.loc[df_players.player_id == row2.player2_id].index[0])

In [4]:
import numpy
import random

labels = numpy.ones(len(tensor1))

indexes = random.sample(range(0, len(tensor1)-1), ((len(tensor1)-1)//2))

for index in indexes:
    temp = tensor1[index]
    tensor1[index] = tensor2[index]
    tensor2[index] = temp
    labels[index] = 0

In [25]:
import torch_geometric.transforms as T
from torch_geometric.data import Data

# Create the heterogeneous graph data object:
#data = Data()

# Add the user nodes:
x = torch.tensor(list(df_players[["birth_year","weight_kg","height_cm"]].values))  # [num_users, num_features_users]
x = torch.nan_to_num(x, nan=0.0)
#x = torch.masked_select(x, ~torch.isnan(x))
#x = torch.ones(df_players.shape[0])

edge_index = torch.stack([torch.tensor(tensor1), torch.tensor(tensor2)], dim=0)
#edge_attr = torch.Tensor(list(df_matchs[["player1_atprank","player1_oddsB365","player2_atprank","player2_oddsB365"]].values))
labels = torch.tensor(labels, dtype=torch.long)

data = Data(x=x, edge_index=edge_index,edge_label=labels) 
#data = T.ToUndirected()(data)
# Add the movie nodes:
#data['movie'].x = movie_features  # [num_movies, num_features_movies]

data

  labels = torch.tensor(labels, dtype=torch.long)


Data(x=[3446, 3], edge_index=[2, 51981], edge_label=[51981])

In [26]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.05,
    num_test=0.1,
    is_undirected=False,
    add_negative_train_samples=False
)(data)
train_data, val_data, test_data

(Data(x=[3446, 3], edge_index=[2, 44184], edge_label=[44184], edge_label_index=[2, 44184]),
 Data(x=[3446, 3], edge_index=[2, 44184], edge_label=[5198], edge_label_index=[2, 5198]),
 Data(x=[3446, 3], edge_index=[2, 46783], edge_label=[10396], edge_label_index=[2, 10396]))

In [27]:
train_data.edge_label.unique()

tensor([0, 1])

In [29]:
train_data.x[train_data.edge_index[0]]


tensor([[1977.,   80.,  183.],
        [1976.,   82.,  180.],
        [1981.,   85.,  185.],
        ...,
        [1968.,   71.,  183.],
        [1981.,   84.,  188.],
        [1985.,  108.,  208.]])

In [30]:
train_data.x[train_data.edge_index[1]]

tensor([[1982.,   79.,  180.],
        [1976.,   82.,  193.],
        [1982.,   79.,  180.],
        ...,
        [1978.,   70.,  175.],
        [1984.,   87.,  193.],
        [1996.,   83.,  198.]])

In [28]:
from torch_geometric.nn import SAGEConv

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

class EdgeDirectionPredictionModel(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(EdgeDirectionPredictionModel, self).__init__()
        # GraphSAGE layers
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        # Output layer for binary classification
        self.fc = nn.Linear(hidden_channels*2, 1) # Adjust output size for edge direction prediction

    def forward(self, x, edge_index,edge_label_index):
        # Apply GraphSAGE layers
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        edge_pairs = torch.cat((x[edge_index[0]], x[edge_index[1]]), dim=1)
        # Global mean pooling
        #x = torch.mean(x, dim=0)
        # Fully connected layer for binary classification (edge direction)
        #x = (x[edge_label_index[0]] * x[edge_label_index[1]]).sum(dim=-1)
        x = self.fc(edge_pairs)
        return x

model = EdgeDirectionPredictionModel(in_channels=data.num_features, hidden_channels=64).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

In [29]:
from sklearn.metrics import accuracy_score

# Training loop
def train(data):
    optimizer.zero_grad()
    # Predict edge directions for positive examples
    logits = model(data.x, data.edge_index, data.edge_label_index)
    # Create target labels (1 for positive edges, 0 for negative edges)
    #labels = torch.cat([torch.ones(data.edge_index.size(1)), torch.zeros(data.edge_index.size(1))], dim=0)
    # Calculate the binary classification loss
    loss = criterion(logits.squeeze(), data.y)
    # Backpropagation
    loss.backward()
    optimizer.step()
    return loss

@torch.no_grad()
def test(data):
    model.eval()
    out = model(data.x, data.edge_index, data.edge_label_index)
    #z = model.encode(data.x, data.edge_index)
    #out = model.decode(z, data.edge_label_index).view(-1).sigmoid()
    return roc_auc_score(data.y.cpu().numpy(), out.cpu().numpy())

# Evaluate the model (e.g., on a validation set)
# You can use metrics like accuracy, precision, recall, etc.

best_val_auc = final_test_auc = 0
for epoch in range(1, 101):
    loss = train(train_data)
    val_auc = test(val_data)
    test_auc = test(test_data)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        final_test_auc = test_auc
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
          f'Test: {test_auc:.4f}')  

# Predict edge directions for test data (you can replace this with your own test data)
#test_logits = model(test_data.x, test_data.edge_index)
#test_pred_labels = (test_logits.squeeze() > 0).float()

# Calculate accuracy on the test data
#test_labels = torch.cat([torch.ones(data.edge_index.size(1)), torch.zeros(data.edge_index.size(1))], dim=0)
#test_accuracy = accuracy_score(test_labels.cpu().numpy(), test_pred_labels.cpu().numpy())
#print(f'Test Accuracy: {test_accuracy * 100:.2f}%')



RuntimeError: mat1 and mat2 must have the same dtype

In [47]:
# Specify the edge you want to predict (e.g., edge from node 0 to node 1)
node_0 = 1568
node_1 = 3415

# Predict the direction of the edge
with torch.no_grad():
    model.eval()
    edge_label_index =  torch.tensor([[node_0, node_1], [node_1, node_0]], dtype=torch.long).t()
    print(edge_label_index)
    prediction = model(train_data.x, train_data.edge_index, edge_label_index)
    print(prediction)

tensor([[1568, 3415],
        [3415, 1568]])
tensor([[-0.0066],
        [ 0.0617],
        [ 0.0516],
        ...,
        [-0.0007],
        [-0.0801],
        [ 0.4008]])


In [48]:
prediction

tensor([[-0.0066],
        [ 0.0617],
        [ 0.0516],
        ...,
        [-0.0007],
        [-0.0801],
        [ 0.4008]])