In [1]:
import networkx as nx
import pandas as pd
from torch_geometric.utils import from_networkx


In [2]:
#Load GraphMissingEdges
G = nx.read_gml("../predio-de-links-utfpr-2024-1\GraphMissingEdges.gml")

In [3]:
#Load Categories
categories_map = pd.read_csv('../predio-de-links-utfpr-2024-1\categories.csv')

In [4]:
# Load the edges to evaluate
edges_to_evaluate = pd.read_csv('../predio-de-links-utfpr-2024-1\edgesToEvaluate.csv')

In [5]:
# Número de nós e arestas
num_nodes = len(G.nodes)
num_edges = len(G.edges)

# Densidade da rede
density = nx.density(G)

# Grau médio dos nós
avg_degree = sum(dict(G.degree()).values()) / num_nodes

print("Number of nodes:", num_nodes)
print("Number of edges:", num_edges)
print("Network density:", density)
print("Average node degree:", avg_degree)

Number of nodes: 4575
Number of edges: 18991
Network density: 0.0018150582646987846
Average node degree: 8.302076502732241


In [6]:
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
import torch 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert the graph to a PyTorch Geometric Data object
data = from_networkx(G)

print(data)
data.train_mask = data.val_mask = data.test_mask = None

transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.10, num_test=0.10, neg_sampling_ratio = 1.0,
                  is_undirected=True, add_negative_train_samples=False)
])


train_data, val_data, test_data = transform(data)
train_data = train_data.to(device)
val_data=val_data.to(device)
test_data = test_data.to(device)


print(train_data)
print('-----')
print(test_data)
print()
print(train_data)
print(val_data)

Data(edge_index=[2, 37575], longitude=[4575], latitude=[4575], categories=[4575], stars=[4575], name=[4575], reviewCount=[4575], weight=[37575], num_nodes=4575)
Data(edge_index=[2, 30386], longitude=[4575], latitude=[4575], categories=[4575], stars=[4575], name=[4575], reviewCount=[4575], weight=[30386], num_nodes=4575, edge_label=[15193], edge_label_index=[2, 15193])
-----
Data(edge_index=[2, 34184], longitude=[4575], latitude=[4575], categories=[4575], stars=[4575], name=[4575], reviewCount=[4575], weight=[34184], num_nodes=4575, edge_label=[3798], edge_label_index=[2, 3798])

Data(edge_index=[2, 30386], longitude=[4575], latitude=[4575], categories=[4575], stars=[4575], name=[4575], reviewCount=[4575], weight=[30386], num_nodes=4575, edge_label=[15193], edge_label_index=[2, 15193])
Data(edge_index=[2, 30386], longitude=[4575], latitude=[4575], categories=[4575], stars=[4575], name=[4575], reviewCount=[4575], weight=[30386], num_nodes=4575, edge_label=[3798], edge_label_index=[2, 379

In [10]:
import numpy as np
# Extract the 'reviewCount' and 'stars' attributes from the nodes
review_counts = np.array([G.nodes[node]['reviewCount'] for node in G.nodes])
stars = np.array([G.nodes[node]['stars'] for node in G.nodes])

# Convert the lists to tensors and stack them to create the node features
node_features = torch.tensor([review_counts.astype(np.float), stars.astype(np.float)], dtype=torch.float).t()

# Convert the graph to a PyTorch Geometric Data object
data = from_networkx(G)

# Assign the node features to the data object
data.x = node_features


AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [22]:
import torch
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling
from sklearn.metrics import f1_score
import torch.nn.functional as F

class LinkPredictor(torch.nn.Module):
    def __init__(self, num_features, hidden_channels):
        super(LinkPredictor, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.classifier = torch.nn.Linear(2*hidden_channels, 1)

    def forward(self, x, edge_index):
        x1 = torch.relu(self.conv1(x, edge_index))
        x2 = self.conv2(x1, edge_index)
        # Concatenate the embeddings of the source and target nodes
        edge_embeddings = torch.cat([x2[edge_index[0]], x2[edge_index[1]]], dim=-1)
        return self.classifier(edge_embeddings)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LinkPredictor(num_features=train_data.num_node_features, hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    # Use all nodes for training 
    out = model(train_data.x, train_data.edge_index)
    # We apply the sigmoid function to the output to get probabilities
    out_probs = torch.sigmoid(out)
    # We use binary cross entropy loss
    loss = F.binary_cross_entropy(out_probs.view(-1), train_data.y.float())
    loss.backward()
    optimizer.step()
    return loss.item()

def test(data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        out_probs = torch.sigmoid(out)
        # We apply a threshold of 0.5 to determine whether an edge exists
        out_preds = (out_probs > 0.5).float().view(-1)
        f1 = f1_score(data.y.cpu(), out_preds.cpu())
        return f1

for epoch in range(1, 101):
    loss = train()
    val_f1 = test(val_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val F1: {val_f1:.4f}')

test_f1 = test(test_data)
print(f'Test F1: {test_f1:.4f}')


AttributeError: 'NoneType' object has no attribute 'size'