<a href="https://colab.research.google.com/github/raniaabidi/HTGNNs/blob/main/HTGNN_Amazon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/ratings_Beauty.csv')

# Convert timestamp to datetime
ratings['timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')

# Sort by timestamp
ratings = ratings.sort_values(by='timestamp')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['UserId'], row['ProductId'], timestamp=row['timestamp'].timestamp())
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl.metadata (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.5.3


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)

    # Create a feature matrix with fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class HTGNN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(HTGNN, self).__init__()
        self.conv1 = GCNConv(in_channels, 8)
        self.conv2 = GCNConv(8 + 8, out_channels)
        self.time_embedding = torch.nn.Embedding(365, 8)

    def forward(self, x, edge_index, edge_time):
        print(f'Input x shape: {x.shape}')
        print(f'Edge index shape: {edge_index.shape}')
        print(f'Edge time shape: {edge_time.shape}')

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        print(f'x after conv1 shape: {x.shape}')

        # Embedding for the edge times
        time_embeds = self.time_embedding((edge_time.long() % 365).view(-1, 1)).view(-1, 8)
        print(f'time_embeds shape: {time_embeds.shape}')

        # Average the edge time embeddings per node
        node_time_embeds = torch.zeros_like(x)
        for i in range(edge_index.size(1)):
            node_time_embeds[edge_index[0, i]] += time_embeds[i]
        print(f'node_time_embeds shape: {node_time_embeds.shape}')

        x = torch.cat([x, node_time_embeds], dim=1)
        print(f'x after concatenation shape: {x.shape}')

        x = self.conv2(x, edge_index)
        print(f'x after conv2 shape: {x.shape}')

        return x

# Initialize the model, loss function, and optimizer
model = HTGNN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()




In [None]:
# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

Input x shape: torch.Size([1214002, 8])
Edge index shape: torch.Size([2, 1618456])
Edge time shape: torch.Size([1618456])
x after conv1 shape: torch.Size([1214002, 8])
time_embeds shape: torch.Size([1618456, 8])
node_time_embeds shape: torch.Size([1214002, 8])
x after concatenation shape: torch.Size([1214002, 16])
x after conv2 shape: torch.Size([1214002, 2])


In [None]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import label_binarize

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Function to calculate just the accuracy
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f' NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

In [None]:
import numpy as np
from scipy.stats import norm

random_accuracy = 0.5
n = len(test_loader.dataset)

se = np.sqrt(random_accuracy * (1 - random_accuracy) / n)

z_score = (accuracy - random_accuracy) / se

p_value = 2 * norm.sf(np.abs(z_score))

print(f"P-value: {p_value}")