<a href="https://colab.research.google.com/github/raniaabidi/HTGNNs/blob/main/TGN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade torch-geometric
!pip install --upgrade torch-scatter torch-sparse torch-cluster torch-spline-conv

Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl.metadata (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3
Collecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')

# Sort by timestamp
ratings = ratings.sort_values(by='timestamp')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], timestamp=row['timestamp'].timestamp())
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)

    # Create a feature matrix with fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(365, in_channels)  # Match in_channels to embedding size

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding((edge_time.long() % 365))

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Function to calculate just the accuracy
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.709091305732727, Test Accuracy: 2460.0
Epoch 1, Loss: 0.7049285173416138, Test Accuracy: 2457.0
Epoch 2, Loss: 0.7018870711326599, Test Accuracy: 2447.0
Epoch 3, Loss: 0.6991264820098877, Test Accuracy: 2458.0
Epoch 4, Loss: 0.6973441243171692, Test Accuracy: 2445.0
Epoch 5, Loss: 0.6963655352592468, Test Accuracy: 2461.0
Epoch 6, Loss: 0.6956021785736084, Test Accuracy: 2465.0
Epoch 7, Loss: 0.6950414776802063, Test Accuracy: 2478.0
Epoch 8, Loss: 0.6948276162147522, Test Accuracy: 2464.0
Epoch 9, Loss: 0.6948055624961853, Test Accuracy: 2478.0
Epoch 10, Loss: 0.694718062877655, Test Accuracy: 2489.0
Epoch 11, Loss: 0.6945255398750305, Test Accuracy: 2483.0
Epoch 12, Loss: 0.694344699382782, Test Accuracy: 2500.0
Epoch 13, Loss: 0.6942039132118225, Test Accuracy: 2492.0
Epoch 14, Loss: 0.6940293908119202, Test Accuracy: 2478.0
Epoch 15, Loss: 0.6937764883041382, Test Accuracy: 2473.0
Epoch 16, Loss: 0.6935113072395325, Test Accuracy: 2454.0
Epoch 17, Loss: 0.693293988

In [None]:
from google.colab import drive
import os
import pandas as pd
import numpy as np
import networkx as nx
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Mount Google Drive
drive.mount('/content/drive')
os.environ['LC_ALL'] = 'en_US.UTF-8'

# Load the user_activity dataset
ratings = pd.read_csv('/content/drive/MyDrive/user_activity.csv')

# Convert timestamp to datetime
ratings['timestamp'] = pd.to_datetime(ratings['create_timestamp'])

# Sort by timestamp
ratings = ratings.sort_values(by='timestamp')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        # Add edges between users and items with the timestamp
        G.add_edge(row['user_id'], row['item_id'], timestamp=row['timestamp'].timestamp())
    return G

# Convert graphs to PyTorch Geometric Data objects
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)

    # Create a feature matrix with a fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random binary labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

# Create graphs for train and test data
train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

# Convert graphs to PyTorch Geometric Data objects
train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

# Create DataLoader for training and testing
train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

# Define the TGN model
class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(365, in_channels)  # Match in_channels to embedding size

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding((edge_time.long() % 365))

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function with metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}, MRR: {mrr}, NDCG: {ndcg}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.7880251407623291, Accuracy: 0.49209785667893485, Precision: 0.4917059294949475, Recall: 0.49230076692355773, F1-Score: 0.4829313789000453, MRR: 0.002145967016169201, NDCG: 0.8587959739536171
Epoch 1, Loss: 0.7706388831138611, Accuracy: 0.4951288157609872, Precision: 0.4950327255726975, Recall: 0.49530679754575463, F1-Score: 0.48815907518494017, MRR: 0.0023783348624203233, NDCG: 0.9530418785472459
Epoch 2, Loss: 0.7506056427955627, Accuracy: 0.49209785667893485, Precision: 0.492088606394656, Recall: 0.4921787145996703, F1-Score: 0.49068824228787666, MRR: 0.0020651912372923217, NDCG: 0.8110722760144377
Epoch 3, Loss: 0.736721396446228, Accuracy: 0.4938298332972505, Precision: 0.4938474660942693, Recall: 0.4938995398758629, F1-Score: 0.4927913888560119, MRR: 0.0017324511893052786, NDCG: 0.6325004241086446
Epoch 4, Loss: 0.7267962694168091, Accuracy: 0.49556180991556614, Precision: 0.4954727333716695, Recall: 0.4957489794288401, F1-Score: 0.4878443900713783, MRR: 0.0018758

In [None]:
# Import necessary libraries
from google.colab import drive
drive.mount('/content/drive')

import os
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/ratings_Beauty.csv')

# Convert timestamp to datetime
ratings['timestamp'] = pd.to_datetime(ratings['Timestamp'], unit='s')

# Sort by timestamp
ratings = ratings.sort_values(by='timestamp')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['UserId'], row['ProductId'], timestamp=row['timestamp'].timestamp())
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)

    # Create a feature matrix with fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

# Define the TGN model
class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(365, in_channels)  # Match in_channels to embedding size

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding((edge_time.long() % 365))

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

# Evaluation metrics
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Function to calculate just the accuracy
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.7864665985107422, Test Accuracy: 190131.0
Epoch 1, Loss: 0.767753005027771, Test Accuracy: 189903.0
Epoch 2, Loss: 0.7524439692497253, Test Accuracy: 189763.0
Epoch 3, Loss: 0.7401058077812195, Test Accuracy: 189801.0
Epoch 4, Loss: 0.7302836775779724, Test Accuracy: 189864.0
Epoch 5, Loss: 0.7226814031600952, Test Accuracy: 189817.0
Epoch 6, Loss: 0.7170183062553406, Test Accuracy: 189810.0
Epoch 7, Loss: 0.7129460573196411, Test Accuracy: 189965.0
Epoch 8, Loss: 0.7101252675056458, Test Accuracy: 190014.0
Epoch 9, Loss: 0.7082409858703613, Test Accuracy: 190017.0
Epoch 10, Loss: 0.706953227519989, Test Accuracy: 189999.0
Epoch 11, Loss: 0.7059647440910339, Test Accuracy: 190280.0
Epoch 12, Loss: 0.7050787210464478, Test Accuracy: 190218.0
Epoch 13, Loss: 0.7041748762130737, Test Accuracy: 190249.0
Epoch 14, Loss: 0.703193187713623, Test Accuracy: 190303.0
Epoch 15, Loss: 0.7021345496177673, Test Accuracy: 190248.0
Epoch 16, Loss: 0.7010471820831299, Test Accuracy: 19

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime and extract the year
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['year'] = ratings['timestamp'].dt.year

# Sort by year
ratings = ratings.sort_values(by='year')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with years instead of numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], timestamp=row['year'])  # Use year instead of timestamp
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)  # year instead of timestamp

    # Create a feature matrix with fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(365, in_channels)  # Match in_channels to embedding size

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding((edge_time.long() % 365))  # Modulo by 365 for yearly cycles

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

# Additional metrics for evaluation
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with additional metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Final evaluation
accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Final accuracy calculation
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Mounted at /content/drive




Epoch 0, Loss: 0.7729452848434448, Test Accuracy: 2443.0
Epoch 1, Loss: 0.7366364002227783, Test Accuracy: 2447.0
Epoch 2, Loss: 0.713729977607727, Test Accuracy: 2448.0
Epoch 3, Loss: 0.7045156955718994, Test Accuracy: 2476.0
Epoch 4, Loss: 0.705981969833374, Test Accuracy: 2443.0
Epoch 5, Loss: 0.7114231586456299, Test Accuracy: 2440.0
Epoch 6, Loss: 0.714838445186615, Test Accuracy: 2452.0
Epoch 7, Loss: 0.7142521142959595, Test Accuracy: 2455.0
Epoch 8, Loss: 0.7105098962783813, Test Accuracy: 2455.0
Epoch 9, Loss: 0.7053887844085693, Test Accuracy: 2422.0
Epoch 10, Loss: 0.7005578875541687, Test Accuracy: 2430.0
Epoch 11, Loss: 0.6971607804298401, Test Accuracy: 2446.0
Epoch 12, Loss: 0.6956380009651184, Test Accuracy: 2428.0
Epoch 13, Loss: 0.6957695484161377, Test Accuracy: 2449.0
Epoch 14, Loss: 0.6968568563461304, Test Accuracy: 2442.0
Epoch 15, Loss: 0.6980480551719666, Test Accuracy: 2439.0
Epoch 16, Loss: 0.6986843347549438, Test Accuracy: 2426.0
Epoch 17, Loss: 0.698495090

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime and extract the month
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['month'] = ratings['timestamp'].dt.month

# Sort by month
ratings = ratings.sort_values(by='month')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with months instead of numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], timestamp=row['month'])  # Use month instead of timestamp
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)  # Use month instead of timestamp

    # Create a feature matrix with a fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(12, in_channels)  # Use 12 months instead of days

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding(edge_time.long() % 12)  # Modulo by 12 for months

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

# Additional metrics for evaluation
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with additional metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Final evaluation
accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Final accuracy calculation
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.9147045612335205, Test Accuracy: 2392.0
Epoch 1, Loss: 0.8631607890129089, Test Accuracy: 2394.0
Epoch 2, Loss: 0.8186883926391602, Test Accuracy: 2398.0
Epoch 3, Loss: 0.7814255356788635, Test Accuracy: 2417.0
Epoch 4, Loss: 0.7515698671340942, Test Accuracy: 2406.0
Epoch 5, Loss: 0.7293709516525269, Test Accuracy: 2414.0
Epoch 6, Loss: 0.7148250937461853, Test Accuracy: 2438.0
Epoch 7, Loss: 0.707529604434967, Test Accuracy: 2405.0
Epoch 8, Loss: 0.7062758803367615, Test Accuracy: 2407.0
Epoch 9, Loss: 0.7090039253234863, Test Accuracy: 2397.0
Epoch 10, Loss: 0.71335768699646, Test Accuracy: 2375.0
Epoch 11, Loss: 0.7173429131507874, Test Accuracy: 2382.0
Epoch 12, Loss: 0.7197471261024475, Test Accuracy: 2390.0
Epoch 13, Loss: 0.7201870679855347, Test Accuracy: 2385.0
Epoch 14, Loss: 0.7188341021537781, Test Accuracy: 2378.0
Epoch 15, Loss: 0.7161431908607483, Test Accuracy: 2370.0
Epoch 16, Loss: 0.7126142382621765, Test Accuracy: 2376.0
Epoch 17, Loss: 0.708746850

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime and extract the hour
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['hour'] = ratings['timestamp'].dt.hour

# Sort by hour
ratings = ratings.sort_values(by='hour')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with hours instead of numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], timestamp=row['hour'])  # Use hour instead of timestamp
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)  # Use hour instead of timestamp

    # Create a feature matrix with a fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(24, in_channels)  # Use 24 hours instead of months

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding(edge_time.long() % 24)  # Modulo by 24 for hours

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

# Additional metrics for evaluation
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with additional metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Final evaluation
accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Final accuracy calculation
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.7035441398620605, Test Accuracy: 2456.0
Epoch 1, Loss: 0.7022198438644409, Test Accuracy: 2456.0
Epoch 2, Loss: 0.700287401676178, Test Accuracy: 2445.0
Epoch 3, Loss: 0.6982542276382446, Test Accuracy: 2427.0
Epoch 4, Loss: 0.6974003911018372, Test Accuracy: 2424.0
Epoch 5, Loss: 0.6969702839851379, Test Accuracy: 2427.0
Epoch 6, Loss: 0.6961584091186523, Test Accuracy: 2407.0
Epoch 7, Loss: 0.6952204704284668, Test Accuracy: 2464.0
Epoch 8, Loss: 0.6945876479148865, Test Accuracy: 2461.0
Epoch 9, Loss: 0.6942571401596069, Test Accuracy: 2438.0
Epoch 10, Loss: 0.6939384341239929, Test Accuracy: 2444.0
Epoch 11, Loss: 0.6935242414474487, Test Accuracy: 2443.0
Epoch 12, Loss: 0.6931489109992981, Test Accuracy: 2420.0
Epoch 13, Loss: 0.6929462552070618, Test Accuracy: 2422.0
Epoch 14, Loss: 0.6928694844245911, Test Accuracy: 2439.0
Epoch 15, Loss: 0.6927764415740967, Test Accuracy: 2438.0
Epoch 16, Loss: 0.6925996541976929, Test Accuracy: 2443.0
Epoch 17, Loss: 0.6923976

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime and extract the minute
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['minute'] = ratings['timestamp'].dt.minute

# Sort by minute
ratings = ratings.sort_values(by='minute')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with minutes instead of numeric timestamps
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], timestamp=row['minute'])  # Use minute instead of timestamp
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_time = torch.tensor([graph[u][v]['timestamp'] for u, v in graph.edges], dtype=torch.float)  # Use minute instead of timestamp

    # Create a feature matrix with a fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_time, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class TGN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TGN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)
        self.time_embedding = torch.nn.Embedding(60, in_channels)  # Use 60 minutes instead of hours

    def forward(self, x, edge_index, edge_time):
        # Generate time embeddings for edges
        time_embeds = self.time_embedding(edge_time.long() % 60)  # Modulo by 60 for minutes

        # Incorporate time embeddings into node features via message passing on the edges
        x = self.conv1(x, edge_index)  # First GCN layer
        x = F.relu(x)

        # Apply time embeddings to the edges during the second GCN layer
        x = self.conv2(x, edge_index)

        return x

# Initialize the model, loss function, and optimizer
model = TGN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

# Additional metrics for evaluation
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with additional metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Final evaluation
accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
print(f'NDCG: {ndcg}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}')

# Final accuracy calculation
def calculate_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
        total += data.y.size(0)
    accuracy = correct / total
    return accuracy

accuracy = calculate_accuracy(model, test_loader)
print(f'Final Accuracy: {accuracy}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Epoch 0, Loss: 0.7439812421798706, Test Accuracy: 2557.0
Epoch 1, Loss: 0.7253974676132202, Test Accuracy: 2495.0
Epoch 2, Loss: 0.7182537913322449, Test Accuracy: 2489.0
Epoch 3, Loss: 0.7160478830337524, Test Accuracy: 2494.0
Epoch 4, Loss: 0.7134988307952881, Test Accuracy: 2502.0
Epoch 5, Loss: 0.7097679376602173, Test Accuracy: 2493.0
Epoch 6, Loss: 0.705613911151886, Test Accuracy: 2489.0
Epoch 7, Loss: 0.702096700668335, Test Accuracy: 2490.0
Epoch 8, Loss: 0.6998295783996582, Test Accuracy: 2482.0
Epoch 9, Loss: 0.698858380317688, Test Accuracy: 2475.0
Epoch 10, Loss: 0.6987568736076355, Test Accuracy: 2498.0
Epoch 11, Loss: 0.6988451480865479, Test Accuracy: 2500.0
Epoch 12, Loss: 0.6986638307571411, Test Accuracy: 2499.0
Epoch 13, Loss: 0.6981428861618042, Test Accuracy: 2486.0
Epoch 14, Loss: 0.6974668502807617, Test Accuracy: 2478.0
Epoch 15, Loss: 0.6968567967414856, Test Accuracy: 2485.0
Epoch 16, Loss: 0.6964206695556641, Test Accuracy: 2471.0
Epoch 17, Loss: 0.696114778

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['LC_ALL'] = 'en_US.UTF-8'

import pandas as pd
import numpy as np
import networkx as nx
from sklearn.model_selection import train_test_split

# Load the dataset
ratings = pd.read_csv('/content/drive/MyDrive/movielens/ratings_small.csv')

# Convert timestamp to datetime and extract the month
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings['month'] = ratings['timestamp'].dt.month

# Sort by month
ratings = ratings.sort_values(by='month')

# Split the data into train and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, shuffle=False)

# Create graph structures with month instead of full timestamp
def create_graph(data):
    G = nx.DiGraph()
    for _, row in data.iterrows():
        G.add_edge(row['userId'], row['movieId'], month=row['month'])
    return G

train_graph = create_graph(train_data)
test_graph = create_graph(test_data)

!pip install torch_geometric

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader

# Convert NetworkX graph to PyTorch Geometric Data object
def convert_to_pyg_data(graph, num_features=8):
    nodes = list(graph.nodes())
    node_mapping = {node: i for i, node in enumerate(nodes)}
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges]).t().contiguous()
    edge_month = torch.tensor([graph[u][v]['month'] for u, v in graph.edges], dtype=torch.float)

    # Create a feature matrix with a fixed number of features per node
    x = torch.randn(len(nodes), num_features)

    # Random labels for the nodes (binary classification: 0 or 1)
    y = torch.randint(0, 2, (len(nodes),))

    data = Data(x=x, edge_index=edge_index, edge_time=edge_month, y=y)
    return data

train_data_pyg = convert_to_pyg_data(train_graph)
test_data_pyg = convert_to_pyg_data(test_graph)

train_loader = DataLoader([train_data_pyg], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data_pyg], batch_size=1, shuffle=False)

class HTGNN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(HTGNN, self).__init__()
        self.conv1 = GCNConv(in_channels, 8)
        self.conv2 = GCNConv(8 + 8, out_channels)
        self.time_embedding = torch.nn.Embedding(12, 8)  # Embedding for 12 months

    def forward(self, x, edge_index, edge_time):
        print(f'Input x shape: {x.shape}')
        print(f'Edge index shape: {edge_index.shape}')
        print(f'Edge time shape: {edge_time.shape}')

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        print(f'x after conv1 shape: {x.shape}')

        # Embedding for the edge times (months)
        time_embeds = self.time_embedding((edge_time.long() % 12).view(-1, 1)).view(-1, 8)
        print(f'time_embeds shape: {time_embeds.shape}')

        # Average the edge time embeddings per node
        node_time_embeds = torch.zeros_like(x)
        for i in range(edge_index.size(1)):
            node_time_embeds[edge_index[0, i]] += time_embeds[i]
        print(f'node_time_embeds shape: {node_time_embeds.shape}')

        x = torch.cat([x, node_time_embeds], dim=1)
        print(f'x after concatenation shape: {x.shape}')

        x = self.conv2(x, edge_index)
        print(f'x after conv2 shape: {x.shape}')

        return x

# Initialize the model, loss function, and optimizer
model = HTGNN(in_channels=train_data_pyg.num_node_features, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

# Training function
def train(model, loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_time)
        loss = loss_fn(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

# Training loop
for epoch in range(100):
    train_loss = train(model, train_loader, optimizer, loss_fn)
    test_acc = evaluate(model, test_loader)
    print(f'Epoch {epoch}, Loss: {train_loss}, Test Accuracy: {test_acc}')

import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Function to calculate MRR
def mrr_score(y_true, y_pred):
    order = np.argsort(y_pred)[::-1]
    ranks = np.where(y_true[order] == 1)[0] + 1
    return np.mean(1.0 / ranks)

# Function to calculate NDCG
def ndcg_score(y_true, y_pred, k=10):
    order = np.argsort(y_pred)[::-1]
    y_true = np.take(y_true, order[:k])

    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(2, k + 2))
    dcg = np.sum(gains / discounts)

    ideal_gains = 2 ** np.sort(y_true)[::-1] - 1
    idcg = np.sum(ideal_gains / discounts)

    return dcg / idcg if idcg > 0 else 0.0

# Evaluation function with metrics
def evaluate_with_metrics(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    for data in loader:
        out = model(data.x, data.edge_index, data.edge_time)
        pred = out.argmax(dim=1)
        all_preds.append(pred.detach().cpu().numpy())
        all_labels.append(data.y.detach().cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    mrr = mrr_score(all_labels, all_preds)
    ndcg = ndcg_score(all_labels, all_preds)

    return accuracy, precision, recall, f1, mrr, ndcg

# Training loop
for epoch in range(100):
    train(model, train_loader, optimizer, loss_fn)
    accuracy, precision, recall, f1, mrr, ndcg = evaluate_with_metrics(model, test_loader)
    print(f'Epoch {epoch}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-Score: {f1}, MRR: {mrr}, NDCG: {ndcg}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Input x shape: torch.Size([8601, 8])
Edge index shape: torch.Size([2, 80003])
Edge time shape: torch.Size([80003])
x after conv1 shape: torch.Size([8601, 8])
time_embeds shape: torch.Size([80003, 8])
node_time_embeds shape: torch.Size([8601, 8])
x after concatenation shape: torch.Size([8601, 16])
x after conv2 shape: torch.Size([8601, 2])
Input x shape: torch.Size([4814, 8])
Edge index shape: torch.Size([2, 20001])
Edge time shape: torch.Size([20001])
x after conv1 shape: torch.Size([4814, 8])
time_embeds shape: torch.Size([20001, 8])
node_time_embeds shape: torch.Size([4814, 8])
x after concatenation shape: torch.Size([4814, 16])
x after conv2 shape: torch.Size([4814, 2])
Epoch 0, Loss: 65.18610382080078, Test Accuracy: 2416.0
Input x shape: torch.Size([8601, 8])
Edge index shape: torch.Size([2, 80003])
Edge time shape: torch.Size([80003])
x after conv1 shape: torch.Size([8601, 8])
time_embeds shape: torch.Size([80003, 8])
node_time_embeds shape: torch.Size([8601, 8])
x after concaten

KeyboardInterrupt: 