In [3]:
!pip install torch torchvision torchaudio
!pip install torch-geometric



In [7]:
import pandas as pd
import torch
from torch_geometric.data import Data
import numpy as np

# Load the datasets
data_path = "/home/smruthi-bhat/Desktop/movielens/"  # Set the path to your dataset
u_data = pd.read_csv(data_path + 'u.data', sep='\t', header=None, names=['user_id', 'item_id', 'rating', 'timestamp'])
u_item = pd.read_csv(data_path + 'u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['item_id', 'title'])
u_user = pd.read_csv(data_path + 'u.user', sep='|', header=None, names=['user_id', 'age', 'gender', 'occupation', 'zip'])

# Normalize user and item IDs to be zero-indexed
u_data['user_id'] = u_data['user_id'] - 1
u_data['item_id'] = u_data['item_id'] - 1

# Create node features (user features could be demographic, item features could be genres)
num_users = u_user.shape[0]
num_items = u_item.shape[0]

# Create user features: age, gender (one-hot encoding), occupation (one-hot encoding)
age_feature = pd.get_dummies(u_user['age'], prefix='age')
gender_feature = pd.get_dummies(u_user['gender'], prefix='gender')
occupation_feature = pd.get_dummies(u_user['occupation'], prefix='occupation')
user_features = pd.concat([age_feature, gender_feature, occupation_feature], axis=1).values

# Create item features: Here we assume you want 84 features for items as well
# This is a placeholder; you should ideally use meaningful item features (like genres)
item_features = np.zeros((num_items, user_features.shape[1]))  # Match the number of user features

# Combine user features and item features into one tensor
combined_features = np.vstack([user_features, item_features])

# Create edge index and edge attributes
edge_index = torch.tensor(u_data[['user_id', 'item_id']].values.T, dtype=torch.long)
edge_attr = torch.tensor(u_data['rating'].values, dtype=torch.float).unsqueeze(1)

# Create the PyTorch Geometric Data object
data = Data(x=torch.tensor(combined_features, dtype=torch.float),
            edge_index=edge_index,
            edge_attr=edge_attr)

print(data)

Data(x=[2625, 84], edge_index=[2, 100000], edge_attr=[100000, 1])


In [8]:
import torch.nn as nn
import torch_geometric.nn as pyg_nn

class TemporalGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(TemporalGNN, self).__init__()
        self.conv1 = pyg_nn.GCNConv(in_channels, hidden_channels)
        self.conv2 = pyg_nn.GCNConv(hidden_channels, out_channels)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return x

In [12]:
import torch.optim as optim

# Hyperparameters
embedding_dim = 64
hidden_dim = 32
output_dim = 1
num_epochs = 100
learning_rate = 0.01

# Model, loss, optimizer
model = TemporalGNN(in_channels=user_features.shape[1], hidden_channels=hidden_dim, out_channels=output_dim)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

# Move model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)

# Training Loop
for epoch in range(1,num_epochs+1):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    out = model(data.x, data.edge_index, data.edge_attr)
    
    # We use the edge attributes (ratings) as target values for the loss
    target = data.edge_attr
    loss = criterion(out[data.edge_index[0]], target)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

Epoch 10, Loss: 5.2033
Epoch 20, Loss: 4.3107
Epoch 30, Loss: 3.6029
Epoch 40, Loss: 3.6110
Epoch 50, Loss: 3.5099
Epoch 60, Loss: 3.4240
Epoch 70, Loss: 3.2995
Epoch 80, Loss: 3.2455
Epoch 90, Loss: 3.2465
Epoch 100, Loss: 3.1406


In [17]:
def infer(model, data, u_item):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        # Forward pass for inference
        predictions = model(data.x, data.edge_index, data.edge_attr)
        
        # Extract the predicted ratings based on the edge index
        predicted_ratings = predictions[data.edge_index[0]].cpu().numpy()  # Convert to NumPy array for easier manipulation

    # Prepare a DataFrame for better interpretation
    user_ids = data.edge_index[0].cpu().numpy()  # User indices
    item_ids = data.edge_index[1].cpu().numpy()  # Item indices

    # Flatten the arrays to ensure they are 1-dimensional
    user_ids = user_ids.flatten()
    item_ids = item_ids.flatten()
    predicted_ratings = predicted_ratings.flatten()

    # Create a DataFrame with user-item pairs and their predicted ratings
    results = pd.DataFrame({
        'user_id': user_ids,
        'item_id': item_ids,
        'predicted_rating': predicted_ratings
    })

    # Merge with item titles for better readability
    results = results.merge(u_item, on='item_id', how='left')

    # Optional: Interpret predictions into categories
    results['prediction_category'] = pd.cut(results['predicted_rating'], 
                                             bins=[-1, 2, 4, 5], 
                                             labels=['Dislike', 'Neutral', 'Like'])

    return results

In [18]:
# Perform inference
predicted_results = infer(model, data, u_item)

# Example: Print the first 10 results
print(predicted_results[['user_id', 'title', 'predicted_rating', 'prediction_category']].head(10))

   user_id                                   title  predicted_rating  \
0      195        Last of the Mohicans, The (1992)          4.589980   
1      185                         In & Out (1997)          4.649298   
2       21                       Houseguest (1994)          5.013661   
3      243                        Star Wars (1977)          2.365091   
4      165             Deconstructing Harry (1997)          2.539805   
5      297        James and the Giant Peach (1996)          4.057338   
6      114                            Mimic (1997)          1.543644   
7      252             Vanya on 42nd Street (1994)          1.906262   
8      304  Star Trek V: The Final Frontier (1989)          2.897184   
9        5                         Ref, The (1994)          1.866246   

  prediction_category  
0                Like  
1                Like  
2                 NaN  
3             Neutral  
4             Neutral  
5                Like  
6             Dislike  
7             D