In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import scipy.sparse as sp
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import ndcg_score
import numpy as np
import os
from utils import csr2torch, recall_at_k, ndcg_at_k
import math 
import random
from models import *

random.seed(2022)
np.random.seed(2022)


In [2]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
current_directory = os.getcwd()


# Define KAN-based Autoencoder
class KANAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dims, grid_size=5, spline_order=3):
        super(KANAutoencoder, self).__init__()
        self.encoder = KAN([input_dim] + hidden_dims, grid_size, spline_order)
        self.decoder = KAN(hidden_dims[::-1] + [input_dim], grid_size, spline_order)

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
        return self.encoder.regularization_loss(regularize_activation, regularize_entropy) + \
               self.decoder.regularization_loss(regularize_activation, regularize_entropy)



# Training parameters
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 10
batch_size = 256
learning_rate = 0.001
hidden_dims = [128, 64, 32]
verbose = 1

# Load dataset
dataset = "ml-1m"
path_tr = f"{current_directory}/dataset/{dataset}_train.npz"
path_ts = f"{current_directory}/dataset/{dataset}_test.npz"
R_tr = csr2torch(sp.load_npz(path_tr)).to(device)
R_ts = csr2torch(sp.load_npz(path_ts)).to(device)

n_users = R_tr.shape[0]
n_items = R_tr.shape[1]
if verbose:
    print(f"number of users: {n_users}")
    print(f"number of items: {n_items}")

# Create DataLoader
train_dataset = TensorDataset(R_tr)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize model, optimizer, and loss function
model = KANAutoencoder(n_items, hidden_dims).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

# Training loop
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for batch in train_loader:
        inputs = batch[0].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, inputs) + model.regularization_loss()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * inputs.size(0)

    train_loss /= len(train_loader.dataset)
    if verbose:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}")





  torch.LongTensor([coo.row, coo.col]),


number of users: 5949
number of items: 2810
Epoch 1/10, Loss: 1793.2622
Epoch 2/10, Loss: 318.6188
Epoch 3/10, Loss: 171.7812
Epoch 4/10, Loss: 151.0540
Epoch 5/10, Loss: 144.9136
Epoch 6/10, Loss: 141.7007
Epoch 7/10, Loss: 139.8040
Epoch 8/10, Loss: 138.9353
Epoch 9/10, Loss: 138.7164
Epoch 10/10, Loss: 138.9347


In [3]:
# Evaluation metrics
def recall_at_k(predictions, targets, k):
    top_k_preds = torch.topk(predictions, k=k, dim=1).indices
    hits = torch.sum(torch.gather(targets, 1, top_k_preds), dim=1)
    return torch.mean(hits.float() / torch.clamp(torch.sum(targets, dim=1).float(), max=k))

def ndcg_at_k(predictions, targets, k):
    top_k_preds = torch.topk(predictions, k=k, dim=1).indices
    gains = torch.gather(targets, 1, top_k_preds)
    discounts = torch.log2(torch.arange(2, k + 2, device=targets.device).float())
    dcg = torch.sum(gains / discounts, dim=1)
    idcg = torch.sum(torch.sort(targets, descending=True, dim=1).values[:, :k] / discounts, dim=1)
    return torch.mean(dcg / idcg)

# Evaluate on test set using CPU (to prevent GPU O.O.M)
def evaluate_model(model, test_data, batch_size=64):
    model.eval()
    model.to('cpu')
    test_data = test_data.to('cpu')

    num_samples = test_data.size(0)
    all_predictions = []
    
    with torch.no_grad():
        for i in range(0, num_samples, batch_size):
            batch_data = train_data[i:i + batch_size]
            batch_predictions = model(batch_data)
            all_predictions.append(batch_predictions)

    all_predictions = torch.cat(all_predictions, dim=0)
    recall = recall_at_k(all_predictions, test_data, k=20).item()
    ndcg = ndcg_at_k(all_predictions, test_data, k=20).item()

    return recall, ndcg

# Using the evaluate_model function
recall, ndcg = evaluate_model(model, R_ts, batch_size=64)

if verbose:
    print(f"Recall@20: {recall:.4f}")
    print(f"NDCG@20: {ndcg:.4f}")


Recall@10: 0.2013
NDCG@10: 0.1315


In [1]:
def print_cuda_memory_usage():
    allocated_memory = torch.cuda.memory_allocated() / 1024**2
    reserved_memory = torch.cuda.memory_reserved() / 1024**2
    print(f"CUDA memory allocated: {allocated_memory:.2f} MB")
    print(f"CUDA memory reserved: {reserved_memory:.2f} MB")
print_cuda_memory_usage()

NameError: name 'torch' is not defined