In [43]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

In [44]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [45]:
user_df = pd.read_csv("./data/user.csv")
rating_df = pd.read_csv("./data/tourism_rating.csv")
place_df = pd.read_csv("./data/tourism_with_id.csv", usecols=["Place_Id", "Place_Name", "City", "Rating"])

In [46]:
user_df = user_df.rename(columns={"User_Id": "user_id", "Location": "location", "Age": "age"})
rating_df = rating_df.rename(columns={"User_Id": "user_id", "Place_Id": "place_id", "Place_Ratings": "place_rating"})
place_df = place_df.rename(columns={"Place_Id": "place_id", "Place_Name": "place_name", "City": "city", "Rating": "rating"})

In [47]:
# print(user_df.describe(), "\n")
# print(user_df.isnull().sum(), "\n")
# print(user_df[user_df.duplicated(keep=False)], "\n")

# print(rating_df.describe(), "\n")
# print(rating_df.isnull().sum(), "\n")
# print(rating_df[rating_df.duplicated(keep=False)], "\n")
rating_df = rating_df.drop_duplicates(subset=["user_id", "place_id"], keep="last")

# print(place_df.describe(), "\n")
# print(place_df.isnull().sum(), "\n")
# print(place_df[place_df.duplicated(keep=False)], "\n")


In [48]:
# Converting rating to interaction bool. (Explicit to Implicit)
rating_df["implicit_label"] = (rating_df["place_rating"] >= 3).astype(int)
user_len = rating_df["user_id"].nunique()
place_len = place_df["place_id"].nunique()
# rating_df
place_len

437

In [49]:
#Dataset Class

class RatingDataset(torch.utils.data.Dataset):
    def __init__(self):
        self.users = torch.tensor(rating_df["user_id"].values - 1, dtype=torch.long)
        self.places = torch.tensor(rating_df["place_id"].values - 1, dtype=torch.long)
        self.labels = torch.tensor(rating_df["implicit_label"].values, dtype=torch.float)
    
    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.places[idx], self.labels[idx]
    
dataset = RatingDataset()

In [50]:
class NeuMF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=16):
        super(NeuMF, self).__init__()
        
        # GMF embeddings
        self.gmf_user = nn.Embedding(num_users, embedding_dim)
        self.gmf_item = nn.Embedding(num_items, embedding_dim)

        # MLP embeddings
        self.mlp_user = nn.Embedding(num_users, embedding_dim)
        self.mlp_item = nn.Embedding(num_items, embedding_dim)

        # MLP layers
        self.linear_1 = nn.Linear(embedding_dim * 2, 64)
        self.relu_1 = nn.LeakyReLU()
        self.linear_2 = nn.Linear(64, 32)
        self.relu_2 = nn.LeakyReLU()
        self.linear_3 = nn.Linear(32, 16)

        # NeuMF layer (concat GMF and MLP)
        final_input_dim = embedding_dim + 16
        self.output_layer = nn.Linear(final_input_dim, 1)
    
    def forward(self, user_ids, item_ids):
        # GMF part
        gmf_u = self.gmf_user(user_ids)
        gmf_i = self.gmf_item(item_ids)
        gmf_output = gmf_u * gmf_i  # element-wise product

        # MLP part
        mlp_u = self.mlp_user(user_ids)
        mlp_i = self.mlp_item(item_ids)
        mlp_input = torch.cat([mlp_u, mlp_i], dim=-1)

        x = self.linear_1(mlp_input)
        x = self.relu_1(x)
        x = self.linear_2(x)
        x = self.relu_2(x)
        mlp_output = self.linear_3(x)

        # Concatenate GMF and MLP output
        concat = torch.cat([gmf_output, mlp_output], dim=-1)
        logits = self.output_layer(concat)
        return logits
    
model = NeuMF(user_len, place_len)
model = model.to(device)

In [51]:
# class NeuralCF(nn.Module):
#     def __init__(self, num_users, num_items, embedding_dim=32):
#         super().__init__()
#         self.user_embedding = nn.Embedding(num_users, embedding_dim)
#         self.item_embedding = nn.Embedding(num_items, embedding_dim)

#         # MLP layers (simple 2-layer example)
#         self.fc1 = nn.Linear(embedding_dim * 2, 64)
#         self.relu1 = nn.ReLU()
#         self.fc2 = nn.Linear(64, 32)
#         self.relu2 = nn.ReLU()
#         self.output = nn.Linear(32, 1)  # output raw logits for BCEWithLogitsLoss

#     def forward(self, user_ids, item_ids):
#         user_vec = self.user_embedding(user_ids)
#         item_vec = self.item_embedding(item_ids)
#         x = torch.cat([user_vec, item_vec], dim=-1)  # concat user and item embeddings
#         x = self.relu1(self.fc1(x))
#         x = self.relu2(self.fc2(x))
#         logits = self.output(x)
#         return logits  # raw score, use sigmoid or BCEWithLogitsLoss
    
# model = NeuralCF(num_users=user_len, num_items=place_len, embedding_dim=32).to(device)

In [52]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.001)

epochs = 100

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [53]:
def precision_at_k(y_scores, y_true, k):
    # Sort scores and true labels by descending score
    indices = np.argsort(y_scores)[::-1]  # highest scores first
    top_k_indices = indices[:k]
    
    # Get top-k predicted labels and their ground truths
    top_k_true = np.take(y_true, top_k_indices)
    
    # Count relevant items (assumes y_true are 0 or 1)
    return np.sum(top_k_true) / k

In [54]:
import numpy as np

def precision_at_k(y_scores, y_true, k):
    # Sort scores and true labels by descending score
    indices = np.argsort(y_scores)[::-1]  # highest scores first
    top_k_indices = indices[:k]
    
    # Get top-k predicted labels and their ground truths
    top_k_true = np.take(y_true, top_k_indices)
    
    # Count relevant items (assumes y_true are 0 or 1)
    return np.sum(top_k_true) / k


for epoch in range(epochs):
    model.train()
    train_correct, test_correct = 0, 0
    train_loss_total, test_loss_total = 0, 0

    for x1, x2, y in train_loader:
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        y = y.unsqueeze(1)
        y_logits = model(x1, x2)
        y_preds = (y_logits > 0)
        
        train_correct += (y_preds == y).sum().item()
        train_loss = loss_fn(y_logits, y) 
        train_loss_total += train_loss.item()

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

    # eval
    model.eval()
    all_scores = []
    all_labels = []

    with torch.inference_mode():
        for x1, x2, y in test_loader:
            x1, x2, y = x1.to(device), x2.to(device), y.to(device)
            y = y.unsqueeze(1)
            y_logits = model(x1, x2)
            y_preds = (y_logits > 0)

            test_correct += (y_preds == y).sum().item()
            test_loss = loss_fn(y_logits, y) 
            test_loss_total += test_loss.item()

            # Collect scores and labels for P@K
            all_scores.extend(torch.sigmoid(y_logits).cpu().numpy().flatten())
            all_labels.extend(y.cpu().numpy().flatten())

    # Compute Precision@10
    p_at_10 = precision_at_k(all_scores, all_labels, k=10)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss_total/len(train_loader):.4f}, "
          f"Train Acc: {train_correct/train_size:.2f}, Test Loss: {test_loss_total/len(test_loader):.4f}, "
          f"Test Acc: {test_correct/test_size:.2f}, P@10: {p_at_10:.4f}")


Epoch 1/100, Train Loss: 0.7036, Train Acc: 0.51, Test Loss: 0.6885, Test Acc: 0.56, P@10: 0.7000
Epoch 2/100, Train Loss: 0.6869, Train Acc: 0.56, Test Loss: 0.6717, Test Acc: 0.60, P@10: 0.7000
Epoch 3/100, Train Loss: 0.6776, Train Acc: 0.60, Test Loss: 0.6659, Test Acc: 0.63, P@10: 0.7000
Epoch 4/100, Train Loss: 0.6746, Train Acc: 0.61, Test Loss: 0.6643, Test Acc: 0.63, P@10: 0.8000
Epoch 5/100, Train Loss: 0.6727, Train Acc: 0.61, Test Loss: 0.6633, Test Acc: 0.63, P@10: 0.8000
Epoch 6/100, Train Loss: 0.6711, Train Acc: 0.61, Test Loss: 0.6626, Test Acc: 0.63, P@10: 0.8000
Epoch 7/100, Train Loss: 0.6698, Train Acc: 0.62, Test Loss: 0.6619, Test Acc: 0.63, P@10: 0.8000
Epoch 8/100, Train Loss: 0.6685, Train Acc: 0.62, Test Loss: 0.6610, Test Acc: 0.63, P@10: 0.8000
Epoch 9/100, Train Loss: 0.6673, Train Acc: 0.62, Test Loss: 0.6605, Test Acc: 0.63, P@10: 0.8000
Epoch 10/100, Train Loss: 0.6664, Train Acc: 0.62, Test Loss: 0.6600, Test Acc: 0.63, P@10: 0.8000
Epoch 11/100, Train

In [56]:
torch.save(model, 'model.pth')

In [55]:
# del model
# del train_loader
# torch.cuda.empty_cache()