In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print(
            "MPS not available because the current PyTorch install was not "
            "built with MPS enabled."
        )
    else:
        print(
            "MPS not available because the current MacOS version is not 12.3+ "
            "and/or you do not have an MPS-enabled device on this machine."
        )
    mps_device = None
else:
    mps_device = torch.device("mps")

if mps_device is not None:
    device = mps_device
    print("Using MPS")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

device

MPS not available because the current PyTorch install was not built with MPS enabled.
Using GPU: NVIDIA GeForce RTX 4080 Laptop GPU


device(type='cuda')

In [2]:
import sys
import cornac
import tensorflow as tf

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")

SEED = 20240516
VERBOSE = True

System version: 3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]
Cornac version: 2.1
Tensorflow version: 2.16.1


In [3]:
from cornac.models import BPR
from cornac.eval_methods import RatioSplit
import glob
import pickle

param_grid = {
    "num_factors": [100, 200, 500, 1000],
    "learning_rate": [0.001, 0.01, 0.1],
    "regularization": [0.001, 0.01, 0.1, 1.0],
    "max_iter": [100, 500, 1000, 2000],
}

models = []
for K in param_grid["num_factors"]:
    for lr in param_grid["learning_rate"]:
        for reg in param_grid["regularization"]:
            for max_iter in param_grid["max_iter"]:
                bpr = BPR(
                    k=K,
                    max_iter=max_iter,
                    learning_rate=lr,
                    lambda_reg=reg,
                    verbose=VERBOSE,
                    seed=SEED,
                    name=f"BPR(K={K}, lr={lr}, reg={reg}, max_iter={max_iter})",
                )

                models.append(bpr)

for model in models:
    print(model.name)
    # Find .pkl files in the specified folder
    pkl_files = glob.glob(f"./bpr_grid_search/{model.name}/*.pkl")

    # Load the first .pkl file found
    if pkl_files:
        print(f"Loading {pkl_files[0]}")
        model.load(pkl_files[0])
    else:
        print("No .pkl files found in the specified directory.")

BPR(K=100, lr=0.001, reg=0.001, max_iter=100)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.001, max_iter=100)\2024-05-16_06-39-41-146474.pkl
BPR(K=100, lr=0.001, reg=0.001, max_iter=500)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.001, max_iter=500)\2024-05-16_06-40-28-465299.pkl
BPR(K=100, lr=0.001, reg=0.001, max_iter=1000)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.001, max_iter=1000)\2024-05-16_06-41-46-099404.pkl
BPR(K=100, lr=0.001, reg=0.001, max_iter=2000)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.001, max_iter=2000)\2024-05-16_06-44-04-298936.pkl
BPR(K=100, lr=0.001, reg=0.01, max_iter=100)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.01, max_iter=100)\2024-05-16_06-44-27-046091.pkl
BPR(K=100, lr=0.001, reg=0.01, max_iter=500)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.01, max_iter=500)\2024-05-16_06-45-17-825833.pkl
BPR(K=100, lr=0.001, reg=0.01, max_iter=1000)
Loading ./bpr_grid_search/BPR(K=100, lr=0.001, reg=0.01, max

In [6]:
%%time
from tqdm.notebook import tqdm

import zipfile

def generate_submission(bpr):
    print(f"Generating recommendations using {bpr.name}...")
    df = pd.read_csv("./cs608_ip_train_v3.csv")
    
    # Sort the DataFrame by 'user_id' in ascending order
    df_sorted = df.sort_values(by='user_id')

    # Extract unique user IDs sorted in ascending order
    user_ids = df_sorted['user_id'].unique()

    # Generate recommendations for all users
    top_k_recommendations = []
    for user_id in tqdm(user_ids):
        recs = bpr.recommend(user_id=user_id, k=50)
        top_k_recommendations.append(recs)

    with open("submission.txt", "w") as file:
        for user_recommendations in top_k_recommendations:
            file.write(" ".join(map(str, user_recommendations)) + "\n")

    # zip the submission file
    with zipfile.ZipFile('submission.zip', 'w') as file:
        file.write('submission.txt')

CPU times: total: 0 ns
Wall time: 0 ns


In [7]:
generate_submission(models[0])

Generating recommendations using BPR(K=100, lr=0.001, reg=0.001, max_iter=100)...


  0%|          | 0/21124 [00:00<?, ?it/s]

AttributeError: 'NoneType' object has no attribute 'get'