In [1]:
import sys
import cornac
import tensorflow as tf

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")

SEED = 20240514
VERBOSE = True

System version: 3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]
Cornac version: 2.1
Tensorflow version: 2.16.1


In [6]:
import cornac
import pandas as pd
from cornac.eval_methods import RatioSplit
from cornac.models import BPR
from cornac.metrics import Precision, Recall

df = pd.read_csv("./cs608_ip_train_v3.csv")
df1 = pd.read_csv("./cs608_ip_probe_v3.csv")
data_train = df.values.tolist()
data_val = df1.values.tolist()
data = data_train + data_val

# Split the data into training, validation and testing sets
rs = RatioSplit(data=data, test_size=0.1, val_size=0.1, seed=SEED, verbose=VERBOSE)

# Instantiate Recall@100 for evaluation
rec50 = cornac.metrics.Recall(50)

# Instantiate a matrix factorization model (e.g., BPR)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=SEED, verbose=VERBOSE)

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 21124
Number of items = 29665
Number of ratings = 225055
Max rating = 5.0
Min rating = 1.0
Global mean = 4.3
---
Test data:
Number of users = 21124
Number of items = 29665
Number of ratings = 27175
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 21124
Number of items = 29665
Number of ratings = 27216
---
Total users = 21124
Total items = 29665


In [7]:
len(data), len(data_train), len(data_val)

(281321, 188656, 92665)

In [8]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

# Grid Search
gs_bpr = GridSearch(
    model=bpr,
    space=[
        Discrete(name="k", values=[5, 10, 50, 100, 500]),
        Discrete(name="learning_rate", values=[0.001, 0.05, 0.01, 0.1]),
        Discrete(name="lambda_reg", values=[0.001, 0.01, 0.1, 1.0]),
    ],
    metric=rec50,
    eval_method=rs,
)

# Random Search
rs_bpr = RandomSearch(
    model=bpr,
    space=[
        Discrete(name="k", values=[5, 10, 50, 100, 500]),
        Continuous(name="learning_rate", low=0.001, high=0.01),
        Discrete(name="lambda_reg", values=[0.001, 0.01, 0.1, 1.0]),
    ],
    metric=rec50,
    eval_method=rs,
    n_trails=20,
)

In [9]:
# Define the experiment
cornac.Experiment(
    eval_method=rs,
    models=[gs_bpr, rs_bpr],
    metrics=[rec50],
    user_based=False,
).run()

# Obtain the best params
print(gs_bpr.best_params)
print(rs_bpr.best_params)


[GridSearch_BPR] Training started!
Evaluating: {'k': 5, 'lambda_reg': 0.001, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.001, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.001, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.001, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.1, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.1, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.1, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.1, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 1.0, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 1.0, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 1.0, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 1.0, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.001, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.001, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.001, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.001, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.1, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.1, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.1, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.1, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 1.0, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 1.0, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 1.0, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 1.0, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.1, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.1, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.1, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.1, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 1.0, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 1.0, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 1.0, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 1.0, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.01, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 1.0, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 1.0, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 1.0, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 1.0, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.001, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.001, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.001, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.001, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.1, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.1, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.1, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.1, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.001}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.01}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.05}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.1}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Best parameter settings: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.1}
Recall@50 = 0.1351

[GridSearch_BPR] Evaluation started!


Ranking:   0%|          | 0/14807 [00:00<?, ?it/s]

Ranking:   0%|          | 0/14844 [00:00<?, ?it/s]


[RandomSearch_BPR] Training started!
Evaluating: {'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.002795766842251988}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.009196626872796716}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.002224287933552601}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.004666053796944102}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 1.0, 'learning_rate': 0.009220401551961823}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 0.001, 'learning_rate': 0.008040369713008658}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.004867175750223332}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 500, 'lambda_reg': 1.0, 'learning_rate': 0.0060286295783985346}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.005079923810254551}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.002840251764938269}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 10, 'lambda_reg': 0.01, 'learning_rate': 0.0022199985482972383}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.00819383327770316}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.001, 'learning_rate': 0.009661051243044737}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.00864719980981898}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 0.1, 'learning_rate': 0.005670567627610911}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.01, 'learning_rate': 0.0072855380272273075}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 100, 'lambda_reg': 0.1, 'learning_rate': 0.0012519895242566355}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 5, 'lambda_reg': 1.0, 'learning_rate': 0.0039654731534250216}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.008258200381350113}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Evaluating: {'k': 50, 'lambda_reg': 0.001, 'learning_rate': 0.005250954116051634}


  0%|          | 0/200 [00:00<?, ?it/s]

Optimization finished!
Best parameter settings: {'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.00864719980981898}
Recall@50 = 0.0613

[RandomSearch_BPR] Evaluation started!


Ranking:   0%|          | 0/14807 [00:00<?, ?it/s]

Ranking:   0%|          | 0/14844 [00:00<?, ?it/s]


VALIDATION:
...
                 | Recall@50 | Time (s)
---------------- + --------- + --------
GridSearch_BPR   |    0.1351 |  30.4894
RandomSearch_BPR |    0.0613 |  14.6196

TEST:
...
                 | Recall@50 | Train (s) | Test (s)
---------------- + --------- + --------- + --------
GridSearch_BPR   |    0.1381 | 4298.9573 |  28.7952
RandomSearch_BPR |    0.0617 |  730.8316 |  15.1821

{'k': 500, 'lambda_reg': 0.01, 'learning_rate': 0.1}
{'k': 5, 'lambda_reg': 0.01, 'learning_rate': 0.00864719980981898}


In [13]:
gs_bpr.best_model

<cornac.models.bpr.recom_bpr.BPR at 0x1ecf92a3210>

In [14]:
from tqdm.notebook import tqdm

import zipfile


def generate_submission(bpr):
    print(f"Generating recommendations using {bpr.name}...")
    df = pd.read_csv("./cs608_ip_train_v3.csv")

    # Sort the DataFrame by 'user_id' in ascending order
    df_sorted = df.sort_values(by="user_id")

    # Extract unique user IDs sorted in ascending order
    user_ids = df_sorted["user_id"].unique()

    # Generate recommendations for all users
    top_k_recommendations = []
    for user_id in tqdm(user_ids):
        recs = bpr.recommend(user_id=user_id, k=50)
        top_k_recommendations.append(recs)

    with open("submission.txt", "w") as file:
        for user_recommendations in top_k_recommendations:
            file.write(" ".join(map(str, user_recommendations)) + "\n")

    # zip the submission file
    with zipfile.ZipFile("submission.zip", "w") as file:
        file.write("submission.txt")

In [15]:
generate_submission(gs_bpr.best_model)

Generating recommendations using BPR...


  0%|          | 0/21124 [00:00<?, ?it/s]