In [5]:
!pip install -q cornac==2.1 tensorflow

In [6]:
import sys
import cornac
import tensorflow as tf

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")

SEED = 20240514
VERBOSE = True

System version: 3.11.8 (main, Feb 26 2024, 15:36:12) [Clang 14.0.6 ]
Cornac version: 2.1
Tensorflow version: 2.16.1


In [8]:
from cornac.models import BPR
from cornac.eval_methods import RatioSplit

K = 500

bpr = BPR(
    k=K,
    max_iter=1000,
    learning_rate=0.01,
    lambda_reg=0.008899,
    verbose=VERBOSE,
    seed=SEED,
    name=f"BPR(K={K})",
)

eval_metrics = [
    cornac.metrics.Recall(k=50),
    cornac.metrics.NDCG(k=50),
    cornac.metrics.NCRR(k=50),
]

df = pd.read_csv("./cs608_ip_train_v3.csv")
df1 = pd.read_csv("./cs608_ip_probe_v3.csv")
data_list = df.values.tolist()
data_val = df1.values.tolist()

rs = RatioSplit(data_list, test_size=0.1, seed=SEED, verbose=VERBOSE)

cornac.Experiment(eval_method=rs, models=[bpr], metrics=eval_metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 21124
Number of items = 30016
Number of ratings = 169790
Max rating = 5.0
Min rating = 1.0
Global mean = 4.3
---
Test data:
Number of users = 21124
Number of items = 30016
Number of ratings = 17612
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 21124
Total items = 30016

[BPR(K=500)] Training started!


  0%|          | 0/1000 [00:00<?, ?it/s]

Optimization finished!

[BPR(K=500)] Evaluation started!


Ranking:   0%|          | 0/11672 [00:00<?, ?it/s]


TEST:
...
           | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
---------- + ------- + ------- + --------- + --------- + --------
BPR(K=500) |  0.0189 |  0.0387 |    0.1139 |   93.0736 |  16.3079



In [11]:
%%time
from tqdm.notebook import tqdm

import zipfile

# Sort the DataFrame by 'user_id' in ascending order
df_sorted = df.sort_values(by='user_id')

# Extract unique user IDs sorted in ascending order
user_ids = df_sorted['user_id'].unique()

# Generate recommendations for all users
top_k_recommendations = []
for user_id in tqdm(user_ids):
    recs = bpr.recommend(user_id=user_id, k=50)
    top_k_recommendations.append(recs)

with open("submission.txt", "w") as file:
    for user_recommendations in top_k_recommendations:
        file.write(" ".join(map(str, user_recommendations)) + "\n")

# zip the submission file
with zipfile.ZipFile('submission.zip', 'w') as file:
    file.write('submission.txt')

  0%|          | 0/21124 [00:00<?, ?it/s]

CPU times: user 55.1 s, sys: 361 ms, total: 55.5 s
Wall time: 55.6 s
