In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
from loguru import logger

import pandas as pd
import numpy as np
from pydantic import BaseModel
import plotly.express as px
from dotenv import load_dotenv
import mlflow

load_dotenv()

sys.path.insert(0, '..')

from src.viz import blueq_colors

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    log_to_mlflow: bool = True
    experiment_name: str = "FSDS RecSys - L5 - Reco Algo"
    run_name: str = '064-cf-i2i'
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = 'user_id'
    item_col: str = 'parent_asin'
    rating_col: str = 'rating'
    timestamp_col: str = 'timestamp'
    
    top_K: int = 100
    top_k: int = 10

    batch_size: int = 128

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        
        if not os.environ.get("MLFLOW_TRACKING_URI"):
            logger.warning(
                f"Environment variable MLFLOW_TRACKING_URI is not set. Setting self.log_to_mlflow to false."
            )
            self.log_to_mlflow = False

        if self.log_to_mlflow:
            logger.info(
                f"Setting up MLflow experiment {self.experiment_name} - run {self.run_name}..."
            )
            import mlflow

            mlflow.set_experiment(self.experiment_name)
            mlflow.start_run(run_name=self.run_name)

        return self
    
args = Args().init()

print(args.model_dump_json(indent=2))

[32m2024-09-21 16:00:55.783[0m | [1mINFO    [0m | [36m__main__[0m:[36minit[0m:[36m29[0m - [1mSetting up MLflow experiment FSDS RecSys - L5 - Reco Algo - run 064-cf-i2i...[0m


{
  "testing": false,
  "log_to_mlflow": true,
  "experiment_name": "FSDS RecSys - L5 - Reco Algo",
  "run_name": "064-cf-i2i",
  "notebook_persist_dp": "/home/dvquys/frostmourne/reco-algo/notebooks/data/064-cf-i2i",
  "random_seed": 41,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "top_K": 100,
  "top_k": 10,
  "batch_size": 128
}


# Implement

In [4]:
from src.train_utils import train, MetricLogCallback
from src.model import Item2ItemCollaborativeFiltering
from src.math_utils import sigmoid

In [5]:
def init_model():
    model = Item2ItemCollaborativeFiltering(n_users, n_items)
    return model

# Test implementation

In [6]:
# Mock data
user_indices = [0, 0, 1, 1, 2, 2, 2]
item_indices = [0, 1, 1, 2, 3, 1, 2]
ratings = [1, 4, 4, 5, 3, 2, 4]
n_users = len(set(user_indices))
n_items = len(set(item_indices))

val_user_indices = [0, 1, 2]
val_item_indices = [2, 1, 2]
val_ratings = [2, 4, 5]

print("Mock User IDs:", user_indices)
print("Mock Item IDs:", item_indices)
print("Ratings:", ratings)

model = init_model()

users = [0, 1, 2]
items = [2, 2, 0]
predictions = model.predict(users, items)
print(predictions)

Mock User IDs: [0, 0, 1, 1, 2, 2, 2]
Mock Item IDs: [0, 1, 1, 2, 3, 1, 2]
Ratings: [1, 4, 4, 5, 3, 2, 4]
[0.95257413 0.95257413 0.95257413]


In [7]:
model.fit(user_indices, item_indices, ratings)
predictions = model.predict(users, items)
print(predictions)

[0.98201379 0.98201379 0.88079708]


In [8]:
model.user_item_matrix.T

array([[1., 0., 0.],
       [4., 4., 2.],
       [0., 5., 4.],
       [0., 0., 3.]])

In [9]:
model.item_similarity

array([[0.        , 0.66666667, 0.        , 0.        ],
       [0.66666667, 0.        , 0.72881089, 0.33333333],
       [0.        , 0.72881089, 0.        , 0.62469505],
       [0.        , 0.33333333, 0.62469505, 0.        ]])

In [10]:
item = 3
user = 1

# Compute prediction using weighted average of ratings from similar items
sim_scores = model.item_similarity[item]
print(f"{sim_scores=}")

sim_scores=array([0.        , 0.33333333, 0.62469505, 0.        ])


In [11]:
# Only consider items that have been rated by the current user
item_ratings = model.user_item_matrix[user, :]
print(f"Ratings of current user for all items:\n{item_ratings=}")
sim_scores = sim_scores[item_ratings != 0]
print(f"Cosine similarity score of target item towards all other items where current user has rated:\n{sim_scores}")
item_ratings = item_ratings[item_ratings != 0]

Ratings of current user for all items:
item_ratings=array([0., 4., 5., 0.])
Cosine similarity score of target item towards all other items where current user has rated:
[0.33333333 0.62469505]


In [12]:
# Weighted average of ratings
print(f"Weighted average: {np.dot(sim_scores, item_ratings)}")
print(f"Normalization factor: {np.sum(sim_scores)}")
print(f"Predicted rating: {np.dot(sim_scores, item_ratings) / np.sum(sim_scores)}")
print(f"Predicted rating - sigmoid: {sigmoid(np.dot(sim_scores, item_ratings) / np.sum(sim_scores))}")

Weighted average: 4.456808571105455
Normalization factor: 0.9580283808877577
Predicted rating: 4.652063195638892
Predicted rating - sigmoid: 0.9905482923878774


In [13]:
recommendations = model.recommend(
    val_user_indices,
    k=2,
    progress_bar_type='tqdm_notebook'
)

Generating Recommendations:   0%|          | 0/3 [00:00<?, ?it/s]

In [14]:
%debug

ERROR:root:No traceback has been produced, nothing to debug.


In [15]:
recommendations

{'user_indice': [0, 0, 1, 1, 2],
 'recommendation': [2, 3, 3, 0, 0],
 'score': [4.0, 4.0, 4.652063195638892, 4.0, 2.0]}

# Prep data

In [16]:
from src.id_mapper import IDMapper
from src.train_utils import map_indice

In [17]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")
val_df = pd.read_parquet("../data/val_features_neg_df.parquet")
idm = IDMapper().load("../data/idm.json")
# val_timestamp = 1628643414042  # https://amazon-reviews-2023.github.io/data_processing/5core.html
assert (val_df[args.timestamp_col].min() - train_df[args.timestamp_col].max()) > 0
val_timestamp = train_df[args.timestamp_col].max() + 1
print(f"{val_timestamp=}")

val_timestamp=np.int64(1628641464793)


In [18]:
user_ids = train_df[args.user_col].values
item_ids = train_df[args.item_col].values
unique_user_ids = list(set(user_ids))
unique_item_ids = list(set(item_ids))
n_users = len(unique_user_ids)
n_items = len(unique_item_ids)

logger.info(f"{len(unique_user_ids)=:,.0f}, {len(unique_item_ids)=:,.0f}")

[32m2024-09-21 16:00:59.800[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mlen(unique_user_ids)=20,366, len(unique_item_ids)=4,696[0m


In [19]:
train_df = train_df.pipe(map_indice, idm, args.user_col, args.item_col)
val_df = val_df.pipe(map_indice, idm, args.user_col, args.item_col)

user_indices = [idm.get_user_index(user_id) for user_id in user_ids]
item_indices = [idm.get_item_index(item_id) for item_id in item_ids]
ratings = train_df[args.rating_col].values.tolist()

val_user_indices = [idm.get_user_index(user_id) for user_id in val_df[args.user_col]]
val_item_indices = [idm.get_item_index(item_id) for item_id in val_df[args.item_col]]
val_ratings = val_df[args.rating_col].values.tolist()

# Train

In [20]:
model = init_model()

#### Predict before train

In [21]:
val_df.sample(10)

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
1473,AER6PIIILPSUBCD3UVUSETU4AQCQ,B07624RBWB,5.0,1629979293632,11766,3293,Video Games,Nintendo Switch Pro Controller,[],"[Video Games, Nintendo Switch, Accessories, Co...",69.0,"[-1, -1, -1, -1, 2021, 1295, 937, 1482, 1721, ..."
93,AGTDLL74EJGAECTWX73F4OKVXNRQ,B07SVJVQCL,5.0,1640978526844,1190,2636,Video Games,Sniper Ghost Warrior Contracts PS4 - PlayStati...,[Experience pure sniper gameplay across the ha...,"[Video Games, PlayStation 4, Games]",14.94,"[-1, -1, -1, -1, -1, 2654, 1138, 3230, 468, 4221]"
795,AGDESIG7HU5BZN6OSN6XITL7HXYA,B0C37RBK2R,5.0,1645049757763,8300,3514,Video Games,Xbox Series S,"[Introducing the Xbox Series S, the smallest, ...",[],279.0,"[-1, -1, -1, -1, 3074, 972, 1271, 2717, 1548, ..."
1185,AHHZ5TKBS6CDRS4EE32RIB5ZOIYA,B01I59OFDU,1.0,1655114142340,18130,4342,Video Games,Resident Evil 4 - Xbox One Standard Edition,[Special agent Leon S Kennedy is sent on a mis...,"[Video Games, Xbox One, Games]",24.95,"[2116, 3834, 2506, 3487, 1822, 3506, 3808, 198..."
1039,AEXN3VFNZS7CKHX2NHDHLYDBZZIQ,B07D36WX84,5.0,1647660882477,6987,1709,All Electronics,FastSnail Controller Charger Compatible with N...,[],"[Video Games, Legacy Systems, Nintendo Systems...",19.99,"[2691, 303, 3974, 3175, 4212, 4035, 2828, 1861..."
886,AF3UJKRZK4OEWSVCF3CCJIP2UB5A,B0BKRXQ5GL,4.0,1637988043468,14362,4094,Computers,Logitech G Logitech G935 Over Ear Wireless Hea...,[Logitech G935 Wireless DTS:X 7.1 Surround Sou...,"[Video Games, PC, Accessories, Headsets]",153.98,"[4165, 3334, 4362, 536, 1776, 1538, 911, 2955,..."
290,AE3PFNNGGDBXSFN36KHY3MJZ2AFQ,B01N10NIBP,0.0,1640529855026,19244,735,Video Games,Nintendo amiibo-Zelda: Breath of the Wild,"[Zelda appears here carrying a Sheikah Slate, ...","[Video Games, Legacy Systems, Nintendo Systems...",,"[-1, -1, -1, -1, 2021, 2173, 448, 3803, 3365, ..."
1573,AGK5CJRXH75MMVKZVWP7LMVJ65WA,B0BTM9LJM4,5.0,1641927814038,12086,1638,Video Games,HORI Nintendo Switch Split Pad Pro (Pokemon Le...,[Become a true Pokemon trainer legend with the...,"[Video Games, Nintendo Switch, Accessories, Co...",59.99,"[-1, -1, -1, 1177, 381, 438, 3563, 2079, 475, ..."
1372,AFFCNEJLNMAUSBFDTVL7TDS76HHA,B01L1Y0RZQ,0.0,1635546961171,19191,1548,Video Games,Xbox One S 500GB Console - Minecraft Bundle [D...,[The Xbox One S Minecraft Favorites Bundle (50...,"[Video Games, Xbox One, Consoles]",214.99,"[-1, -1, -1, -1, -1, 3490, 3364, 4212, 2433, 4..."
174,AFGV3FHSYVQCVZNZ3QU5C4ZOVHTQ,B000050FBJ,0.0,1632730498499,13245,1084,Video Games,Super Mario All Stars,"[Product description, Revisit the magic and fu...","[Video Games, Legacy Systems, Nintendo Systems...",53.49,"[-1, -1, -1, 231, 950, 455, 4185, 2747, 2875, ..."


In [22]:
user_id = val_df.sample(10)[args.user_col].values[0]
test_df = val_df.loc[lambda df: df[args.user_col].eq(user_id)]
test_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
456,AEFSE2LWF5FTU6GC6HEX7IVHRJZA,B01LRLJV28,0.0,1654460239266,6114,3797,Video Games,PlayStation 4 Slim 500GB Console - Uncharted 4...,[The new slim PlayStation 4 opens the door to ...,"[Video Games, PlayStation 4, Consoles]",272.95,"[2351, 104, 53, 452, 2050, 1442, 1243, 2819, 4..."
863,AEFSE2LWF5FTU6GC6HEX7IVHRJZA,B01LXC1QL0,4.0,1654460239266,6114,3250,Computers,"Razer DeathAdder Elite Gaming Mouse: 16,000 DP...",[Equipped with the new eSports-grade optical s...,"[Video Games, PC, Accessories, Gaming Mice]",43.66,"[2351, 104, 53, 452, 2050, 1442, 1243, 2819, 4..."


In [23]:
item_id = test_df.loc[lambda df: df[args.rating_col].gt(0)][args.item_col].values[0]
logger.info(f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}")
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)

model.predict([user_indice], [item_indice])

[32m2024-09-21 16:01:00.219[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mTest predicting before training with user_id = AEFSE2LWF5FTU6GC6HEX7IVHRJZA and parent_asin = B01LXC1QL0[0m


array([0.95257413])

#### Training loop

In [24]:
model.fit(user_indices, item_indices, ratings)

# Predict

In [25]:
logger.info(f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}")
model.predict([user_indice], [item_indice])

[32m2024-09-21 16:01:02.112[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mTest predicting before training with user_id = AEFSE2LWF5FTU6GC6HEX7IVHRJZA and parent_asin = B01LXC1QL0[0m


array([0.98925171])

# Evaluate

## Ranking metrics

In [26]:
from src.eval import create_label_df, create_rec_df, merge_recs_with_target
from src.eval import log_ranking_metrics

In [27]:
train_df.loc[lambda df: df['user_indice'].eq(9)]

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
43086,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B007CSF3GO,5.0,1405798893000,9,653,Video Games,The Last Story - Nintendo Wii,"[Product Description, Embroiled in seemingly e...","[Video Games, Legacy Systems, Nintendo Systems...",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
86293,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00BQVXVYY,5.0,1423463206000,9,440,Video Games,LEGO: Marvel - PC,[Lego Marvel Super Heroes sees Lego Nick Fury ...,"[Video Games, PC, Games]",,"[-1.0, -1.0, -1.0, -1.0, 2213.0, 1086.0, 653.0..."
94839,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B000050FBJ,5.0,1405799408000,9,1084,Video Games,Super Mario All Stars,"[Product description, Revisit the magic and fu...","[Video Games, Legacy Systems, Nintendo Systems...",53.49,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 221..."
110686,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00002SVFQ,5.0,1405799470000,9,1875,Video Games,F-Zero,"[Product description, The future of racing is ...","[Video Games, Legacy Systems, Nintendo Systems...",44.11,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 2213.0, 1..."
111562,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00328P0GG,5.0,1363878077000,9,2213,Video Games,Skate 3 - Xbox 360,"[Product Description, The award winning SKATE ...","[Video Games, Legacy Systems, Xbox Systems, Xb...",14.2,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
115913,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00503E8S2,0.0,1405798893000,9,2038,Video Games,Call of Duty: Modern Warfare 3 - Xbox 360,"[Product Description, Modern Warfare is back. ...","[Video Games, Legacy Systems, Xbox Systems, Xb...",40.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
116082,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B000FQBPDU,5.0,1423463188000,9,1179,Video Games,Metroid Prime 3: Corruption,[You ARE Samus with Wii control! By moving aro...,"[Video Games, Legacy Systems, Nintendo Systems...",49.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, 2213.0, 1086.0,..."
119706,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B0053BCP40,5.0,1389114070000,9,1086,Video Games,Luigi's Mansion: Dark Moon,"[Help Luigi overcome ghastly ghosts, mind-melt...","[Video Games, Legacy Systems, Nintendo Systems...",31.53,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
137174,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B001ELJFGO,0.0,1363878077000,9,2246,Video Games,Assassin's Creed: Director's Cut Edition [Down...,"[Assassin's Creed, redefines the action genre....","[Video Games, PC, Games]",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
143830,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00DJRLDMU,0.0,1389114070000,9,1483,Video Games,Tom Clancy’s The Division Underground - Xbox O...,"[In Expansion I, Underground, the factions of ...","[Video Games, Xbox One, Downloadable Content]",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."


In [28]:
recommendations = model.recommend(
    val_user_indices,
    k=args.top_K,
    progress_bar_type='tqdm_notebook'
)

Generating Recommendations:   0%|          | 0/1898 [00:00<?, ?it/s]

In [29]:
recommendations_df = pd.DataFrame(recommendations).pipe(create_rec_df, idm)
recommendations_df

Unnamed: 0,user_indice,recommendation,score,rec_ranking,user_id,parent_asin
0,2377,1067,4.0,1.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B01GWHPDEW
1,2377,1682,4.0,2.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B000FJEV06
2,2377,2225,4.0,3.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B002I0J5JW
3,2377,2222,4.0,4.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B001COTC3E
4,2377,2218,4.0,5.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B001G605ZC
...,...,...,...,...,...,...
189795,19050,2729,5.0,196.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B01J3MKLHC
189796,19050,2732,5.0,197.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B01CHU4IY4
189797,19050,2733,5.0,198.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B078941YVV
189798,19050,4597,5.0,199.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B018VAEXD0


In [30]:
label_df = create_label_df(val_df)
label_df

Unnamed: 0,user_id,parent_asin,rating,rating_rank
1711,AEOY2365QPPEVDTOXL6N7ZA4NSAA,B00PDRZG9U,5.0,1.0
425,AFGHX4VLP6P5XORLDJX3LZKUAAZA,B00Z9TJBUW,5.0,1.0
189,AFCH2PDOFM2S3622QFV6PHCHGMCA,B00KSQHX1K,5.0,1.0
1297,AEURBISVS35ALE7YQLR5L4K7AHCA,B07QQ8N7LL,1.0,1.0
320,AEMA3SW3WPNLEH3IACW23K2ZSUFA,B09JDLC31H,4.0,1.0
...,...,...,...,...
663,AFB6FYPPCN33UMUU5536IHXNOHCQ,B00BGA9WK2,0.0,18.0
453,AESD4RLWUKM6JTD6SNNWYLHLLQQA,B00Z9TJHEC,0.0,18.0
582,AG4RCXKPTC6QRORJLUSBY4SO2IAA,B001G7PSGW,0.0,18.0
1374,AFB6FYPPCN33UMUU5536IHXNOHCQ,B01K1OO5PU,0.0,19.0


In [31]:
eval_df = merge_recs_with_target(recommendations_df, label_df, k=args.top_K)
eval_df

Unnamed: 0,user_indice,recommendation,score,rec_ranking,user_id,parent_asin,rating,rating_rank
137,2711.0,572.0,5.0,1,AE2AZ2MNROPF33U6SS53VI22OXJA,B072MR3V1J,0,
138,2711.0,572.0,5.0,2,AE2AZ2MNROPF33U6SS53VI22OXJA,B072MR3V1J,0,
37,2711.0,42.0,5.0,3,AE2AZ2MNROPF33U6SS53VI22OXJA,B001EYUWWC,0,
8,2711.0,2438.0,5.0,4,AE2AZ2MNROPF33U6SS53VI22OXJA,B0001AO01Y,0,
185,2711.0,2464.0,5.0,5,AE2AZ2MNROPF33U6SS53VI22OXJA,B08GFGNH3D,0,
...,...,...,...,...,...,...,...,...
191573,15813.0,1280.0,5.0,196,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B00T8F5VKW,0,
191615,15813.0,1289.0,5.0,197,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B074MMGR8F,0,
191518,15813.0,2839.0,5.0,198,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B006QRNKOO,0,
191601,15813.0,2814.0,5.0,199,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B01HC0LG0S,0,


In [32]:
ranking_report = log_ranking_metrics(args, eval_df)

  return (1 + beta_sqr) * precision_arr * recall_arr / (beta_sqr * precision_arr + recall_arr)


## Classification metrics

In [33]:
from evidently.metric_preset import ClassificationPreset
from src.eval import log_classification_metrics

In [34]:
val_user_indices = val_df['user_indice'].values
val_item_indices = val_df['item_indice'].values

In [35]:
classifications = model.predict(val_user_indices, val_item_indices)

In [36]:
eval_classification_df = val_df.assign(
    classification_proba=classifications,
    label=lambda df: df['rating'].gt(0).astype(int)
)
eval_classification_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence,classification_proba,label
0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B001EYUS7G,0.0,1650810855155,2377,2080,Video Games,Far Cry 2: Fortune's Edition | PC Code - Ubiso...,"[Product Description, Includes Game + Fortune'...","[Video Games, Legacy Systems, PlayStation Syst...",,"[-1, -1, -1, -1, 2044, 1400, 4253, 3448, 3402,...",0.982014,0
1,AEXN3VFNZS7CKHX2NHDHLYDBZZIQ,B002CZ38KA,0.0,1633099443693,6987,2376,Video Games,Heavy Rain - Greatest Hits,"[Product Description, Experience a gripping ps...","[Video Games, Legacy Systems, PlayStation Syst...",7.66,"[-1, 3431, 2128, 1144, 2691, 303, 3974, 3175, ...",0.987807,0
2,AGCYZBKXV6Q5BGHWJB7J7D2HRWSA,B09R21G9DL,0.0,1640957371979,7520,4611,Computers,"Cipon Gamecube Controller, Wired Controller Ga...",[],"[Video Games, Legacy Systems, Nintendo Systems...",17.99,"[-1, -1, -1, -1, 1103, 2459, 750, 673, 2850, 3...",0.952574,0
3,AEWCUX5UKUYPDZJIOB6XMLCBJ3KA,B0BLFYF8K2,4.0,1630263342566,9303,4165,Computers,"Logitech G600 MMO Gaming Mouse, RGB Backlit, 2...","[With 20 buttons, the Logitech G600 MMO Gaming...","[Video Games, PC, Accessories, Gaming Mice]",37.99,"[1829, 1711, 3115, 1930, 1657, 4651, 1579, 250...",0.983170,1
4,AFFPVZ3JNCTQIKAK4XK37E2ENWWA,B00HVBPRUO,4.0,1655428133046,6775,2216,Video Games,Gold Wireless Stereo Headset - PlayStation 4,[A Headset for Gamers: Experience everything f...,"[Video Games, PlayStation 4, Accessories, Head...",,"[-1, -1, 4399, 3877, 1233, 3713, 2050, 3803, 2...",0.992967,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1893,AFUWPAK6VCGEL2OVIL2YGZNFQJZQ,B08N6NCR3Q,4.0,1642699950266,3144,4617,Video Games,Thrustmaster T 16000M SPACE SIM DUO STICK (PC),[The THRUSTMASTER T.16000M FCS Space Sim Duo c...,"[Video Games, PC, Accessories, Controllers, Fl...",119.51,"[-1, -1, -1, -1, 3648, 3017, 4093, 3173, 4263,...",0.993307,1
1894,AEPOQDJZJCF5APANNFRSABUNU4IA,B07G3KB7RT,0.0,1643422574208,10070,200,Video Games,Satisfye – ZenGrip Pro Gen 3 OLED Elite Bundle...,[],"[Video Games, Nintendo Switch, Accessories, Ha...",89.99,"[3808, 1356, 638, 3934, 495, 4213, 2717, 1721,...",0.993307,0
1895,AFH63KLSVQQYRNFS7NLQGD3GSP3A,B094YHB1QK,5.0,1652564728981,13283,3456,Video Games,PlayStation DualSense Wireless Controller – Ga...,[Plot a course for astronomical adventures on ...,"[Video Games, PlayStation 5, Accessories, Cont...",74.99,"[-1, 1999, 1652, 2454, 2557, 1334, 129, 2409, ...",0.993307,1
1896,AFPPTJOEUPVXA5C63SNRGID3EQNA,B0BVVTQ5JP,4.0,1635968491390,15033,3058,Computers,Logitech G502 HERO High Performance Wired Gami...,[Logitech updated its iconic G502 gaming mouse...,"[Video Games, PC, Accessories, Gaming Mice]",45.87,"[-1, -1, -1, -1, -1, 2884, 1953, 1724, 3591, 1...",0.973120,1


In [37]:
classification_report = log_classification_metrics(args, eval_classification_df, target_col='label', prediction_col='classification_proba')


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



# Clean up

In [38]:
all_params = [args]

if args.log_to_mlflow:
    for params in all_params:
        params_dict = params.dict()
        params_ = {f"{params.__repr_name__()}.{k}": v for k, v in params_dict.items()}
        mlflow.log_params(params_)

    mlflow.end_run()

2024/09/21 16:01:10 INFO mlflow.tracking._tracking_service.client: 🏃 View run 064-cf-i2i at: http://localhost:5003/#/experiments/1/runs/ee6639204d5740388009790bb5d5ed42.
2024/09/21 16:01:10 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5003/#/experiments/1.


# Appendix

## Model returning same score for every user-item in top 100

In [39]:
tmp = model.predict([10] * n_items, np.arange(n_items))

In [40]:
pd.Series(tmp).value_counts()

0.952574    3234
0.993307    1462
Name: count, dtype: int64

In [41]:
model.forward(9, 4691, debug=False)

TypeError: Item2ItemCollaborativeFiltering.forward() got an unexpected keyword argument 'debug'