In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
from loguru import logger

import pandas as pd
import numpy as np
from pydantic import BaseModel
import plotly.express as px
from dotenv import load_dotenv
import mlflow

load_dotenv()

sys.path.insert(0, '..')

from src.viz import blueq_colors

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    log_to_mlflow: bool = True
    experiment_name: str = "FSDS RecSys - L5 - Reco Algo"
    run_name: str = '063-cf-u2u'
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = 'user_id'
    item_col: str = 'parent_asin'
    rating_col: str = 'rating'
    timestamp_col: str = 'timestamp'
    
    top_K: int = 100
    top_k: int = 10

    batch_size: int = 128

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        
        if not os.environ.get("MLFLOW_TRACKING_URI"):
            logger.warning(
                f"Environment variable MLFLOW_TRACKING_URI is not set. Setting self.log_to_mlflow to false."
            )
            self.log_to_mlflow = False

        if self.log_to_mlflow:
            logger.info(
                f"Setting up MLflow experiment {self.experiment_name} - run {self.run_name}..."
            )
            import mlflow

            mlflow.set_experiment(self.experiment_name)
            mlflow.start_run(run_name=self.run_name)

        return self
    
args = Args().init()

print(args.model_dump_json(indent=2))

[32m2024-09-21 15:54:38.686[0m | [1mINFO    [0m | [36m__main__[0m:[36minit[0m:[36m29[0m - [1mSetting up MLflow experiment FSDS RecSys - L5 - Reco Algo - run 063-cf-u2u...[0m


{
  "testing": false,
  "log_to_mlflow": true,
  "experiment_name": "FSDS RecSys - L5 - Reco Algo",
  "run_name": "063-cf-u2u",
  "notebook_persist_dp": "/home/dvquys/frostmourne/reco-algo/notebooks/data/063-cf-u2u",
  "random_seed": 41,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "top_K": 100,
  "top_k": 10,
  "batch_size": 128
}


# Implement

In [4]:
from src.train_utils import train, MetricLogCallback
from src.model import User2UserCollaborativeFiltering
from src.math_utils import sigmoid

In [None]:
def init_model():
    model = User2UserCollaborativeFiltering(n_users, n_items)
    return model

# Test implementation

In [5]:
# Mock data
user_indices = [0, 0, 1, 1, 2, 2, 2]
item_indices = [0, 1, 1, 2, 3, 1, 2]
ratings = [1, 4, 4, 5, 3, 2, 4]
n_users = len(set(user_indices))
n_items = len(set(item_indices))

val_user_indices = [0, 1, 2]
val_item_indices = [2, 1, 2]
val_ratings = [2, 4, 5]

print("Mock User IDs:", user_indices)
print("Mock Item IDs:", item_indices)
print("Ratings:", ratings)

model = init_model()

users = [0, 1, 2]
items = [2, 2, 0]
predictions = model.predict(users, items)
print(predictions)

Mock User IDs: [0, 0, 1, 1, 2, 2, 2]
Mock Item IDs: [0, 1, 1, 2, 3, 1, 2]
Ratings: [1, 4, 4, 5, 3, 2, 4]
[0.5 0.5 0.5]


In [6]:
model.fit(user_indices, item_indices, ratings)
predictions = model.predict(users, items)
print(predictions)

[0.99031217 0.98201379 0.73105858]


In [7]:
model.user_item_matrix

array([[1., 4., 0., 0.],
       [0., 4., 5., 0.],
       [0., 2., 4., 3.]])

In [8]:
model.user_similarity

array([[0.        , 0.60604322, 0.36030188],
       [0.60604322, 0.        , 0.81202071],
       [0.36030188, 0.81202071, 0.        ]])

In [9]:
user = 0
item = 2

# Compute prediction using weighted average of ratings from similar users
sim_scores = model.user_similarity[user]
print(f"{sim_scores=}")

sim_scores=array([0.        , 0.60604322, 0.36030188])


In [10]:
# Only consider users who have rated the item
user_ratings = model.user_item_matrix[:, item]
print(f"{user_ratings=}")
sim_scores = sim_scores[user_ratings != 0]
print(f"{sim_scores=}")
user_ratings = user_ratings[user_ratings != 0]
print(f"{user_ratings=}")

user_ratings=array([0., 5., 4.])
sim_scores=array([0.60604322, 0.36030188])
user_ratings=array([5., 4.])


In [11]:
# Weighted average of ratings
print(f"Weighted average: {np.dot(sim_scores, user_ratings)}")
print(f"Normalization factor: {np.sum(sim_scores)}")
print(f"Predicted rating: {np.dot(sim_scores, user_ratings) / np.sum(sim_scores)}")
print(f"Predicted rating - sigmoid: {sigmoid(np.dot(sim_scores, user_ratings) / np.sum(sim_scores))}")

Weighted average: 4.471423593469625
Normalization factor: 0.9663450945516922
Predicted rating: 4.627149885356445
Predicted rating - sigmoid: 0.9903121712214553


In [12]:
recommendations = model.recommend(
    val_user_indices,
    k=2,
    progress_bar_type='tqdm_notebook'
)

Generating recommendations:   0%|          | 0/3 [00:00<?, ?it/s]

In [13]:
recommendations

{'user_indice': [np.int64(0),
  np.int64(0),
  np.int64(1),
  np.int64(1),
  np.int64(2),
  np.int64(2)],
 'recommendation': [np.int64(2),
  np.int64(1),
  np.int64(2),
  np.int64(3),
  np.int64(2),
  np.int64(1)],
 'score': [np.float64(0.9903121712214553),
  np.float64(0.9628273118576526),
  np.float64(0.9820137900379085),
  np.float64(0.9525741268224334),
  np.float64(0.9933071490757153),
  np.float64(0.9820137900379085)]}

# Prep data

In [14]:
from src.id_mapper import IDMapper
from src.train_utils import map_indice

In [15]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")
val_df = pd.read_parquet("../data/val_features_neg_df.parquet")
idm = IDMapper().load("../data/idm.json")
# val_timestamp = 1628643414042  # https://amazon-reviews-2023.github.io/data_processing/5core.html
assert (val_df[args.timestamp_col].min() - train_df[args.timestamp_col].max()) > 0
val_timestamp = train_df[args.timestamp_col].max() + 1
print(f"{val_timestamp=}")

val_timestamp=np.int64(1628641464793)


In [16]:
user_ids = train_df[args.user_col].values
item_ids = train_df[args.item_col].values
unique_user_ids = list(set(user_ids))
unique_item_ids = list(set(item_ids))
n_users = len(unique_user_ids)
n_items = len(unique_item_ids)

logger.info(f"{len(unique_user_ids)=:,.0f}, {len(unique_item_ids)=:,.0f}")

[32m2024-09-21 15:13:24.778[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mlen(unique_user_ids)=20,366, len(unique_item_ids)=4,696[0m


In [17]:
train_df = train_df.pipe(map_indice, idm, args.user_col, args.item_col)
val_df = val_df.pipe(map_indice, idm, args.user_col, args.item_col)

user_indices = [idm.get_user_index(user_id) for user_id in user_ids]
item_indices = [idm.get_item_index(item_id) for item_id in item_ids]
ratings = train_df[args.rating_col].values.tolist()

val_user_indices = [idm.get_user_index(user_id) for user_id in val_df[args.user_col]]
val_item_indices = [idm.get_item_index(item_id) for item_id in val_df[args.item_col]]
val_ratings = val_df[args.rating_col].values.tolist()

# Train

In [18]:
model = init_model()

#### Predict before train

In [19]:
val_df.sample(10)

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
398,AEXLFLCDXVF6CJ6JVGZADT2Z3R2Q,B00HVBPRUO,0.0,1645994417200,4224,2216,Video Games,Gold Wireless Stereo Headset - PlayStation 4,[A Headset for Gamers: Experience everything f...,"[Video Games, PlayStation 4, Accessories, Head...",,"[-1, -1, -1, -1, 2310, 3219, 929, 666, 4213, 3..."
623,AGMJWWTZ6HMM2FBRDLFW2CWMV5DQ,B073SC6V1D,5.0,1630809866903,8957,1825,Computers,"havit Gaming Keyboard and Mouse Combo, Backlit...",[],"[Video Games, PC, Accessories, Gaming Keyboards]",,"[-1, -1, -1, -1, -1, 4221, 830, 2741, 357, 4350]"
17,AFNNVSVXIU446JKQR6Z6BWGYZXIA,B004IK89G0,0.0,1649256510391,17882,2983,Video Games,Michael Jackson: The Experience,"[Product Description, Michael Jackson The Expe...","[Video Games, Video Games - Xbox 360 Kinect, X...",38.99,"[-1, -1, -1, -1, -1, 4412, 3088, 3116, 2340, 198]"
855,AFJ6L63XP5GOYSTCRFXFXNG2FBBA,B0050SYZ2G,0.0,1655765736515,13837,2978,Video Games,Kinect Sports Season Two,"[Product Description, Kinect Sports: Season Tw...","[Video Games, Legacy Systems, Xbox Systems, Xb...",15.74,"[4351, 1575, 1650, 1909, 4214, 628, 2138, 2383..."
1295,AESEOKCWWKUG7YPP43J2CRWAXQIA,B09GM4283G,5.0,1646962674203,3803,309,Video Games,PlayStation PULSE 3D Wireless Headset – Midnig...,[Ignite your gaming nights with the ultra-slee...,"[Video Games, PlayStation 5, Accessories, Gami...",99.0,"[-1, -1, -1, -1, 1265, 2592, 1891, 2286, 3638,..."
493,AGD2KE77JSUWQKD5CGYVGCQYJPHQ,B00EI4V3FU,0.0,1639362284720,7202,3027,Video Games,Turtle Beach Ear Force P4C PlayStation 4 Gamin...,"[Experience the high-quality audio, crystal cl...","[Video Games, PlayStation 4, Accessories, Head...",34.99,"[3139, 173, 1700, 102, 383, 3713, 10, 2700, 21..."
573,AHCFQRRL6QMRATADDGFZ632FJ4ZA,B001FRTHBU,0.0,1652353244484,15368,2922,Video Games,Mercenaries 2: World in Flames - PlayStation 2,"[Product description, Mercenaries 2 features t...","[Video Games, Legacy Systems, PlayStation Syst...",24.89,"[1008, 3487, 1621, 3754, 2384, 1868, 3861, 180..."
1831,AHALYOX63S6CACZTHN7R24RVEX2A,B018VAEXD0,0.0,1650077930493,2218,4597,Video Games,MyLifeUNIT Hand Grip Handle Stand for Nintendo...,[Features :>Made of high strength ABS material...,[],13.99,"[354, 2961, 2791, 4656, 2023, 2700, 2256, 11, ..."
781,AERJEAEB43FYERJRLL5LSQJLXR5Q,B0BL65X86R,5.0,1634753869442,6213,32,Video Games,$25 PlayStation Store Gift Card [Digital Code],[Redeem against anything on PlayStation Store....,"[Video Games, Online Game Services, PlayStatio...",25.0,"[-1, -1, -1, 4158, 1989, 3457, 3579, 357, 538,..."
54,AF5VSNTEEL5AERSW7AKTJSJCAVNQ,B087NNZZM8,5.0,1629095203643,11123,3989,Video Games,Animal Crossing: New Horizons (Physical) & Hap...,[],"[Video Games, Nintendo Switch, Games]",76.87,"[3902, 4487, 3498, 3199, 931, 1396, 1034, 1597..."


In [20]:
user_id = val_df.sample(10)[args.user_col].values[0]
test_df = val_df.loc[lambda df: df[args.user_col].eq(user_id)]
test_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
92,AE7VCHG4GNB23Z3LWF4WULXGCHWQ,B09V5R5LSZ,0.0,1642401425078,9532,1070,Video Games,Halo 5: Guardians 9 Gold REQ Packs – Xbox One ...,[Gear up for Halo 5: Guardians multiplayer mod...,"[Video Games, Xbox One, Downloadable Content]",19.99,"[4617, 4656, 2186, 2906, 366, 632, 3925, 3526,..."
471,AE7VCHG4GNB23Z3LWF4WULXGCHWQ,B07WJ6WP3G,1.0,1642401425078,9532,562,Computers,Redragon S101 Wired RGB Backlit Gaming Keyboar...,[],"[Video Games, PC, Accessories, Gaming Keyboards]",54.99,"[4617, 4656, 2186, 2906, 366, 632, 3925, 3526,..."
668,AE7VCHG4GNB23Z3LWF4WULXGCHWQ,B07895QZBF,0.0,1642403373617,9532,1158,Video Games,Horizon Zero Dawn: Complete Edition - PlayStat...,[Horizon Zero Dawn is an exhilarating action r...,"[Video Games, PlayStation 4, Games]",27.99,"[4656, 2186, 2906, 366, 632, 3925, 3526, 2955,..."
1046,AE7VCHG4GNB23Z3LWF4WULXGCHWQ,B0BMGHMP23,4.0,1642403373617,9532,4619,Computers,Logitech G502 Lightspeed Wireless Gaming Mouse...,[G502 is the best gaming mouse from Logitech G...,"[Video Games, PC, Accessories, Gaming Mice]",87.95,"[4656, 2186, 2906, 366, 632, 3925, 3526, 2955,..."


In [21]:
item_id = test_df.loc[lambda df: df[args.rating_col].gt(0)][args.item_col].values[0]
logger.info(f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}")
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)

model.predict([user_indice], [item_indice])

[32m2024-09-21 15:13:25.189[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mTest predicting before training with user_id = AE7VCHG4GNB23Z3LWF4WULXGCHWQ and parent_asin = B07WJ6WP3G[0m


array([0.5])

#### Training loop

In [22]:
model.fit(user_indices, item_indices, ratings)

# Predict

In [23]:
logger.info(f"Test predicting before training with {args.user_col} = {user_id} and {args.item_col} = {item_id}")
model.predict([user_indice], [item_indice])

[32m2024-09-21 15:13:33.692[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mTest predicting before training with user_id = AE7VCHG4GNB23Z3LWF4WULXGCHWQ and parent_asin = B07WJ6WP3G[0m


array([0.5])

# Evaluate

## Ranking metrics

In [24]:
from src.eval import create_label_df, create_rec_df, merge_recs_with_target
from src.eval import log_ranking_metrics

In [36]:
train_df.loc[lambda df: df['user_indice'].eq(9)]

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
43086,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B007CSF3GO,5.0,1405798893000,9,653,Video Games,The Last Story - Nintendo Wii,"[Product Description, Embroiled in seemingly e...","[Video Games, Legacy Systems, Nintendo Systems...",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
86293,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00BQVXVYY,5.0,1423463206000,9,440,Video Games,LEGO: Marvel - PC,[Lego Marvel Super Heroes sees Lego Nick Fury ...,"[Video Games, PC, Games]",,"[-1.0, -1.0, -1.0, -1.0, 2213.0, 1086.0, 653.0..."
94839,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B000050FBJ,5.0,1405799408000,9,1084,Video Games,Super Mario All Stars,"[Product description, Revisit the magic and fu...","[Video Games, Legacy Systems, Nintendo Systems...",53.49,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 221..."
110686,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00002SVFQ,5.0,1405799470000,9,1875,Video Games,F-Zero,"[Product description, The future of racing is ...","[Video Games, Legacy Systems, Nintendo Systems...",44.11,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 2213.0, 1..."
111562,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00328P0GG,5.0,1363878077000,9,2213,Video Games,Skate 3 - Xbox 360,"[Product Description, The award winning SKATE ...","[Video Games, Legacy Systems, Xbox Systems, Xb...",14.2,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
115913,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00503E8S2,0.0,1405798893000,9,2038,Video Games,Call of Duty: Modern Warfare 3 - Xbox 360,"[Product Description, Modern Warfare is back. ...","[Video Games, Legacy Systems, Xbox Systems, Xb...",40.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
116082,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B000FQBPDU,5.0,1423463188000,9,1179,Video Games,Metroid Prime 3: Corruption,[You ARE Samus with Wii control! By moving aro...,"[Video Games, Legacy Systems, Nintendo Systems...",49.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, 2213.0, 1086.0,..."
119706,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B0053BCP40,5.0,1389114070000,9,1086,Video Games,Luigi's Mansion: Dark Moon,"[Help Luigi overcome ghastly ghosts, mind-melt...","[Video Games, Legacy Systems, Nintendo Systems...",31.53,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
137174,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B001ELJFGO,0.0,1363878077000,9,2246,Video Games,Assassin's Creed: Director's Cut Edition [Down...,"[Assassin's Creed, redefines the action genre....","[Video Games, PC, Games]",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
143830,AHDQY5RS2JL3JQ7LLCNWLG6R5MPA,B00DJRLDMU,0.0,1389114070000,9,1483,Video Games,Tom Clancy’s The Division Underground - Xbox O...,"[In Expansion I, Underground, the factions of ...","[Video Games, Xbox One, Downloadable Content]",,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."


In [25]:
recommendations = model.recommend(
    val_user_indices,
    k=args.top_K,
    progress_bar_type='tqdm_notebook'
)

Generating recommendations:   0%|          | 0/1898 [00:00<?, ?it/s]

In [27]:
recommendations_df = pd.DataFrame(recommendations).pipe(create_rec_df, idm)
recommendations_df

Unnamed: 0,user_indice,recommendation,score,rec_ranking,user_id,parent_asin
0,2377,4691,0.993307,1.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B07GRP33YM
1,2377,3,0.993307,2.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B000VIUNZI
2,2377,4687,0.993307,3.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B002I094AC
3,2377,4685,0.993307,4.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B00C1ZBFTW
4,2377,26,0.993307,5.0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B0C2CYCX75
...,...,...,...,...,...,...
189795,19050,4579,0.993307,196.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B0030GBSUC
189796,19050,4573,0.993307,197.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B07G5RKF3W
189797,19050,4571,0.993307,198.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B001EYUWWW
189798,19050,4568,0.993307,199.0,AHAKU6TTWIHJPZIODW7MGC52M2DA,B0088TN7HI


In [48]:
label_df = create_label_df(val_df)
label_df

Unnamed: 0,user_id,parent_asin,rating,rating_rank
1711,AEOY2365QPPEVDTOXL6N7ZA4NSAA,B00PDRZG9U,5.0,1.0
425,AFGHX4VLP6P5XORLDJX3LZKUAAZA,B00Z9TJBUW,5.0,1.0
189,AFCH2PDOFM2S3622QFV6PHCHGMCA,B00KSQHX1K,5.0,1.0
1297,AEURBISVS35ALE7YQLR5L4K7AHCA,B07QQ8N7LL,1.0,1.0
320,AEMA3SW3WPNLEH3IACW23K2ZSUFA,B09JDLC31H,4.0,1.0
...,...,...,...,...
663,AFB6FYPPCN33UMUU5536IHXNOHCQ,B00BGA9WK2,0.0,18.0
453,AESD4RLWUKM6JTD6SNNWYLHLLQQA,B00Z9TJHEC,0.0,18.0
582,AG4RCXKPTC6QRORJLUSBY4SO2IAA,B001G7PSGW,0.0,18.0
1374,AFB6FYPPCN33UMUU5536IHXNOHCQ,B01K1OO5PU,0.0,19.0


In [49]:
eval_df = merge_recs_with_target(recommendations_df, label_df, k=args.top_K)
eval_df

Unnamed: 0,user_indice,recommendation,score,rec_ranking,user_id,parent_asin,rating,rating_rank
177,2711.0,4691.0,0.993307,1,AE2AZ2MNROPF33U6SS53VI22OXJA,B07GRP33YM,0,
115,2711.0,0.0,0.993307,2,AE2AZ2MNROPF33U6SS53VI22OXJA,B00BCEK2LU,0,
93,2711.0,4695.0,0.993307,3,AE2AZ2MNROPF33U6SS53VI22OXJA,B004Q8L46G,0,
10,2711.0,1.0,0.993307,4,AE2AZ2MNROPF33U6SS53VI22OXJA,B00004SWL9,0,
87,2711.0,4688.0,0.993307,5,AE2AZ2MNROPF33U6SS53VI22OXJA,B003O6ED7S,0,
...,...,...,...,...,...,...,...,...
191592,15813.0,4567.0,0.993307,196,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B01MTJA6EV,0,
191541,15813.0,181.0,0.993307,197,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B00BGAA0SU,0,
191580,15813.0,180.0,0.993307,198,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B01CR058OS,0,
191630,15813.0,2998.0,0.993307,199,AHZNHP6OKXRZV2UJMYDPLWCKFKEA,B07WVFDZXC,0,


In [50]:
ranking_report = log_ranking_metrics(args, eval_df)

  return (1 + beta_sqr) * precision_arr * recall_arr / (beta_sqr * precision_arr + recall_arr)


## Classification metrics

In [51]:
from evidently.metric_preset import ClassificationPreset
from src.eval import log_classification_metrics

In [52]:
val_user_indices = val_df['user_indice'].values
val_item_indices = val_df['item_indice'].values

In [53]:
classifications = model.predict(val_user_indices, val_item_indices)

In [54]:
eval_classification_df = val_df.assign(
    classification_proba=classifications,
    label=lambda df: df['rating'].gt(0).astype(int)
)
eval_classification_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence,classification_proba,label
0,AEFWYBITAJIQEAGJMGBBZQPD246Q,B001EYUS7G,0.0,1650810855155,2377,2080,Video Games,Far Cry 2: Fortune's Edition | PC Code - Ubiso...,"[Product Description, Includes Game + Fortune'...","[Video Games, Legacy Systems, PlayStation Syst...",,"[-1, -1, -1, -1, 2044, 1400, 4253, 3448, 3402,...",0.905138,0
1,AEXN3VFNZS7CKHX2NHDHLYDBZZIQ,B002CZ38KA,0.0,1633099443693,6987,2376,Video Games,Heavy Rain - Greatest Hits,"[Product Description, Experience a gripping ps...","[Video Games, Legacy Systems, PlayStation Syst...",7.66,"[-1, 3431, 2128, 1144, 2691, 303, 3974, 3175, ...",0.993307,0
2,AGCYZBKXV6Q5BGHWJB7J7D2HRWSA,B09R21G9DL,0.0,1640957371979,7520,4611,Computers,"Cipon Gamecube Controller, Wired Controller Ga...",[],"[Video Games, Legacy Systems, Nintendo Systems...",17.99,"[-1, -1, -1, -1, 1103, 2459, 750, 673, 2850, 3...",0.500000,0
3,AEWCUX5UKUYPDZJIOB6XMLCBJ3KA,B0BLFYF8K2,4.0,1630263342566,9303,4165,Computers,"Logitech G600 MMO Gaming Mouse, RGB Backlit, 2...","[With 20 buttons, the Logitech G600 MMO Gaming...","[Video Games, PC, Accessories, Gaming Mice]",37.99,"[1829, 1711, 3115, 1930, 1657, 4651, 1579, 250...",0.981448,1
4,AFFPVZ3JNCTQIKAK4XK37E2ENWWA,B00HVBPRUO,4.0,1655428133046,6775,2216,Video Games,Gold Wireless Stereo Headset - PlayStation 4,[A Headset for Gamers: Experience everything f...,"[Video Games, PlayStation 4, Accessories, Head...",,"[-1, -1, 4399, 3877, 1233, 3713, 2050, 3803, 2...",0.989572,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1893,AFUWPAK6VCGEL2OVIL2YGZNFQJZQ,B08N6NCR3Q,4.0,1642699950266,3144,4617,Video Games,Thrustmaster T 16000M SPACE SIM DUO STICK (PC),[The THRUSTMASTER T.16000M FCS Space Sim Duo c...,"[Video Games, PC, Accessories, Controllers, Fl...",119.51,"[-1, -1, -1, -1, 3648, 3017, 4093, 3173, 4263,...",0.991472,1
1894,AEPOQDJZJCF5APANNFRSABUNU4IA,B07G3KB7RT,0.0,1643422574208,10070,200,Video Games,Satisfye – ZenGrip Pro Gen 3 OLED Elite Bundle...,[],"[Video Games, Nintendo Switch, Accessories, Ha...",89.99,"[3808, 1356, 638, 3934, 495, 4213, 2717, 1721,...",0.993307,0
1895,AFH63KLSVQQYRNFS7NLQGD3GSP3A,B094YHB1QK,5.0,1652564728981,13283,3456,Video Games,PlayStation DualSense Wireless Controller – Ga...,[Plot a course for astronomical adventures on ...,"[Video Games, PlayStation 5, Accessories, Cont...",74.99,"[-1, 1999, 1652, 2454, 2557, 1334, 129, 2409, ...",0.731059,1
1896,AFPPTJOEUPVXA5C63SNRGID3EQNA,B0BVVTQ5JP,4.0,1635968491390,15033,3058,Computers,Logitech G502 HERO High Performance Wired Gami...,[Logitech updated its iconic G502 gaming mouse...,"[Video Games, PC, Accessories, Gaming Mice]",45.87,"[-1, -1, -1, -1, -1, 2884, 1953, 1724, 3591, 1...",0.980681,1


In [55]:
classification_report = log_classification_metrics(args, eval_classification_df, target_col='label', prediction_col='classification_proba')


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



# Clean up

In [56]:
all_params = [args]

if args.log_to_mlflow:
    for params in all_params:
        params_dict = params.dict()
        params_ = {f"{params.__repr_name__()}.{k}": v for k, v in params_dict.items()}
        mlflow.log_params(params_)

    mlflow.end_run()

2024/09/21 15:49:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run 063-cf-u2u at: http://localhost:5003/#/experiments/1/runs/42b8d39fdb874b2f8da995c90f0b5198.
2024/09/21 15:49:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5003/#/experiments/1.


# Appendix

## Model returning same score for every user-item in top 100

In [46]:
tmp = model.predict([10] * n_items, np.arange(n_items))

In [47]:
pd.Series(tmp).value_counts()

0.500000    3234
0.993307     692
0.982014     164
0.952574      79
0.731059      78
            ... 
0.968799       1
0.988753       1
0.989024       1
0.988581       1
0.977353       1
Name: count, Length: 402, dtype: int64

In [39]:
model.forward(9, 4691, debug=False)

> [0;32m/home/dvquys/frostmourne/reco-algo/src/model/cf_u2u.py[0m(38)[0;36mforward[0;34m()[0m
[0;32m     36 [0;31m            [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     37 [0;31m[0;34m[0m[0m
[0m[0;32m---> 38 [0;31m        [0;32mif[0m [0mlen[0m[0;34m([0m[0msim_scores[0m[0;34m)[0m [0;34m==[0m [0;36m0[0m [0;32mor[0m [0msim_scores[0m[0;34m.[0m[0msum[0m[0;34m([0m[0;34m)[0m [0;34m==[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     39 [0;31m            [0;32mreturn[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     40 [0;31m[0;34m[0m[0m
[0m


ipdb>  sim_scores


array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.06295911, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        ])


ipdb>  user_ratings


array([5., 5., 4., 5., 5., 5., 5., 1., 5., 5., 5., 5., 5., 5., 1., 5., 5.,
       5., 5., 2., 1., 4., 5., 3., 4., 1., 5., 5., 5.])


ipdb>  exit
