In [2]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [3]:
def load_data():
    dataset_file = 'dataset_purchases.csv'
    df = pd.read_csv(dataset_file)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['usd'] = df['usd'].round(2)
    df['hour'] = df['timestamp'].dt.hour
    df['week'] = df['timestamp'].dt.isocalendar().week
    return df

In [4]:
def prepare_data(df):
   # Create item_id from price-coins combination
   df['item_id'] = df['usd'].astype(str) + '_' + df['coins'].astype(str)
   
   # Create user-hour combinations
   df['user_hour'] = df['user_id'].astype(str) + '_' + df['hour'].astype(str)
   
   # Create dataset
   dataset = Dataset()
   dataset.fit(
       users=df['user_hour'].unique(),
       items=df['item_id'].unique()
   )
   
   # Build interactions
   (interactions, weights) = dataset.build_interactions(
       ((row['user_hour'], row['item_id'], 1.0) 
        for _, row in df.iterrows())
   )
   return dataset, interactions
   

In [5]:
def train_test_stratified(df, test_size=0.2):
   # Split users first
   users = df['user_id'].unique()
   train_users, test_users = train_test_split(users, test_size=test_size, random_state=42)
   
   train_df = df[df['user_id'].isin(train_users)]
   test_df = df[df['user_id'].isin(test_users)]
   
   return train_df, test_df

In [6]:
def train_model(train_interactions):
   model = LightFM(learning_rate=0.05, 
                   loss='warp',
                   no_components=30, 
                   random_state=42)
   
   model.fit(train_interactions, epochs=30, verbose=True)
   return model

In [7]:
def get_recommendations(model, dataset, user_hour, n_items=6):
   # Map user_hour to internal id
   user_id_map = dataset.mapping()[0]
   try:
       user_idx = user_id_map[user_hour]
   except KeyError:
       return []  # New user-hour combination
       
   n_items = len(dataset.mapping()[2])
   scores = model.predict(user_idx, np.arange(n_items))
   
   # Get top items
   top_items = np.argsort(-scores)[:n_items]
   
   # Map back to original item_ids
   reverse_mapping = {v: k for k, v in dataset.mapping()[2].items()}
   return [reverse_mapping[i] for i in top_items]

In [8]:
df = load_data()
train_df, test_df = train_test_stratified(df)

In [9]:
dataset, train_interactions = prepare_data(train_df)

In [10]:
model = train_model(train_interactions)

: 

In [None]:
# Get recommendations for a user at specific hour
user_id = "12345"
hour = 14
user_hour = f"{user_id}_{hour}"
recommendations = get_recommendations(model, dataset, user_hour)