In [2]:
from surprise import Dataset, Reader, SVD
from sklearn.model_selection import train_test_split
from utils import load_data

In [42]:
def prepare_data_unique_users(df):
    df['item_id'] = df['usd'].astype(str) + '_' + df['coins'].astype(str)
    df['user_hour'] = df['user_id'].astype(str) + '_' + df['hour'].astype(str)
    unique_users = df.groupby(['user_hour', 'item_id'])['user_id'].nunique().reset_index(name='unique_users')
    unique_users['rating'] = unique_users['unique_users'] / unique_users['unique_users'].max()
    df = df.merge(unique_users[['user_hour', 'item_id', 'rating']], on=['user_hour', 'item_id'])
    train_df, test_df = train_test_split(
        df,
        test_size=0.2,
        random_state=42,
        stratify=df['user_id']
    )
    
    reader = Reader(rating_scale=(0, 1))
    trainset = Dataset.load_from_df(
        train_df[['user_hour', 'item_id', 'rating']], 
        reader
    ).build_full_trainset()
    
    return trainset, train_df, test_df

In [18]:
def train_model(trainset):
    model = SVD(
        n_factors=20,
        n_epochs=30,
        lr_all=0.1,
    )
    
    model.fit(trainset)
    return model

In [35]:
def create_hourly_recommendations_table(df):
    df['item_id'] = df['usd'].astype(str) + '_' + df['coins'].astype(str)
    
    hourly_recommendations = {}
    
    for hour in range(24):
        hour_df = df[df['hour'] == hour]
            
        # Group by item_id to count purchases and unique users
        grouped = hour_df.groupby(['item_id']).agg(
            total_purchases=('user_id', 'count'),
            unique_users=('user_id', 'nunique')
        ).reset_index()
        
        grouped['purchases_per_user'] = grouped['total_purchases'] / grouped['unique_users']
        max_purchases_per_user = grouped['purchases_per_user'].max()
        grouped['normalized_score'] = grouped['purchases_per_user'] / max_purchases_per_user
        grouped[['usd', 'coins']] = grouped['item_id'].str.split('_', expand=True)
        result = grouped.sort_values('normalized_score', ascending=False)
        hourly_recommendations[hour] = result
    
    return hourly_recommendations

def get_top_items_for_hour(hourly_recommendations, target_hour, top_n=6):
    recommendations = hourly_recommendations[target_hour]
    top_items = recommendations.head(top_n)
    result = top_items[['item_id', 'usd', 'coins', 'normalized_score', 'total_purchases', 'unique_users']]

    return result

In [10]:
def get_recommendations(model, train_df, user_id, hour, n_items=6):
    user_hour = f"{user_id}_{hour}"
    items = train_df['item_id'].unique()
    predictions = [model.predict(user_hour, item_id) for item_id in items]
    top_items = sorted(predictions, key=lambda x: x.est, reverse=True)[:n_items]
    return [pred.iid for pred in top_items]

In [43]:
df = load_data()
trainset, train_df, test_df = prepare_data_unique_users(df)

In [19]:
model = train_model(trainset)

In [36]:
hourly_recommendations = create_hourly_recommendations_table(df)

In [39]:
hit_rate_model = 0
hit_rate_baseline = 0
sample_size = 1000
for index, row  in test_df.sample(sample_size).iterrows():
    user_id = row['user_id']
    hour = row['hour']
    actual_item = row['item_id']
    predictions = get_recommendations(model, train_df, user_id, hour, 6)
    baseline_result = get_top_items_for_hour(hourly_recommendations, hour)
    if actual_item in predictions:
        hit_rate_model += 1
    if actual_item in baseline_result['item_id'].tolist():
        hit_rate_baseline += 1
print(hit_rate_model / sample_size)
print(hit_rate_baseline / sample_size)

0.284
0.159
