In [1]:
import datetime
import pandas as pd
import numpy as np
# from lightfm import LightFM
from sklearn import preprocessing
from scipy.sparse import coo_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt

In [8]:
#loading data
data_events = pd.read_csv('archive/events.csv')

In [7]:
events = data_events.assign(date=pd.Series(datetime.datetime.fromtimestamp(i/1000).date() for i in data_events.timestamp))
events = events[['visitorid','itemid','event','date']]
events = events.sort_values(by=['date']).reset_index(drop=True)
events.head()

Unnamed: 0,visitorid,itemid,event,date
0,733947,113019,view,2015-05-03
1,182906,230971,view,2015-05-03
2,1019913,347670,view,2015-05-03
3,380074,206981,view,2015-05-03
4,190672,259357,transaction,2015-05-03


In [8]:
start_date = '2015-5-3'
end_date = '2015-5-18'
fd = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date()
events = events[(events.date >= fd(start_date)) & (events.date <= fd(end_date))]

In [9]:
# Divide training sets and test sets
split_point = int(np.round(events.shape[0]*0.8))
events_train = events.iloc[0:split_point]
events_test = events.iloc[split_point::]
events_test = events_test[(events_test['visitorid'].isin(events_train['visitorid'])) & (events_test['itemid'].isin(events_train['itemid']))]


In [10]:
id_cols=['visitorid','itemid']
trans_cat_train=dict()
trans_cat_test=dict()

for k in id_cols:
    cate_enc=preprocessing.LabelEncoder()
    trans_cat_train[k]=cate_enc.fit_transform(events_train[k].values)
    trans_cat_test[k]=cate_enc.transform(events_test[k].values)

In [11]:
ratings = dict()
#Generating the ratings for each user-item interaction pair
cate_enc=preprocessing.LabelEncoder()
ratings['train'] = cate_enc.fit_transform(events_train.event)
ratings['test'] = cate_enc.transform(events_test.event)

In [12]:
n_users=len(np.unique(trans_cat_train['visitorid']))
n_items=len(np.unique(trans_cat_train['itemid']))

In [13]:
rate_matrix = dict()
rate_matrix['train'] = coo_matrix((ratings['train'], (trans_cat_train['visitorid'], trans_cat_train['itemid'])), shape=(n_users,n_items))
rate_matrix['test'] = coo_matrix((ratings['test'], (trans_cat_test['visitorid'], trans_cat_test['itemid'])), shape=(n_users,n_items))

In [None]:
model = LightFM(no_components=5, loss='warp')
model.fit(rate_matrix['train'], epochs=100, num_threads=8)

In [1]:
#Getting recommendation from the model

df = events[['itemid']]
items = df.to_numpy()
def sample_recommendation(model, data, user_ids):

    #number of users and movies in training data
    n_users, n_items = data.shape

    #generate recommendations for each user we input
    for user_id in user_ids:

        #movies our model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))

        #rank them in order of most liked to least
        top_items = items[np.argsort(-scores)][:3]

        print(top_items)

sample_recommendation(model, rate_matrix['train'], [1])

NameError: name 'events' is not defined

In [None]:

# Step 1: Generate predictions using the trained model on the test set
predictions = model.predict(trans_cat_test['visitorid'], trans_cat_test['itemid'])

# Step 2: Compare the predicted ratings with the actual ratings in the test set
actual_ratings = ratings['test']

# Step 3: Calculate RMSE and MAE
rmse = sqrt(mean_squared_error(actual_ratings, predictions))
mae = mean_absolute_error(actual_ratings, predictions)

print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
