# Single-Machine Implementation - LightFM

In [1]:
import pandas as pd
import numpy as np
import time
from scipy.sparse import coo_matrix, csr_matrix
from lightfm.data import Dataset
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k





In [2]:
train = pd.read_csv("/Users/colleenjin/Desktop/test20/train.csv")
train.rename(columns = {'user':'user_id', 'item':'recording_id', 'rating':'count'}, inplace = True)
train.head()

Unnamed: 0,user_id,recording_id,count
0,2481,8959,1
1,16160,13284,1
2,1660,51104,1
3,13986,67268,1
4,15138,69516,2


In [3]:
val = pd.read_csv("/Users/colleenjin/Desktop/test20/val.csv")
val.rename(columns = {'user':'user_id', 'item':'recording_id', 'rating':'count'}, inplace = True)
val.shape

(21039354, 3)

In [4]:
test = pd.read_csv("/Users/colleenjin/Desktop/test20/test.csv")
test.rename(columns = {'user':'user_id', 'item':'recording_id', 'rating':'count'}, inplace = True)
test.head()

Unnamed: 0,user_id,recording_id,count
0,131,84953,1
1,9602,118940,1
2,8176,157395,1
3,714,181692,1
4,5922,190806,13


In [5]:
sampled_val= val.sample(frac=0.0005)
sampled_test = test.sample(frac=0.0005)

total = pd.concat([train, sampled_val, sampled_test]).drop_duplicates()
unique_users = total['user_id'].unique()
unique_items = total['recording_id'].unique()

In [6]:
data = Dataset()
data.fit(users = unique_users, items = unique_items)


## Build interactions

In [7]:
# data (iterable of (user_id, item_id) or (user_id, item_id, weight))
interactions_train, weights_train = data.build_interactions([(train['user_id'][i], 
                                                              train['recording_id'][i], 
                                                              train['count'][i]) for i in range(train.shape[0])])

In [8]:
interactions_val, weights_val = data.build_interactions([(sampled_val.iloc[i]['user_id'],
                                                          sampled_val.iloc[i]['recording_id'],
                                                          sampled_val.iloc[i]['count']) for i in range(sampled_val.shape[0])])

In [9]:
interactions_test, weights_test = data.build_interactions([(sampled_test.iloc[i]['user_id'],
                                                          sampled_test.iloc[i]['recording_id'],
                                                          sampled_test.iloc[i]['count']) for i in range(sampled_test.shape[0])])

In [10]:
sampled_val.shape

(10520, 3)

### Hyperparameters to search

In [11]:
# param_grid = {
#         # 'max_iter': [5, 10, 20],
#         # 'learning_schedule': ['adagrad', 'adadelta'],
#         'no_components': [10, 20, 30],
#         'loss': ['warp', 'bpr'],
#         'user_alpha': [0.1, 0.01, 0.001],
#         # 'item_alpha': [0.1, 0.01, 0.001]
#     }

In [12]:
# best_precision = 0
# best_params = None
# time_cost = 0
# for rank in param_grid['rank']:
#     # for learning_schedule in param_grid['learning_schedule']:  # initial learning rate for the adagrad learning schedule
#     for no_components in param_grid['no_components']:  # the dimensionality of the feature latent embeddings
#         for loss in param_grid['loss']:
#             for user_alpha in param_grid['user_alpha']:  # L2 penalty on user features
#                 # for item_alpha in param_grid['item_alpha']:  # L2 penalty on item features
#                 print(f"start training {loss} model")
#                 start = time.time()
#                 print("initiating LightFM...")
#                 model = LightFM(no_components=no_components,
#                                 loss=loss,
#                                 user_alpha=user_alpha)
#                 print("fitting LightFM...")
#                 model = model.fit(interactions=interactions_train)
#                 print("computing precision_at_k...")
#                 val_precision = precision_at_k(model, interactions_val, k=rank).mean()
#                 end = time.time()
                
#                 print(f"LightFM {loss} model with rank={rank}, no_components={no_components}, user_alpha={user_alpha}:")
#                 print(f"Precision: {val_precision}")
#                 print(f"Time cost: {end-start}")

In [11]:
# Group the interactions by user_id and compute user-based statistics
user_features = total.groupby('user_id')['count'].sum().reset_index()

# Create a feature matrix from the user-based statistics
user_features_matrix = np.zeros((len(user_features), 1))
user_features_matrix[:, 0] = user_features['count']

# Align the feature matrix with the interaction matrices
user_features_train = csr_matrix(user_features_matrix[interactions_train.row])
user_features_val = csr_matrix(user_features_matrix[interactions_val.row])

# Train the model using the interaction matrices and user features
# model = LightFM()
# model.fit(interactions_train, user_features=user_features_train)

# # Evaluate the model using precision at K
# k = 5
# precision = lightfm.evaluation.precision_at_k(model, interactions_test, user_features=user_features_test, k=k)

### 1. rank = 100, no_components = 10, loss = warp, user_alpha = 0.1 

In [14]:
k=100
no_components=10
loss='warp'
user_alpha=0.1

In [15]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha)
print("fitting LightFM...")
model = model.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 1059.4583690166473


In [16]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.1:
Precision at k=100: 1.0775861483125482e-05
Time cost (evaluate): 2093.286360025406


### 2. rank = 100, no_components = 10, loss = warp, user_alpha = 0.01

In [17]:
k=100
no_components=10
loss='warp'
user_alpha=0.01

In [18]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model1 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha)
print("fitting LightFM...")
model1 = model1.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 170.1923041343689


In [19]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model1, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.01:
Precision at k=100: 8.081896339717787e-06
Time cost (evaluate): 2080.643155813217


### 3. rank = 100, no_components = 10, loss = warp, user_alpha = 0.001

In [20]:
k=100
no_components=10
loss='warp'
user_alpha=0.001

In [21]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model2 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha)
print("fitting LightFM...")
model2 = model2.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 102.2452621459961


In [22]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model2, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost: {end-start}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.001:
Precision at k=100: 0.0
Time cost: 2140.420850753784
Time cost (evaluate): 2140.420850753784


### 4. rank = 100, no_components = 20, loss = warp, user_alpha = 0.1

In [23]:
k=100
no_components=20  # dimensionality of the feature latent embeddings.
loss='warp'
user_alpha=0.1

In [24]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model3 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha)
print("fitting LightFM...")
model3 = model3.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 1310.4581167697906


In [25]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model3, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=20, loss=warp, user_alpha=0.1:
Precision at k=100: 1.0775861483125482e-05
Time cost (evaluate): 1601.2254269123077


### 5. rank = 100, no_components = 20, loss = warp, user_alpha = 1

In [26]:
k=100
no_components=20
loss='warp'
user_alpha=1

In [27]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model4 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha)
print("fitting LightFM...")
model4 = model4.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 10761.031217336655


In [28]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model4, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=20, loss=warp, user_alpha=1:
Precision at k=100: 8.081896339717787e-06
Time cost (evaluate): 1587.9050660133362


### 6. rank = 100, no_components = 10, loss = warp, user_alpha = 0.1, item_alpha = 1

In [12]:
k=100
no_components=10
loss='warp'
user_alpha=0.1
item_alpha=1

In [15]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model5 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha,
                item_alpha=item_alpha)
print("fitting LightFM...")
model5 = model5.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 15353.839386940002


In [16]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model5, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}, item_alpha={item_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.1, item_alpha=1:
Precision at k=100: 0.0
Time cost (evaluate): 1651.2142357826233


### 7. rank = 100, no_components = 10, loss = warp, user_alpha = 0.1, item_alpha = 0.1

In [None]:
k=100
no_components=10
loss='warp'
user_alpha=0.1
item_alpha=0.1

In [17]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model6 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha,
                item_alpha=item_alpha)
print("fitting LightFM...")
model6 = model6.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 15282.193259000778


In [18]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model6, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}, item_alpha={item_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.1, item_alpha=1:
Precision at k=100: 5.442176643555285e-06
Time cost (evaluate): 1650.066615819931


### 8. rank = 100, no_components = 10, loss = warp, user_alpha = 0.1, item_alpha = 0.01

In [19]:
k=100
no_components=10
loss='warp'
user_alpha=0.1
item_alpha=0.01

In [20]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model7 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha,
                item_alpha=item_alpha)
print("fitting LightFM...")
model7 = model7.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 1612.7370300292969


In [21]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model7, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}, item_alpha={item_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.1, item_alpha=0.01:
Precision at k=100: 0.0
Time cost (evaluate): 1675.434231042862


### 9. rank = 100, no_components = 10, loss = brp, user_alpha = 0.1, item_alpha = 0.1

In [25]:
k=100
no_components=10
loss='bpr'
user_alpha=0.1
item_alpha=0.1

In [26]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model8 = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha,
                item_alpha=item_alpha)
print("fitting LightFM...")
model8 = model8.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 1569.547702074051


In [28]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model8, interactions_val, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}, item_alpha={item_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=bpr, user_alpha=0.1, item_alpha=0.1:
Precision at k=100: 0.0
Time cost (evaluate): 1668.3383531570435


# model on testing set

In [30]:
k=100
no_components=10
loss='warp'
user_alpha=0.1
item_alpha=0

In [31]:
print(f"start training warp model")
start = time.time()
print("initiating LightFM...")
model_final = LightFM(no_components=no_components,
                loss=loss,
                user_alpha=user_alpha,
                item_alpha=item_alpha)
print("fitting LightFM...")
model_final = model_final.fit(interactions=interactions_train, sample_weight=weights_train, epochs=1, verbose=False)
end = time.time()
print(f"Time cost (fit): {end-start}")

start training warp model
initiating LightFM...
fitting LightFM...
Time cost (fit): 1059.0853538513184


In [32]:
print("computing precision_at_k...")
start = time.time()
val_precision = precision_at_k(model_final, interactions_test, k=k).mean()
end = time.time()

print(f"LightFM model with rank={k}, no_components={no_components}, loss={loss}, user_alpha={user_alpha}, item_alpha={item_alpha}:")
print(f"Precision at k={k}: {val_precision}")
print(f"Time cost (evaluate): {end-start}")

computing precision_at_k...
LightFM model with rank=100, no_components=10, loss=warp, user_alpha=0.1, item_alpha=0:
Precision at k=100: 5.216913632466458e-05
Time cost (evaluate): 1985.3897778987885
