In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
import copy
import pickle
import time

In [2]:
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, auc_score



In [3]:
from thisproject import *

In [4]:
books = pd.read_csv('data/books_transform.csv')
book_map = pd.read_csv('data/books_transform.csv')[['id', 'title', 'authors', 'series']]
ratings = pd.read_csv('data/ratings.csv')
item_features = sp.load_npz('data/item_features.npz')

In [5]:
NITEMS = 10000

# lightfm

In [7]:
def sample_ratings(ratings_data: pd.DataFrame, sample_size: float = 0.05) -> pd.DataFrame:
    
    '''Randomly samples dataset by users'''
    
    from sklearn.model_selection import train_test_split
    _, random_users = train_test_split(ratings.user_id.unique(), test_size=sample_size, random_state=322)
    ratings_random = ratings.loc[ratings['user_id'].isin(random_users)]

    return ratings_random

ratings_random = sample_ratings(ratings, sample_size=0.05)

In [9]:
dataset = DatasetFaster()
dataset.fit(ratings_random.user_id.nunique(), NITEMS)
interactions, weights = dataset.build_interactions(ratings_random)

In [10]:
model = LightFM(learning_rate=0.05, loss = 'warp')

In [11]:
model.fit(
    interactions=interactions,
    sample_weight=weights,
    epochs=10
)
model.item_biases = np.zeros_like(model.item_biases)

In [27]:
user_rating_test = fetch_user_ratings_dataset(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['user_id'] = np.repeat(0, df.shape[0])


In [28]:
interactions_new, weights_new = dataset.build_interactions(ratings_random, user_rating_test)

model.fit_partial(
    interactions=interactions_new,
    sample_weight=weights_new
)

<lightfm.lightfm.LightFM at 0x22b5aec94c0>

In [48]:
predict_list(model, user_rating_test)

array(['To Kill a Mockingbird', '1984', 'Animal Farm',
       'The Girl with the Dragon Tattoo (Millennium, #1)', 'Life of Pi',
       'Slaughterhouse-Five', 'Lord of the Flies', 'Of Mice and Men',
       'The Road', "The Handmaid's Tale"], dtype=object)

# neptune

In [11]:
import neptune.new as neptune
from getpass import getpass

In [12]:
TOKEN = getpass()

········


In [13]:
params = dict(
    loss='warp', 
    learning_schedule='adagrad',
    no_components=100,
    learning_rate=0.05,
    k=5,
    n=10,
    rho=0.95,
    epsilon=1e-06,
    max_sampled=10,
    item_alpha=0.0,
    user_alpha=0.0,
    random_state=322
)
epochs=20

In [14]:
def evaluate(run, model_params, epochs=10, sample_size=0.05):
    
    '''Fits and evaluates a model with given parameters, and logs to neptune'''
    
    run['parameters'] = model_params
    run['epochs'].log(epochs)
    run['sample_size'].log(sample_size)
    
    start = time.time()
    
    ratings_random = sample_ratings(ratings, sample_size=sample_size)
    
    dataset = DatasetFaster()
    dataset.fit(ratings_random.user_id.nunique(), NITEMS)
    interactions, weights = dataset.build_interactions(ratings_random)
    interactions_train, interactions_test = random_train_test_split(interactions, test_percentage=0.2, random_state=322)
    weights_train, weights_test = random_train_test_split(weights, test_percentage=0.2, random_state=322)
    
    model = LightFM(**model_params)
    model.fit(
        interactions=weights_train,
        # user_features=user_features,
        # item_features=item_features,
        epochs=epochs
    )
    model.item_biases = np.zeros_like(model.item_biases)
    
    train_precision = precision_at_k(
        model=model, 
        test_interactions=weights_train,
        k=10
    )
    run['train_precision'].log(train_precision.mean())
    
    test_precision = precision_at_k(
        model=model, 
        test_interactions=weights_test,
        train_interactions=weights_train,
        k=10
    )
    run['test_precision'].log(test_precision.mean())
    
    train_auc = auc_score(
        model=model, 
        test_interactions=weights_train
    )
    run['train_auc'].log(train_auc.mean())
    
    test_auc = auc_score(
        model=model, 
        test_interactions=weights_test,
        train_interactions=weights_train
    )
    run['test_auc'].log(test_auc.mean())
    run['time'].log(time.time() - start)
    
    # return model

In [None]:
for i in np.linspace(0.01, 1, 10):
    run = neptune.init(
        project='fant0md/testing',
        api_token=TOKEN
    )
    run['model'] = 'LightFM'
    evaluate(run, model_params=params, epochs=20, sample_size=i)
    run.stop()

https://app.neptune.ai/fant0md/testing/e/TES-138
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.


All 4 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-139
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-140
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-141
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-142
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [26]:
for i in np.linspace(0.01, 0.21, 5):
    run = neptune.init(
        project='fant0md/testing',
        api_token=TOKEN
    )
    run['model'] = 'LightFM'
    evaluate(run, model_params=params, epochs=epochs, sample_size=i)
    run.stop()

https://app.neptune.ai/fant0md/testing/e/TES-17
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-18
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.


All 2 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-19
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.


All 2 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-20
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 4 operations to synchronize with Neptune. Do not kill this process.


All 4 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-21
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.


All 2 operations synced, thanks for waiting!


In [71]:
for i in np.arange(1, 31):
    run = neptune.init(
        project='fant0md/testing',
        api_token=TOKEN
    )
    run['model'] = 'LightFM'
    evaluate(run, model_params=params, epochs=i, sample_size=0.05)
    run.stop()

https://app.neptune.ai/fant0md/testing/e/TES-104
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-105
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-106
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-107
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-108
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-109
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-110
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-111
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-112
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-113
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-114
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-115
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-116
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-117
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-118
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-119
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-120
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-121
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-122
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-123
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-124
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-125
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-126
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-127
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-128
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-129
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-130
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-131
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-132
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!
https://app.neptune.ai/fant0md/testing/e/TES-133
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 5 operations to synchronize with Neptune. Do not kill this process.


All 5 operations synced, thanks for waiting!


## Setup

In [4]:
%load_ext autoreload
%autoreload 2
from thisproject import *

In [5]:
ratings = pd.read_csv('data/ratings.csv')
ratings = ratings.sort_values('user_id')

In [6]:
best_params = dict(
    loss='warp', 
    learning_schedule='adagrad',
    no_components=100,
    learning_rate=0.05,
    k=5,
    n=10,
    rho=0.95,
    epsilon=1e-06,
    max_sampled=10,
    item_alpha=0.0,
    user_alpha=0.0,
    random_state=322
)
best_epochs = 20

In [7]:
def setup_model(model_params, epochs, item_features=None, name='1'):
    
    '''Fits a model with parameters and saves it in pickle'''
    
    dataset = DatasetFaster()
    dataset.fit(ratings.user_id.nunique(), NITEMS)
    interactions, weights = dataset.build_interactions(ratings)
    
    model = LightFM(**model_params)
    model.fit(
        interactions=weights,
        # user_features=user_features,
        # item_features=item_features,
        epochs=epochs
    )
    model.item_biases = np.zeros_like(model.item_biases)
    
    import pickle
    with open(f'lightfm{name}.pickle', 'wb') as f:
        pickle.dump(model, f)

In [8]:
setup_model(best_params, epochs=best_epochs)