In [1]:
import numpy as np
import pandas as pd
from lightfm import LightFM
import itertools
from lightfm.evaluation import precision_at_k
from scipy.sparse import csr_matrix, lil_matrix, coo_matrix
from time import time



In [None]:
def create_interactions(train, test, val):
    
    train_interaction = pd.pivot_table(train, index='user_id_numer', columns='track_id_numer', values='count')
    train_interaction = train_interaction.fillna(0)
    
    test_interaction = pd.pivot_table(test, index='user_id_numer', columns='track_id_numer', values='count')
    test_interaction = test_interaction.fillna(0)
    
    val_interaction = pd.pivot_table(val, index='user_id_numer', columns='track_id_numer', values='count')
    val_interaction = val_interaction.fillna(0)
    
    return train_interaction, test_interaction, val_interaction  

In [None]:
def create_matrix(train_interaction, test_interaction, val_interaction):
    
    return csr_matrix(train_interaction.values), csr_matrix(test_interaction.values), csr_matrix(val_interaction.values)

In [None]:
def sample_hyperparameters():
    """
    Yield possible hyperparameter choices.
    """
    
    while True:
        yield {
            "no_components": [5,10,15,20],
            "learning_rate": [.01,.1,1]
        }

In [None]:
def random_search(train, val, m_iter):

    for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):

        model = LightFM(**hyperparams)
        model.fit(train, epochs=m_iter)

        MAP = precision_at_k(model, val).mean()

        yield (MAP, hyperparams, model)

In [None]:
def train_model(train_matrix, rank, reg, m_iter):
    
    model = LightFM(random_state = 123, learning_rate = reg, no_components = rank)
    model = model.fit(train_matrix, epochs = m_iter)
    
    return model

In [None]:
def test_model(model, test_matrix):
    
    return precision_at_k(model, val).mean()

In [None]:
def main():
    
    train_df = pd.read_csv('data/train_df.csv')
    test_df = pd.read_csv('data/test_df.csv')
    val_df = pd.read_csv('data/val_df.csv')
    
    train_interaction, test_interaction, val_interaction = create_interactions(train_df, test_df, val_df)
    train_matrix, test_matrix, val_matrix = create_matrix(train_interaction, test_interaction, val_interaction)
    
    st = time()
    (score, hyperparams, model) = max(random_search(train_matrix, val_matrix, m_iter = 4), key=lambda x: x[0])
    end = round(time()-st, 3)
    
    print("Best score {} at {}".format(score, hyperparams))
    print("Hyperparameter tuning took {}".format(end))
    
    st = time()
    model = train_model(train_model, **hyperparams, m_iter = 4)
    bestMAP = test_model(model, test_matrix)  
    end = round(time()-st, 3)
    
    print("Best MAP on test data: {}".format(bestMAP))
    print("Final model training and fitting took {}".format(end))
    