In [1]:
import numpy as np
import pandas as pd
import joblib
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
from lightfm.inference import _precompute_representation

In [2]:
def create_lightfm_features(df):
    
    df['pid_bias'] = model.user_biases[df.pid]
    df['tid_bias'] = model.item_biases[df.tid]
    
    pid_embeddings = model.user_embeddings[df.pid]
    tid_embeddings = model.item_embeddings[df.tid]
    
    df['lightfm_dot_product'] = (pid_embeddings * tid_embeddings).sum(axis=1)
    df['lightfm_prediction'] = df['lightfm_dot_product'] + df['pid_bias'] + df['tid_bias']
    
    df['lightfm_rank'] = df.groupby('pid').lightfm_prediction.rank(ascending=False)
    
    df['pid_bias_text'] = _user_repr_biases[df.pid]
    df['tid_bias_text'] = model_text.item_biases[df.tid]
    
    pid_embeddings = _user_repr[df.pid]
    tid_embeddings = model_text.item_embeddings[df.tid]
    
    df['lightfm_dot_product_text'] = (pid_embeddings * tid_embeddings).sum(axis=1)
    df['lightfm_prediction_text'] = df['lightfm_dot_product_text'] + df['pid_bias_text'] + df['tid_bias_text']
    
    df['lightfm_rank_text'] = df.groupby('pid').lightfm_prediction_text.rank(ascending=False)

In [3]:
model = joblib.load(open('models/lightfm_model.pkl', 'rb'))
model_text = joblib.load(open('models/lightfm_model_text.pkl', 'rb'))
user_features = joblib.load(open('models/user_features.pkl', 'rb'))

_user_repr, _user_repr_biases = _precompute_representation(
    features=user_features,
    feature_embeddings=model_text.user_embeddings,
    feature_biases=model_text.user_biases,
)

In [4]:
train = pd.read_hdf('df_data/ii_candidate.hdf')
val = pd.read_hdf('df_data/iii_candidate.hdf')
test = pd.read_hdf('df_data/test_candidate.hdf')

In [5]:
create_lightfm_features(train)
create_lightfm_features(val)
create_lightfm_features(test)

In [6]:
train.to_hdf('df_data/ii_lightfm_features.hdf', key='abc')
val.to_hdf('df_data/iii_lightfm_features.hdf', key='abc')
test.to_hdf('df_data/test_lightfm_features.hdf', key='abc')