# Imports

In [27]:
import pandas as pd
import heapq
from collections import defaultdict
from operator import itemgetter
from lightfm.data import Dataset
from lightfm import lightfm
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score
import numpy as np
import glob
import os
import sys
import scipy

from data_helper import *

In [48]:
def generate_feature_list(dataframe, features_name):
    """
    Generate features list for mapping 

    Parameters
    ----------
    dataframe: Dataframe
        Pandas Dataframe for Users or Q&A. 
    features_name : List
        List of feature columns name avaiable in dataframe. 
        
    Returns
    -------
    List of all features for mapping 
    """
    features = dataframe[features_name].apply(
        lambda x: ','.join(x.map(str)), axis=1)
    features = features.str.split(',')
    features = features.apply(pd.Series).stack().reset_index(drop=True)
    return features

def func(x):
    return ','.join(x.map(str))

def create_features (dataframe, features_name, id_col):
    features = dataframe[features_name].apply(func, axis=1)
    features = features.str.split(',')
    features = list(zip(dataframe[id_col], features))
    return features

def calculate_auc_score(lightfm_model, interactions_matrix, 
                        question_features, professional_features): 
    """
    Measure the ROC AUC metric for a model. 
    A perfect score is 1.0.

    Parameters
    ----------
    lightfm_model: LightFM model 
        A fitted lightfm model 
    interactions_matrix : 
        A lightfm interactions matrix 
    question_features, professional_features: 
        Lightfm features 
        
    Returns
    -------
    String containing AUC score 
    """
    score = auc_score( 
        lightfm_model, interactions_matrix, 
        item_features=question_features, 
        user_features=professional_features, 
        num_threads=16).mean()
    return score

In [29]:
customer_features, product_features = get_features()
customer_features = customer_features.reset_index(level=0)
product_features = product_features.reset_index(level=0)

interactions = get_interactions()

customer_features_list = generate_feature_list(customer_features, customer_features.columns.tolist()[1:])
product_features_list = generate_feature_list(product_features, product_features.columns.tolist()[1:])

In [30]:
dataset = Dataset()
dataset.fit(items=product_features['product_id'].tolist(), 
            users=customer_features['customer_unique_id'].tolist(), 
            item_features=product_features_list, 
            user_features=customer_features_list)

In [31]:
customer_product_interaction = list(zip(interactions.customer_unique_id, interactions.product_id, 
                                        interactions.review_score))
interactions, weights = dataset.build_interactions(data=customer_product_interaction)

In [32]:
customer_features = create_features(customer_features, customer_features.columns.tolist()[1:], 'customer_unique_id')
product_features = create_features(product_features, product_features.columns.tolist()[1:], 'product_id')

customer_features = dataset.build_user_features(data=customer_features)
product_features = dataset.build_item_features(data=product_features)

In [46]:
# Train model
model = lightfm.LightFM(no_components=150, learning_rate=0.05, loss='warp', random_state=2022)
model.fit(interactions=interactions, 
          item_features=product_features, 
          user_features=customer_features, 
          sample_weight=weights, 
          epochs=100, verbose=True, num_threads=16)

Epoch: 100%|██████████████████████████████████| 100/100 [10:39<00:00,  6.39s/it]


<lightfm.lightfm.LightFM at 0x1315ea850>

In [None]:
calculate_auc_score(model, interactions, product_features, customer_features)