In [2]:
import pandas as pd
from apyori import apriori

# GET DATA
model = 'svd'
city = 'Philadelphia'
k=10
factor=0.1

preds = pd.read_pickle(f"../data/clean/predictions_{model}_{city}.pkl")
trainset = pd.read_pickle(f"../data/clean/train.pkl")

In [3]:
# step 1: recs variable
recs = pd.read_pickle(f"../data/clean/recommendations_{model}_{city}_top_{k}.pkl")

# step 2: create association rules
results = pd.DataFrame(list(apriori(recs['recommendations'])))
cols = ['items_base', 'items_add', 'confidence', 'lift']
df_associations=results.explode('ordered_statistics')

df_associations[cols] = pd.DataFrame(
    df_associations['ordered_statistics'].to_list(),
    columns = cols
)
df_associations.drop(columns='ordered_statistics', inplace=True)

# filter according to rules: min_supp = 0.1, min_conf = 0.1, min_lift = 0.1, max_len= 2
df_associations_filter = df_associations[
    (df_associations.apply(lambda x: len(x['items_base'])==len(x['items_add']), axis=1)) & 
    (df_associations['support']>=0.1) &
    (df_associations['confidence']>=0.1) &
    (df_associations['lift']>=0.1)
    ].copy(deep=True).drop_duplicates()

df_associations_filter['X'] = df_associations_filter['items_base'].apply(lambda x: list(x)[0])
df_associations_filter['Y'] = df_associations_filter['items_add'].apply(lambda x: list(x)[0])
df_associations_filter.drop(columns=['items_base', 'items_add', 'items'], inplace=True)

In [4]:
final_recs = recs.drop(columns=['actual']).explode('recommendations')
rec_interpretations = pd.DataFrame()
# step 3: for each user
for uid, rec_u in final_recs.groupby('user_id'):
    # step 4: compute the list {unseen} of items Y where X ⇒ Y if X ∈ {train} and Y not {train}
    # step 5: order unseen by supp/conf/lift
    iid_user = trainset[trainset['uid'] == uid]['iid'].drop_duplicates()
    assoc_user = df_associations_filter[(df_associations_filter['X'].isin(iid_user)) & 
                                        (~df_associations_filter['Y'].isin(iid_user))]\
                                            .drop_duplicates()\
                                                .sort_values(by=['support', 'confidence', 'lift'], ascending=False)

    
    # step 6: merge with recommendations
    recs_assoc_u = pd.merge(rec_u, assoc_user[['X', 'Y']], left_on='recommendations', right_on='Y', how='left')
    rec_interpretations = pd.concat([rec_interpretations, recs_assoc_u])

In [9]:
# % of interpretations: 
rec_interpretations.dropna().shape[0]*100/rec_interpretations.shape[0]

0.992825459200381