# Demo of Association Rules (AR) RecSys Explanations (Post-hoc approach)

#### Imports

In [1]:
import pathlib
import os, sys
import random
import pandas as pd
from typing import List, Tuple, Dict, Any

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import ipywidgets as widgets

import warnings
warnings.filterwarnings("ignore")

root_working_dir = str(pathlib.Path(os.getcwd()).parent.parent.parent)
working_dir = pathlib.Path(os.getcwd()).parent.parent
os.chdir(working_dir)
print(f"Current working directory is: {working_dir}\n")
sys.path.append(root_working_dir)


from Code.Demos.Models.Src.ARPostHocExplanation import ARPostHocExplainerDemo
from Code.Demos.Models.Src.ModelBasedExplanation import ModelBasedExplainerDemo
from Code.Utils.Src.Enums import ExplanationType
from Code.Utils.Src.Utils import Helpers
import Code.Model.Src.Constants as c
import Code.DataAccessLayer.Src.Constants as c2

Current working directory is: C:\Development\Python\MyGitHub\Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)\ExplainableRecsys\Code



#### Run AR explainability demo based on cached recommendations and explanation models

In [2]:
def runDemoUsingCachedModelDatasets() -> Tuple[Any, Any, pd.DataFrame]:
    """
    Run AR explainability demo using cached recommendation and explainer models
    :return: Best recommender, explainer and explanations
    """
    ar_best_recommender = Helpers.readPickleFromFile(c.MF_BEST_AR_TRAIN_RECOMMENDER_PATH)
    ar_best_explainer = Helpers.readPickleFromFile(c.MF_BEST_AR_EXPLANATION_MODEL_PATH)
    ModelBasedExplainerDemo.reportDemoResults(
        ar_best_explainer.explanation_metrics,
        ar_best_explainer.explanations_df,
        ar_best_recommender.recommendation_metrics,
        ar_best_recommender.recommendations_df,
        is_report_explanations=False
    )
    return ar_best_recommender, ar_best_explainer, ar_best_explainer.explanations_df

def reportBestExplanations(best_explanations_df: pd.DataFrame):
    print("Sample of generated explanations:")
    relevant_explanations_columns = ["userId", "itemId", "rank", "explanations_as_list", 
                                     "explanation_score", "n_explanations"]    
    best_explanations_df = best_explanations_df[relevant_explanations_columns]
    best_explanations_df.rename({
        "explanations_as_list": "consequent - explanation", 
        "itemId": "antecedent - recommended item"}, axis=1, inplace=True)
    display(best_explanations_df.head())
    Helpers.createConsoleDivider()
    return best_explanations_df

ar_best_recommender, ar_best_explainer, best_explanations_df = runDemoUsingCachedModelDatasets()
best_explanations_df = reportBestExplanations(best_explanations_df)

Sample of generated recommendations:
+--------+--------+--------+
|  userId|  itemId|   rank |
+--------+--------+--------+
|    0.0 |  636.0 |    1.0 |
|    0.0 | 1086.0 |    2.0 |
|    0.0 |  980.0 |    3.0 |
|    0.0 |  418.0 |    4.0 |
|    0.0 | 1633.0 |    5.0 |
|    0.0 | 1315.0 |    6.0 |
|    0.0 | 1142.0 |    7.0 |
|    0.0 |  560.0 |    8.0 |
|    0.0 |  304.0 |    9.0 |
|    0.0 | 1510.0 |   10.0 |
+--------+--------+--------+



Recommendation metrics:
{'hit_ratio': 0.0021, 'ndcg': 0.0024}



Explanation metrics:
{'fidelity': 0.015058324496288411}



Sample of generated explanations:


Unnamed: 0,userId,antecedent - recommended item,rank,consequent - explanation,explanation_score,n_explanations
83,1.0,94.0,7.0,"[216, 297, 156, 31]","[0.3636, 0.3256, 0.3192, 0.3942]",4
6,5.0,9.0,10.0,[357],[0.1867],1
659,7.0,740.0,9.0,[357],[0.1867],1
501,20.0,622.0,4.0,"[471, 356, 101, 102, 231, 357, 408, 423, 367, ...","[0.2517, 0.2917, 0.4249, 0.386, 0.391, 0.3446,...",16
83,21.0,94.0,3.0,"[102, 247]","[0.3571, 0.3323]",2







#### Sample 30 users and the corresponding explanations of their recommendations

In [3]:
def createSampleUserIds(n_users: int=10) -> Tuple[int, List[int]]:
    """
    Creates random sample of users
    :param n_users: Number of users
    """
    n_users = 30
    random.seed(100)
    sample_users = random.sample(set(ar_best_recommender.metadata.dataset.userId),n_users)
    return n_users, sample_users

n_users, sample_users = createSampleUserIds()
sample_users[:15]

[149, 470, 465, 789, 178, 722, 402, 749, 358, 443, 519, 819, 112, 545, 124]

In [8]:
sample_explanations_df = best_explanations_df[best_explanations_df.userId.isin(sample_users)]
sample_explanations_df.drop_duplicates(subset=['userId'], inplace=True)
sample_explanations_df

Unnamed: 0,userId,antecedent - recommended item,rank,consequent - explanation,explanation_score,n_explanations
199,149.0,217.0,9.0,"[256, 31, 288, 34, 36, 297, 47, 49, 307, 311, ...","[0.5, 0.5205, 0.5241, 0.6141, 0.3663, 0.6688, ...",20
166,182.0,235.0,10.0,[357],[0.1848],1
228,209.0,241.0,3.0,[357],[0.2038],1
118,402.0,241.0,4.0,"[357, 101]","[0.272, 0.2038]",2
351,663.0,361.0,9.0,"[49, 357, 289]","[0.3124, 0.2336, 0.3257]",3


#### Get the content being recommended i.e. Movies

In [9]:
content_df = pd.read_csv(c2.MOVIELENS_ITEM_PATH, sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content_df = content_df.set_index(0)[[1]]
content_df.columns = ['movie']
content_df.sample(10)

Unnamed: 0_level_0,movie
0,Unnamed: 1_level_1
1345,"Day the Sun Turned Cold, The (Tianguo niezi) (..."
866,Michael (1996)
72,"Mask, The (1994)"
1354,Venice/Venice (1992)
195,"Terminator, The (1984)"
863,"Garden of Finzi-Contini, The (Giardino dei Fin..."
1320,Homage (1995)
1282,"Grass Harp, The (1995)"
154,Monty Python's Life of Brian (1979)
1439,Jason's Lyric (1994)


#### Compute the ranked explanations for the highly ranked recommendations

In [10]:

def computeBestARExplanations(user_id: int):
    """
    Computes the best item recommendation explanations based on the the top-k AR confidence metric.
    Steps:
        - Rank the row in descending order based on rank
        - Per selected user_id, rank the explanations based on confidence score
            - Map the antecedent (recommended item) movie name
            - Map the consequent (explanation item) movie name
    :param user_id: Selected user_id
    """
    train_dataset = ar_best_recommender.train_metadata_clone
    sample_explanations_df.sort_values(by=['rank'], ascending=True, inplace=True)
    explanations_per_user = sample_explanations_df[sample_explanations_df.userId == user_id]
    antecedent = explanations_per_user["antecedent - recommended item"].iloc[0]
    explanations = explanations_per_user["consequent - explanation"].iloc[0]
    explanation_scores = explanations_per_user["explanation_score"].iloc[0]
    results = list(zip(explanations, explanation_scores))
    results.sort(key=lambda a: a[1], reverse = True)
    explanation_item, score = zip(*results)    
    results_df = pd.DataFrame({
        "explanation_item_id": explanation_item,
        "score": score,        
    })
    explaination_movie = list(content_df.loc[train_dataset.getOriginalItemId(results_df.explanation_item_id)].movie)
    recommended_movie = "Recommended movie: " + content_df.loc[int(explanations_per_user["antecedent - recommended item"].iloc[0])].movie
    results_df["explaination_movie"] = explaination_movie
    results_df = results_df[["explanation_item_id", "explaination_movie", "score"]]
    print(recommended_movie)
    return results_df

def onSelectUser(user_id: int):
    """
    Callback event for when a user is selected
    :param user_id: User ID
    """
    results = computeBestARExplanations(user_id)
    display(results)
    
widgets.interact(onSelectUser, user_id=sample_explanations_df.userId.tolist());

interactive(children=(Dropdown(description='user_id', options=(149.0, 182.0, 209.0, 402.0, 663.0), value=149.0…