# Demo of K-Nearest Neighbours (KNN) RecSys Explanations (Post-hoc approach)

#### Imports

In [1]:
import pathlib
import os, sys
import random
import pandas as pd
from typing import List, Tuple, Dict, Any

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import ipywidgets as widgets

import warnings
warnings.filterwarnings("ignore")

root_working_dir = str(pathlib.Path(os.getcwd()).parent.parent.parent)
working_dir = pathlib.Path(os.getcwd()).parent.parent
os.chdir(working_dir)
print(f"Current working directory is: {working_dir}\n")
sys.path.append(root_working_dir)


from Code.Demos.Models.Src.KNNPostHocExplanation import KNNPostHocExplainerDemo
from Code.Demos.Models.Src.ModelBasedExplanation import ModelBasedExplainerDemo
from Code.Utils.Src.Enums import ExplanationType
from Code.Utils.Src.Utils import Helpers
import Code.Model.Src.Constants as c
import Code.DataAccessLayer.Src.Constants as c2

Current working directory is: C:\Development\Python\MyGitHub\Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)\ExplainableRecsys\Code



#### Run KNN explainability demo based on cached recommendations and explanation models

In [2]:
def runDemoUsingCachedModelDatasets() -> Tuple[Any, Any, pd.DataFrame]:
    """
    Run AR explainability demo using cached recommendation and explainer models
    :return: Best recommender, explainer and explanations
    """
    knn_best_recommender = Helpers.readPickleFromFile(c.MF_BEST_KNN_TRAIN_RECOMMENDER_PATH)
    knn_best_explainer = Helpers.readPickleFromFile(c.MF_BEST_KNN_EXPLANATION_MODEL_PATH)
    ModelBasedExplainerDemo.reportDemoResults(
        knn_best_explainer.explanation_metrics,
        knn_best_explainer.explanations_df,
        knn_best_recommender.recommendation_metrics,
        knn_best_recommender.recommendations_df,
        is_report_explanations=False
    )
    return knn_best_recommender, knn_best_explainer, knn_best_explainer.explanations_df

def reportBestExplanations(best_explanations_df: pd.DataFrame):
    print("Sample of generated explanations:")
    relevant_explanations_columns = ["userId", "itemId", "rank", "explanations_as_list", 
                                     "explanation_score", "n_explanations"]    
    best_explanations_df = best_explanations_df[relevant_explanations_columns]
    best_explanations_df.rename({
        "explanations_as_list": "consequent - explanation", 
        "itemId": "antecedent - recommended item"}, axis=1, inplace=True)
    display(best_explanations_df.head())
    Helpers.createConsoleDivider()
    return best_explanations_df

knn_best_recommender, knn_best_explainer, best_explanations_df = runDemoUsingCachedModelDatasets()
best_explanations_df = reportBestExplanations(best_explanations_df)

Sample of generated recommendations:
+--------+--------+--------+
|  userId|  itemId|   rank |
+--------+--------+--------+
|    0.0 | 1462.0 |    1.0 |
|    0.0 | 1665.0 |    2.0 |
|    0.0 | 1176.0 |    3.0 |
|    0.0 | 1645.0 |    4.0 |
|    0.0 |  788.0 |    5.0 |
|    0.0 |  623.0 |    6.0 |
|    0.0 |  836.0 |    7.0 |
|    0.0 |  453.0 |    8.0 |
|    0.0 |  993.0 |    9.0 |
|    0.0 |  661.0 |   10.0 |
+--------+--------+--------+



Recommendation metrics:
{'hit_ratio': 0.0011, 'ndcg': 0.002}



Explanation metrics:
{'fidelity': 0.13573700954400816}



Sample of generated explanations:


Unnamed: 0,userId,antecedent - recommended item,rank,consequent - explanation,explanation_score,n_explanations
598,0.0,623.0,6.0,[329],[0.4479],1
1235,1.0,1319.0,5.0,"[608, 1104, 380]","[0.1246, 0.2508, 0.1195]",3
584,1.0,646.0,9.0,"[216, 297]","[0.4271, 0.4385]",2
403,2.0,482.0,2.0,"[217, 101]","[0.3977, 0.3965]",2
1080,3.0,1294.0,2.0,[59],[0.239],1







#### Sample 30 users and the corresponding explanations of their recommendations

In [3]:
def createSampleUserIds(n_users: int=10) -> Tuple[int, List[int]]:
    """
    Creates random sample of users
    :param n_users: Number of users
    """
    n_users = 30
    random.seed(100)
    sample_users = random.sample(set(knn_best_recommender.metadata.dataset.userId),n_users)
    return n_users, sample_users

n_users, sample_users = createSampleUserIds()
sample_users[:15]

[149, 470, 465, 789, 178, 722, 402, 749, 358, 443, 519, 819, 112, 545, 124]

#### Sample explanations

In [11]:
sample_explanations_df = best_explanations_df[best_explanations_df.userId.isin(sample_users)]
sample_explanations_df.drop_duplicates(subset=['userId'], inplace=True)
sample_explanations_df

Unnamed: 0,userId,antecedent - recommended item,rank,consequent - explanation,explanation_score,n_explanations
601,49.0,644.0,8.0,"[136, 141]","[0.3659, 0.3777]",2
219,82.0,273.0,3.0,"[101, 240, 118, 254, 31]","[0.5374, 0.5236, 0.5958, 0.5385, 0.5307]",5
303,124.0,333.0,5.0,"[200, 216, 669, 423]","[0.401, 0.4006, 0.4244, 0.3979]",4
1547,149.0,1605.0,1.0,[660],[0.1045],1
504,178.0,544.0,5.0,[471],[0.4633],1
865,182.0,1042.0,1.0,[719],[0.2086],1
980,208.0,1044.0,8.0,"[1113, 978, 773]","[0.2821, 0.2582, 0.2892]",3
1090,235.0,1121.0,8.0,"[715, 116]","[0.3394, 0.3108]",2
394,343.0,522.0,1.0,[148],[0.35],1
1354,358.0,1469.0,1.0,[365],[0.2239],1


#### Get the content being recommended i.e. Movies

In [12]:
content_df = pd.read_csv(c2.MOVIELENS_ITEM_PATH, sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content_df = content_df.set_index(0)[[1]]
content_df.columns = ['movie']
content_df.sample(10)

Unnamed: 0_level_0,movie
0,Unnamed: 1_level_1
183,Alien (1979)
335,How to Be a Player (1997)
1149,Walkabout (1971)
514,Annie Hall (1977)
1306,Delta of Venus (1994)
337,"House of Yes, The (1997)"
1497,"Line King: Al Hirschfeld, The (1996)"
130,Kansas City (1996)
785,Only You (1994)
950,Georgia (1995)


#### Compute the ranked explanations for the highly ranked recommendations

In [13]:
def computeBestKNNExplanations(user_id: int):
    """
    Computes the best item recommendation explanations based on the KNN item similarity score.
    Steps:
        - Rank the row in descending order based on rank
        - Per selected user_id, rank the explanations based on confidence score
            - Map the antecedent (recommended item) movie name
            - Map the consequent (explanation item) movie name
    :param user_id: Selected user_id
    """
    train_dataset = knn_best_recommender.train_metadata_clone
    sample_explanations_df.sort_values(by=['rank'], ascending=True, inplace=True)
    explanations_per_user = sample_explanations_df[sample_explanations_df.userId == user_id]
    antecedent = explanations_per_user["antecedent - recommended item"].iloc[0]
    explanations = explanations_per_user["consequent - explanation"].iloc[0]
    explanation_scores = explanations_per_user["explanation_score"].iloc[0]
    results = list(zip(explanations, explanation_scores))
    results.sort(key=lambda a: a[1], reverse = True)
    explanation_item, score = zip(*results)    
    results_df = pd.DataFrame({
        "explanation_item_id": explanation_item,
        "score": score,        
    })
    explaination_movie = list(content_df.loc[train_dataset.getOriginalItemId(results_df.explanation_item_id)].movie)
    recommended_movie = "Recommended movie: " + content_df.loc[int(explanations_per_user["antecedent - recommended item"].iloc[0])].movie
    results_df["explaination_movie"] = explaination_movie
    results_df = results_df[["explanation_item_id", "explaination_movie", "score"]]
    print(recommended_movie)
    return results_df

def onSelectUser(user_id: int):
    """
    Callback event for when a user is selected
    :param user_id: User ID
    """
    results = computeBestKNNExplanations(user_id)
    display(results)
    
widgets.interact(onSelectUser, user_id=list(set(sample_explanations_df.userId.tolist())));

interactive(children=(Dropdown(description='user_id', options=(519.0, 402.0, 149.0, 789.0, 663.0, 545.0, 674.0…