# Demo of Factorization Machine with Lime RecSys Explanations (Post-hoc approach)

#### Imports

In [1]:
import pathlib
import os, sys
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict, Any

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import ipywidgets as widgets

import warnings
warnings.filterwarnings("ignore")

root_working_dir = str(pathlib.Path(os.getcwd()).parent.parent.parent)
working_dir = pathlib.Path(os.getcwd()).parent.parent
os.chdir(working_dir)
print(f"Current working directory is: {working_dir}\n")
sys.path.append(root_working_dir)


from Code.Demos.Models.Src.FMLimePostHocExplanation import FMLimePostHocExplainerDemo
from Code.Demos.Models.Src.ModelBasedExplanation import ModelBasedExplainerDemo
from Code.Demos.Visualization.Src.ExplainabilityVisualization import ExplainerVisualizer
from Code.Utils.Src.Enums import ExplanationType
from Code.Utils.Src.Utils import Helpers
import Code.Model.Src.Constants as c
import Code.DataAccessLayer.Src.Constants as c2

Current working directory is: C:\Development\Python\MyGitHub\Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)\ExplainableRecsys\Code



#### Run FM-Lime explainability demo based on cached recommendations and explanation models

In [2]:
def runDemoUsingCachedModelDatasets() -> Tuple[Any, Any, pd.DataFrame]:
    """
    Run AR explainability demo using cached recommendation and explainer models
    :return: Best recommender, explainer and explanations
    """
    fm_best_recommender = Helpers.readPickleFromFile(c.FM_BEST_TRAIN_RECOMMENDER_PATH)
    fm_best_explainer = Helpers.readPickleFromFile(c.FM_BEST_LIME_EXPLANATION_MODEL_PATH)
    ModelBasedExplainerDemo.reportDemoResults(
        fm_best_explainer.explanation_metrics,
        fm_best_explainer.explanations_df,
        fm_best_recommender.recommendation_metrics,
        fm_best_recommender.recommendations_df,
        is_report_explanations=False
    )
    return fm_best_recommender, fm_best_explainer, fm_best_explainer.explanations_df

def reportBestExplanations(best_explanations_df: pd.DataFrame):
    print("Sample of generated explanations:")
    display(best_explanations_df.head())
    Helpers.createConsoleDivider()
    
fm_best_recommender, fm_best_explainer, best_explanations_df = runDemoUsingCachedModelDatasets()
reportBestExplanations(best_explanations_df)

Sample of generated recommendations:
+--------+--------+--------+
|  userId|  itemId|   rank |
+--------+--------+--------+
|    0.0 |  200.0 |    1.0 |
|    0.0 |  239.0 |    2.0 |
|    0.0 |   99.0 |    3.0 |
|    0.0 |   66.0 |    4.0 |
|    0.0 |   31.0 |    5.0 |
|    0.0 |  174.0 |    6.0 |
|    0.0 |  100.0 |    7.0 |
|    0.0 |  273.0 |    8.0 |
|    0.0 |  240.0 |    9.0 |
|    0.0 |  185.0 |   10.0 |
+--------+--------+--------+



Recommendation metrics:
{'hit_ratio': 0.1303, 'ndcg': 0.3209}



Explanation metrics:
{'fidelity': 0.744}



Sample of generated explanations:


Unnamed: 0,userId,itemId,Crime,Children's,Adventure,Musical,unknown,Action,Animation,Comedy,...,Drama,Romance,Documentary,Western,Thriller,Horror,Mystery,War,Film-Noir,Fantasy
0,848,309,-0.198,0.186,0.173,0.172,-0.163,0.152,0.107,-0.044,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,694,1225,-0.008,-0.352,-0.007,0.064,0.0,-0.359,0.147,-0.297,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,9,288,0.089,-0.628,-0.048,0.0,0.0,-0.596,0.463,-0.294,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,97,257,0.0,-0.677,-0.002,0.0,0.0,-0.355,0.295,-0.365,...,0.742,0.569,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,580,562,0.012,-0.3,-0.086,0.0,0.0,-0.456,0.202,-0.246,...,0.0,0.0,0.132,0.0,0.0,0.0,0.0,0.0,0.0,0.0







#### Sample users from the training dataset and the corresponding explanations of their recommendations

In [3]:
sample_users = fm_best_explainer.sample_instances_user_item_df.userId.tolist()
sample_users[:15]

[848, 694, 9, 97, 580, 184, 455, 8, 98, 102, 364, 499, 474, 710, 530]

#### Sample explanations

In [4]:
sample_explanations_df = best_explanations_df[best_explanations_df.userId.isin(sample_users)]
filter_1 = sample_explanations_df.R2_Score > 0.4
sample_explanations_df = sample_explanations_df[sample_explanations_df.R2_Score > 0.30]
sample_explanations_df

Unnamed: 0,userId,itemId,Crime,Children's,Adventure,Musical,unknown,Action,Animation,Comedy,...,Drama,Romance,Documentary,Western,Thriller,Horror,Mystery,War,Film-Noir,Fantasy
0,97,257,0.0,-0.677,-0.002,0.0,0.0,-0.355,0.295,-0.365,...,0.742,0.569,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,696,595,0.058,-0.505,-0.143,0.0,0.0,-0.583,0.316,-0.088,...,0.435,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,516,462,0.235,-0.857,-0.206,0.0,0.0,-0.172,0.068,-0.454,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,516,124,0.0,-0.824,-0.169,0.0,-0.163,-0.099,0.13,-0.347,...,0.269,0.049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,114,195,0.0,-0.313,0.039,0.245,0.0,-0.477,0.355,0.0,...,0.524,0.0,0.0,0.0,0.0,0.0,0.0,0.262,0.0,0.0
0,703,956,-0.062,-0.45,0.011,0.0,0.0,-0.652,0.319,-0.06,...,0.542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,206,275,0.0,-0.592,-0.098,0.0,-0.108,-0.223,0.345,-0.278,...,0.367,0.0,0.0,0.0,0.0,0.0,0.0,0.262,0.0,0.0
0,179,416,0.0,-0.031,-0.031,0.0,0.0,-0.362,0.243,-0.392,...,0.512,0.0,0.0,0.0,-0.013,0.0,0.0,0.0,0.0,0.0
0,717,1473,0.146,-0.636,-0.04,0.0,-0.108,-0.413,0.432,-0.247,...,0.366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,97,471,0.0,-0.657,-0.025,0.0,0.0,-0.379,0.223,-0.267,...,0.793,0.0,0.0,0.0,0.099,0.0,0.0,0.0,0.0,0.0


#### Get the content being recommended i.e. Movies

In [5]:
content_df = pd.read_csv(c2.MOVIELENS_ITEM_PATH, sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content_df = content_df.set_index(0)[[1]]
content_df.columns = ['movie']
content_df.sample(10)

Unnamed: 0_level_0,movie
0,Unnamed: 1_level_1
990,Anna Karenina (1997)
1185,In the Army Now (1994)
416,Old Yeller (1957)
830,Power 98 (1995)
14,"Postino, Il (1994)"
207,Cyrano de Bergerac (1990)
39,Strange Days (1995)
740,Jane Eyre (1996)
1128,Heidi Fleiss: Hollywood Madam (1995)
329,Desperate Measures (1998)


#### Visualize the FM - Lime explanations

In [6]:
def filterNonZeroCoefficents(
        feature_names: List[str],
        coefficient_values: List[float]
) -> Tuple[List[str], List[float]]:
    """
    Filters the non-zero coefficients
    :param feature_names: Feature names
    :param coefficient_values: Coefficient values
    :return: Modified feature_names and coefficient_values
    """
    coefficient_values_array = np.array(coefficient_values)
    feature_names_array = np.array(feature_names)
    non_zero_index = np.where(np.array(coefficient_values) != 0.0)[0]
    new_coefficient_values = coefficient_values_array[non_zero_index].tolist()
    new_feature_names = feature_names_array[non_zero_index].tolist()
    return new_feature_names, new_coefficient_values

def visualizeLimeFMExplainabilityPerUserId(
        explanations_df: pd.DataFrame,
        user_id: int,
        item_id: int,
        movie_title: str="Star Wars",
        title="Movielens Lime Explainability"):
    """
    Visualization of FM model LIME explainability
    :param explanations_df: Explanations
    :param user_id: User ID
    :param item_id: Item ID
    :param movie_title: Movie title
    :param title: Title
    """
    filter_1 = explanations_df[c2.MOVIELENS_USER_ID_COLUMN_KEY] == user_id
    filter_2 = explanations_df[c2.MOVIELENS_ITEM_ID_COLUMN_KEY] == item_id
    filtered_explanation_df = explanations_df[(filter_1 & filter_2)]    
    genre_side_info_explanations_df = filtered_explanation_df[c2.MOVIELENS_GENRE_COLUMNS]
    explanations_as_dict = genre_side_info_explanations_df.to_dict('records')[0]
    feature_names = list(explanations_as_dict.keys())
    coef_values = list(explanations_as_dict.values())
    new_feature_names, new_coef_values = filterNonZeroCoefficents(feature_names, coef_values)
    pos = np.arange(len(new_coef_values)) + .5
    colors = ['green' if x > 0 else 'red' for x in new_coef_values]
    plt.barh(pos, new_coef_values, align='center', color=colors)
    plt.yticks(pos, new_feature_names, rotation=45)
    new_title = f"{title} for user ID={user_id} and Movie title: {movie_title}"
    plt.title(new_title)
    plt.show()   


In [7]:
def selectMovie(user_id):
    train_dataset = fm_best_recommender.dataset
    filter_1 = sample_explanations_df.userId == user_id
    explanations_per_user = sample_explanations_df[filter_1]
    item_ids = explanations_per_user.itemId.tolist()
    if item_ids:
        original_item_id = train_dataset.original_item_id_df.loc[item_ids].item_Id
        movies = content_df.loc[original_item_id].movie.tolist()
        movie_with_id = [f"{item_ids[i]} | {movies[i]}" for i in range(len(item_ids))]
        return movie_with_id

def visualizeExplanations(user_id):
    movie_with_id = selectMovie(user_id)
    dropdown_item.options = movie_with_id
    item_id = int(dropdown_item.value.split("|")[0])
    movie_title = dropdown_item.value.split("|")[1]
    ExplainerVisualizer.visualizeLimeFMExplainabilityPerUserId(
            sample_explanations_df,
            user_id=user_id,
            item_id=item_id,
            movie_title=movie_title            
    )    

def onSelectUser(user_control, item_control):
    """
    Callback event for when a user is selected
    :param user_id: User ID
    """
    visualizeExplanations(user_control) 

dropdown_item = widgets.Dropdown(description="Movie Items",
                 layout={'width': 'max-content'})
dropdown_user = widgets.Dropdown(options=list(set(sample_explanations_df.userId.tolist())), 
                                 description="User ID",
                                 layout={'width': 'max-content'}
                                )
widgets.interact(onSelectUser, user_control=dropdown_user, item_control=dropdown_item)
display(dropdown_item);


interactive(children=(Dropdown(description='User ID', layout=Layout(width='max-content'), options=(3, 516, 911…

Dropdown(description='Movie Items', layout=Layout(width='max-content'), options=('3 | Legends of the Fall (199…