# Demo of Explainable Matrix Factorization (EMF) RecSys Explanations

#### Imports

In [1]:
import random
import pandas as pd
import umap
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import os, sys
from typing import List, Tuple, Dict, Any

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import ipywidgets as widgets

root_working_dir = str(pathlib.Path(os.getcwd()).parent.parent.parent)
working_dir = pathlib.Path(os.getcwd()).parent.parent
os.chdir(working_dir)
sys.path.append(root_working_dir)

from Code.Demos.Models.Src.EMFExplanation import EMFExplainerDemo
from Code.Demos.Models.Src.ModelBasedExplanation import ModelBasedExplainerDemo
from Code.Utils.Src.Enums import ExplanationType
from Code.Utils.Src.Utils import Helpers
import Code.Model.Src.Constants as c
import Code.DataAccessLayer.Src.Constants as c2

print(f"Current working directory is: {working_dir}")

  warn("Tensorflow not installed; ParametricUMAP will be unavailable")


Current working directory is: C:\Development\Python\MyGitHub\Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)\ExplainableRecsys\Code


#### Run demo based on cached recommendations and explanation models

In [2]:
def runDemoUsingCachedModelDatasets() -> Tuple[Any, Any, pd.DataFrame]:
    """
    Run demo using cached recommendation and explainer models
    :return: Best recommender, explainer and explanations
    """
    emf_best_recommender = Helpers.readPickleFromFile(c.EMF_BEST_TRAIN_RECOMMENDER_PATH)
    emf_best_explainer = Helpers.readPickleFromFile(c.EMF_BEST_EXPLANATION_MODEL_PATH)
    emf_best_explanations_df = emf_best_explainer.explanations_df
    ModelBasedExplainerDemo.reportDemoResults(
            emf_best_explainer.explanation_metrics,
            emf_best_explainer.explanations_df,
            emf_best_recommender.recommendation_metrics,
            emf_best_recommender.recommendations_df)
    return emf_best_recommender, emf_best_explainer, emf_best_explanations_df

emf_best_recommender, emf_best_explainer, emf_best_explanations_df = runDemoUsingCachedModelDatasets()

Sample of generated recommendations:
+--------+--------+--------+
|  userId|  itemId|   rank |
+--------+--------+--------+
|    0.0 | 1255.0 |    1.0 |
|    0.0 |  569.0 |    2.0 |
|    0.0 |  174.0 |    3.0 |
|    0.0 |  180.0 |    4.0 |
|    0.0 |  200.0 |    5.0 |
|    0.0 |   36.0 |    6.0 |
|    0.0 |   66.0 |    7.0 |
|    0.0 |  277.0 |    8.0 |
|    0.0 | 1271.0 |    9.0 |
|    0.0 |  130.0 |   10.0 |
+--------+--------+--------+



Recommendation metrics:
{'hit_ratio': 0.0326, 'ndcg': 0.0473}



Sample of generated explanations:
+--------------+--------------+--------------+--------------------+--------------+
|     userId   |     itemId   |      rank    |     explanations   |      expl    |
+--------------+--------------+--------------+--------------------+--------------+
|          0.0 |       1255.0 |          1.0 |                 {} |          0.0 |
|          0.0 |        569.0 |          2.0 |             {5: 1} |       0.1786 |
|          0.0 |        174.0 |         

#### Sample 10 users and the corresponding explanations of their recommendations

In [3]:
def createSampleUserIds(n_users: int=10) -> Tuple[int, List[int]]:
    """
    Creates random sample of users
    :param n_users: Number of users
    :return: Sampled users
    """
    n_users = 10
    random.seed(100)
    sample_users = random.sample(set(emf_best_recommender.metadata.dataset.userId),n_users)
    return n_users, sample_users

n_users, sample_users = createSampleUserIds()
sample_users

[149, 470, 465, 789, 178, 722, 402, 749, 358, 443]

#### Extract user and item latent matrix embeddings

In [4]:
def getUserAndItemEmbeddings() -> Tuple[np.ndarray, np.ndarray]:
    """
    Gets the user/item embeddings
    :return: User and item embeddings
    """
    item_embeddings = emf_best_recommender.model.item_embedding()
    user_embeddings = emf_best_recommender.model.user_embedding()
    return item_embeddings, user_embeddings

item_embeddings, user_embeddings = getUserAndItemEmbeddings()

In [5]:
item_embeddings[:5,:5]

array([[0.28880102, 0.27965317, 0.28332145, 0.31038678, 0.25747679],
       [0.24594684, 0.31372757, 0.28079141, 0.36052146, 0.28872038],
       [0.0503809 , 0.09121301, 0.06787067, 0.09281201, 0.08766888],
       [0.19133009, 0.2040272 , 0.19122277, 0.17295551, 0.19297063],
       [0.33383508, 0.24262068, 0.25434719, 0.34200586, 0.27527593]])

In [6]:
user_embeddings[:5,:5]

array([[0.19558857, 0.2484944 , 0.19516628, 0.16640047, 0.21407196],
       [0.11547596, 0.25366705, 0.2036018 , 0.06258813, 0.23222268],
       [0.34112671, 0.25342754, 0.3532337 , 0.26305076, 0.2883578 ],
       [0.37611507, 0.17981696, 0.2360513 , 0.30416478, 0.2420981 ],
       [0.12642996, 0.18202362, 0.19503056, 0.1562379 , 0.20768361]])

#### Get explainability matrix

In [7]:
def getExplainabilityMatrix() -> np.ndarray:
    """
    Gets the explainability matrix
    :return: Explinability matrix
    """
    return emf_best_recommender.model.explainability_matrix

explainability_matrix = getExplainabilityMatrix()
explainability_matrix[:5,:5]


array([[0.21621622, 0.26086957, 0.        , 0.2       , 0.        ],
       [0.        , 0.2826087 , 0.        , 0.36666667, 0.25806452],
       [0.        , 0.15217391, 0.        , 0.13333333, 0.        ],
       [0.        , 0.2173913 , 0.        , 0.6       , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ]])

#### Get the explanations for sample users

In [8]:
def convertTupleListToDict(row: pd.Series):
    item, contribution = [], []
    for s in row:
        item.append(s[0])
        contribution.append(s[1])
    modified_row = {
        "item": item,
        "contribution": contribution
    }
    return modified_row

sample_explanations_df = emf_best_explanations_df[emf_best_explanations_df.userId.isin(sample_users)]
sample_explanations_df.sample(5)

Unnamed: 0,userId,itemId,rank,explanations,expl
54,465.0,66.0,9.0,{},0.0
527,178.0,569.0,5.0,{5: 1},0.0
122,402.0,247.0,1.0,"{3: 2, 4: 3, 5: 5}",0.7391
1197,178.0,1255.0,1.0,{},0.0
483,178.0,522.0,10.0,{},0.0


#### Reduce dimensionality of embeddings to 2-D

In [9]:
def reduceEmbeddingDimension() -> Tuple[np.ndarray, np.ndarray]:
    """
    Reduces the dimensionality of the user/item embeddings
    :return: Reduced user/item embedding
    """
    dim_reduce_item_embeddings = umap.UMAP().fit_transform(item_embeddings)
    dim_reduce_user_embeddings = umap.UMAP().fit_transform(user_embeddings)
    return dim_reduce_item_embeddings, dim_reduce_user_embeddings

dim_reduce_item_embeddings, dim_reduce_user_embeddings = reduceEmbeddingDimension()

#### Simple visualization UI of explanation

In [18]:
def plotModelLatentFactors(user_id: int, metadata: Any):
    """
    Plots the model latent factors
    :param user_id: Sampled user ID
    :param metadata: Movielens metadata
    """
    rated_items = metadata.dataset[metadata.dataset.userId == user_id].itemId
    
    item_emb_for_user_id_df = pd.DataFrame.from_dict({
            'x': dim_reduce_item_embeddings[:,0],
            'y': dim_reduce_item_embeddings[:,1],
            'expl': explainability_matrix[user_id, :]})
    item_emb_for_user_id_df = item_emb_for_user_id_df.sort_values(by=['expl'])
    
    sns.scatterplot(data=item_emb_for_user_id_df, x="x", y="y", hue="expl")
    
    plt.scatter(dim_reduce_item_embeddings[rated_items,0], 
            dim_reduce_item_embeddings[rated_items,1], 
            marker='o', c='#c3e0f4')

    plt.scatter(user_embeddings[user_id,0], 
            user_embeddings[user_id,1], 
            marker='X', s=100, c='#419ede')
    plt.title(f"Model Latent Factor Visualization for user ID {user_id}")
    plt.show()
    
def onSelectUser(user_id: int):
    """
    Callback event for when a user is selected
    :param user_id: User ID
    """
    #print(f"Current selected user_id is: {user_id}")
    plotModelLatentFactors(user_id, emf_best_recommender.metadata)
    
widgets.interact(onSelectUser, user_id=sample_users);

interactive(children=(Dropdown(description='user_id', options=(149, 470, 465, 789, 178, 722, 402, 749, 358, 44…