# Demo of Alternating Least Square (ALS) RecSys Explanations

#### Imports

In [1]:
import pathlib
import os, sys
import random
import pandas as pd
from typing import List, Tuple, Dict, Any

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import Dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

root_working_dir = str(pathlib.Path(os.getcwd()).parent.parent.parent)
working_dir = pathlib.Path(os.getcwd()).parent.parent
os.chdir(working_dir)
print(f"Current working directory is: {working_dir}\n")

#sys.path.append("C:/Development/python/MyGitHub/Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)/ExplainableRecsys")
sys.path.append(root_working_dir)
# sys.path.append("..ExplainableRecsys/Code/")
#print(f"{sys.path}")

from Code.Demos.Models.Src.ALSExplanation import ALSExplainerDemo
from Code.Demos.Models.Src.ModelBasedExplanation import ModelBasedExplainerDemo
from Code.Utils.Src.Enums import ExplanationType
from Code.Utils.Src.Utils import Helpers
import Code.Model.Src.Constants as c
import Code.DataAccessLayer.Src.Constants as c2



Current working directory is: C:\Development\Python\MyGitHub\Hands-on-Intro-to-building-Explainability-for-RecSys-(Pydata-2023)\ExplainableRecsys\Code



#### Run demo based on cached recommendations and explanation models

In [2]:
def runDemoUsingCachedModelDatasets() -> Tuple[Any, Any, pd.DataFrame]:
    """
    Run demo using cached recommendation and explainer models
    :return: Best recommender, explainer and explanations
    """
    als_best_recommender = Helpers.readPickleFromFile(c.ALS_BEST_TRAIN_RECOMMENDER_PATH)
    als_best_explainer = Helpers.readPickleFromFile(c.ALS_BEST_EXPLANATION_MODEL_PATH)
    best_explanations_df = Helpers.prettifyExplanations(als_best_explainer.explanations_df)
    ModelBasedExplainerDemo.reportDemoResults(
            als_best_explainer.explanation_metrics,
            best_explanations_df,
            als_best_recommender.recommendation_metrics,
            als_best_recommender.recommendations_df)
    return als_best_recommender, als_best_explainer, best_explanations_df

als_best_recommender, als_best_explainer, best_explanations_df = runDemoUsingCachedModelDatasets()

Sample of generated recommendations:
+--------+--------+--------+
|  userId|  itemId|   rank |
+--------+--------+--------+
|    0.0 |  366.0 |    1.0 |
|    0.0 |   33.0 |    2.0 |
|    0.0 |  148.0 |    3.0 |
|    0.0 |  247.0 |    4.0 |
|    0.0 |  133.0 |    5.0 |
|    0.0 |   68.0 |    6.0 |
|    0.0 |  491.0 |    7.0 |
|    0.0 |  367.0 |    8.0 |
|    0.0 |  226.0 |    9.0 |
|    0.0 |  404.0 |   10.0 |
+--------+--------+--------+



Recommendation metrics:
{'hit_ratio': 0.22144785413141901, 'ndcg': 0.24363069095732878}



Sample of generated explanations:
+--------------+--------------+--------------+----------------------------------------------------------------------------------------------------------------------------------------------+
|     userId   |     itemId   |      rank    |                                                                  explanations                                                                |
+--------------+--------------+--------------+---

#### Sample 10 users and the corresponding explanations of their recommendations

In [3]:
def createSampleUserIds(n_users: int=10) -> Tuple[int, List[int]]:
    """
    Creates random sample of users
    :param n_users: Number of users
    """
    n_users = 10
    random.seed(100)
    sample_users = random.sample(set(als_best_recommender.metadata.dataset.userId),n_users)
    return n_users, sample_users

n_users, sample_users = createSampleUserIds()
sample_users

[149, 470, 465, 789, 178, 722, 402, 749, 358, 443]

In [4]:
def convertTupleListToDict(row: pd.Series):
    item, contribution = [], []
    for s in row:
        item.append(s[0])
        contribution.append(s[1])
    modified_row = {
        "item": item,
        "contribution": contribution
    }
    return modified_row

sample_explanations_df = best_explanations_df[best_explanations_df.userId.isin(sample_users)]
sample_explanations_df["explanations"] = sample_explanations_df.explanations.apply(convertTupleListToDict)
sample_explanations_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Unnamed: 0,userId,itemId,rank,explanations
299,149.0,329.0,1.0,"{'item': [93, 247, 234, 31, 362, 496, 462, 808..."
91,149.0,102.0,2.0,"{'item': [256, 462, 399, 488, 138, 592, 94, 49..."
140,149.0,156.0,3.0,"{'item': [138, 399, 496, 216, 506, 47, 249, 31..."
199,149.0,217.0,4.0,"{'item': [93, 354, 256, 249, 297, 401, 550, 30..."
703,149.0,750.0,5.0,"{'item': [258, 234, 137, 311, 75, 592, 365, 48..."
...,...,...,...,...
47,789.0,53.0,6.0,"{'item': [244, 254, 623, 239, 725, 347, 492, 1..."
271,789.0,297.0,7.0,"{'item': [239, 216, 1130, 623, 43, 347, 547, 5..."
29,789.0,31.0,8.0,"{'item': [216, 347, 200, 571, 725, 191, 43, 24..."
466,789.0,497.0,9.0,"{'item': [995, 118, 1130, 139, 65, 561, 60, 62..."


#### Get the content being recommended i.e. Movies

In [5]:
content_df = pd.read_csv(c2.MOVIELENS_ITEM_PATH, sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content_df = content_df.set_index(0)[[1]]
content_df.columns = ['movie']

In [6]:
content_df.sample(10)

Unnamed: 0_level_0,movie
0,Unnamed: 1_level_1
1639,Bitter Sugar (Azucar Amargo) (1996)
1072,"Pyromaniac's Love Story, A (1995)"
586,Terminal Velocity (1994)
739,Pretty Woman (1990)
1128,Heidi Fleiss: Hollywood Madam (1995)
1513,Sprung (1997)
440,Amityville II: The Possession (1982)
851,Two or Three Things I Know About Her (1966)
829,Fled (1996)
1603,Angela (1995)


#### Simple visualization UI of explanation

In [7]:
def runExplanationVisualizer():
    """
    Runs the Explanation visualization
    """
    app = JupyterDash(__name__)
    #app = Dash(__name__)
    app.layout = html.Div([
        html.P("userId:"),
        dcc.Dropdown(
            id='userId',
            value=sample_users[0],
            options=[{'value': x, 'label': x}
                     for x in sample_users],
            clearable=False
        ),
        html.P("Rank:"),
          dcc.Slider(
            id='rank',
            min=1,
            max=10,
            value=1,
            step=1,
            marks={i:'{}'.format(i) for i in range(1,11)}
        ),
        dcc.Graph(id="pie-chart"),
    ])

    @app.callback(
        Output("pie-chart", "figure"),
        [Input("userId", "value"),
         Input("rank", "value")])
    def generateChart(userId, rank):
        train_dataset = als_best_recommender.train_metadata_clone
        explanations_df = sample_explanations_df[(sample_explanations_df['userId'] == userId) & (sample_explanations_df['rank'] == rank)]
        df = pd.DataFrame.from_dict(explanations_df.explanations.iloc[0])
        df['item'] = list(content_df.loc[train_dataset.getOriginalItemId(df.item)].movie)
        title = "Recommended movie: " + content_df.loc[int(explanations_df.itemId)].movie
        fig = px.pie(df, values='contribution', names='item', title=title)
        return fig

    app.css.config.serve_locally = True
    app.scripts.config.serve_locally = True

    # Run app and display result inline in the notebook
    app.run_server(mode='inline', debug=True,port=8099)

runExplanationVisualizer()