### Imports

In [10]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pandas as pd
pd.options.mode.chained_assignment = None
import random

from recoxplainer.config import cfg
from recoxplainer.data_reader import DataReader 
from recoxplainer.models import ALS, EMFModel, GMFModel, BPR
from recoxplainer.models.mlp_model import MLPModel
from recoxplainer.recommender import Recommender 
from recoxplainer.evaluator import Splitter, Evaluator
from recoxplainer.explain import ALSExplainer
from recoxplainer.evaluator import ExplanationEvaluator

## Prepare data

Import the data:

In [11]:
data = DataReader(**cfg.ml100k)

Re-arrange users' and items' Ids:

In [12]:
data.make_consecutive_ids_in_dataset()

Because ALS works on implicit feedback we need to binarize it:

In [13]:
data.binarize(binary_threshold=1)

Prepare train and test sets:

In [14]:
sp = Splitter()
train, test = sp.split_leave_n_out(data, frac=0.1)

## Model

In [15]:
DataReader(**cfg.ml100k)

<recoxplainer.data_reader.data_reader.DataReader at 0x7fe12e5a3a20>

In [16]:
als = ALS(**cfg.model.als)



In [17]:
als.fit(train)


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.



  0%|          | 0/10 [00:00<?, ?it/s]

True

## Recommendations

In [18]:
recommender = Recommender(train, als)

In [19]:
recommendations = recommender.recommend_all()

Recommending for users:   0%|          | 0/943 [00:00<?, ?it/s]

In [20]:
recommendations

Unnamed: 0,userId,itemId,rank
162,0.0,166.0,1.0
144,0.0,148.0,2.0
32,0.0,33.0,3.0
389,0.0,404.0,4.0
354,0.0,366.0,5.0
...,...,...,...
390,942.0,403.0,6.0
257,942.0,269.0,7.0
187,942.0,198.0,8.0
165,942.0,175.0,9.0


## Evaluations

In [21]:
eva = Evaluator(test)

In [22]:
eva.cal_hit_ratio(recommendations)

0.23338690790685565

In [23]:
eva.cal_ndcg(recommendations)

0.24356550055588594

## Explain

In [24]:
expl = ALSExplainer(als, recommendations, train)

In [25]:
explanations = expl.explain_recommendations()

Computing explanations:   0%|          | 0/9430 [00:00<?, ?it/s]

In [26]:
explanations.head(10)

Unnamed: 0,userId,itemId,rank,explanations
162,0.0,166.0,1.0,"{'item': [179, 632, 377, 291, 947, 656, 221, 8..."
144,0.0,148.0,2.0,"{'item': [512, 665, 522, 365, 377, 431, 1006, ..."
32,0.0,33.0,3.0,"{'item': [291, 377, 1006, 528, 947, 179, 632, ..."
389,0.0,404.0,4.0,"{'item': [649, 380, 487, 1006, 751, 291, 512, ..."
354,0.0,366.0,5.0,"{'item': [528, 466, 291, 438, 550, 1006, 179, ..."
153,0.0,157.0,6.0,"{'item': [649, 302, 522, 0, 466, 521, 1045, 49..."
542,0.0,568.0,7.0,"{'item': [0, 487, 438, 550, 751, 329, 466, 365..."
415,0.0,432.0,8.0,"{'item': [1006, 487, 947, 550, 522, 512, 834, ..."
16,0.0,17.0,9.0,"{'item': [632, 512, 83, 302, 365, 649, 1006, 9..."
129,0.0,133.0,10.0,"{'item': [512, 423, 92, 487, 302, 380, 550, 66..."


In [27]:
ex = ExplanationEvaluator(train.num_user)

In [28]:
ex.model_fidelity(explanations)

1.0

### Example

In [29]:
sample_users = random.sample(set(data.dataset.userId),10)
sample_expl = explanations[explanations.userId.isin(sample_users)]

In [30]:
content = pd.read_csv('datasets/ml-100k/u.item', sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)
content = content.set_index(0)[[1]]
content.columns = ['movie']

In [31]:
content

Unnamed: 0_level_0,movie
0,Unnamed: 1_level_1
1,Toy Story (1995)
2,GoldenEye (1995)
3,Four Rooms (1995)
4,Get Shorty (1995)
5,Copycat (1995)
...,...
1678,Mat' i syn (1997)
1679,B. Monkey (1998)
1680,Sliding Doors (1998)
1681,You So Crazy (1994)


In [33]:
content2 = pd.read_csv('datasets/ml-100k/u.item', sep='|', encoding = "ISO-8859-1", skiprows=0, engine='python', header=None)

In [34]:
content2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1678,Mat' i syn (1997),06-Feb-1998,,http://us.imdb.com/M/title-exact?Mat%27+i+syn+...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1678,1679,B. Monkey (1998),06-Feb-1998,,http://us.imdb.com/M/title-exact?B%2E+Monkey+(...,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
1679,1680,Sliding Doors (1998),01-Jan-1998,,http://us.imdb.com/Title?Sliding+Doors+(1998),0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1680,1681,You So Crazy (1994),01-Jan-1994,,http://us.imdb.com/M/title-exact?You%20So%20Cr...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [261]:
sample_users

[380, 821, 656, 62, 824, 92, 909, 115, 621, 923]

In [262]:
sample_expl

Unnamed: 0,userId,itemId,rank,explanations
115,62.0,131.0,1.0,"{'item': [525, 173, 924, 745, 631, 434, 455, 5..."
108,62.0,124.0,2.0,"{'item': [48, 1224, 153, 348, 762, 302, 1067, ..."
154,62.0,178.0,3.0,"{'item': [259, 1067, 765, 745, 924, 516, 17, 6..."
36,62.0,40.0,4.0,"{'item': [1067, 206, 516, 446, 668, 251, 467, ..."
133,62.0,151.0,5.0,"{'item': [446, 302, 1034, 259, 434, 455, 361, ..."
...,...,...,...,...
471,923.0,512.0,6.0,"{'item': [681, 665, 243, 148, 630, 969, 552, 8..."
461,923.0,502.0,7.0,"{'item': [176, 214, 681, 49, 189, 148, 117, 25..."
470,923.0,511.0,8.0,"{'item': [72, 184, 395, 305, 89, 173, 302, 40,..."
314,923.0,346.0,9.0,"{'item': [243, 1120, 214, 184, 395, 1106, 681,..."


In [263]:
df = pd.DataFrame.from_dict(sample_expl.explanations.iloc[0])
df

Unnamed: 0,item,contribution
57,525,0.319582
48,173,0.294722
49,924,0.273884
69,745,0.229873
46,631,0.210698
37,434,0.20684
45,455,0.197954
8,516,0.181597
56,1034,0.16228
35,56,0.158601


In [264]:
print("You are recommended to watch movie")
print(content.loc[280])
print("because you watched:")
list(content.loc[train.get_original_item_id(df.item)].movie)

You are recommended to watch movie
movie    Up Close and Personal (1996)
Name: 280, dtype: object
because you watched:


['Down Periscope (1996)',
 'First Wives Club, The (1996)',
 'Celtic Pride (1996)',
 'Addicted to Love (1997)',
 'Harriet the Spy (1996)',
 'Jack (1996)',
 'Chamber, The (1996)',
 'Substitute, The (1996)',
 'White Squall (1996)',
 'Sound of Music, The (1965)']

In [265]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.P("userId:"),
    dcc.Dropdown(
        id='userId',
        value=sample_users[0],
        options=[{'value': x, 'label': x} 
                 for x in sample_users],
        clearable=False
    ),
    html.P("Rank:"),
      dcc.Slider(
        id='rank',
        min=1,
        max=10,
        value=1,
        step=1,
        marks={i:'{}'.format(i) for i in range(1,11)}
    ),
    dcc.Graph(id="pie-chart"),
])

@app.callback(
    Output("pie-chart", "figure"), 
    [Input("userId", "value"), 
     Input("rank", "value")])
def generate_chart(userId, rank):
    expl = sample_expl[(sample_expl['userId'] == userId) & (sample_expl['rank'] == rank)]
    df = pd.DataFrame.from_dict(expl.explanations.iloc[0]) #3amal dataframe men column el explanations
    df['item'] = list(content.loc[train.get_original_item_id(df.item)].movie)
    fig = px.pie(df, values='contribution', names='item', title=content.loc[int(expl.itemId)].movie)
    return fig

# Run app and display result inline in the notebook
app.run_server(mode='inline')