# Shows how the `explain()` in Implicit work

In [2]:
# Loading dependencies
import argparse
import codecs
import logging
import time
import tqdm
import h5py
from scipy.sparse import coo_matrix, csr_matrix
import scipy.sparse as sp
from random import randint
import pickle
import os
import itertools
import numpy as np
import pandas as pd

#### wordcloud dependencies
import matplotlib.pyplot as plt # data visualization library
from wordcloud import WordCloud, STOPWORDS #used to generate world cloud
import seaborn as sns

from implicit.als import AlternatingLeastSquares
from implicit.approximate_als import (AnnoyAlternatingLeastSquares, FaissAlternatingLeastSquares,
                                      NMSLibAlternatingLeastSquares)

# Loading Last.fm dataset

In [3]:
# borrowed from https://github.com/benfred/implicit, modified to read the file locally instead of downloading it.
def get_lastfm():
    """Returns a tuple of (artistids, userids, plays) where plays is a CSR matrix """
    
    filename = '../flaskapp/data/lastfm_360k.hdf5'

    with h5py.File(filename, 'r') as f:
        m = f.get('artist_user_plays')
        plays = csr_matrix((m.get('data'), m.get('indices'), m.get('indptr')))
        return np.array(f['artist']), np.array(f['user']), plays

In [4]:
# loading data
artists, users, plays = get_lastfm()

# Training the baseline model

In [5]:
model = AlternatingLeastSquares(factors=128, regularization=20, iterations=15)
os.environ['OPENBLAS_NUM_THREADS'] = "1"



In [6]:
model.fit(plays)

100%|██████████| 15.0/15 [04:29<00:00, 21.30s/it]


# Defining a random user to recommend for

In [30]:
u_id = 513
[artists[a] for a in plays.getcol(u_id).tocoo().row]
# user items, alphabetically sorted.

['animal collective',
 'apparat',
 'architecture in helsinki',
 'azeda booth',
 'band of horses',
 'beck',
 'blitzen trapper',
 'bloc party',
 'blonde redhead',
 "bonnie 'prince' billy",
 'brightblack morning light',
 'built to spill',
 'crystal stilts',
 'deerhunter',
 'department of eagles',
 'fleet foxes',
 'glass candy',
 'grizzly bear',
 'health',
 'iron & wine',
 'jim james',
 'meanest man contest',
 'mgmt',
 'modest mouse',
 'nick cave & the bad seeds',
 'nick lowe',
 'of montreal',
 'panther',
 'ratatat',
 'rogue wave',
 'seamonster¹',
 'spoon',
 'starfucker',
 'stephen malkmus',
 'the black keys',
 'the cool kids',
 'the dodos',
 'the national',
 'the pharmacy',
 'the raconteurs',
 'the walkmen',
 'the white stripes',
 'tv on the radio']

# print baseline recommendations

In [19]:
def printRecs(u_id):
    p = sp.csr_matrix(plays.copy())
    user_plays = p.T.tocsr()
    for artistid, score in model.recommend(u_id, user_plays, recalculate_user=False, N=20):
        #print('artist id: '+str(artistid) + ' '+ artists[artistid] + ' score: ' + str(score))
        print(str(artists[artistid]) +"("+str(artistid)+")" + ' score: ' + str(score))

In [20]:
start = time.time()
printRecs(u_id)
print()
print("elapsed: "+ str(time.time()-start))

bon iver(52989) score: 0.95276856
andrew bird(24304) score: 0.9154794
my morning jacket(194761) score: 0.89223385
beirut(43089) score: 0.8891827
broken social scene(56783) score: 0.8805647
the shins(262555) score: 0.8518601
vampire weekend(274818) score: 0.84480655
kings of leon(161432) score: 0.8363707
arcade fire(30091) score: 0.829973
beach house(41971) score: 0.8244278
girl talk(125452) score: 0.8239067
dr. dog(99656) score: 0.8174257
menomena(185743) score: 0.8120258
the flaming lips(256038) score: 0.80339265
okkervil river(203087) score: 0.79054725
the decemberists(254804) score: 0.7901379
sufjan stevens(245610) score: 0.78894305
caribou(62539) score: 0.7873214
interpol(142048) score: 0.78666675
elliott smith(106496) score: 0.78244257

elapsed: 1.0600829124450684


In [31]:
def explain(u_id, item_id):
    # explain the recommendation by using the model func explain()
    p = sp.csr_matrix(plays.copy())
    user_plays = p.T.tocsr()
    top_score_explained, top_contributions, W = model.explain(userid=u_id, user_items=user_plays, itemid=item_id, user_weights=None, N=10)
    return top_score_explained, top_contributions

In [32]:
# explanation for the recommendation bon iver(52989) score: 0.95276856
top_score_explained, top_contributions = explain(u_id, 52989)

In [33]:
top_score_explained

0.9375970576153871

In [34]:
for a in top_contributions:
    print(artists[a[0]]+", "+str(a[1]))

fleet foxes, 0.16996315155682637
bonnie 'prince' billy, 0.1263029009510567
department of eagles, 0.09213594575253842
band of horses, 0.09157829037040019
mgmt, 0.08192097391784169
the national, 0.07711950032447022
iron & wine, 0.05156663545217489
deerhunter, 0.04969274501101792
tv on the radio, 0.039296525839094654
the dodos, 0.03469614815419167
