In [16]:
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from recoxplainer.evaluator import Splitter, Evaluator
from recoxplainer.config import cfg
from recoxplainer.data_reader import DataReader 

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

###### Loading Data

In [2]:
ratings_df = pd.read_csv('datasets/ml-1m/ratings.csv', sep=',', encoding='latin-1')
movies_df = pd.read_csv('datasets/ml-1m/movies.csv', sep=',',encoding='latin-1')

In [3]:
data = DataReader(**cfg.ml1m)
data.make_consecutive_ids_in_dataset()
data.binarize(binary_threshold=1)
sp = Splitter()
train, test = sp.split_leave_n_out(data, frac=0.1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


###### Preprocessing movies dataframe

In [4]:
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre
0,1193,0,One Flew Over the Cuckoo's Nest (1975),Drama
1,661,1,James and the Giant Peach (1996),Animation|Children's|Musical
2,914,2,My Fair Lady (1964),Musical|Romance
3,3408,3,Erin Brockovich (2000),Drama
4,2355,4,"Bug's Life, A (1998)",Animation|Children's|Comedy


So each movie has a unique ID, a title with its release year along with it (Which may contain unicode characters) and several different genres in the same field. 

Let's remove the year from the __title__ column by using pandas' replace function and store in a new __year__ column.

Using regular expressions to find a year stored between parentheses<br>
We specify the parantheses so we don't conflict with movies that have years in their titles

In [5]:
movies_df['year'] = movies_df.title.str.extract('(\(\d\d\d\d\))',expand=False)
movies_df['year']

0       (1975)
1       (1996)
2       (1964)
3       (2000)
4       (1998)
         ...  
3701    (1998)
3702    (1998)
3703    (1999)
3704    (1973)
3705    (1998)
Name: year, Length: 3706, dtype: object

In [6]:
#Removing paranthesis
movies_df['year'] = movies_df.year.str.extract('(\d\d\d\d)',expand=False)
movies_df['year']

0       1975
1       1996
2       1964
3       2000
4       1998
        ... 
3701    1998
3702    1998
3703    1999
3704    1973
3705    1998
Name: year, Length: 3706, dtype: object

In [7]:
movies_df['title']

0            One Flew Over the Cuckoo's Nest (1975)
1                  James and the Giant Peach (1996)
2                               My Fair Lady (1964)
3                            Erin Brockovich (2000)
4                              Bug's Life, A (1998)
                           ...                     
3701                             Modulations (1998)
3702                          Broken Vessels (1998)
3703                              White Boys (1999)
3704                       One Little Indian (1973)
3705    Five Wives, Three Secretaries and Me (1998)
Name: title, Length: 3706, dtype: object

In [8]:
#Applying the strip function to get rid of any ending whitespace characters that may have appeared
movies_df['title'] = movies_df['title'].apply(lambda x: x.strip())

In [9]:
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year
0,1193,0,One Flew Over the Cuckoo's Nest (1975),Drama,1975
1,661,1,James and the Giant Peach (1996),Animation|Children's|Musical,1996
2,914,2,My Fair Lady (1964),Musical|Romance,1964
3,3408,3,Erin Brockovich (2000),Drama,2000
4,2355,4,"Bug's Life, A (1998)",Animation|Children's|Comedy,1998


With that, let's also split the values in the __Genres__ column into a __list of Genres__ to simplify future use. This can be achieved by applying Python's split string function on the correct column.

In [10]:
#Every genre is separated by a | so we simply have to call the split function on |
movies_df['genre'] = movies_df.genre.str.split('|')
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year
0,1193,0,One Flew Over the Cuckoo's Nest (1975),[Drama],1975
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964
3,3408,3,Erin Brockovich (2000),[Drama],2000
4,2355,4,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1998


Since keeping genres in a list format isn't optimal for the content-based recommendation system technique, we will use the One Hot Encoding technique to convert the list of genres to a vector where each column corresponds to one possible value of the feature. This encoding is needed for feeding categorical data. In this case, we store every different genre in columns that contain either 1 or 0. 1 shows that a movie has that genre and 0 shows that it doesn't. Let's also store this dataframe in another variable since genres won't be important for our first recommendation system.

In [11]:
#Copying the movie dataframe into a new one since we won't need to use the genre information in our first case.
moviesWithGenres_df = movies_df.copy()

#For every row in the dataframe, iterate through the list of genres and place a 1 into the corresponding column
for index, row in movies_df.iterrows():
    for genre in row['genre']:
        moviesWithGenres_df.at[index, genre] = 1
        
#Filling in the NaN values with 0 to show that a movie doesn't have that column's genre
moviesWithGenres_df = moviesWithGenres_df.fillna(0)
moviesWithGenres_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year,Drama,Animation,Children's,Musical,Romance,...,Fantasy,Sci-Fi,War,Thriller,Crime,Mystery,Western,Horror,Film-Noir,Documentary
0,1193,0,One Flew Over the Cuckoo's Nest (1975),[Drama],1975,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3408,3,Erin Brockovich (2000),[Drama],2000,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2355,4,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1998,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Next, let's look at the ratings dataframe.

In [12]:
ratings_df.head()

Unnamed: 0,origin_uid,origin_iid,rating,timestamp,userId,itemId
0,1,1193,5,978300760,0,0
1,1,661,3,978302109,0,1
2,1,914,3,978301968,0,2
3,1,3408,4,978300275,0,3
4,1,2355,5,978824291,0,4


In [13]:
#Drop removes a specified row or column from a dataframe
ratings_df = ratings_df.drop('timestamp', 1)
ratings_df.head()

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
0,1,1193,5,0,0
1,1,661,3,0,1
2,1,914,3,0,2
3,1,3408,4,0,3
4,1,2355,5,0,4


# Getting 10 Recommendations for all users

In [19]:
usersList = ratings_df.groupby(by='userId')
ids = []
recommendationsList = []
for x in range (0,6040):
    user = usersList.get_group(x)
    inputTitle = movies_df[movies_df['itemId'].isin(user['itemId'].tolist())]
    user = pd.merge(user, inputTitle)
    user = user.drop('genre', 1).drop('year', 1)
    hotEnc = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(user['itemId'].tolist())]
    hotEnc = hotEnc.reset_index(drop=True)
    userGenre = hotEnc.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
    profile = userGenre.transpose().dot(user['rating'])
    allMoviesGenreTable = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
    allMoviesGenreTable = allMoviesGenreTable.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
    recommendations_df = ((allMoviesGenreTable*profile).sum(axis=1))/(profile.sum())
    recommendations_df = recommendations_df.sort_values(ascending=False)
    recommendations = movies_df.loc[movies_df['itemId'].isin(recommendations_df.head(10).keys())]
    recommendations_array = np.asarray(recommendations)
    for i in range (len(recommendations_array)):
        ids.append(x)
        recommendationsList.append(recommendations_array[i][2])
allUsersRecommendations_df = pd.DataFrame(list(zip(ids, recommendationsList)), columns =['userId', 'movie'])
        

In [20]:
allUsersRecommendations_df.head(10)

Unnamed: 0,userId,movie
0,0,"Wizard of Oz, The (1939)"
1,0,Pocahontas (1995)
2,0,Hercules (1997)
3,0,Aladdin (1992)
4,0,"Little Mermaid, The (1989)"
5,0,Watership Down (1978)
6,0,"Jungle Book, The (1967)"
7,0,Lady and the Tramp (1955)
8,0,Space Jam (1996)
9,0,Steamboat Willie (1940)


# Explanations

In [21]:
#Computing Assosciation Rules
rules = None
item_sets = [
        [item for item in ratings_df[ratings_df.userId == user].itemId]
        for user in ratings_df.userId.unique()
    ]

te = TransactionEncoder()
te_ary = te.fit(item_sets).transform(item_sets)

df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df,
                            min_support=.1,
                            use_colnames=True,
                            max_len=2)

rules = association_rules(frequent_itemsets,
                            metric="lift",
                            min_threshold=.1)
rules = rules[(rules['confidence'] > .1) &
               (rules['lift'] > .1)]

rules.consequents = [list(row.consequents)[0] for _, row in rules.iterrows()]
rules.antecedents = [list(row.antecedents)[0] for _, row in rules.iterrows()]

rules = rules[["consequents", "antecedents", "confidence"]]

In [22]:
usersListAR = ratings_df.groupby(by='userId')
pointerAR = 0
explanationsAR = []
for x in range (0,6040):
    user_ratingsAR = usersListAR.get_group(x).itemId.values #give it userID
    counterAR = 0
    while counterAR < 10:
        titleAR = recommendationsList[pointerAR]
        #print(title)
        recommendedMovieAR = movies_df.loc[movies_df.title == titleAR]
        #print(recommendedMovie)
        #rec_Origin_id = int(recommendedMovie.origin_iid)
        rec_item_idAR = int(recommendedMovieAR.itemId)
        #print(rec_item_id)
        ARs = rules[rules.consequents == rec_item_idAR]
        explanationAR =  ARs[ARs.antecedents.isin(user_ratingsAR)]
        #print(explanationAR)
        explanationsAR.append({x for x in explanationAR.antecedents})
        pointerAR = pointerAR + 1
        counterAR = counterAR + 1
allUsersRecommendations_df['explanations'] = explanationsAR

In [23]:
allUsersRecommendations_df

Unnamed: 0,userId,movie,explanations
0,0,"Wizard of Oz, The (1939)","{0, 4, 5, 7, 8, 10, 13, 15, 19, 22, 23, 26, 27..."
1,0,Pocahontas (1995),{}
2,0,Hercules (1997),{}
3,0,Aladdin (1992),"{4, 5, 38, 40, 9, 10, 44, 13, 15, 48, 50, 19, ..."
4,0,"Little Mermaid, The (1989)","{33, 5, 40, 9, 10, 44, 13, 48, 19, 22, 26}"
...,...,...,...
60395,6039,"Purple Rose of Cairo, The (1985)",{}
60396,6039,Brassed Off (1996),{}
60397,6039,Twelfth Night (1996),{}
60398,6039,Best Men (1997),{}


# Model Fidelity Calculation

In [24]:
expl = allUsersRecommendations_df[[len(x) > 0 for x in allUsersRecommendations_df.explanations]]
fidelity = expl.groupby('userId')['movie'].count() / 10
modelFidelity = sum(fidelity)/6040

In [25]:
modelFidelity

0.406705298013247

# Single user testing

# Testing getting 2 users very similar and testing stability of explanations

In [26]:
groupedUserRatings = ratings_df.groupby('userId')
testUser = groupedUserRatings.get_group(6)
testUser.shape

(31, 5)

In [27]:
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
523,7,648,4,6,58
524,7,861,4,6,430
525,7,2916,5,6,62
526,7,3578,3,6,68
527,7,3793,3,6,237
528,7,1610,5,6,78
529,7,589,5,6,97
530,7,6,4,6,255
531,7,442,4,6,107
532,7,733,5,6,195


In [28]:
testUserTitles = movies_df[movies_df['itemId'].isin(testUser['itemId'].tolist())]
testUserTitles

Unnamed: 0,origin_iid,itemId,title,genre,year
22,1270,22,Back to the Future (1985),"[Comedy, Sci-Fi]",1985
48,2028,48,Saving Private Ryan (1998),"[Action, Drama, War]",1998
58,648,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
68,3578,68,Gladiator (2000),"[Action, Drama]",2000
74,3107,74,Backdraft (1991),"[Action, Drama]",1991
78,1610,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
90,3256,90,Patriot Games (1992),"[Action, Thriller]",1992
92,110,92,Braveheart (1995),"[Action, Drama, War]",1995
97,589,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991


In [29]:
testUser = pd.merge(testUser, testUserTitles)
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title,genre,year
0,7,648,4,6,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
1,7,861,4,6,430,Supercop (1992),"[Action, Thriller]",1992
2,7,2916,5,6,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
3,7,3578,3,6,68,Gladiator (2000),"[Action, Drama]",2000
4,7,3793,3,6,237,X-Men (2000),"[Action, Sci-Fi]",2000
5,7,1610,5,6,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
6,7,589,5,6,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991
7,7,6,4,6,255,Heat (1995),"[Action, Crime, Thriller]",1995
8,7,442,4,6,107,Demolition Man (1993),"[Action, Sci-Fi]",1993
9,7,733,5,6,195,"Rock, The (1996)","[Action, Adventure, Thriller]",1996


In [30]:
testUser = testUser.drop('genre', 1).drop('year', 1)
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,648,4,6,58,Mission: Impossible (1996)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [31]:
testHotEnc = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(testUser['itemId'].tolist())]
testHotEnc = testHotEnc.reset_index(drop=True)
testUserGenre = testHotEnc.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
#print(testUserGenre.transpose().shape)
#print(testUser['rating'].shape)
testUserProfile = testUserGenre.transpose().dot(testUser['rating'])
testUserAllMoviesGenreTable = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
testUserAllMoviesGenreTable = testUserAllMoviesGenreTable.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
testUserRecommendations_df = ((testUserAllMoviesGenreTable*testUserProfile).sum(axis=1))/(testUserProfile.sum())
testUserRecommendations_df = testUserRecommendations_df.sort_values(ascending=False)
testUserRecommendations = movies_df.loc[movies_df['itemId'].isin(testUserRecommendations_df.head(10).keys())]
testUserRecommendations

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [32]:
testUser2 = groupedUserRatings.get_group(6)
testUser2.shape

(31, 5)

In [33]:
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
523,7,648,4,6,58
524,7,861,4,6,430
525,7,2916,5,6,62
526,7,3578,3,6,68
527,7,3793,3,6,237
528,7,1610,5,6,78
529,7,589,5,6,97
530,7,6,4,6,255
531,7,442,4,6,107
532,7,733,5,6,195


In [34]:
testUserTitles2 = movies_df[movies_df['itemId'].isin(testUser2['itemId'].tolist())]
testUserTitles2

Unnamed: 0,origin_iid,itemId,title,genre,year
22,1270,22,Back to the Future (1985),"[Comedy, Sci-Fi]",1985
48,2028,48,Saving Private Ryan (1998),"[Action, Drama, War]",1998
58,648,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
68,3578,68,Gladiator (2000),"[Action, Drama]",2000
74,3107,74,Backdraft (1991),"[Action, Drama]",1991
78,1610,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
90,3256,90,Patriot Games (1992),"[Action, Thriller]",1992
92,110,92,Braveheart (1995),"[Action, Drama, War]",1995
97,589,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991


In [35]:
testUser2 = pd.merge(testUser2, testUserTitles2)
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title,genre,year
0,7,648,4,6,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
1,7,861,4,6,430,Supercop (1992),"[Action, Thriller]",1992
2,7,2916,5,6,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
3,7,3578,3,6,68,Gladiator (2000),"[Action, Drama]",2000
4,7,3793,3,6,237,X-Men (2000),"[Action, Sci-Fi]",2000
5,7,1610,5,6,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
6,7,589,5,6,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991
7,7,6,4,6,255,Heat (1995),"[Action, Crime, Thriller]",1995
8,7,442,4,6,107,Demolition Man (1993),"[Action, Sci-Fi]",1993
9,7,733,5,6,195,"Rock, The (1996)","[Action, Adventure, Thriller]",1996


In [36]:
testUser2 = testUser2.drop('genre', 1).drop('year', 1)
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,648,4,6,58,Mission: Impossible (1996)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [37]:
testUser2.at[0,'origin_iid']=3408
testUser2.at[0,'itemId']=3
testUser2.at[0,'rating']=3
testUser2.at[0,'title']='Erin Brockovich (2000)'

In [38]:
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,3408,3,6,3,Erin Brockovich (2000)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [39]:
testHotEnc2 = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(testUser2['itemId'].tolist())]
testHotEnc2 = testHotEnc2.reset_index(drop=True)
testUserGenre2 = testHotEnc2.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
#print(testUserGenre.transpose().shape)
#print(testUser['rating'].shape)
testUserProfile2 = testUserGenre2.transpose().dot(testUser2['rating'])
testUserAllMoviesGenreTable2 = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
testUserAllMoviesGenreTable2 = testUserAllMoviesGenreTable2.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
testUserRecommendations_df2 = ((testUserAllMoviesGenreTable2*testUserProfile2).sum(axis=1))/(testUserProfile2.sum())
testUserRecommendations_df2 = testUserRecommendations_df2.sort_values(ascending=False)
testUserRecommendations2 = movies_df.loc[movies_df['itemId'].isin(testUserRecommendations_df2.head(10).keys())]
testUserRecommendations2

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [40]:
testUserRecommendations

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [50]:
#testing explanations for both test users
#test user 1 
testUser1Ratings = testUser['itemId']

In [77]:
ARs = rules[rules.consequents == 1562]
explanationAR =  ARs[ARs.antecedents.isin(testUser1Ratings)]
explanation = {x for x in explanationAR.antecedents}
explanation

set()

In [78]:
#testing explanations for both test users
#test user 2 
testUser2Ratings = testUser2['itemId']

In [79]:
ARs2 = rules[rules.consequents == 1562]
explanationAR2 =  ARs2[ARs2.antecedents.isin(testUser2Ratings)]
explanation2 = {x for x in explanationAR2.antecedents}
explanation2

set()

# End of stability testing