In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from recoxplainer.evaluator import Splitter, Evaluator
from recoxplainer.config import cfg
from recoxplainer.data_reader import DataReader 

<a id="ref2"></a>
<h1><center>Content Based Filtering</center></h1>

###### Loading Data

In [2]:
ratings_df = pd.read_csv('datasets/ml-1m/ratings.csv', sep=',', encoding='latin-1')
movies_df = pd.read_csv('datasets/ml-1m/movies.csv', sep=',',encoding='latin-1')

In [3]:
data = DataReader(**cfg.ml1m)
data.make_consecutive_ids_in_dataset()
data.binarize(binary_threshold=1)
sp = Splitter()
train, test = sp.split_leave_n_out(data, frac=0.1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


###### Preprocessing movies dataframe

In [4]:
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre
0,1193,0,One Flew Over the Cuckoo's Nest (1975),Drama
1,661,1,James and the Giant Peach (1996),Animation|Children's|Musical
2,914,2,My Fair Lady (1964),Musical|Romance
3,3408,3,Erin Brockovich (2000),Drama
4,2355,4,"Bug's Life, A (1998)",Animation|Children's|Comedy


So each movie has a unique ID, a title with its release year along with it (Which may contain unicode characters) and several different genres in the same field. 

Let's remove the year from the __title__ column by using pandas' replace function and store in a new __year__ column.

Using regular expressions to find a year stored between parentheses<br>
We specify the parantheses so we don't conflict with movies that have years in their titles

In [5]:
movies_df['year'] = movies_df.title.str.extract('(\(\d\d\d\d\))',expand=False)
movies_df['year']

0       (1975)
1       (1996)
2       (1964)
3       (2000)
4       (1998)
         ...  
3701    (1998)
3702    (1998)
3703    (1999)
3704    (1973)
3705    (1998)
Name: year, Length: 3706, dtype: object

In [6]:
#Removing paranthesis
movies_df['year'] = movies_df.year.str.extract('(\d\d\d\d)',expand=False)
movies_df['year']

0       1975
1       1996
2       1964
3       2000
4       1998
        ... 
3701    1998
3702    1998
3703    1999
3704    1973
3705    1998
Name: year, Length: 3706, dtype: object

In [7]:
movies_df['title']

0            One Flew Over the Cuckoo's Nest (1975)
1                  James and the Giant Peach (1996)
2                               My Fair Lady (1964)
3                            Erin Brockovich (2000)
4                              Bug's Life, A (1998)
                           ...                     
3701                             Modulations (1998)
3702                          Broken Vessels (1998)
3703                              White Boys (1999)
3704                       One Little Indian (1973)
3705    Five Wives, Three Secretaries and Me (1998)
Name: title, Length: 3706, dtype: object

In [8]:
#Applying the strip function to get rid of any ending whitespace characters that may have appeared
movies_df['title'] = movies_df['title'].apply(lambda x: x.strip())

In [9]:
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year
0,1193,0,One Flew Over the Cuckoo's Nest (1975),Drama,1975
1,661,1,James and the Giant Peach (1996),Animation|Children's|Musical,1996
2,914,2,My Fair Lady (1964),Musical|Romance,1964
3,3408,3,Erin Brockovich (2000),Drama,2000
4,2355,4,"Bug's Life, A (1998)",Animation|Children's|Comedy,1998


With that, let's also split the values in the __Genres__ column into a __list of Genres__ to simplify future use. This can be achieved by applying Python's split string function on the correct column.

In [10]:
#Every genre is separated by a | so we simply have to call the split function on |
movies_df['genre'] = movies_df.genre.str.split('|')
movies_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year
0,1193,0,One Flew Over the Cuckoo's Nest (1975),[Drama],1975
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964
3,3408,3,Erin Brockovich (2000),[Drama],2000
4,2355,4,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1998


Since keeping genres in a list format isn't optimal for the content-based recommendation system technique, we will use the One Hot Encoding technique to convert the list of genres to a vector where each column corresponds to one possible value of the feature. This encoding is needed for feeding categorical data. In this case, we store every different genre in columns that contain either 1 or 0. 1 shows that a movie has that genre and 0 shows that it doesn't. Let's also store this dataframe in another variable since genres won't be important for our first recommendation system.

In [11]:
#Copying the movie dataframe into a new one since we won't need to use the genre information in our first case.
moviesWithGenres_df = movies_df.copy()

#For every row in the dataframe, iterate through the list of genres and place a 1 into the corresponding column
for index, row in movies_df.iterrows():
    for genre in row['genre']:
        moviesWithGenres_df.at[index, genre] = 1
        
#Filling in the NaN values with 0 to show that a movie doesn't have that column's genre
moviesWithGenres_df = moviesWithGenres_df.fillna(0)
moviesWithGenres_df.head()

Unnamed: 0,origin_iid,itemId,title,genre,year,Drama,Animation,Children's,Musical,Romance,...,Fantasy,Sci-Fi,War,Thriller,Crime,Mystery,Western,Horror,Film-Noir,Documentary
0,1193,0,One Flew Over the Cuckoo's Nest (1975),[Drama],1975,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3408,3,Erin Brockovich (2000),[Drama],2000,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2355,4,"Bug's Life, A (1998)","[Animation, Children's, Comedy]",1998,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Next, let's look at the ratings dataframe.

In [12]:
ratings_df.head()

Unnamed: 0,origin_uid,origin_iid,rating,timestamp,userId,itemId
0,1,1193,5,978300760,0,0
1,1,661,3,978302109,0,1
2,1,914,3,978301968,0,2
3,1,3408,4,978300275,0,3
4,1,2355,5,978824291,0,4


In [13]:
#Drop removes a specified row or column from a dataframe
ratings_df = ratings_df.drop('timestamp', 1)
ratings_df.head()

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
0,1,1193,5,0,0
1,1,661,3,0,1
2,1,914,3,0,2
3,1,3408,4,0,3
4,1,2355,5,0,4


# Getting 10 Recommendations for all users

In [14]:
usersList = ratings_df.groupby(by='userId')
ids = []
recommendationsList = []
for x in range (0,6040):
    user = usersList.get_group(x)
    inputTitle = movies_df[movies_df['itemId'].isin(user['itemId'].tolist())]
    user = pd.merge(user, inputTitle)
    user = user.drop('genre', 1).drop('year', 1)
    hotEnc = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(user['itemId'].tolist())]
    hotEnc = hotEnc.reset_index(drop=True)
    userGenre = hotEnc.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
    profile = userGenre.transpose().dot(user['rating'])
    allMoviesGenreTable = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
    allMoviesGenreTable = allMoviesGenreTable.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
    recommendations_df = ((allMoviesGenreTable*profile).sum(axis=1))/(profile.sum())
    recommendations_df = recommendations_df.sort_values(ascending=False)
    recommendations = movies_df.loc[movies_df['itemId'].isin(recommendations_df.head(10).keys())]
    recommendations_array = np.asarray(recommendations)
    for i in range (len(recommendations_array)):
        ids.append(x)
        recommendationsList.append(recommendations_array[i][2])
allUsersRecommendations_df = pd.DataFrame(list(zip(ids, recommendationsList)), columns =['userId', 'movie'])
        

In [15]:
allUsersRecommendations_df.head(10)

Unnamed: 0,userId,movie
0,0,"Wizard of Oz, The (1939)"
1,0,Pocahontas (1995)
2,0,Hercules (1997)
3,0,Aladdin (1992)
4,0,"Little Mermaid, The (1989)"
5,0,Watership Down (1978)
6,0,"Jungle Book, The (1967)"
7,0,Lady and the Tramp (1955)
8,0,Space Jam (1996)
9,0,Steamboat Willie (1940)


# Explanations

# Getting similar items to all movies in a dict

In [16]:
knn_items_dict = {}
num_items = int(ratings_df[['itemId']].nunique())
num_users = int(ratings_df[['userId']].nunique())

In [17]:
#generating knn items dict for all movies
ds = np.zeros((num_items, num_users))
ds[ratings_df.itemId, ratings_df.userId] = ratings_df.rating
ds = sparse.csr_matrix(ds)
sim_matrix = cosine_similarity(ds)
min_val = sim_matrix.min() - 1

for i in range(num_items):
            sim_matrix[i, i] = min_val
            knn_to_item_i = (-sim_matrix[i, :]).argsort()[:10]
            knn_items_dict[i] = knn_to_item_i

# Getting user ratings in an array

In [19]:
usersList = ratings_df.groupby(by='userId')
pointer = 0
explanations = []
for x in range (0,6040):
    user_ratings = usersList.get_group(x).itemId.values #give it userID
    counter = 0
    while counter < 10:
        title = recommendationsList[pointer]
        #print(title)
        recommendedMovie = movies_df.loc[movies_df.title == title]
        #print(recommendedMovie)
        #rec_Origin_id = int(recommendedMovie.origin_iid)
        rec_item_id = int(recommendedMovie.itemId)
        #print(rec_item_id)
        sim_items = knn_items_dict[rec_item_id]
        explanation =  set(sim_items) & set(user_ratings)
        explanations.append(explanation)
        pointer = pointer + 1
        counter = counter + 1
allUsersRecommendations_df['explanations'] = explanations

9
25
32
33
207
582
596
608
1341
2286
91
124
136
458
1036
1058
1389
1562
1612
1677
5
148
189
192
385
675
740
779
935
1317
62
64
86
124
133
702
1058
1103
1198
1265
91
163
347
590
711
713
1612
1928
2638
3285
170
207
252
608
1376
1625
1695
1785
1842
2850
62
86
133
277
702
1058
1103
1198
1265
1562
91
136
229
458
1036
1058
1389
1562
1612
1677
91
136
347
590
711
713
1389
1612
1677
3285
5
148
189
385
558
675
740
779
935
1695
252
398
409
412
1256
1492
2398
2850
2906
3301
136
163
347
587
590
711
713
1677
2638
3285
62
64
86
124
133
277
702
1103
1198
1265
163
229
347
590
711
713
982
1612
2638
3285
136
163
347
590
711
713
1612
1677
2638
3285
5
148
189
385
675
740
779
935
1695
1797
62
86
124
133
277
702
1058
1103
1198
1265
124
163
590
711
779
935
1036
1220
2203
3285
189
192
209
512
555
740
909
1000
1220
1265
62
86
133
277
512
702
1058
1103
1198
1265
32
207
388
608
1000
1016
1220
1341
2203
2350
5
148
189
385
512
675
740
909
1265
1797
62
133
277
512
702
740
909
1058
1198
1265
163
347
590
711
713
865
1

2850
124
163
347
590
711
713
740
1562
2638
3285
163
347
512
590
711
713
740
909
2638
3285
64
86
124
133
189
512
740
1058
1198
1265
62
64
86
124
189
512
702
740
1198
1265
91
124
136
458
909
1058
1389
1562
1612
1677
252
398
412
1012
1038
1256
1625
2906
3301
3316
62
86
133
277
702
1058
1103
1198
1265
1562
91
209
555
990
1058
1220
1265
1389
1562
1612
145
163
347
590
711
713
909
1797
2638
3285
207
608
865
918
1038
1492
1695
1842
2487
2544
163
207
347
474
590
608
711
713
2638
3285
62
86
124
133
277
702
740
1103
1198
1265
163
252
347
590
674
711
713
2299
2638
3285
64
124
133
189
512
740
1103
1198
1265
1612
145
163
347
590
711
713
740
1928
2638
3285
163
347
512
590
711
713
909
1797
2638
3285
163
347
474
590
711
713
1612
1928
2638
3285
73
137
305
343
662
911
921
1368
1612
1723
44
60
64
124
189
277
558
740
767
1265
5
148
189
385
675
711
740
779
935
1797
163
347
398
412
590
711
713
1376
2638
3285
136
163
347
474
590
711
713
1612
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
91
124
512
740


3285
163
347
457
474
590
711
713
1612
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
398
409
865
1012
1256
1354
1492
1651
1987
3316
75
252
398
865
918
1492
1595
2850
2906
3316
163
170
347
398
590
711
713
2398
2638
3285
124
163
347
590
711
713
782
2622
2638
3285
64
91
124
416
458
714
1220
1265
1389
1612
163
347
590
711
713
1061
1797
1928
2638
3285
91
163
229
711
713
982
1389
1612
1928
3285
32
33
207
596
608
1016
1220
1341
1842
2286
91
124
136
512
1058
1265
1389
1562
1612
1677
124
163
347
590
711
713
1389
1612
2638
3285
163
347
590
711
713
740
779
935
2638
3285
62
91
126
702
1067
1265
1389
1562
1612
1797
62
86
133
277
512
702
1103
1198
1220
1265
145
163
347
457
590
711
713
1928
2638
3285
163
347
590
711
713
740
909
1612
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
91
124
136
711
1058
1265
1389
1562
1612
1677
62
86
124
189
192
512
740
909
1198
1265
252
674
865
1625
1651
1987
2299
2487
2544
3085
136
163
347
590
711
713
909
1677
2638
3285
398
409
412
674
865
1376
1492
1625
198

163
347
512
590
711
713
909
2638
3285
124
163
347
590
711
713
1058
1612
2638
3285
163
229
347
590
711
713
982
1359
2638
3285
32
124
207
558
608
779
935
1220
1341
2203
558
628
655
741
1341
1695
1941
2203
2277
3055
170
207
608
865
918
1012
1061
1695
1842
2544
32
207
558
596
608
741
1341
1695
1941
2203
252
409
412
865
918
1061
1354
1492
2487
3316
163
347
457
590
711
713
740
1928
2638
3285
62
86
124
133
277
702
740
1103
1198
1265
136
163
347
590
711
713
1677
1928
2638
3285
32
33
207
388
596
608
1341
1737
2286
2350
409
474
674
797
1038
1595
2906
3085
3301
3316
62
86
133
209
555
702
740
990
1000
1265
124
163
347
590
711
713
740
1562
2638
3285
5
124
148
189
192
385
512
675
740
909
5
124
148
385
675
740
779
935
1036
1562
51
121
136
229
921
982
1286
1677
1928
1953
163
347
590
711
713
1354
1359
2638
3018
3285
91
112
124
662
1036
1058
1389
1562
1612
2572
163
347
590
711
713
1376
1928
2487
2638
3285
124
163
347
512
590
711
713
1058
2638
3285
163
347
474
590
711
713
792
1928
2638
3285
64
124
458
58

921
1058
1612
2638
124
189
209
512
555
740
990
1000
1058
1265
51
121
136
229
893
909
921
982
1410
1677
75
170
207
608
1038
1359
1651
1695
2487
3085
91
124
136
458
1058
1332
1389
1562
1612
2210
398
412
674
865
918
1038
1492
2487
3301
3347
209
555
740
947
990
1000
1214
1220
1265
3560
25
32
33
207
388
596
608
1737
2286
2350
136
163
347
590
711
713
909
1677
2638
3285
112
124
322
512
662
988
1058
1612
1720
2139
163
347
590
711
713
1058
1389
1612
2638
3285
32
207
381
398
608
918
1376
1767
2622
3301
145
163
347
590
711
713
1928
2253
2638
3285
163
347
474
590
711
713
1928
2253
2638
3285
163
347
590
711
713
1038
1651
2638
3285
3347
91
124
512
662
1058
1220
1265
1389
1562
1612
163
229
347
590
711
713
982
1612
2638
3285
124
163
347
590
711
713
740
909
2638
3285
75
170
409
674
1012
1625
1987
2850
3018
3347
163
347
457
512
590
711
713
1797
2638
3285
398
918
1256
1625
2544
2906
3018
3085
3301
3347
145
163
347
590
711
713
1612
1928
2638
3285
62
86
133
189
277
512
740
1198
1220
1265
5
32
148
207
385
5

189
347
590
711
713
740
2638
3285
75
412
674
865
1256
1595
1785
1987
2398
3085
5
148
189
385
558
675
740
779
1341
1695
62
86
124
133
277
702
740
1103
1198
1265
163
347
474
590
711
713
865
1785
2638
3285
62
124
133
277
512
702
1058
1103
1198
1265
163
347
590
711
713
909
1797
1928
2638
3285
163
347
590
711
713
740
909
1928
2638
3285
75
170
674
918
1625
1651
1987
2544
3018
3347
91
136
909
1058
1265
1389
1562
1612
1677
1723
148
189
512
558
675
740
779
935
1341
1695
75
412
918
1012
1595
1987
2906
3018
3301
3347
192
202
209
555
740
885
909
990
2806
2930
32
124
189
512
558
740
1000
1053
1094
1220
62
124
133
277
512
740
1058
1103
1265
1562
398
409
412
1038
1256
1376
1651
2544
3018
3316
62
86
133
277
512
702
740
1103
1198
1265
32
33
207
558
596
608
935
1341
1695
2286
252
409
412
1038
1651
1785
2544
2906
3301
3316
62
86
124
277
512
702
1058
1103
1198
1265
91
163
347
590
711
713
1389
1612
2638
3285
75
409
412
674
865
1061
1354
1359
1376
1922
5
148
252
385
398
675
1036
1359
1612
1797
112
124
322
6

674
918
1061
1256
1354
1359
2487
3347
163
189
347
512
590
711
713
740
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
62
86
124
133
702
1058
1103
1198
1220
1265
124
163
189
512
740
909
1058
1220
1265
3285
91
124
136
458
662
1058
1389
1562
1612
1677
136
163
347
474
590
711
713
921
2638
3285
62
64
86
124
277
702
714
1058
1220
1265
121
163
229
347
590
711
713
1928
2638
3285
409
412
1012
1038
1061
1651
1785
2906
3301
3347
163
347
474
590
711
713
1256
1987
2638
3285
398
409
412
918
1012
1785
2299
2398
2906
3018
252
398
674
865
1012
1061
1625
1987
3085
3316
163
347
590
711
713
1922
2638
3285
3301
3347
91
121
136
229
662
921
982
1389
1612
1677
5
124
148
189
385
675
740
779
935
1036
121
246
909
945
955
977
1009
1231
2232
2928
124
189
209
512
555
740
990
1000
1058
1265
136
163
347
590
711
713
1612
1677
2638
3285
189
192
512
711
740
909
1058
1075
1265
1797
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
909
1797
2638
3285
3347
91
124
590
711
713
1058
1389
1612
2638
3285
170
4

713
1036
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
136
163
347
590
711
713
1058
1677
2638
3285
163
347
474
590
711
713
1247
1928
2638
3285
163
347
512
590
711
713
740
909
2638
3285
145
163
347
590
711
713
1612
1928
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
865
1987
2638
2906
3285
124
163
347
590
711
713
1389
1612
2638
3285
163
347
474
590
711
713
988
1928
2638
3285
381
512
909
988
1767
1797
1922
1928
2622
3301
92
99
124
458
546
753
1109
1228
1472
1699
252
409
412
674
865
1359
1987
2398
2906
3316
136
163
347
458
590
711
713
1677
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
988
1928
2622
2638
3285
412
674
1061
1651
1785
1922
2398
3018
3301
3316
145
163
347
590
711
713
740
2253
2638
3285
305
474
662
909
911
921
1058
1368
1612
2712
124
189
347
457
512
711
740
1053
1075
3285
64
112
124
133
189
277
512
740
1058
1265
64
124
458
512
714
1220
1265
1317
1562
1797
124
163
347
590
711
713
1389
1612
2638
3285
5
148
189
385
512
67

711
713
740
909
2638
3285
163
252
347
590
711
713
1376
1651
2638
3285
91
124
136
458
713
1389
1562
1612
1677
2638
91
229
512
711
982
1058
1389
1612
1797
1928
91
112
124
229
982
1036
1058
1389
1612
1677
32
33
207
531
596
608
1341
1695
1842
2286
62
86
124
133
277
702
740
1103
1198
1265
91
163
347
590
711
713
1389
1612
2638
3285
64
124
136
458
587
909
1058
1265
1562
1677
91
124
136
229
458
982
1389
1562
1612
1677
75
170
398
412
1012
1061
1625
2299
2850
3347
170
398
409
412
865
1038
1492
1922
3018
3347
75
412
1061
1354
1359
1376
1651
1922
2398
3301
163
189
347
512
590
711
713
740
2638
3285
44
60
64
124
189
558
740
767
1094
1265
252
412
674
1256
1785
2544
3018
3085
3301
3316
209
555
702
740
990
1000
1103
1198
1220
1265
189
192
512
558
740
779
909
935
1220
1797
163
347
590
711
713
1562
1612
1928
2638
3285
124
189
512
558
740
909
1058
1220
1265
1797
91
512
740
909
1058
1389
1562
1612
1797
1928
163
347
590
711
713
740
909
1928
2638
3285
124
145
163
347
590
711
713
740
2638
3285
124
163
347
590

62
86
133
277
702
740
1058
1103
1198
1265
32
33
207
608
779
935
1220
1341
2203
2286
32
33
207
388
596
608
1220
1341
2203
2286
91
112
124
458
662
1036
1058
1389
1562
1612
64
124
189
512
740
1058
1103
1220
1265
1562
145
163
347
457
590
711
713
2253
2638
3285
5
32
148
207
385
608
675
1341
1695
1842
163
347
474
590
711
713
1928
2638
3116
3285
163
347
590
711
713
988
1928
2253
2638
3285
75
170
252
412
1012
1038
1061
1256
1651
1922
163
347
512
590
711
713
909
1797
1928
2638
163
347
512
590
711
713
909
1928
2638
3285
83
124
137
196
197
199
1003
1562
1612
1839
75
409
412
590
865
1354
2544
3018
3085
3347
5
148
347
385
512
675
713
740
1797
2638
32
163
207
347
608
740
779
935
1053
1220
136
229
343
512
909
921
982
1612
1677
1928
163
347
512
590
711
713
1612
1797
2638
3285
136
163
347
590
711
713
982
1677
2638
3285
5
148
163
189
347
385
675
713
740
1797
163
347
457
590
711
713
740
2253
2638
3285
145
163
347
590
711
713
1928
2253
2638
3285
62
133
209
277
512
555
740
990
1198
1265
91
347
590
711
713


1928
2299
2638
3285
91
458
512
909
1058
1389
1562
1612
1797
1928
124
163
347
457
590
711
713
2253
2638
3285
163
347
512
590
711
713
909
1797
2638
3285
163
347
474
590
711
713
988
1928
2638
3285
136
163
347
458
590
711
713
1677
2638
3285
163
347
512
590
711
713
740
909
2638
3285
79
91
136
458
1058
1389
1562
1612
1677
2210
91
124
136
458
590
1058
1389
1562
1612
1677
5
64
124
148
189
385
675
740
1036
1265
84
124
163
347
590
711
713
1612
2638
3285
5
32
148
189
385
675
740
779
935
2203
124
163
347
512
590
711
713
740
2638
3285
91
124
347
590
711
713
1058
1389
1562
1612
32
124
512
779
935
1058
1220
1265
1562
1612
91
229
428
958
982
1260
1389
1491
1612
1797
163
347
409
590
711
713
1354
1595
2638
3285
192
202
209
512
555
740
909
990
1000
1220
163
347
398
590
711
713
1928
2544
2638
3285
398
1012
1038
1492
1651
1922
1987
2906
3316
3347
163
347
512
590
711
713
909
1797
2638
3285
865
918
1012
1354
1359
1376
1492
1987
2544
3085
62
86
133
209
277
702
1058
1103
1198
1265
124
136
163
347
587
590
711
7

1695
1737
1842
163
347
590
711
713
1651
1987
2398
2638
3285
918
1012
1038
1061
1492
1595
1785
1987
2487
2544
91
124
136
458
662
1058
1389
1562
1612
1677
62
86
124
133
277
702
740
1103
1198
1265
347
512
590
711
713
740
909
1797
2638
3285
91
163
347
590
711
1389
1562
1612
2638
3285
62
86
124
133
277
702
1103
1198
1265
1562
91
170
229
982
1359
1389
1612
1922
3018
3085
170
252
412
865
1376
1651
2398
2850
2906
3301
75
398
409
412
865
918
1922
1987
3301
3316
209
512
555
740
909
990
1058
1928
2806
2930
62
124
133
189
277
702
740
1103
1198
1265
124
163
347
590
711
713
909
1797
2638
3285
124
136
163
347
590
711
713
1058
2638
3285
86
124
133
189
192
512
740
909
1198
1265
60
558
628
655
741
1341
1695
1941
2203
2277
91
398
1389
1492
1562
1612
1928
2544
2906
3347
136
163
229
347
590
711
713
982
1677
3285
84
347
381
590
711
713
1767
2253
2622
3285
91
124
512
740
909
1058
1389
1562
1612
1797
124
163
347
590
711
713
740
1058
2638
3285
163
347
590
711
713
792
1506
1865
2638
3285
163
347
590
711
713
105

32
189
207
558
608
740
779
935
1053
1341
64
86
124
189
740
1000
1058
1198
1265
1562
170
252
409
412
674
1061
1376
1595
1625
1922
91
124
136
458
711
1058
1389
1562
1612
1677
86
124
133
555
702
740
1058
1103
1198
1265
5
148
189
192
385
675
740
779
935
1317
64
91
112
124
740
1036
1058
1389
1562
1612
124
163
347
590
711
713
740
909
2638
3285
5
126
148
189
192
385
675
740
1067
1562
91
124
136
229
982
1058
1389
1562
1612
1677
145
163
347
457
590
711
713
2253
2638
3285
62
124
277
458
512
702
1058
1265
1562
1677
91
136
229
458
909
982
1058
1389
1612
1677
192
202
209
555
603
740
909
990
1000
1006
91
115
124
416
420
1389
1612
1847
2336
3367
91
124
163
347
458
1036
1389
1612
2638
3285
124
163
347
458
590
711
713
1058
1612
2638
192
202
209
555
651
740
909
990
1000
1265
5
148
189
207
385
608
675
740
1695
1797
145
163
347
457
590
711
713
2253
2638
3285
412
865
918
1012
1492
1987
2398
3018
3301
3316
91
124
136
458
1036
1058
1265
1389
1562
1612
145
163
347
457
590
711
713
1797
2638
3285
91
163
347
590

909
1058
1928
2638
3285
91
136
512
909
1058
1389
1562
1612
1677
1797
145
163
347
457
590
711
713
2253
2638
3285
163
347
474
590
711
713
829
1928
2638
3285
75
674
1038
1061
1492
1595
1785
1987
2487
2544
252
918
1038
1354
1359
1785
2398
2906
3018
3301
207
252
608
1061
1651
1695
1842
2487
3018
3316
124
163
347
590
711
713
1058
1612
2638
3285
252
412
1256
1359
1492
1625
1785
2487
3018
3316
145
163
347
457
590
711
713
2253
2638
3285
124
189
347
512
711
713
740
909
2638
3285
163
347
474
590
711
713
1928
2638
3285
3316
170
412
865
1354
1376
1625
1922
2544
2850
3085
674
740
1012
1038
1061
1651
1785
2299
2398
3347
91
136
347
590
711
1058
1389
1612
1677
3285
163
347
409
590
711
713
1785
2638
3018
3285
124
163
189
347
590
713
740
1058
2638
3285
91
124
136
458
1058
1265
1389
1562
1612
1677
91
1012
1354
1376
1389
1612
1651
1797
3085
3316
163
347
474
590
711
713
1651
2638
3285
3347
44
60
189
558
628
655
740
767
2203
2277
163
347
381
590
711
713
782
1612
2638
3285
91
136
458
1058
1389
1562
1612
1672


1317
3285
409
412
865
918
1359
1376
1595
1625
1987
2850
512
590
711
713
909
1389
1491
1612
1797
1928
91
163
590
713
1389
1562
1612
1797
2638
3285
148
163
347
590
711
713
1036
1612
2638
3285
62
124
277
512
740
1000
1058
1103
1220
1265
86
124
133
189
512
702
740
1058
1103
1265
124
458
512
740
909
1058
1389
1562
1612
1928
163
347
590
711
713
1376
1612
1928
2638
3285
192
209
603
740
885
909
990
1006
2806
2930
163
347
457
474
590
711
713
1928
2638
3285
207
608
782
1012
1036
1695
1842
1922
2299
3018
163
347
474
590
711
713
829
2638
3116
3285
62
86
124
133
277
702
1058
1103
1198
1265
62
86
124
133
277
702
740
1103
1198
1265
62
124
133
277
702
1058
1103
1198
1220
1265
145
163
347
457
590
711
713
2253
2638
3285
163
229
347
590
711
713
982
1612
2638
3285
163
189
347
512
590
711
713
740
2638
3285
148
189
192
385
512
675
740
909
1053
1797
75
163
347
590
674
711
713
1922
2638
3285
163
347
457
590
711
713
740
909
2638
3285
75
252
1061
1354
1492
1625
1987
2299
2850
3018
75
170
918
1061
1256
1595
1651

512
740
1058
1220
1265
1562
170
674
1038
1354
1359
1625
1651
2299
2906
3347
163
347
590
711
713
909
1797
1928
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
136
229
458
662
921
982
1058
1562
1612
1677
62
86
124
512
702
740
1058
1103
1198
1265
252
865
1061
1354
1359
1492
1922
2299
3018
3316
145
163
347
457
590
711
713
1928
2638
3285
75
398
409
412
918
1595
1922
2850
3085
3347
163
347
512
590
711
713
740
1075
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
91
136
458
1058
1265
1389
1562
1612
1677
3687
62
86
133
277
512
702
1103
1198
1220
1265
136
163
347
590
711
713
1612
1677
2638
3285
91
163
347
457
590
711
713
1612
2638
3285
163
347
590
711
713
2398
2638
2906
3085
3285
124
189
192
512
702
740
909
1000
1220
1265
163
347
457
590
711
713
1928
2253
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
145
163
347
590
711
713
1928
2253
2638
3285
64
124
458
714
1058
1103
1220
1265
1562
1612
91
136
163
713
1058
1389
1612
1677
1928
3285
5
91

2638
3285
163
347
412
590
711
713
918
2638
2850
3285
163
189
347
512
558
590
711
713
740
909
145
163
347
512
590
711
713
1797
2638
3285
124
163
347
590
711
713
1058
1612
2638
3285
91
409
674
1012
1256
1359
1389
1612
2487
3316
163
347
590
711
713
740
1612
1797
2638
3285
91
124
136
229
458
662
982
1058
1389
1612
62
86
133
189
740
1000
1094
1198
1220
1265
5
148
192
385
675
740
1038
1354
2544
3018
91
136
229
347
590
982
1389
1612
1677
3285
163
347
474
590
711
713
1625
2638
3085
3285
398
409
412
865
1354
1359
1376
2299
2398
2906
124
133
189
512
702
740
1058
1198
1265
1562
145
163
347
457
590
711
713
2253
2638
3285
124
189
512
575
647
740
909
1251
1265
1562
91
228
229
428
458
958
982
1389
1562
1612
86
124
133
189
512
558
740
1053
1075
1265
163
347
590
711
713
918
1061
1612
2638
3285
145
163
347
590
711
713
909
2253
2638
3285
32
33
207
596
608
797
1737
2286
2614
3078
416
865
1061
1256
1612
1651
1785
2299
2487
2906
124
163
347
587
590
711
713
1677
2638
3285
92
99
122
124
406
458
546
753
1698
1

163
347
474
590
711
713
1612
1928
2638
3285
124
163
347
590
711
713
1562
1612
2638
3285
5
148
189
385
512
675
740
909
1797
2638
163
347
590
711
713
909
1612
1797
2638
3285
124
163
347
457
590
711
713
2253
2638
3285
398
1061
1354
1359
1492
1625
1651
2299
2398
3018
409
412
674
1354
1359
1376
1612
1785
1922
2398
91
163
347
590
711
713
1389
1612
2638
3285
192
202
209
555
603
740
909
990
1000
2712
75
207
252
608
674
918
1376
1651
2299
2544
412
918
1012
1492
1595
1612
1922
1928
3301
3347
145
163
347
457
590
711
713
2253
2638
3285
64
86
124
189
719
740
767
775
1036
1265
124
163
347
590
711
713
1058
1612
2638
3285
62
86
124
512
702
1058
1103
1198
1220
1265
91
163
347
590
711
713
1389
1612
2638
3285
5
189
385
512
558
675
740
1053
1075
1797
48
124
458
473
753
1228
1562
1612
1698
1699
32
33
207
388
596
608
1341
1737
2286
2350
91
136
347
590
711
1389
1612
1677
2638
3285
62
86
124
133
277
702
1103
1198
1220
1265
163
189
347
512
590
711
713
740
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
16

990
1000
1006
2556
62
86
124
133
277
702
1103
1198
1265
1562
75
409
412
865
1038
1061
1359
1625
2299
3347
124
189
558
740
935
1000
1094
1220
1265
2203
62
64
86
124
133
277
702
1058
1198
1265
32
33
207
596
608
1341
1695
1737
1842
2286
121
305
474
662
921
1260
1368
1491
1612
1928
163
347
590
711
713
909
1612
1797
2638
3285
163
347
512
590
711
713
909
2253
2638
3285
62
86
133
277
512
702
1058
1103
1198
1265
124
163
347
590
711
713
1562
1612
2638
3285
76
163
347
590
711
713
792
1865
2638
3285
91
124
136
587
1036
1058
1389
1562
1612
1677
163
207
347
590
711
713
909
1797
2638
3285
62
124
133
189
277
512
702
740
1220
1265
202
209
532
555
651
909
990
991
2556
3030
62
86
133
277
512
702
740
1103
1198
1265
91
163
347
590
711
713
1389
1612
2638
3285
91
118
229
469
720
982
1036
1061
1389
1612
163
347
512
590
711
713
740
909
2638
3285
398
409
412
865
1012
1038
1061
1492
1922
2544
163
347
590
711
713
909
1389
1612
2638
3285
163
347
512
711
713
740
909
1797
2638
3285
145
163
347
457
590
711
713
2253


163
458
1058
1389
1562
1612
1928
2638
674
865
1061
1359
2487
2544
2850
2906
3018
3316
73
91
136
662
921
1058
1389
1612
1677
1723
62
64
124
133
702
740
1094
1103
1198
1265
91
163
347
590
711
713
1389
1612
2638
3285
207
512
558
608
740
988
1695
1797
1842
2622
91
163
347
590
711
713
1389
1612
2638
3285
5
148
207
385
558
608
675
740
1695
1797
91
163
347
590
711
713
1389
1612
2638
3285
124
163
347
512
711
713
740
1058
1562
1612
62
86
124
133
277
702
740
1103
1198
1265
32
207
512
558
608
740
779
935
1341
1842
136
163
347
590
711
713
1612
1677
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
909
1797
2253
2638
3285
5
148
385
675
740
779
909
935
1797
2638
163
347
474
590
711
713
829
2638
3116
3285
163
347
412
590
711
713
740
1354
2638
3285
136
229
347
590
711
713
982
1677
2638
3285
412
1038
1354
1359
1785
1928
2487
2544
2906
3085
163
347
457
590
711
713
740
2253
2638
3285
62
86
124
512
740
909
1058
1220
1265
1562
62
86
124
133
512
702
1058
1103
1220
1265
124
163
347
590

1103
1198
1265
91
124
136
458
1058
1265
1389
1562
1612
1677
145
163
347
590
711
713
1928
2253
2638
3285
124
189
512
740
909
1058
1265
1562
1612
1797
5
148
385
675
1354
1359
1797
3018
3085
3316
62
86
124
133
702
740
1058
1103
1198
1265
163
192
347
590
711
713
740
909
2638
3285
91
163
347
590
711
713
1389
1612
2638
3285
48
92
99
124
458
473
546
1109
1472
1698
75
252
412
1359
1625
1785
1922
1987
2544
2850
163
347
512
590
711
713
909
1797
2638
3285
91
229
409
412
982
1389
1612
1651
1785
1928
5
64
148
189
385
675
740
808
1695
1797
163
347
590
711
713
1625
2638
2906
3018
3285
124
145
163
347
457
590
711
713
2638
3285
91
124
136
458
1058
1265
1389
1562
1612
1677
84
145
163
347
457
590
711
713
2638
3285
674
918
1012
1256
1354
1359
1595
1651
1922
2850
163
512
590
711
713
740
909
1797
2638
3285
136
163
347
590
711
713
1612
1677
2638
3285
145
163
347
590
711
713
1612
2253
2638
3285
32
207
558
608
779
935
1341
1695
1842
2286
91
512
590
740
909
1058
1389
1612
1797
1928
163
347
590
711
713
909
1612


347
457
590
711
713
1612
2253
2638
3285
192
209
512
555
603
740
909
990
1797
2806
62
86
124
133
277
702
1103
1198
1220
1265
558
628
655
741
1341
1695
1941
2203
2277
3055
163
347
590
711
713
740
909
1797
2638
3285
91
118
124
153
458
469
720
1036
1389
1612
5
32
148
189
385
558
675
740
779
935
163
347
512
590
740
1058
1389
1612
1797
1928
163
347
512
590
711
713
909
1928
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
5
148
189
385
512
558
675
740
1053
1075
163
347
512
590
711
713
740
1797
2638
3285
124
347
590
711
713
1058
1562
1612
2638
3285
62
86
133
277
702
740
1000
1103
1198
1265
918
1012
1038
1359
1376
1595
2299
2544
2850
3301
91
136
458
1058
1389
1562
1612
1677
2780
3692
91
163
512
711
713
1058
1389
1612
1797
1928
91
124
163
347
713
1036
1389
1562
1612
1797
163
347
590
711
713
1058
1562
1612
2638
3285
62
133
277
1058
1103
1198
1265
1389
1562
1612
192
202
209
512
555
740
909
990
1000
1265
163
347
512
590
711
713
909
1797
2638
3285
412
1038
1354
1359
1376
1595
1651
2299
2487
3018

711
713
1389
1612
1928
2638
3285
62
86
189
512
702
740
1058
1103
1198
1265
62
86
136
512
909
1058
1265
1290
1677
2175
5
148
163
189
385
512
675
740
909
1797
32
33
207
381
596
608
1341
1767
1842
2286
91
124
136
229
458
982
1058
1389
1612
1677
163
347
457
590
711
713
909
2253
2638
3285
76
163
347
590
711
713
1506
1865
2638
3285
163
347
512
590
711
713
909
1797
2638
3285
62
124
133
189
740
1000
1094
1103
1220
1265
163
347
512
590
711
713
740
909
1797
2638
91
136
163
229
347
982
1389
1612
1677
2638
170
252
412
1492
1785
2544
2850
2906
3018
3301
75
381
474
918
1038
1061
1651
1767
1785
2622
145
163
347
457
590
711
713
2253
2638
3285
170
674
1012
1256
1492
1785
1987
2398
2850
2906
91
229
982
1061
1256
1354
1389
1612
1922
2544
64
91
124
720
1003
1036
1058
1389
1562
1612
5
148
189
385
512
675
740
909
1797
2638
163
347
590
711
713
1922
2299
2638
3285
3347
91
124
136
1036
1058
1389
1562
1612
1677
1797
145
163
347
457
590
711
713
2253
2638
3285
136
163
347
474
590
711
713
1677
2638
3285
62
86
124


64
124
458
714
1058
1220
1265
1389
1562
1612
409
1061
1256
1359
1651
2299
2398
2544
2850
3085
163
347
398
590
711
713
2544
2638
3018
3285
76
170
792
1061
1256
1506
1651
1785
1865
3085
124
133
189
558
702
740
1000
1094
1198
1265
145
163
347
457
590
711
713
2253
2638
3285
136
163
347
590
711
713
1612
1677
2638
3285
62
64
86
124
133
277
702
1103
1198
1265
32
207
558
608
779
935
1220
1341
1695
2203
44
60
189
558
740
767
1000
1094
1220
1265
64
86
133
189
277
512
702
740
1198
1265
75
398
409
412
1012
1651
1987
2487
2906
3301
124
163
347
590
711
713
1058
1612
2638
3285
5
148
189
192
385
558
675
740
779
935
75
163
347
590
711
713
1376
1492
2638
3285
163
347
474
590
711
713
1247
1928
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
1612
1928
2487
2638
3285
163
347
457
474
590
711
713
1928
2638
3285
75
163
347
590
711
713
1376
2638
3285
3347
145
163
347
457
590
711
713
2253
2638
3285
44
124
189
558
740
779
935
1094
2203
3055
163


409
412
608
1038
1492
1625
1842
1987
124
163
347
590
711
713
1036
1612
2638
3285
32
207
558
608
740
808
1341
1695
1737
1842
145
163
347
512
590
711
713
740
2638
3285
62
124
189
512
740
1058
1103
1198
1220
1265
75
163
347
590
711
713
2638
2906
3285
3301
163
347
474
590
711
713
1376
2638
3085
3285
163
189
347
512
711
713
740
909
1797
2638
32
189
512
740
779
909
935
1053
1075
1797
124
136
458
587
662
1058
1265
1562
1612
1677
121
136
229
590
711
713
982
1677
2638
3285
163
347
590
711
713
918
1595
2299
2638
3285
163
347
590
674
711
713
1061
1492
2638
3285
32
33
207
582
596
608
1220
1341
1842
2286
75
207
412
608
1376
1625
1922
1987
2850
3085
163
347
590
711
713
740
1612
1797
2638
3285
75
170
252
1038
1256
1595
1987
2398
2487
3085
398
409
412
674
1012
1038
2398
2850
3085
3301
145
163
347
590
711
713
740
2253
2638
3285
32
33
207
596
608
1341
1695
1737
1842
2286
124
189
512
740
909
1058
1075
1220
1265
1797
6
124
587
854
1036
1081
1082
1265
1562
3186
62
86
133
277
512
702
740
1103
1198
1265
145


558
740
1053
1075
1265
26
322
662
958
1058
1389
1612
1720
1723
2139
145
163
347
457
590
711
713
1612
2638
3285
86
133
209
512
555
740
990
1058
1198
1265
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
711
713
909
1612
1928
2638
3285
44
64
124
189
512
558
740
767
1075
1265
124
145
163
347
457
590
711
713
2638
3285
163
347
512
590
711
713
740
909
2638
3285
252
1012
1038
1492
1797
2398
2850
2906
3018
3301
252
412
674
865
918
1492
1785
2850
3316
3347
91
163
347
590
711
713
1389
1612
2638
3285
5
32
148
558
675
740
779
935
1341
1695
163
347
590
711
713
918
1376
2638
3018
3285
163
347
590
711
713
1922
1987
2638
3285
3301
145
163
347
457
590
711
713
2253
2638
3285
62
86
124
133
277
702
740
1103
1198
1265
75
409
674
1038
1061
1785
2487
2544
2906
3085
124
145
163
347
590
711
713
2253
2638
3285
124
163
347
590
711
713
1562
1612
2638
3285
75
91
207
608
782
1036
1389
1612
1651
3085
91
124
136
458
1036
1058
1389
1562
1612
1677
91
124
136
458
1058
1220
1265
1562
1612
1677
76
124
458
474
662
79

474
590
711
713
829
2638
3116
3285
62
86
124
209
555
740
990
1000
1058
1265
91
163
229
347
590
711
982
1389
1612
3285
5
148
189
385
512
675
740
1075
1317
1797
75
398
412
865
918
2398
2850
3301
3316
3347
84
163
347
590
711
713
1562
1928
2638
3285
124
163
347
590
711
713
1625
2398
2638
3285
145
163
347
457
590
711
713
1797
2638
3285
124
163
347
590
711
713
740
1612
2638
3285
62
124
133
512
702
740
1058
1103
1198
1265
398
409
412
674
1256
1625
1785
1922
2850
3316
145
163
347
457
590
711
713
2253
2638
3285
5
148
163
347
590
675
711
713
2638
3285
32
33
40
207
596
608
1341
1695
1842
2286
124
189
558
740
779
935
1000
1094
1220
1265
124
163
347
590
711
713
720
1036
2638
3285
163
347
512
590
711
713
909
1797
2638
3285
409
412
1256
1595
1625
1922
2398
2487
2906
3085
5
148
163
189
347
385
590
675
740
1797
412
674
1256
1492
1922
1987
2544
3018
3301
3316
163
347
398
590
711
713
1256
1612
2638
3285
163
347
457
590
711
713
1928
2638
2712
3285
163
347
590
674
711
713
918
1612
2638
3285
91
163
347
590


3285
207
608
1012
1038
1595
1625
1842
1922
2544
3347
163
347
512
711
713
740
909
1797
2638
3285
91
163
347
458
590
711
713
1612
2638
3285
163
347
398
474
590
711
713
1612
2638
3285
145
163
347
457
590
711
1928
2253
2638
3285
163
347
590
711
713
909
1612
2253
2638
3285
70
123
196
197
199
416
623
1839
3158
3704
91
124
590
713
1058
1389
1562
1612
2638
3285
91
136
458
714
1058
1220
1265
1389
1612
1677
124
163
347
590
711
713
1389
1612
2638
3285
136
163
229
347
587
590
982
1677
2638
3285
5
64
124
148
189
385
675
740
1265
1562
62
86
133
277
702
1058
1103
1198
1265
1612
64
124
189
512
702
740
1058
1075
1220
1265
145
163
347
457
590
711
713
2253
2638
3285
91
163
347
458
711
713
1389
1612
2638
3285
62
64
86
124
702
740
775
1094
1103
1265
5
64
124
148
189
385
675
740
1036
1058
163
347
412
590
711
713
1256
2398
2638
3285
207
608
865
1012
1359
2487
2850
3085
3301
3347
91
124
347
512
740
1058
1389
1562
1612
3285
91
163
347
590
711
713
1389
1612
2638
3285
170
398
674
865
1359
1376
2299
2487
3316
334

91
136
229
662
921
982
1058
1389
1612
1677
136
163
347
590
711
713
1612
1677
2638
3285
163
347
398
409
590
711
713
1038
2638
3285
5
32
148
207
385
608
675
740
1053
1797
75
1038
1061
1651
1785
2544
2906
3085
3316
3347
86
202
209
555
740
909
990
1000
1058
1265
124
163
347
590
711
713
1036
2253
2638
3285
91
124
347
590
1036
1058
1389
1562
1612
3285
170
865
918
1595
1625
1987
2398
2850
3018
3085
91
124
163
347
590
711
713
1036
1389
1612
91
136
163
711
713
1058
1389
1562
1612
1677
5
148
189
385
575
647
675
740
1251
1695
91
163
347
590
711
713
1389
1612
2638
3285
124
458
512
713
740
1058
1075
1220
1265
2638
62
86
124
133
277
702
1103
1198
1265
1562
48
124
458
461
546
662
921
1239
1612
1699
163
347
474
590
711
713
1247
2638
3116
3285
163
347
512
590
711
713
909
1797
2638
3285
5
91
126
148
385
675
1389
1562
1612
1797
5
189
192
575
647
740
779
909
935
1251
163
347
590
711
713
1928
2544
2638
3085
3285
512
711
713
740
909
1058
1612
1797
2638
3285
62
124
133
189
277
702
740
1103
1198
1265
91
163
3

713
1612
2638
3285
124
163
347
458
587
590
711
1036
1562
3285
170
207
398
412
608
1256
1695
2299
2487
3018
91
343
512
909
1260
1389
1491
1612
1797
1928
62
64
124
133
189
512
740
1058
1265
1562
163
347
474
590
711
713
1612
1928
2638
3285
91
124
163
458
713
1036
1389
1612
2638
3285
62
86
124
133
277
702
1058
1103
1198
1265
91
252
398
412
1389
1612
1928
2398
2850
3301
79
91
136
458
1058
1389
1562
1612
1677
1928
75
163
347
590
711
713
1625
1922
2638
3285
62
86
91
124
512
1058
1265
1389
1562
1612
145
163
347
590
711
713
988
1058
2638
3285
91
229
958
982
1036
1058
1389
1562
1612
2638
163
347
590
711
713
909
1612
1797
2638
3285
62
86
124
133
277
702
1058
1103
1198
1265
91
124
347
590
711
1058
1389
1612
2638
3285
124
133
136
1058
1103
1220
1265
1562
1612
1677
44
60
64
124
189
740
767
775
1265
1317
5
148
189
192
385
558
675
740
1341
1695
91
163
347
711
713
1389
1612
1928
2638
3285
62
86
124
133
277
702
1103
1198
1265
1562
145
163
347
457
590
711
713
2253
2638
3285
163
347
590
674
711
713
1354
1

In [20]:
allUsersRecommendations_df

Unnamed: 0,userId,movie,explanations
0,0,"Wizard of Oz, The (1939)","{8, 26, 44, 45}"
1,0,Pocahontas (1995),"{32, 33, 34, 35, 37, 10, 46}"
2,0,Hercules (1997),"{33, 34, 35, 37, 10, 16, 25}"
3,0,Aladdin (1992),"{4, 37, 40, 8, 10, 45}"
4,0,"Little Mermaid, The (1989)","{33, 37, 10, 45, 46}"
...,...,...,...
60395,6039,"Purple Rose of Cairo, The (1985)","{392, 170, 1962, 1292, 1651, 183, 797}"
60396,6039,Brassed Off (1996),"{176, 382}"
60397,6039,Twelfth Night (1996),"{417, 382, 221, 361}"
60398,6039,Best Men (1997),{}


# Model Fidelity Calculation

In [21]:
expl = allUsersRecommendations_df[[len(x) > 0 for x in allUsersRecommendations_df.explanations]]
fidelity = expl.groupby('userId')['movie'].count() / 10
modelFidelity = sum(fidelity)/6040

In [22]:
modelFidelity

0.5736754966887422

# Single user testing

# Testing getting 2 users very similar and testing stability of explanations

In [23]:
groupedUserRatings = ratings_df.groupby('userId')
testUser = groupedUserRatings.get_group(6)
testUser.shape

(31, 5)

In [24]:
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
523,7,648,4,6,58
524,7,861,4,6,430
525,7,2916,5,6,62
526,7,3578,3,6,68
527,7,3793,3,6,237
528,7,1610,5,6,78
529,7,589,5,6,97
530,7,6,4,6,255
531,7,442,4,6,107
532,7,733,5,6,195


In [25]:
testUserTitles = movies_df[movies_df['itemId'].isin(testUser['itemId'].tolist())]
testUserTitles

Unnamed: 0,origin_iid,itemId,title,genre,year
22,1270,22,Back to the Future (1985),"[Comedy, Sci-Fi]",1985
48,2028,48,Saving Private Ryan (1998),"[Action, Drama, War]",1998
58,648,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
68,3578,68,Gladiator (2000),"[Action, Drama]",2000
74,3107,74,Backdraft (1991),"[Action, Drama]",1991
78,1610,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
90,3256,90,Patriot Games (1992),"[Action, Thriller]",1992
92,110,92,Braveheart (1995),"[Action, Drama, War]",1995
97,589,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991


In [26]:
testUser = pd.merge(testUser, testUserTitles)
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title,genre,year
0,7,648,4,6,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
1,7,861,4,6,430,Supercop (1992),"[Action, Thriller]",1992
2,7,2916,5,6,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
3,7,3578,3,6,68,Gladiator (2000),"[Action, Drama]",2000
4,7,3793,3,6,237,X-Men (2000),"[Action, Sci-Fi]",2000
5,7,1610,5,6,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
6,7,589,5,6,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991
7,7,6,4,6,255,Heat (1995),"[Action, Crime, Thriller]",1995
8,7,442,4,6,107,Demolition Man (1993),"[Action, Sci-Fi]",1993
9,7,733,5,6,195,"Rock, The (1996)","[Action, Adventure, Thriller]",1996


In [27]:
testUser = testUser.drop('genre', 1).drop('year', 1)
testUser

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,648,4,6,58,Mission: Impossible (1996)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [28]:
testHotEnc = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(testUser['itemId'].tolist())]
testHotEnc = testHotEnc.reset_index(drop=True)
testUserGenre = testHotEnc.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
#print(testUserGenre.transpose().shape)
#print(testUser['rating'].shape)
testUserProfile = testUserGenre.transpose().dot(testUser['rating'])
testUserAllMoviesGenreTable = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
testUserAllMoviesGenreTable = testUserAllMoviesGenreTable.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
testUserRecommendations_df = ((testUserAllMoviesGenreTable*testUserProfile).sum(axis=1))/(testUserProfile.sum())
testUserRecommendations_df = testUserRecommendations_df.sort_values(ascending=False)
testUserRecommendations = movies_df.loc[movies_df['itemId'].isin(testUserRecommendations_df.head(10).keys())]
testUserRecommendations

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [29]:
testUser2 = groupedUserRatings.get_group(6)
testUser2.shape

(31, 5)

In [30]:
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId
523,7,648,4,6,58
524,7,861,4,6,430
525,7,2916,5,6,62
526,7,3578,3,6,68
527,7,3793,3,6,237
528,7,1610,5,6,78
529,7,589,5,6,97
530,7,6,4,6,255
531,7,442,4,6,107
532,7,733,5,6,195


In [31]:
testUserTitles2 = movies_df[movies_df['itemId'].isin(testUser2['itemId'].tolist())]
testUserTitles2

Unnamed: 0,origin_iid,itemId,title,genre,year
22,1270,22,Back to the Future (1985),"[Comedy, Sci-Fi]",1985
48,2028,48,Saving Private Ryan (1998),"[Action, Drama, War]",1998
58,648,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
68,3578,68,Gladiator (2000),"[Action, Drama]",2000
74,3107,74,Backdraft (1991),"[Action, Drama]",1991
78,1610,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
90,3256,90,Patriot Games (1992),"[Action, Thriller]",1992
92,110,92,Braveheart (1995),"[Action, Drama, War]",1995
97,589,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991


In [32]:
testUser2 = pd.merge(testUser2, testUserTitles2)
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title,genre,year
0,7,648,4,6,58,Mission: Impossible (1996),"[Action, Adventure, Mystery]",1996
1,7,861,4,6,430,Supercop (1992),"[Action, Thriller]",1992
2,7,2916,5,6,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
3,7,3578,3,6,68,Gladiator (2000),"[Action, Drama]",2000
4,7,3793,3,6,237,X-Men (2000),"[Action, Sci-Fi]",2000
5,7,1610,5,6,78,"Hunt for Red October, The (1990)","[Action, Thriller]",1990
6,7,589,5,6,97,Terminator 2: Judgment Day (1991),"[Action, Sci-Fi, Thriller]",1991
7,7,6,4,6,255,Heat (1995),"[Action, Crime, Thriller]",1995
8,7,442,4,6,107,Demolition Man (1993),"[Action, Sci-Fi]",1993
9,7,733,5,6,195,"Rock, The (1996)","[Action, Adventure, Thriller]",1996


In [33]:
testUser2 = testUser2.drop('genre', 1).drop('year', 1)
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,648,4,6,58,Mission: Impossible (1996)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [34]:
testUser2.at[0,'origin_iid']=3408
testUser2.at[0,'itemId']=3
testUser2.at[0,'rating']=3
testUser2.at[0,'title']='Erin Brockovich (2000)'

In [35]:
testUser2

Unnamed: 0,origin_uid,origin_iid,rating,userId,itemId,title
0,7,3408,3,6,3,Erin Brockovich (2000)
1,7,861,4,6,430,Supercop (1992)
2,7,2916,5,6,62,Total Recall (1990)
3,7,3578,3,6,68,Gladiator (2000)
4,7,3793,3,6,237,X-Men (2000)
5,7,1610,5,6,78,"Hunt for Red October, The (1990)"
6,7,589,5,6,97,Terminator 2: Judgment Day (1991)
7,7,6,4,6,255,Heat (1995)
8,7,442,4,6,107,Demolition Man (1993)
9,7,733,5,6,195,"Rock, The (1996)"


In [36]:
testHotEnc2 = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(testUser2['itemId'].tolist())]
testHotEnc2 = testHotEnc2.reset_index(drop=True)
testUserGenre2 = testHotEnc2.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
#print(testUserGenre.transpose().shape)
#print(testUser['rating'].shape)
testUserProfile2 = testUserGenre2.transpose().dot(testUser2['rating'])
testUserAllMoviesGenreTable2 = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])
testUserAllMoviesGenreTable2 = testUserAllMoviesGenreTable2.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
testUserRecommendations_df2 = ((testUserAllMoviesGenreTable2*testUserProfile2).sum(axis=1))/(testUserProfile2.sum())
testUserRecommendations_df2 = testUserRecommendations_df2.sort_values(ascending=False)
testUserRecommendations2 = movies_df.loc[movies_df['itemId'].isin(testUserRecommendations_df2.head(10).keys())]
testUserRecommendations2

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [37]:
testUserRecommendations

Unnamed: 0,origin_iid,itemId,title,genre,year
62,2916,62,Total Recall (1990),"[Action, Adventure, Sci-Fi, Thriller]",1990
86,1544,86,"Lost World: Jurassic Park, The (1997)","[Action, Adventure, Sci-Fi, Thriller]",1997
133,1917,133,Armageddon (1998),"[Action, Adventure, Sci-Fi, Thriller]",1998
277,1127,277,"Abyss, The (1989)","[Action, Adventure, Sci-Fi, Thriller]",1989
702,1129,702,Escape from New York (1981),"[Action, Adventure, Sci-Fi, Thriller]",1981
1058,1876,1058,Deep Impact (1998),"[Action, Drama, Sci-Fi, Thriller]",1998
1103,1591,1103,Spawn (1997),"[Action, Adventure, Sci-Fi, Thriller]",1997
1198,849,1198,Escape from L.A. (1996),"[Action, Adventure, Sci-Fi, Thriller]",1996
1265,2322,1265,Soldier (1998),"[Action, Adventure, Sci-Fi, Thriller, War]",1998
1562,2344,1562,Runaway Train (1985),"[Action, Adventure, Drama, Thriller]",1985


In [38]:
#testing explanations for both test users
#test user 1 
testUser1Ratings = testUser['itemId']
testUser1RatingsArray = np.asarray(testUser1Ratings)
testUser1RatingsArray

array([ 58, 430,  62,  68, 237,  78,  97, 255, 107, 195, 264, 120, 124,
       132, 148, 431,  22, 151, 432, 406,  74, 433, 319,  90, 189,  92,
       434,  48, 113, 150, 156])

In [63]:
sim_items = knn_items_dict[1562]
explanation =  set(sim_items) & set(testUser1RatingsArray)
explanation

set()

In [64]:
#testing explanations for both test users
#test user 2 
testUser2Ratings = testUser2['itemId']
testUser2RatingsArray = np.asarray(testUser2Ratings)
testUser2RatingsArray

array([  3, 430,  62,  68, 237,  78,  97, 255, 107, 195, 264, 120, 124,
       132, 148, 431,  22, 151, 432, 406,  74, 433, 319,  90, 189,  92,
       434,  48, 113, 150, 156])

In [65]:
sim_items = knn_items_dict[1562]
explanation =  set(sim_items) & set(testUser2RatingsArray)
explanation

set()

# End of stability testing

In [528]:
#userInput = [
#            {'title':'James and the Giant Peach (1996)', 'rating':5},
#            {'title':'My Fair Lady (1964)', 'rating':3.5},
#            {'title':'Erin Brockovich (2000)', 'rating':5},
#            {'title':"Modulations (1998)", 'rating':5},
#            {'title':'White Boys (1999)', 'rating':4.5}
#         ] 
inputMovies = pd.DataFrame(userInput)
inputMovies

Unnamed: 0,title,rating
0,James and the Giant Peach (1996),5.0
1,My Fair Lady (1964),3.5
2,Erin Brockovich (2000),5.0
3,Modulations (1998),5.0
4,White Boys (1999),4.5


### Add movieId to input user
With the input complete, let's extract the input movie's ID's from the movies dataframe and add them into it.

In [529]:
#Filtering out the movies by title
inputId = movies_df[movies_df['title'].isin(inputMovies['title'].tolist())]

In [530]:
inputId

Unnamed: 0,origin_iid,itemId,title,genre,year
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964
3,3408,3,Erin Brockovich (2000),[Drama],2000
3701,2198,3701,Modulations (1998),[Documentary],1998
3703,2845,3703,White Boys (1999),[Drama],1999


In [531]:
#Then merging it so we can get the movieId. It's implicitly merging it by title.
inputMovies = pd.merge(inputId, inputMovies)

In [532]:
#Dropping information we won't use from the input dataframe
inputMovies = inputMovies.drop('genre', 1).drop('year', 1)

In [533]:
inputMovies

Unnamed: 0,origin_iid,itemId,title,rating
0,661,1,James and the Giant Peach (1996),5.0
1,914,2,My Fair Lady (1964),3.5
2,3408,3,Erin Brockovich (2000),5.0
3,2198,3701,Modulations (1998),5.0
4,2845,3703,White Boys (1999),4.5


### Getting genre table for user movies

We're going to start by learning the input's preferences, so let's get the subset of movies that the input has watched from the Dataframe containing genres defined with binary values.

In [534]:
#Filtering out the movies from the input
userMovies = moviesWithGenres_df[moviesWithGenres_df['itemId'].isin(inputMovies['itemId'].tolist())]
userMovies

Unnamed: 0,origin_iid,itemId,title,genre,year,Drama,Animation,Children's,Musical,Romance,...,Fantasy,Sci-Fi,War,Thriller,Crime,Mystery,Western,Horror,Film-Noir,Documentary
1,661,1,James and the Giant Peach (1996),"[Animation, Children's, Musical]",1996,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,914,2,My Fair Lady (1964),"[Musical, Romance]",1964,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3408,3,Erin Brockovich (2000),[Drama],2000,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3701,2198,3701,Modulations (1998),[Documentary],1998,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3703,2845,3703,White Boys (1999),[Drama],1999,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We'll only need the actual genre table, so let's clean this up a bit by resetting the index and dropping the movieId, title, genres and year columns.

In [535]:
#Resetting the index to avoid future issues
userMovies = userMovies.reset_index(drop=True)

#Dropping unnecessary issues due to save memory and to avoid issues
userGenreTable = userMovies.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
userGenreTable

Unnamed: 0,Drama,Animation,Children's,Musical,Romance,Comedy,Action,Adventure,Fantasy,Sci-Fi,War,Thriller,Crime,Mystery,Western,Horror,Film-Noir,Documentary
0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Start learning the input's preferences

To do this, we're going to turn each genre into weights. We can do this by using the input's reviews and multiplying them into the input's genre table and then summing up the resulting table by column. This operation is actually a dot product between a matrix and a vector, so we can simply accomplish by calling Pandas's "dot" function.

In [536]:
inputMovies['rating']

0    5.0
1    3.5
2    5.0
3    5.0
4    4.5
Name: rating, dtype: float64

In [537]:
#Dot produt to get weights
userProfile = userGenreTable.transpose().dot(inputMovies['rating'])

#The user profile
userProfile

Drama          9.5
Animation      5.0
Children's     5.0
Musical        8.5
Romance        3.5
Comedy         0.0
Action         0.0
Adventure      0.0
Fantasy        0.0
Sci-Fi         0.0
War            0.0
Thriller       0.0
Crime          0.0
Mystery        0.0
Western        0.0
Horror         0.0
Film-Noir      0.0
Documentary    5.0
dtype: float64

Now, we have the weights for every of the user's preferences. This is known as the User Profile. Using this, we can recommend movies that satisfy the user's preferences.

Let's start by extracting the genre table from the original dataframe:

In [538]:
#Now let's get the genres of every movie in our original dataframe
genreTable = moviesWithGenres_df.set_index(moviesWithGenres_df['itemId'])

#And drop the unnecessary information
genreTable = genreTable.drop('origin_iid', 1).drop('itemId', 1).drop('title', 1).drop('genre', 1).drop('year', 1)
genreTable.head()

Unnamed: 0_level_0,Drama,Animation,Children's,Musical,Romance,Comedy,Action,Adventure,Fantasy,Sci-Fi,War,Thriller,Crime,Mystery,Western,Horror,Film-Noir,Documentary
itemId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [539]:
genreTable.shape

(3706, 18)

With the input's profile and the complete list of movies and their genres in hand, we're going to take the weighted average of every movie based on the input profile and recommend the top twenty movies that most satisfy it.

In [540]:
#Multiply the genres by the weights and then take the weighted average
recommendationTable_df = ((genreTable*userProfile).sum(axis=1))/(userProfile.sum())
recommendationTable_df.head()

itemId
0    0.260274
1    0.506849
2    0.328767
3    0.260274
4    0.273973
dtype: float64

In [541]:
#Sort our recommendations in descending order
recommendationTable_df = recommendationTable_df.sort_values(ascending=False)

#Just a peek at the values
recommendationTable_df.head()

itemId
9      0.630137
608    0.602740
25     0.602740
207    0.602740
582    0.534247
dtype: float64

In [542]:
recommendationTable_df.head(20)

itemId
9       0.630137
608     0.602740
25      0.602740
207     0.602740
582     0.534247
612     0.506849
632     0.506849
2188    0.506849
390     0.506849
2183    0.506849
609     0.506849
2206    0.506849
596     0.506849
2223    0.506849
682     0.506849
388     0.506849
2350    0.506849
389     0.506849
10      0.506849
2286    0.506849
dtype: float64

### Now here's the recommendation table!

In [543]:
#The final recommendation table
movies_df.loc[movies_df['itemId'].isin(recommendationTable_df.head(5).keys())]

Unnamed: 0,origin_iid,itemId,title,genre,year
9,919,9,"Wizard of Oz, The (1939)","[Adventure, Children's, Drama, Musical]",1939
25,48,25,Pocahontas (1995),"[Animation, Children's, Musical, Romance]",1995
207,2081,207,"Little Mermaid, The (1989)","[Animation, Children's, Comedy, Musical, Romance]",1989
582,2138,582,Watership Down (1978),"[Animation, Children's, Drama, Fantasy]",1978
608,2080,608,Lady and the Tramp (1955),"[Animation, Children's, Comedy, Musical, Romance]",1955


In [583]:
inputMovies

Unnamed: 0,origin_iid,itemId,title,rating
0,661,1,James and the Giant Peach (1996),5.0
1,914,2,My Fair Lady (1964),3.5
2,3408,3,Erin Brockovich (2000),5.0
3,2198,3701,Modulations (1998),5.0
4,2845,3703,White Boys (1999),4.5


In [601]:
user_input_movies = np.asarray(inputMovies['origin_iid'])

In [602]:
user_input_movies

array([ 661,  914, 3408, 2198, 2845])

In [618]:
sim_items = knn_items_dict[2080]

In [619]:
sim_items

array([ 593, 1059,  439,  484,   59,  902,  513,   52, 1384,  704])

In [620]:
set(user_input_movies) & set(sim_items)

set()

### Advantages and Disadvantages of Content-Based Filtering

##### Advantages
* Learns user's preferences
* Highly personalized for the user

##### Disadvantages
* Doesn't take into account what others think of the item, so low quality item recommendations might happen
* Extracting data is not always intuitive
* Determining what characteristics of the item the user dislikes or likes is not always obvious