In [2]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix

In [3]:
ratingCols = ['UserID','MovieID','Rating','Timestamp']
ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', engine='python',header=None,names=ratingCols)
ratings.head()

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
userCols = ['UserID','Gender','Age','Occupation','Zip-code']
users = pd.read_csv('ml-1m/users.dat', sep='::', engine='python',header=None,names=userCols)
users.head()

Unnamed: 0,UserID,Gender,Age,Occupation,Zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [5]:
movieCols = ['MovieID','Title','Genres']
movies = pd.read_csv('ml-1m/movies.dat', sep='::', engine='python',header=None,names=movieCols)
movies.head()

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [6]:
#Starting the process to remove the lesser known movies to remove noise
merge_movie_ratings = pd.merge(ratings,movies,on='MovieID')
drop_extra_columns = ['Timestamp','Genres']
merge_movie_ratings = merge_movie_ratings.drop(drop_extra_columns,axis=1)
merge_movie_ratings.head()

Unnamed: 0,UserID,MovieID,Rating,Title
0,1,1193,5,One Flew Over the Cuckoo's Nest (1975)
1,2,1193,5,One Flew Over the Cuckoo's Nest (1975)
2,12,1193,4,One Flew Over the Cuckoo's Nest (1975)
3,15,1193,4,One Flew Over the Cuckoo's Nest (1975)
4,17,1193,5,One Flew Over the Cuckoo's Nest (1975)


In [7]:
merge_movie_ratings = merge_movie_ratings.dropna(axis = 0, subset=['Title'])

In [8]:
movie_ratingCount = (merge_movie_ratings.groupby(by = ['Title'])['Rating'].count().reset_index())
movie_ratingCount.head()

Unnamed: 0,Title,Rating
0,"$1,000,000 Duck (1971)",37
1,'Night Mother (1986),70
2,'Til There Was You (1997),52
3,"'burbs, The (1989)",303
4,...And Justice for All (1979),199


In [9]:
#Now Renaming the columns in movie_ratingCount
#movie_ratingCount = movie_ratingCount.rename(columns={'Title':'Rating'})[['movieTitle','totalRating']]
movie_ratingCount.columns = ['Title','totalRating']
movie_ratingCount.head()

Unnamed: 0,Title,totalRating
0,"$1,000,000 Duck (1971)",37
1,'Night Mother (1986),70
2,'Til There Was You (1997),52
3,"'burbs, The (1989)",303
4,...And Justice for All (1979),199


In [10]:
rating_with_totalRating = merge_movie_ratings.merge(movie_ratingCount,left_on='Title',right_on='Title',how='left')
rating_with_totalRating.head()

Unnamed: 0,UserID,MovieID,Rating,Title,totalRating
0,1,1193,5,One Flew Over the Cuckoo's Nest (1975),1725
1,2,1193,5,One Flew Over the Cuckoo's Nest (1975),1725
2,12,1193,4,One Flew Over the Cuckoo's Nest (1975),1725
3,15,1193,4,One Flew Over the Cuckoo's Nest (1975),1725
4,17,1193,5,One Flew Over the Cuckoo's Nest (1975),1725


In [11]:
#print(rating_with_totalRating)

In [12]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(movie_ratingCount['totalRating'].describe())

count   3706.000
mean     269.889
std      384.048
min        1.000
25%       33.000
50%      123.500
75%      350.000
max     3428.000
Name: totalRating, dtype: float64


In [13]:
print(movie_ratingCount['totalRating'].quantile(np.arange(.9,1,.01)))

0.900    729.500
0.910    773.550
0.920    825.000
0.930    887.300
0.940    971.400
0.950   1051.500
0.960   1133.800
0.970   1268.100
0.980   1446.600
0.990   1784.900
Name: totalRating, dtype: float64


In [14]:
threshold = 100
rating_popular_movie = movie_ratingCount.query('totalRating >= @threshold')
print(rating_popular_movie)

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

                                                 Title  totalRating
3                                   'burbs, The (1989)          303
4                        ...And Justice for All (1979)          199
6                    10 Things I Hate About You (1999)          700
7                                101 Dalmatians (1961)          565
8                                101 Dalmatians (1996)          364
9                                  12 Angry Men (1957)          616
10                            13th Warrior, The (1999)          750
12                         2 Days in the Valley (1996)          286
13                                     20 Dates (1998)          139
14                 20,000 Leagues Under the Sea (1954)          575
15                               200 Cigarettes (1999)          181
16                        2001: A Space Odyssey (1968)         1716
17                                         2010 (1984)          470
20                                      28 Days 

In [15]:
rating_with_totalRating_pivot = rating_with_totalRating.pivot(index='Title',columns='UserID',values='Rating').fillna(0)
rating_with_totalRating_matrix = csr_matrix(rating_with_totalRating_pivot.values)

In [16]:
#print(rating_with_totalRating_matrix)

In [17]:
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric='cosine',algorithm = 'brute')
model_knn.fit(rating_with_totalRating_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [120]:
from PIL import Image
import matplotlib.pyplot as plt
from tmdbv3api import TMDb, Movie
from imageio import imread
tmdb = TMDb()
tmdb.api_key = 'f85a92e0b3635a045d81d26e262c119d'
tmdb.language = 'en'
tmdb.debug = True
m = Movie()
prefix = 'https://image.tmdb.org/t/p/w200/'


In [121]:
query_index = np.random.choice(rating_with_totalRating_pivot.shape[0])
distances, indices = model_knn.kneighbors(rating_with_totalRating_pivot.iloc[query_index, :].values.reshape(1, -1), n_neighbors = 10)
L = []
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(rating_with_totalRating_pivot.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, rating_with_totalRating_pivot.index[indices.flatten()[i]], distances.flatten()[i]))
        s = rating_with_totalRating_pivot.index[indices.flatten()[i]]
        search = m.search(s[:-6]) 
        suffix = search[0].poster_path
        url = prefix + suffix
        L.append(url)

Recommendations for Force of Evil (1948):

1: Out of the Past (1947), with distance of 0.6553015365521164:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 3, 'total_pages': 1, 'results': [{'vote_count': 204, 'id': 678, 'video': False, 'vote_average': 7.7, 'title': 'Out of the Past', 'popularity': 9.776, 'poster_path': '/6M1Qeuq0K5THG95S9nzvEIRHsyt.jpg', 'original_language': 'en', 'original_title': 'Out of the Past', 'genre_ids': [18, 9648, 53, 10749], 'backdrop_path': '/aO6MUtT9D1G0LkYHOrxG8itPjiU.jpg', 'adult': False, 'overview': 'Jeff Bailey seems to be a mundane gas station owner in remote Bridgeport, CA. He is dating local girl Ann Miller and lives a quiet life. But Jeff has a secret past, and when a mysterious stranger arrives in town, Jeff is forced to return to the dark world he had tried to escape.', 'release_date': '1947-11-13'}, {'vote_count': 0, 'id': 112111, 'video': False, 'vote_average': 0, 'title': 'Out of the Past', 'popularity': 1.127, 'poster_path': '/w1qo4mipdl8M4Ja1vWCFOkv12it.jpg', 'original_language': 'en', 'original_title': 'Out of the Past', 'genre_ids': [99], 'back

2: Murder, My Sweet (1944), with distance of 0.6594703172351895:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 1, 'total_pages': 1, 'results': [{'vote_count': 79, 'id': 1834, 'video': False, 'vote_average': 7.4, 'title': 'Murder, My Sweet', 'popularity': 5.647, 'poster_path': '/7zWUcJejShfLvj9CsfvtSUJuINB.jpg', 'original_language': 'en', 'original_title': 'Murder, My Sweet', 'genre_ids': [80, 18, 9648, 53], 'backdrop_path': '/guY9nJqHIcqmwWJNQ7xzI0CNDXV.jpg', 'adult': False, 'overview': "Gumshoe Philip Marlowe is hired by the oafish Moose Malloy to track down his former girlfriend. He's also hired to accompany an effeminate playboy buy back some jewels. When the exchange results in the playboy's murder, Marlowe can't leave the case alone, and soon discovers it's related to Malloy's. As he gets drawn deeper into a complex web of intrigue by a mysterious blonde, the detective finds his own life in increasing jeopardy.", 'release_date': '1944-12-09'}]}


3: Crossfire (1947), with distance of 0.6839915996557888:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 21, 'total_pages': 2, 'results': [{'vote_count': 42, 'id': 28120, 'video': False, 'vote_average': 6.6, 'title': 'Crossfire', 'popularity': 4.597, 'poster_path': '/gF8ahTXpsqvM6OdC5sqe3K1ncts.jpg', 'original_language': 'en', 'original_title': 'Crossfire', 'genre_ids': [80, 18, 9648, 53], 'backdrop_path': '/wYHTUAmtSmjjsFeXNdN2WwkvHPn.jpg', 'adult': False, 'overview': 'A man is murdered, apparently by one of a group of soldiers just out of the army. But which one? And why?', 'release_date': '1947-08-15'}, {'vote_count': 14, 'id': 423878, 'video': False, 'vote_average': 5, 'title': 'Crossfire', 'popularity': 3.359, 'poster_path': '/enaA9kvPGai5jRMGw4M1J01lmu2.jpg', 'original_language': 'en', 'original_title': 'Crossfire', 'genre_ids': [18, 53, 10770], 'backdrop_path': '/cZG8ubpcnBKF2nzmtQWQe01uzcL.jpg', 'adult': False, 'overview': "When National Guard soldier Samantha Harrison returns from the front lines of Iraq, she realizes that none of 

4: Gilda (1946), with distance of 0.7186485690864213:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 12, 'total_pages': 1, 'results': [{'vote_count': 215, 'id': 3767, 'video': False, 'vote_average': 7.7, 'title': 'Gilda', 'popularity': 9.479, 'poster_path': '/hz2gG5D3GHaaPZUSnRNjMC3Wl1r.jpg', 'original_language': 'en', 'original_title': 'Gilda', 'genre_ids': [18, 10749, 53], 'backdrop_path': '/tCqI3EtSxVxgddWeQtyaT1rRlYf.jpg', 'adult': False, 'overview': "A gambler discovers an old flame while in Argentina, but she's married to his new boss.", 'release_date': '1946-02-14'}, {'vote_count': 17, 'id': 511972, 'video': False, 'vote_average': 7.2, 'title': 'Love, Gilda', 'popularity': 4.879, 'poster_path': '/jJVp7JvPkFLeXKuS2YSLIC2zuK4.jpg', 'original_language': 'en', 'original_title': 'Love, Gilda', 'genre_ids': [99], 'backdrop_path': '/lWRt82Uc5AU2TKyeyQjUrRsv9Eg.jpg', 'adult': False, 'overview': 'Diaries, audiotapes, videotapes and testimonies from friends and colleagues offer insight into the life and career of Gilda Radner -- the belove

5: Lady Eve, The (1941), with distance of 0.7297070935991997:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 4, 'total_pages': 1, 'results': [{'vote_count': 135, 'id': 3086, 'video': False, 'vote_average': 7.4, 'title': 'The Lady Eve', 'popularity': 8.176, 'poster_path': '/lJYD3CMgKtv12hazSHc7xt3i2uq.jpg', 'original_language': 'en', 'original_title': 'The Lady Eve', 'genre_ids': [35, 10749], 'backdrop_path': '/mcBllhz7mbnlzEZLJhge0YbPSR.jpg', 'adult': False, 'overview': "It's no accident when wealthy Charles falls for Jean. Jean is a con artist with her sights set on Charles' fortune. Matters complicate when Jean starts falling for her mark. When Charles suspects Jean is a gold digger, he dumps her. Jean, fixated on revenge and still pining for the millionaire, devises a plan to get back in Charles' life. With love and payback on her mind, she re-introduces herself to Charles, this time as an aristocrat named Lady Eve Sidwich.", 'release_date': '1941-02-25'}, {'vote_count': 0, 'id': 241026, 'video': False, 'vote_average': 0, 'title': 'More Love

6: Asphalt Jungle, The (1950), with distance of 0.7299436901543591:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 1, 'total_pages': 1, 'results': [{'vote_count': 180, 'id': 16958, 'video': False, 'vote_average': 7.6, 'title': 'The Asphalt Jungle', 'popularity': 9.022, 'poster_path': '/kIkPJnMVvFPt08siIJB5G8h8WDO.jpg', 'original_language': 'en', 'original_title': 'The Asphalt Jungle', 'genre_ids': [80, 18], 'backdrop_path': '/tJMttP21HHBkcp1QR5UCmBddgVP.jpg', 'adult': False, 'overview': 'Recently paroled from prison, legendary burglar "Doc" Riedenschneider, with funding from Alonzo Emmerich, a crooked lawyer, gathers a small group of veteran criminals together in the Midwest for a big jewel heist.', 'release_date': '1950-05-22'}]}


7: Six of a Kind (1934), with distance of 0.7317008464462376:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 1, 'total_pages': 1, 'results': [{'vote_count': 2, 'id': 127973, 'video': False, 'vote_average': 7, 'title': 'Six of a Kind', 'popularity': 0.948, 'poster_path': '/jgFaWCqzv27kzscpAM6sSHPjuz7.jpg', 'original_language': 'en', 'original_title': 'Six of a Kind', 'genre_ids': [35], 'backdrop_path': '/xPA8OU74hm5YEKiBuDqMxFAAv5M.jpg', 'adult': False, 'overview': "The Whinneys share expenses for their trip to Hollywood with George and Gracie and their great Dane. A clerk in Whinney's bank has put fifty thousand dollars in a suitcase, hoping to rob Whinney on the road, but instead Whinney takes another road and is himself arrested in Nevada.", 'release_date': '1934-02-09'}]}


8: Macao (1952), with distance of 0.7504868553792778:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 13, 'total_pages': 1, 'results': [{'vote_count': 12, 'id': 26282, 'video': False, 'vote_average': 5.9, 'title': 'Macao', 'popularity': 3.171, 'poster_path': '/1hvGaESoUw1nIXOr3mI1hWYmvtr.jpg', 'original_language': 'en', 'original_title': 'Macao', 'genre_ids': [12, 18, 10749], 'backdrop_path': '/tuSnADcaBbP9YmjdZ8b8Eh2Z0EZ.jpg', 'adult': False, 'overview': 'A man on the run in the Far East is mistaken for an undercover cop.', 'release_date': '1952-04-30'}, {'vote_count': 1, 'id': 591268, 'video': False, 'vote_average': 10, 'title': 'The Dragon of Macao', 'popularity': 3.601, 'poster_path': '/pxbdLluLnWvc9zmI6tZhKDL2OOZ.jpg', 'original_language': 'ja', 'original_title': 'マカオの竜', 'genre_ids': [80, 28], 'backdrop_path': '/sDLuxKaCAdkVfuSpDs0x10ik8qy.jpg', 'adult': False, 'overview': 'Ryu (Akira Kobayashi) sets foot on the Yokohama port with a suitcase filled with 300 thousand dollars. He was sent by a jewelry company in Hong Kong to retrieve

9: Freedom for Us (À nous la liberté ) (1931), with distance of 0.7528855543745865:


INFO:tmdbv3api.tmdb:{'page': 1, 'total_results': 0, 'total_pages': 1, 'results': []}


IndexError: list index out of range

In [135]:

for i in range(6):
    plt.subplot(2,3,i)
    plt.figure()
    #plt.subplot(2, 3, i)
    image = imread(L[i])
    poster = Image.fromarray(image)
    fig = plt.imshow(image, cmap = 'gray')
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)

ValueError: num must be 1 <= num <= 6, not 0

<Figure size 432x288 with 0 Axes>