In [2]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.feature_extraction.text import CountVectorizer

### Creating User Based Recommendor

In [3]:
# Read and check our data
df = pd.read_csv('data/item_based.csv', index_col = 'title')
movies_df = pd.read_csv('ml-10M100K/movies.csv')
df.head()

Unnamed: 0_level_0,3,6,9,12,18,24,27,30,33,36,...,71535,71538,71541,71544,71547,71550,71553,71559,71562,71565
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,
'Til There Was You (1997),,,,,,,,,,,...,,,,,,,,,,
"'burbs, The (1989)",,,,,2.5,,,,,,...,,,,,,,2.5,,,
'night Mother (1986),,,,,,,,,,,...,,,,,,,,,,
*batteries not included (1987),,,,,,,,,,,...,,,,,,,,,,


In [4]:
# Transforming the data to sparse matrix
df_sparse = sparse.csr_matrix(df.fillna(0))

In [5]:
# Creating a recommender 
recommender = pairwise_distances(df_sparse, metric = 'cosine')

In [6]:
# Checking the shape of our recommender
recommender.shape

(10444, 10444)

In [7]:
# Creating a dataframe for the recommender
recommender_df = pd.DataFrame(recommender, index = df.index, columns = df.index)
recommender_df.head(3)

title,'Round Midnight (1986),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),*batteries not included (1987),"...All the Marbles (California Dolls, The) (1981)",...And God Spoke (1993),...And Justice for All (1979),1-900 (06) (1994),10 (1979),...,"Zorro, the Gay Blade (1981)",Zulu (1964),Zus & Zo (2001),[REC] (2007),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Round Midnight (1986),0.0,1.0,0.967295,1.0,0.970086,1.0,1.0,0.956182,1.0,0.929238,...,0.956964,0.896233,0.818078,1.0,0.978199,1.0,0.973195,0.969703,0.956141,0.910811
'Til There Was You (1997),1.0,0.0,0.946326,0.928409,0.951665,1.0,1.0,0.914804,0.9956,0.967857,...,0.974586,0.995166,1.0,1.0,0.96853,1.0,0.96165,0.955062,0.955323,0.993645
"'burbs, The (1989)",0.967295,0.946326,0.0,0.948625,0.827449,0.958881,0.978141,0.878311,1.0,0.874861,...,0.928564,0.932674,0.938172,0.985205,0.87863,0.929283,0.905966,0.968174,0.694802,0.981467


In [8]:
# Creating a formula to search through the recommendation engine and provide the top 10 picks

def top_ten_recommendations(search):
    for title in movies_df[movies_df['title'].str.lower().str.contains(search.lower())]['title'].values:
        if title in recommender_df.index:
            print(title)
            print(" ")
            print(recommender_df[title].sort_values()[1:11])
            print(" ")
            print(" ")
#         else: 
#             print('Unfortunately there is this movie, please pick a new one.')

In [12]:
top_ten_recommendations('Monty Python and the Holy Grai')

Monty Python and the Holy Grail (1975)
 
title
Monty Python's Life of Brian (1979)                                               0.350954
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    0.396825
Star Wars: Episode V - The Empire Strikes Back (1980)                             0.404323
Princess Bride, The (1987)                                                        0.421192
Back to the Future (1985)                                                         0.429259
Groundhog Day (1993)                                                              0.440186
Indiana Jones and the Last Crusade (1989)                                         0.440687
Star Wars: Episode IV - A New Hope (1977)                                         0.447785
Fish Called Wanda, A (1988)                                                       0.451528
Blade Runner (1982)                                                               0.458461
Name: Monty Python and the Holy Grail (1975