In [46]:
#import libraries
import pandas as pd
import numpy as np
import random
from sklearn.decomposition import NMF

In [47]:
#import data
ratings=pd.read_csv('ml-latest-small/ratings.csv')
movies=pd.read_csv('ml-latest-small/movies.csv')
tags=pd.read_csv('ml-latest-small/tags.csv')

In [48]:
def input_cleanup(df, filter_name):
    '''
    Reassignes ratings to a 3 (to neuralize it) based on userId or movieId filter.
    1. top25_users_neutralize: removes effect of the top 25% that gave their input
    2. bottom75_users_neutralize: removes effect of the bottom 75% that gave their input
    3. top25_voted_neutralize: removes effect of top25% voted on films **note: not their 
                                        rating but ones that recieved the most input**
    4. rate4_5_neutralize: removes effect of films that were rated a 4 and 5 on average  
    5. combo: combination of 2 and 3
    '''
    df1=df.copy()
    
    #respective userid and movieId identifiers
    user_rate=pd.DataFrame(df1.groupby('userId')['rating'].count())
    title_rate_count=pd.DataFrame(df1.groupby('movieId')['rating'].count())
    title_rate_mean=pd.DataFrame(df1.groupby('movieId')['rating'].mean())
    
    #filters
    top25_user=user_rate[user_rate['rating']>600].index
    bottom75_user=user_rate[user_rate['rating']<600].index
    top25_movies_voted=title_rate_count[title_rate_count['rating']>9].index
    rate4_movies=title_rate_mean[title_rate_mean['rating']==4].index 
    rate5_movies=title_rate_mean[title_rate_mean['rating']==5].index
    
    #1
    if filter_name == 'top25_users_neutralize':
        df1.set_index('userId', inplace=True)
        df1.loc[top25_user,'rating']=3 
        df1.reset_index(inplace=True)
    #2
    if filter_name == 'bottom75_users_neutralize':
        df1.set_index('userId', inplace=True)
        df1.loc[bottom75_user,'rating']=3 
        df1.reset_index(inplace=True)
    #3
    if filter_name == 'top25_voted_neutralize':    
        df1.set_index('movieId', inplace=True)
        df1.loc[top25_movies_voted, 'rating']=3
        df1.reset_index(inplace=True)
    #4
    if filter_name == 'rate4_5_neutralize':
        df1.set_index('movieId', inplace=True)
        df1.loc[rate4_movies, 'rating']=3 
        df1.loc[rate5_movies, 'rating']=3 
        df1.reset_index(inplace=True)
    #5
    if filter_name == 'combo':
        df1.set_index('userId', inplace=True)
        df1.loc[bottom75_user,'rating']=3 
        df1.reset_index(inplace=True)
        df1.set_index('movieId', inplace=True)
        df1.loc[top25_movies_voted, 'rating']=3
        df1.reset_index(inplace=True)
    return df1            

In [49]:
ratings_default=ratings.copy()
ratings1=input_cleanup(ratings, 'top25_users_neutralize')
ratings2=input_cleanup(ratings, 'bottom75_users_neutralize')
ratings3=input_cleanup(ratings, 'top25_voted_neutralize')
ratings4=input_cleanup(ratings, 'rate4_5_neutralize')
ratings5=input_cleanup(ratings, 'combo')

In [50]:
def nmf_model(df, n_components=20):
    '''
    Build NMF model based on input data. Function works as:
    1. reformats inout data so movieId are the columns, userId is the index and the values are the ratings
    2. saves the number of movies from (1) for the recommender function
    3. saves the movieId number from (1) for recommender function
    4. converts object from (1) to an array
    5. builds an NMF model based on the array from (4) with 20 components and fills all empty values with 3 (neutral)
    6. extracts the movieId-rating mastrix from model for recommender function
    '''
    Rtrue=df.pivot(index='userId', columns='movieId', values='rating').fillna(3) 
    num_movies=Rtrue.shape[1]
    movieId=Rtrue.columns
    Rtrue=np.array(Rtrue)
    model=NMF(n_components)
    model.fit(Rtrue)
    component1=model.components_
    return num_movies, movieId, model, component1

In [51]:
num_movies_default, movieId_default, model_default, component_default=nmf_model(ratings_default)

In [52]:
num_movies1, movieId1, model1, component1=nmf_model(ratings1)

In [53]:
num_movies2, movieId2, model2, component2=nmf_model(ratings2)

In [54]:
num_movies3, movieId3, model3, component3=nmf_model(ratings3)

In [55]:
num_movies4, movieId4, model4, component4=nmf_model(ratings4)

In [56]:
num_movies5, movieId5, model5, component5=nmf_model(ratings5)

In [57]:
def movie_indeces(dfmovie, dftags):
    movie_title_index=dict(zip(dfmovie.movieId, dfmovie.title))
    movie_genre_index=dict(zip(dfmovie.movieId, dfmovie.genres))
    movie_tag_index=dict(zip(dftags.movieId, dftags.tag ))
    return movie_title_index, movie_genre_index, movie_tag_index

In [58]:
movie_title_index, movie_genre_index, movie_tag_index=movie_indeces(movies, tags)

In [59]:
top_movie_names=random.sample(list(movie_title_index.values()),3)
top_movie_names

['Avengers: Age of Ultron (2015)',
 'East-West (Est-ouest) (1999)',
 "Bride of Chucky (Child's Play 4) (1998)"]

In [60]:
def new_user(movie_array, movie_title_index, num_movies):
    '''
    Creates a new user array based on users top pick input
    '''
    index=[] 
    for num in movie_array:
        val=list(movie_title_index.values()).index(num)
        index.append(val)  
    new_user=np.full((1, num_movies), 3, dtype=int) 
    new_user[0][index]=5 
    return new_user

In [61]:
new_user=new_user(top_movie_names, movie_title_index, num_movies_default)

In [62]:
def movie_recommender(new_user, model, component1, movieId, movie_genre_index, movie_tag_index, movie_title_index):
    '''
    Takes the user profile array and calculates recommendations top to bottom by: 
    1. calculates new weights for each film for that customer using the NMF model 
    2. inserts the genre and tag for each movieId into the user_profile array
    3. replaces the movieId with movie title
    4. removes films that user already identified as top rating 
    5. reorders table so top weighted recommendations appear up top
    '''
    #1
    user_profile=model.transform(new_user)
    results=np.dot(user_profile, component1)
    #2
    results2=pd.DataFrame(results[0]).set_index(movieId).reset_index()
    results2['genre']=results2['movieId'].map(movie_genre_index)
    results2['tag']=results2['movieId'].map(movie_tag_index)
    results2.columns=['movieId', 'weight', 'genre', 'tag']
    #3
    recommendations=results2.replace({'movieId':movie_title_index})
    #4
    for i in top_movie_names:
        recommendations.drop(recommendations.index[recommendations['movieId'] == i], inplace = True)
    #5   
    recommendations=recommendations.sort_values('weight', ascending=False) 
    
    return recommendations

In [63]:
#no input data cleanup
recommendations_default=movie_recommender(new_user, model_default, component_default, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations_default

Unnamed: 0,movieId,weight,genre,tag
314,Forrest Gump (1994),3.243143,Comedy|Drama|Romance|War,touching
277,"Shawshank Redemption, The (1994)",3.230061,Crime|Drama,Morgan Freeman
224,Star Wars: Episode IV - A New Hope (1977),3.141556,Action|Adventure|Sci-Fi,space opera
0,Toy Story (1995),3.117652,Adventure|Animation|Children|Comedy|Fantasy,fun
1938,"Matrix, The (1999)",3.111467,Action|Sci-Fi|Thriller,post apocalyptic
...,...,...,...,...
1234,I Know What You Did Last Summer (1997),2.942517,Horror|Mystery|Thriller,
3086,Hannibal (2001),2.941113,Horror|Thriller,
3162,Scarface (1983),2.941066,Action|Crime|Drama,Al Pacino
396,Free Willy (1993),2.939768,Adventure|Children|Drama,


In [64]:
#top25_users_neutralize: removes effect of the top 25% that gave their input
recommendations1=movie_recommender(new_user, model1, component1, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations1

Unnamed: 0,movieId,weight,genre,tag
314,Forrest Gump (1994),3.128929,Comedy|Drama|Romance|War,touching
277,"Shawshank Redemption, The (1994)",3.099005,Crime|Drama,Morgan Freeman
1938,"Matrix, The (1999)",3.094056,Action|Sci-Fi|Thriller,post apocalyptic
224,Star Wars: Episode IV - A New Hope (1977),3.090673,Action|Adventure|Sci-Fi,space opera
2144,American Beauty (1999),3.083730,Drama|Romance,
...,...,...,...,...
6517,"Bourne Ultimatum, The (2007)",2.955853,Action|Crime|Thriller,Robert Ludlum
504,Home Alone (1990),2.948833,Children|Comedy,christmas
315,Four Weddings and a Funeral (1994),2.947010,Comedy|Romance,wedding
2034,"Blair Witch Project, The (1999)",2.942413,Drama|Horror|Thriller,video


In [65]:
#bottom75_users_neutralize: removes effect of the bottom 75% that gave their input
recommendations2=movie_recommender(new_user, model2, component2, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations2

Unnamed: 0,movieId,weight,genre,tag
123,Apollo 13 (1995),3.010985,Adventure|Drama|IMAX,space
1290,Titanic (1997),3.010544,Drama|Romance,romance
692,Some Like It Hot (1959),3.010192,Comedy|Crime,men in drag
933,"Sting, The (1973)",3.009014,Comedy|Crime,The Entertainer
5719,Million Dollar Baby (2004),3.008583,Drama,boxing
...,...,...,...,...
607,Striptease (1996),2.992419,Comedy|Crime,
1144,Anaconda (1997),2.992185,Action|Adventure|Thriller,
1260,Starship Troopers (1997),2.991879,Action|Sci-Fi,
4696,Battle Royale (Batoru rowaiaru) (2000),2.991411,Action|Drama|Horror|Thriller,violence


In [66]:
#top25_voted_neutralize: removes effect of top25% voted on films **note: not their rating but ones that recieved the most input**
recommendations3=movie_recommender(new_user, model3, component3, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations3

Unnamed: 0,movieId,weight,genre,tag
3639,Witness for the Prosecution (1957),3.021361,Drama|Mystery|Thriller,court
9600,"Three Billboards Outside Ebbing, Missouri (2017)",3.019424,Crime|Drama,
720,"Adventures of Robin Hood, The (1938)",3.016666,Action|Adventure|Romance,swashbuckler
3559,Life as a House (2001),3.016233,Drama,
4206,Monty Python Live at the Hollywood Bowl (1982),3.016088,Comedy,spoof
...,...,...,...,...
3522,Joy Ride (2001),2.986991,Adventure|Thriller,
1789,Psycho (1998),2.985486,Crime|Horror|Thriller,
271,Stuart Saves His Family (1995),2.985006,Comedy,
3799,Jason X (2002),2.983748,Horror|Sci-Fi|Thriller,


In [67]:
#rate4_5_neutralize: removes effect of films that were rated a 4 and 5 on average 
recommendations4=movie_recommender(new_user, model4, component4, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations4

Unnamed: 0,movieId,weight,genre,tag
277,"Shawshank Redemption, The (1994)",3.204807,Crime|Drama,Morgan Freeman
314,Forrest Gump (1994),3.169978,Comedy|Drama|Romance|War,touching
224,Star Wars: Episode IV - A New Hope (1977),3.118594,Action|Adventure|Sci-Fi,space opera
1938,"Matrix, The (1999)",3.109376,Action|Sci-Fi|Thriller,post apocalyptic
2144,American Beauty (1999),3.104039,Drama|Romance,
...,...,...,...,...
6293,Borat: Cultural Learnings of America for Make ...,2.947301,Comedy,
2034,"Blair Witch Project, The (1999)",2.945774,Drama|Horror|Thriller,video
5722,Charlie and the Chocolate Factory (2005),2.945010,Adventure|Children|Comedy|Fantasy|IMAX,roald dahl
315,Four Weddings and a Funeral (1994),2.945009,Comedy|Romance,wedding


In [68]:
#combo of 2 &3
recommendations5=movie_recommender(new_user, model5, component5, movieId_default, movie_genre_index, movie_tag_index, movie_title_index)
recommendations5

Unnamed: 0,movieId,weight,genre,tag
1507,Blank Check (1994),3.005855,Children|Comedy,
4743,Bullitt (1968),3.005752,Action|Crime|Drama|Thriller,
3537,Iron Monkey (Siu nin Wong Fei-hung ji: Tit Ma ...,3.005689,Action|Comedy,
669,First Kid (1996),3.005309,Children|Comedy,
3716,Vampire Hunter D: Bloodlust (Banpaia hantâ D) ...,3.005280,Animation|Fantasy|Horror|Sci-Fi,
...,...,...,...,...
7329,Sex and the City 2 (2010),2.995321,Comedy|Drama|Romance,
3154,Josie and the Pussycats (2001),2.995123,Comedy,
1284,Home Alone 3 (1997),2.995067,Children|Comedy,
3799,Jason X (2002),2.995020,Horror|Sci-Fi|Thriller,
