In [13]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pickle
import json

### Building input data matrix for modeling 

In [14]:
#importing necessary files
ratings=pd.read_csv('ml-latest-small/ratings.csv')
movies=pd.read_csv('ml-latest-small/movies.csv')
tags=pd.read_csv('ml-latest-small/tags.csv')

In [15]:
with open('vm/movie_title_index.json', 'r') as fp:
        movie_title_index = json.load(fp)
movie_title_index = {int(k):str(v) for k,v in movie_title_index.items()}        

In [16]:
#creating an input table 
Rtrue=ratings.pivot(index='userId', columns='movieId', values='rating').fillna(2.5)
Rtrue=Rtrue.T.reset_index().replace({str('movieId'):movie_title_index}).set_index('movieId').T

In [17]:
#creating and testing import for webserver
Rtrue.to_csv('input_data.csv')

In [18]:
Rtrue=pd.read_csv('input_data.csv', index_col=0)

### creating sample user input

In [19]:
#create user_flask
user_flask = {'Grumpier Old Men (1995)': '5', 'Jumanji (1995)': '5', 'Waiting to Exhale (1995)': '5'}

In [20]:
new_user_Id=len(Rtrue)+1

In [21]:
#customize user_flask to input data
new_user_vector = pd.DataFrame([2.5]*Rtrue.shape[1], index=Rtrue.columns).T
for key, value in user_flask.items():
    new_user_vector.loc[:, key] = float(value)
new_user_vector.rename(index={0:new_user_Id},inplace=True)    

### recommender based on cosim model 

In [22]:
#create new input matrix with new user input 
R_new_user=Rtrue.append(new_user_vector)

In [23]:
#calculate recommendations 
simi_new_user=pd.DataFrame(cosine_similarity(R_new_user), index=R_new_user.index, columns=R_new_user.index)
simi_new_user2=simi_new_user[new_user_Id][~(simi_new_user.index==new_user_Id)]
results2 = pd.DataFrame(np.dot(simi_new_user2, Rtrue)/simi_new_user2.sum(), index=Rtrue.columns).reset_index()

In [24]:
with open('vm/movie_tags_index.json', 'r') as fp:
    movie_tags_index = json.load(fp)
with open('vm/movie_genres_index.json', 'r') as fp:
    movie_genres_index = json.load(fp)
with open('vm/movie_time_index.json', 'r') as fp:
    movie_time_index = json.load(fp)

In [26]:
#customize recommendations table 
results2.columns=['movieId', 'weight']
results2['genre']=results2['movieId'].map(movie_genres_index)
results2['tag']=results2['movieId'].map(movie_tags_index)
results2['time_block']=results2['movieId'].map(movie_time_index)
recommendations=results2

In [27]:
for i in user_flask.keys():
        recommendations.drop(recommendations.index[recommendations.movieId == i], inplace = True)

In [30]:
recommendations.sort_values('weight', ascending=False).head()

Unnamed: 0,movieId,weight,genre,tag,time_block
277,"Shawshank Redemption, The (1994)",3.501692,Crime|Drama,Morgan Freeman,talk show
314,Forrest Gump (1994),3.396671,Comedy|Drama|Romance|War,touching,tv dinner
257,Pulp Fiction (1994),3.352905,Comedy|Crime|Drama|Thriller,witty,talk show
1938,"Matrix, The (1999)",3.270053,Action|Sci-Fi|Thriller,post apocalyptic,talk show
510,"Silence of the Lambs, The (1991)",3.258805,Crime|Horror|Thriller,suspense,talk show


### recommender based on NMF models

In [39]:
#loading previously build models
combo=pickle.load(open('flask_app2/combo', 'rb'))
top25_movies=pickle.load(open('flask_app2/top25_movies', 'rb'))
bottom75_user_neutral=pickle.load(open('flask_app2/bottom75_user_neutral', 'rb'))

In [40]:
#build recommendation matrix based on models 
user_profile=combo.transform(new_user_vector)
results=np.dot(user_profile, combo.components_)
results2=pd.DataFrame(results[0]).set_index(Rtrue.columns).reset_index()
results2.columns=['movieId', 'weight']

In [42]:
#customize recommendation matrix
results2['genre']=results2['movieId'].map(movie_genres_index)
results2['tag']=results2['movieId'].map(movie_tags_index)
results2['time_block']=results2['movieId'].map(movie_time_index)
recommendations=results2

In [45]:
#remove already new_user selected movies
for i in user_flask.keys():
        recommendations.drop(recommendations.index[recommendations.movieId== i], inplace = True)

In [47]:
recommendations.sort_values('weight', ascending=True).head()

Unnamed: 0,movieId,weight,genre,tag,time_block
3154,Josie and the Pussycats (2001),2.495896,Comedy,,tv dinner
4096,Eight Crazy Nights (Adam Sandler's Eight Crazy...,2.496069,Animation|Comedy|Musical,,talk show
3799,Jason X (2002),2.496353,Horror|Sci-Fi|Thriller,,late night
7329,Sex and the City 2 (2010),2.496357,Comedy|Drama|Romance,,late night
4793,Calendar Girls (2003),2.496391,Comedy,,tv dinner
