In [81]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from fuzzywuzzy import process

In [68]:
file_path = './ml-latest-small/'

## Importação dos dados

In [69]:
movies = pd.read_csv(file_path + "movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [70]:
df = pd.read_csv(file_path + 'ratings.csv')
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


## Criando matriz de notas

In [None]:
del df['timestamp']
ratings = df.pivot('movieId', 'userId')
ratings.fillna(0.0, inplace=True) #insert 0 if Na/NaN
ratings.head()

# Inserindo nome de filmes para busca

In [71]:
movie_1 = input("Please provide a movie title: ")
movie_2 = input("Please provide another movie title: ")
movie_3 = input("Please provide another movie title: ")

Please provide a movie title: Cinderella
Please provide another movie title: Toy Story
Please provide another movie title: Mulan


In [72]:
movie_titles = movies['title'].tolist()
# print(movie_titles)
movie_1c = process.extractOne(movie_1, movie_titles)[0]
movie_2c = process.extractOne(movie_2, movie_titles)[0]
movie_3c = process.extractOne(movie_3, movie_titles)[0]
movie_list = [movie_1c, movie_2c, movie_3c]
# print(movie_list)

# Pegando lista de ids dos filmes escolhidos

In [79]:
id_list = []
print(movie_list)
for i in movie_list:
    a = movies['movieId'][movies['title'] == i]
    id_list.append(a)
for i in id_list:
    print("olar",i)

['Cinderella (1950)', 'Toy Story (1995)', 'Mulan (1998)']
olar 780    1022
Name: movieId, dtype: int64
olar 0    1
Name: movieId, dtype: int64
olar 1390    1907
Name: movieId, dtype: int64


## Colocar o ranking de preferencia do input no ranking

In [88]:
user_list = pd.Series(
    np.zeros(ratings.shape[0]), index=ratings.index)
for i in id_list:
    user_list[i] = 5


## Treinamento

In [132]:
model = NMF(n_components = 25,init='nndsvd', random_state=1) # n_components  means how many features want to train
model.fit(ratings)                                                #'nndsvd' better for sparseness
W = model.transform(ratings) 
profile = np.dot(user_list, W) #if use ratings gets error due to different shapes
ranking = np.dot(profile, W.T)
ranking = pd.Series(ranking, index=ratings.index)

## Recomendação Final

In [136]:
print()
print("Here are your recommendations: ")
print()
result = pd.DataFrame({'title': movies['title'], 'rank': ranking})
result.sort_values('rank', ascending=False, inplace=True)
print(result.head(25))


Here are your recommendations: 

                                                  title       rank
1                                        Jumanji (1995)  42.104540
260                                    Quiz Show (1994)  39.098294
356                        Age of Innocence, The (1993)  36.982455
780                                   Cinderella (1950)  34.700205
1073                               Jerry Maguire (1996)  34.121747
318                               I Love Trouble (1994)  33.309554
32                                          Babe (1995)  31.517024
296                                   Virtuosity (1995)  30.502313
608                                        Heavy (1995)  30.494520
1210                       187 (One Eight Seven) (1997)  30.449644
364                        Beverly Hills Cop III (1994)  29.141080
593          Cemetery Man (Dellamorte Dellamore) (1994)  28.344536
588                        Great White Hype, The (1996)  27.948771
648                         