In [1]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from fuzzywuzzy import process



In [2]:
R = pd.read_csv('../data/UserRatingTitles-withoutYear.csv', index_col=0)

In [3]:
R.shape

(610, 9724)

In [4]:
R.fillna(2.5, inplace=True)

In [5]:
model = NMF(19)
model.fit(R)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200,
    n_components=19, random_state=None, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [6]:
Q = pd.DataFrame(model.components_, columns=R.columns)

P = pd.DataFrame(model.transform(R), index=R.index)

In [7]:
Q.shape, P.shape

((19, 9724), (610, 19))

In [8]:
test_user_input = {
     'toy Story': '5',
     'Jumanyi': '3',
     'Grupmyer Old Men': '4'
    }

REALITY = {'movie1': 'Toy Story (1995)', 
           'rating1': '5', 
           'movie2': 'Jumanji (1995)', 
           'rating2': '5', 
           'movie3': 'Grumpier Old Men (1995)', 
           'rating3': '5'}

In [9]:
def convert_flask_dict(flask_dict):
    new_keys = list(flask_dict.values())[::2]
    new_vals = list(flask_dict.values())[1::2]
    
    return dict(zip(new_keys, new_vals))
    

In [10]:
test_user_input = convert_flask_dict(REALITY)

In [11]:
new_user_vector = pd.DataFrame([np.nan]*len(R.columns), index=R.columns).transpose()

In [16]:
test_user_input

{'Toy Story (1995)': '5',
 'Jumanji (1995)': '5',
 'Grumpier Old Men (1995)': '5'}

In [13]:
# Loop to check whether user_id is there
for key, value in test_user_input.items():

    if key in new_user_vector.columns:
        new_user_vector.loc[:, key] = float(value)
        
        
    else:
        closest_match = process.extract(key, R.columns)[0][0]


        new_user_vector.loc[:, closest_match] = float(value)
        
        
        if len(closest_match) < 0.5*len(key):
            
            closest_match = process.extract(key, R.columns)[1][0]
            new_user_vector.loc[:, closest_match] = float(value)
            # print(closest_match)

In [17]:
new_user_vector

Unnamed: 0,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II,Heat,Sabrina,Tom and Huck,Sudden Death,GoldenEye,...,Gintama: The Movie,anohana: The Flower We Saw That Day - The Movie,Silver Spoon,Love Live! The School Idol Movie,Jon Stewart Has Left the Building,Black Butler: Book of the Atlantic,No Game No Life: Zero,Flint,Bungo Stray Dogs: Dead Apple,Andrew Dice Clay: Dice Rules
0,5.0,5.0,5.0,,,,,,,,...,,,,,,,,,,


In [18]:
# Fill in the missing values
new_user_vector_filled = new_user_vector.fillna(2.5)

# Calculate the hidden profile with nmf.transform # user-feature_matrix of new user
hidden_profile = model.transform(new_user_vector_filled)

# Calculate the predictions using np.dot
rating_prediction = pd.DataFrame(np.dot(hidden_profile, model.components_), columns= new_user_vector.columns)

# Create a boolean mask to filter out the positions where the data was originally NaN
bool_mask = np.isnan(new_user_vector.values[0])

# Find the movies that have not yet been seen
movies_not_seen = rating_prediction.columns[bool_mask]

# Find recommendations for unseen movies
movies_not_seen_df = rating_prediction[movies_not_seen].T

# Get recommendations
films_recommended = movies_not_seen_df.sort_values(by=0, ascending=False).index[:3]

In [None]:
def get_recommendations(user_input):
    flask_user_input = user_input
    # flask_user_input = {
    # 'toy Story': '5',
    #  'Jumanyi': '3',
    #  'Grupmyer Old Men': '4'
    #  }

    new_user_vector = pd.DataFrame([np.nan]*len(R.columns), index=R.columns).transpose()

    # Loop to check whether user_id is there
    for key, value in flask_user_input.items():
        if key in new_user_vector.columns:
            new_user_vector.loc[:, key] = float(value)
        else:
            closest_match = process.extract(key, R.columns)[0][0]
            new_user_vector.loc[:, closest_match] = float(value)
            if len(process.extract(key, R.columns)[0][0]) < 0.5*len(key):
                closest_match = process.extract(key, R.columns)[1][0]
                new_user_vector.loc[:, closest_match] = float(value)
                # print(closest_match)

    # Fill in the missing values
    new_user_vector_filled = new_user_vector.fillna(2.5)

    # Calculate the hidden profile with nmf.transform # user-feature_matrix of new user
    hidden_profile = model.transform(new_user_vector_filled)

    # Calculate the predictions using np.dot
    rating_prediction = pd.DataFrame(np.dot(hidden_profile, model.components_), columns= new_user_vector.columns)

    # Create a boolean mask to filter out the positions where the data was originally NaN
    bool_mask = np.isnan(new_user_vector.values[0])

    # Find the movies that have not yet been seen
    movies_not_seen = rating_prediction.columns[bool_mask]

    # Find recommendations for unseen movies
    movies_not_seen_df = rating_prediction[movies_not_seen].T

    # Get recommendations
    films_recommended = movies_not_seen_df.sort_values(by=0, ascending=False).index[:3]
    return films_recommended
#print(get_recommendations(user_input))

In [19]:
films_recommended

Index(['Shawshank Redemption, The', 'Forrest Gump',
       'Star Wars: Episode IV - A New Hope'],
      dtype='object')