In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import re
import ipywidgets as widgets
from IPython.display import display
import json

In [2]:
vector = joblib.load('vectorizer.pkl')
vec_metric = joblib.load('vec_metric.pkl')
movie = pd.read_csv('clean_movie.csv')
ratings = pd.read_csv('ml-25m/ratings.csv')

In [3]:
movie.head()

Unnamed: 0,movieId,clean_title,genres
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji,Adventure|Children|Fantasy
2,3,Grumpier Old Men,Comedy|Romance
3,4,Waiting to Exhale,Comedy|Drama|Romance
4,5,Father of the Bride Part II,Comedy


In [4]:
def clean_title(title):
    return re.sub('[^a-zA-Z0-9 ]','',title) 

In [5]:
def search(title):
    title = clean_title(title)
    query_vec= vector.transform([title])
    similarity = cosine_similarity(query_vec,vec_metric).flatten() # flatten is used get 1D array
    indices = np.argpartition(similarity,-5)[-5:]
    result  = movie.iloc[indices][::-1] # most similar listed first
    return result

In [6]:
def MovieId(name):
    result = search(name)
    Id = result.iloc[0]['movieId']
    return Id

In [17]:
def recommendation(movieId):
    # finding similar users and their recommendation  greater than 10%
    similar_user = ratings[(ratings['movieId']==movieId) &(ratings['rating']>4) ]['userId'].unique()
    similar_user_rec = ratings[(ratings['rating'] >4 ) & (ratings['userId'].isin(similar_user))]['movieId']
    similar_user_rec = similar_user_rec.value_counts()/len(similar_user) 
    similar_user_rec = similar_user_rec[similar_user_rec >0.2]
    
    
    all_user = ratings[(ratings['movieId'].isin(similar_user_rec.index)) &(ratings['rating']>4)] #list of users like the above movie
    all_user['movieId'].value_counts() # counting how many user rated a single movie
    all_user_rec = all_user['movieId'].value_counts()/len(all_user['userId'].unique())
    
    rec_percent = pd.concat([similar_user_rec,all_user_rec],axis = 1)
    rec_percent.columns = ['similar','all']
    
    rec_percent['score'] = rec_percent['similar']/rec_percent['all']
    rec_percent.sort_values(ascending=False,by ='score',inplace =True)
    
    #returning only top ten movie with three attributes ['score','title','genres']
    return rec_percent.head(10).merge(movie,left_index=True,right_on='movieId')['clean_title']
    

In [20]:

list(recommendation(186079))

[]

In [11]:
movie_input_name = widgets.Text(value='',description ='Movie Title:',disabled = False)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data['new']
        if len(title)>5:
            result = search(title)
            movieId = result.iloc[0]['movieId']  #search('the Avenger').iloc[0]['movieId'] -> getting movieId of searched movie
            display(recommendation(movieId))
            
movie_input_name.observe(on_type,names ='value')
display(movie_input_name,recommendation_list)

Text(value='', description='Movie Title:')

Output()