In [13]:
import re
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

movies = pd.read_csv("data/movies.csv")
ratings = pd.read_csv("data/ratings.csv")

def clean_title(title):
    return re.sub(r'\(\d{4}\)', '', title).strip()

movies["title"] = movies["title"].apply(clean_title)
movies["combine"] = movies["title"] + " " + movies["genres"]
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectorizer.fit_transform(movies["combine"])

def recommend_movie(title):
    title = clean_title(title)
    query_vector = vectorizer.transform([title])
    cosine_sim = linear_kernel(query_vector, tfidf)
    similarity = cosine_sim.flatten()
    similarity_df = pd.DataFrame(similarity, index=movies["title"])
    similarity_df.columns = ["similarity"]
    
    movie_id = movies[movies["title"] == title]["movieId"].values[0]
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >= 4)]["userId"].unique()
    similar_users_rec = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >= 4)]
    similar_users_rec = similar_users_rec["movieId"].value_counts() / len(similar_users)
    similar_users_rec = similar_users_rec[similar_users_rec > 0.2]

    all_users = ratings[(ratings["movieId"].isin(similar_users_rec.index)) & (ratings["rating"] >= 4)]
    all_users_rec = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

    rec_percentage = pd.concat([similar_users_rec, all_users_rec], axis=1)
    rec_percentage.columns = ["similar_users", "all_users"]
    rec_percentage = rec_percentage.fillna(0)
    rec_percentage["suggest"] = (rec_percentage["similar_users"] / rec_percentage["all_users"]) / 10
    rec = rec_percentage.sort_values("suggest", ascending=False)
    rec = pd.DataFrame(rec.merge(movies, left_index=True, right_on="movieId"))

    merged_df = pd.merge(similarity_df, rec, on="title", how="left")
    merged_df.drop(columns=["movieId", "genres", "combine", "similar_users", "all_users"], inplace=True)
    merged_df = merged_df.fillna(0)
    merged_df["total_score"] = (merged_df["similarity"] + merged_df["suggest"] * 2) / 3
    merged_df = merged_df.sort_values("total_score", ascending=False)
    merged_df = merged_df.drop_duplicates(subset="title")
    
    return merged_df

### Type the movie title you want to get recommendations for, must exactly right title so it can recommend other movie for you ( e.g. "Toy Story" )

In [14]:
movie_name_input = widgets.Text(
    value='',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        display(recommend_movie(title))

movie_name_input.observe(on_type, names='value')
display(movie_name_input, recommendation_list)

Text(value='', description='Movie Title:')

Output()