In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
import ipywidgets as widgets
from IPython.display import display

In [2]:
movies = pd.read_csv("movies.csv")

In [3]:
movie_links = pd.read_csv("links.csv")

In [4]:
movie_links['tmdbId'] = movie_links['tmdbId'].astype('Int64')

In [5]:
movies = pd.merge(movies, movie_links[['movieId', 'tmdbId']], on="movieId", how="left")

In [6]:
movies

Unnamed: 0,movieId,title,genres,tmdbId
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,862
1,2,Jumanji (1995),Adventure|Children|Fantasy,8844
2,3,Grumpier Old Men (1995),Comedy|Romance,15602
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,31357
4,5,Father of the Bride Part II (1995),Comedy,11862
...,...,...,...,...
62418,209157,We (2018),Drama,499546
62419,209159,Window of the Soul (2001),Documentary,63407
62420,209163,Bad Poems (2018),Comedy|Drama,553036
62421,209169,A Girl Thing (2001),(no genres listed),162892


In [7]:
import re

def clean_title(title):
    title = re.sub("[^a-zA-Z0-9 ]", "", title)
    return title

In [8]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [10]:
vectorizer = TfidfVectorizer(ngram_range=(1, 2))

In [11]:
tfidf = vectorizer.fit_transform(movies["clean_title"])

In [12]:
def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices].iloc[::-1]
    return results

In [13]:
ratings = pd.read_csv("ratings.csv")

In [61]:
def find_similar_movies(movie_id):
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >= 1)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >= 1)]["movieId"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

    similar_user_recs = similar_user_recs[similar_user_recs > 0.50]  # Adjust this threshold
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] >= 1)]
    all_user_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]


In [43]:
# Define widgets for interactive search
import ipywidgets as widgets
from IPython.display import display

movie_name_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

# Define event handling for user input
def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        results = search(title)
        if not results.empty:
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

# Observe changes in the input widget
movie_name_input.observe(on_type, names='value')

# Display the input widget and movie list
display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()

In [62]:
import ipywidgets as widgets
from IPython.display import display

movie_name_input = widgets.Text(
    value='Toy Story',
    description='Movie Title:',
    disabled=False
)
recommendation_list = widgets.Output()

# Define event handling for user input
def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        results = search(title)
        if not results.empty:
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

# Observe changes in the input widget
movie_name_input.observe(on_type, names='value')

# Display the input widget and movie list
display(movie_name_input, recommendation_list)

Text(value='Toy Story', description='Movie Title:')

Output()