In [2]:
import pandas as pd
movies = pd.read_csv("movies.csv")
print(movies)
import re
def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]", "", title)

movies["clean_title"] = movies["title"].apply(clean_title)
print(movies)
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectorizer.fit_transform(movies["clean_title"])
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices][::-1]
    return results

import ipywidgets as widgets
from IPython.display import display

movie_input = widgets.Text(
    value="toy story",
    description="Movie Title:",
    disabled=False
)

movie_list = widgets.Output()
def on_type(change):
    with movie_list:
        movie_list.clear_output()
        title = change["new"]
        if len(title) > 5:
            display(search(title))
movie_input.observe(on_type, names='value')
display(movie_input, movie_list)

ratings = pd.read_csv("ratings.csv")
print(ratings)
ratings.dtypes

movie_id = 1
similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >= 5)]["userId"].unique()
(similar_users)

similar_user_recs = ratings[(ratings["userId"].isin(similar_users))&(ratings["rating"]>4)]["movieId"]
print(similar_user_recs)
similar_user_recs = similar_user_recs.value_counts()
similar_user_recs = similar_user_recs[similar_user_recs > float(0.1)]
print(similar_user_recs)

all_users =ratings[(ratings["movieId"].isin(similar_user_recs.index))&(ratings["rating"] > 4)]
all_users_recs = len(all_users["userId"].unique())
print(all_users_recs)

if not isinstance(similar_user_recs, (pd.Series, pd.DataFrame)):
    similar_user_recs = pd.Series(similar_user_recs)

if not isinstance(all_users_recs, (pd.Series, pd.DataFrame)):
    all_users_recs = pd.Series(all_users_recs)


rec_percentages = pd.concat([similar_user_recs,all_users_recs],axis=1)
rec_percentages.columns = ["similar","all"]
print(rec_percentages)

rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
rec_percentages = rec_percentages.sort_values("score", ascending=False)
print(rec_percentages)
rec_percentages.head(10).merge(movies,left_index=True, right_on="movieId")


def find_similar_movies(movie_id):
  similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 5)]["userId"].unique()
  similar_user_recs = ratings[(ratings["userId"].isin(similar_users))&(ratings["rating"]>4)]["movieId"]


  similar_user_recs = similar_user_recs.value_counts()
  similar_user_recs = similar_user_recs[similar_user_recs > float(0.1)]

  all_users =ratings[(ratings["movieId"].isin(similar_user_recs.index))&(ratings["rating"] > 4)]
  all_users_recs = len(all_users["userId"].unique())


  rec_percentages = pd.concat([similar_user_recs,all_users_recs],axis=1)
  rec_percentages.columns = ["similar","all"]

  rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

  rec_percentages = rec_percentages.sort_values("score", ascending=False)
  return rec_percentages.head(10).merge(movies,left_index=True, right_on="movieId")[["score","title","genres"]]

movie_name_input = widgets.Text(
    value="Toy Story",
    description="Movie Title:",
    disabled=False
)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))
movie_name_input.observe(on_type,names="value")

display(movie_name_input,recommendation_list)


similar_users_filtered = [user for user in similar_users if list(similar_users).count(user) > 1]
print("Similar Users (filtered):", similar_users_filtered)




       movieId                               title  \
0            1                    Toy Story (1995)   
1            2                      Jumanji (1995)   
2            3             Grumpier Old Men (1995)   
3            4            Waiting to Exhale (1995)   
4            5  Father of the Bride Part II (1995)   
...        ...                                 ...   
62418   209157                           We (2018)   
62419   209159           Window of the Soul (2001)   
62420   209163                    Bad Poems (2018)   
62421   209169                 A Girl Thing (2001)   
62422   209171      Women of Devil's Island (1962)   

                                            genres  
0      Adventure|Animation|Children|Comedy|Fantasy  
1                       Adventure|Children|Fantasy  
2                                   Comedy|Romance  
3                             Comedy|Drama|Romance  
4                                           Comedy  
...                              

Text(value='toy story', description='Movie Title:')

Output()

          userId  movieId  rating   timestamp
0              1      296     5.0  1147880044
1              1      306     3.5  1147868817
2              1      307     5.0  1147868828
3              1      665     5.0  1147878820
4              1      899     3.5  1147868510
...          ...      ...     ...         ...
25000090  162541    50872     4.5  1240953372
25000091  162541    55768     2.5  1240951998
25000092  162541    56176     2.0  1240950697
25000093  162541    58559     4.0  1240953434
25000094  162541    63876     5.0  1240952515

[25000095 rows x 4 columns]
5101           1
5105          34
5111         110
5114         150
5127         260
            ... 
24998388    3706
24998389    3735
24998391    3763
24998392    4187
24998393    4321
Name: movieId, Length: 912084, dtype: int64
movieId
1         13506
318        5599
260        5464
356        4690
296        4628
          ...  
27306         1
71732         1
4739          1
190187        1
97957         1
Name

Text(value='Toy Story', description='Movie Title:')

Output()

Similar Users (filtered): []
