In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')
print(movies.head())
print(ratings.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [8]:
# content based similarity
# split to seperate cols
genres_split = movies['genres'].str.get_dummies(sep='|')
content_sim = cosine_similarity(genres_split)

content_sim_df = pd.DataFrame(content_sim,index = movies['movieId'],columns = movies['movieId'])

content_sim_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.774597,0.316228,0.258199,0.447214,0.0,0.316228,0.632456,0.0,0.258199,...,0.447214,0.316228,0.316228,0.447214,0.0,0.67082,0.774597,0.0,0.316228,0.447214
2,0.774597,1.0,0.0,0.0,0.0,0.0,0.0,0.816497,0.0,0.333333,...,0.0,0.0,0.0,0.0,0.0,0.288675,0.333333,0.0,0.0,0.0
3,0.316228,0.0,1.0,0.816497,0.707107,0.0,1.0,0.0,0.0,0.0,...,0.353553,0.0,0.5,0.0,0.0,0.353553,0.408248,0.0,0.0,0.707107
4,0.258199,0.0,0.816497,1.0,0.57735,0.0,0.816497,0.0,0.0,0.0,...,0.288675,0.408248,0.816497,0.0,0.0,0.288675,0.333333,0.57735,0.0,0.57735
5,0.447214,0.0,0.707107,0.57735,1.0,0.0,0.707107,0.0,0.0,0.0,...,0.5,0.0,0.707107,0.0,0.0,0.5,0.57735,0.0,0.0,1.0


In [9]:
# collaborative item-item ratings
user_item = ratings.pivot(index = 'userId',columns = 'movieId',values='rating').fillna(0)

#cosine sim b/w items (transpose so items x users)
collab_sim = cosine_similarity(user_item.T)
collab_sim_df = pd.DataFrame(collab_sim,index = user_item.columns,columns = user_item.columns)

collab_sim_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.410562,0.296917,0.035573,0.308762,0.376316,0.277491,0.131629,0.232586,0.395573,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.410562,1.0,0.282438,0.106415,0.287795,0.297009,0.228576,0.172498,0.044835,0.417693,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.296917,0.282438,1.0,0.092406,0.417802,0.284257,0.402831,0.313434,0.30484,0.242954,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.035573,0.106415,0.092406,1.0,0.188376,0.089685,0.275035,0.158022,0.0,0.095598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.308762,0.287795,0.417802,0.188376,1.0,0.298969,0.474002,0.283523,0.335058,0.218061,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
#hybrid
 
alpha = 0.5

common_movies = list(set(content_sim_df.index) & set(collab_sim_df.index))
hybrid_sim = alpha * collab_sim_df.loc[common_movies,common_movies] + (1-alpha) * content_sim_df.loc[common_movies,common_movies]
hybrid_sim_df = pd.DataFrame(hybrid_sim,index=common_movies,columns=common_movies)

hybrid_sim_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,98239,98243,131013,131023,32728,163809,32743,98279,65514,98296
1,1.0,0.592579,0.306572,0.146886,0.377988,0.188158,0.296859,0.382042,0.116293,0.326886,...,0.053143,0.448156,0.191176,0.029775,0.021268,0.28372,0.042535,0.267375,0.05268,0.042535
2,0.592579,1.0,0.141219,0.053207,0.143897,0.148505,0.114288,0.494497,0.022418,0.375513,...,0.042037,0.441179,0.027415,0.0,0.0,0.200312,0.067292,0.020773,0.081758,0.040375
3,0.306572,0.141219,1.0,0.454451,0.562454,0.142128,0.701416,0.156717,0.15242,0.121477,...,0.0,0.011316,0.263031,0.0,0.0,0.030385,0.0,0.38482,0.0,0.060771
4,0.146886,0.053207,0.454451,1.0,0.382863,0.044843,0.545766,0.079011,0.0,0.047799,...,0.0,0.0,0.204124,0.0,0.166667,0.166667,0.166667,0.288675,0.166667,0.166667
5,0.377988,0.143897,0.562454,0.382863,1.0,0.149484,0.590554,0.141762,0.167529,0.10903,...,0.0,0.021198,0.409792,0.0,0.0,0.0,0.0,0.534453,0.0,0.066965


In [11]:
 # Recommend similar movies 
def hybrid_recommend(movie_title, top_n=5):
    # find movieId
    row = movies[movies['title'] == movie_title]
    if row.empty:
        return f"Movie '{movie_title}' not found."

    movie_id = row['movieId'].values[0]
    
    if movie_id not in hybrid_sim_df.index:
        return f"No hybrid similarity available for '{movie_title}'."
    
    # get similarity scores
    sim_scores = hybrid_sim_df[movie_id].sort_values(ascending=False)
    sim_scores = sim_scores.drop(movie_id)  # exclude itself
    
    # map back to titles
    rec_ids = sim_scores.index[:top_n]
    recommendations = movies[movies['movieId'].isin(rec_ids)][['movieId', 'title']]
    
    return recommendations

In [12]:
hybrid_recommend('Grumpier Old Men (1995)')

Unnamed: 0,movieId,title
6,7,Sabrina (1995)
157,186,Nine Months (1995)
238,276,Milk Money (1994)
514,597,Pretty Woman (1990)
577,708,"Truth About Cats & Dogs, The (1996)"
