In [6]:
import pandas as pd

# Load data
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

# Merge datasets on 'movieId'
data = pd.merge(ratings, movies, on='movieId')
data.tail()



Unnamed: 0,userId,movieId,rating,timestamp,title,genres
100831,610,166534,4.0,1493848402,Split (2017),Drama|Horror|Thriller
100832,610,168248,5.0,1493850091,John Wick: Chapter Two (2017),Action|Crime|Thriller
100833,610,168250,5.0,1494273047,Get Out (2017),Horror
100834,610,168252,5.0,1493846352,Logan (2017),Action|Sci-Fi
100835,610,170875,3.0,1493846415,The Fate of the Furious (2017),Action|Crime|Drama|Thriller


Movie Matrix

In [21]:
# Create pivot table: rows = users, columns = movies, values = ratings
movie_matrix = data.pivot_table(index='userId', columns='title', values='rating')
movie_matrix.fillna(0, inplace=True)
movie_matrix.head(20)


title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Cosine Similarity

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

# Transpose: now movies as rows
movie_similarity = cosine_similarity(movie_matrix.T)
movie_similarity_df = pd.DataFrame(movie_similarity, index=movie_matrix.columns, columns=movie_matrix.columns)


In [10]:
print(movie_similarity)

[[1.         0.         0.         ... 0.32732684 0.         0.        ]
 [0.         1.         0.70710678 ... 0.         0.         0.        ]
 [0.         0.70710678 1.         ... 0.         0.         0.        ]
 ...
 [0.32732684 0.         0.         ... 1.         0.         0.        ]
 [0.         0.         0.         ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


Recommendation Function

In [11]:
def recommend_movies(movie_name, n=5):
    if movie_name not in movie_similarity_df.columns:
        print("Movie not found.")
        return
    similar_scores = movie_similarity_df[movie_name]
    similar_movies = similar_scores.sort_values(ascending=False)[1:n+1]
    print(f"\nTop {n} recommendations for '{movie_name}':\n")
    for i, (movie, score) in enumerate(similar_movies.items(), 1):
        print(f"{i}. {movie} (Similarity Score: {score:.2f})")


Test it!

In [20]:
recommend_movies("Babes in Toyland (1961)")


Top 5 recommendations for 'Babes in Toyland (1961)':

1. Newsies (1992) (Similarity Score: 0.65)
2. Selena (1997) (Similarity Score: 0.62)
3. Pecker (1998) (Similarity Score: 0.53)
4. Great Expectations (1946) (Similarity Score: 0.53)
5. Unfaithfully Yours (1948) (Similarity Score: 0.53)
