<a href="https://colab.research.google.com/github/jayaprabhapalani/Movie-Recommendation-System-Item-Based-Collaborative-Filtering/blob/main/Movie_Recommendation_System_(Item_Based_CF).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Load Dataset
import pandas as pd

# Load ratings
ratings = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
                      sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie titles
movies = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.item",
                     sep='|', encoding='latin-1',
                     names=["movie_id", "title", "release_date", "video_release_date", "IMDb_URL",
                            "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy",
                            "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
                            "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"])

# Merge ratings with movie titles
data = pd.merge(ratings, movies[['movie_id', 'title']], on='movie_id')
data.head()


Unnamed: 0,user_id,movie_id,rating,timestamp,title
0,196,242,3,881250949,Kolya (1996)
1,186,302,3,891717742,L.A. Confidential (1997)
2,22,377,1,878887116,Heavyweights (1994)
3,244,51,2,880606923,Legends of the Fall (1994)
4,166,346,1,886397596,Jackie Brown (1997)


**pivot table**

Rows → users

Columns → movies

Values → ratings

NaN means the user hasn’t rated that movie.

In [2]:
#Create User-Item Matrix
user_item_matrix = data.pivot_table(index='user_id', columns='title', values='rating')
user_item_matrix.head()


title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,2.0,5.0,,,3.0,4.0,,,...,,,,5.0,3.0,,,,4.0,
2,,,,,,,,,1.0,,...,,,,,,,,,,
3,,,,,2.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,2.0,,,,,4.0,,,...,,,,4.0,,,,,4.0,


**Compute Item-Item Similarity**

Let’s choose cosine similarity using pandas' .corr().

In [3]:
# Get the ratings for a specific movie
target_movie = "Star Wars (1977)"
target_ratings = user_item_matrix[target_movie]

# Correlate Star Wars with all other movies
similar_movies = user_item_matrix.corrwith(target_ratings)

# Create a DataFrame
corr_df = pd.DataFrame(similar_movies, columns=['correlation'])
corr_df.dropna(inplace=True)
corr_df = corr_df.sort_values(by='correlation', ascending=False)

corr_df.head(10)


  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


Unnamed: 0_level_0,correlation
title,Unnamed: 1_level_1
Hollow Reed (1996),1.0
Stripes (1981),1.0
No Escape (1994),1.0
Man of the Year (1995),1.0
Cosi (1996),1.0
Commandments (1997),1.0
Golden Earrings (1947),1.0
"Scarlet Letter, The (1926)",1.0
Safe Passage (1994),1.0
"Beans of Egypt, Maine, The (1994)",1.0


**Filter Movies Based on Popularity**

To avoid recommending obscure movies, we’ll only consider movies that have at least 50 ratings.

In [4]:
# Count how many ratings each movie has
ratings_count = data.groupby('title').size()

# Merge with correlation dataframe
corr_df['num_ratings'] = ratings_count

# Recommend top movies similar to Star Wars with more than 50 ratings
recommendations = corr_df[corr_df['num_ratings'] > 50].sort_values('correlation', ascending=False)

recommendations.head(10)


Unnamed: 0_level_0,correlation,num_ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Star Wars (1977),1.0,583
"Empire Strikes Back, The (1980)",0.747981,367
Return of the Jedi (1983),0.672556,507
Raiders of the Lost Ark (1981),0.536117,420
Giant (1956),0.488093,51
"Life Less Ordinary, A (1997)",0.411638,53
Austin Powers: International Man of Mystery (1997),0.377433,130
"Sting, The (1973)",0.367538,241
Indiana Jones and the Last Crusade (1989),0.350107,331
Pinocchio (1940),0.347868,101
