<a href="https://colab.research.google.com/github/jenwsj/CODSOFT/blob/main/recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Download and unzip MovieLens 100k dataset (contains u.data and u.item)
!wget -nc https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -o ml-100k.zip


--2025-06-25 09:05:06--  https://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2025-06-25 09:05:07 (9.81 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base  

In [11]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load ratings
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Load movie titles
movies = pd.read_csv('ml-100k/u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1])
movies.columns = ['movie_id', 'title']

# Merge datasets
df = pd.merge(ratings, movies, on='movie_id')

# Create user-movie rating matrix
user_movie_matrix = df.pivot_table(index='user_id', columns='title', values='rating').fillna(0)

# Compute cosine similarity
user_similarity = cosine_similarity(user_movie_matrix)

# Recommendation function
def recommend_movies(user_id, num_recommendations=5):
    similarity_scores = list(enumerate(user_similarity[user_id - 1]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_users = [i[0] + 1 for i in similarity_scores[1:6]]
    similar_users_data = df[df['user_id'].isin(similar_users)]
    top_movies = (similar_users_data.groupby('title')['rating']
                  .mean().sort_values(ascending=False)
                  .head(num_recommendations))
    return top_movies

# Recommend for user 1
print("Recommended movies for user 1:")
print(recommend_movies(user_id=1))


Recommended movies for user 1:
title
Wings of Desire (1987)                5.0
Babe (1995)                           5.0
Raiders of the Lost Ark (1981)        5.0
People vs. Larry Flynt, The (1996)    5.0
Angels and Insects (1995)             5.0
Name: rating, dtype: float64
