In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
!wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip
movies = pd.read_csv("ml-latest-small/movies.csv")
ratings = pd.read_csv("ml-latest-small/ratings.csv")

print(movies.head())
print(ratings.head())
# TF-IDF on genres
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Map movie index
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def recommend_content(movie_title, n=5):
    idx = indices[movie_title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example
print(recommend_content("Toy Story (1995)"))
from sklearn.metrics.pairwise import cosine_similarity

# Create user-item rating matrix
user_movie_ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# User similarity
user_sim = cosine_similarity(user_movie_ratings)
user_sim_df = pd.DataFrame(user_sim, index=user_movie_ratings.index, columns=user_movie_ratings.index)

def recommend_collaborative(user_id, n=5):
    similar_users = user_sim_df[user_id].sort_values(ascending=False).index[1:]
    for sim_user in similar_users:
        recommended = ratings[ratings['userId'] == sim_user].sort_values(by='rating', ascending=False)
        recommended = recommended.merge(movies, on='movieId')
        return recommended['title'].head(n)

# Example
print(recommend_collaborative(1))



--2025-08-17 07:52:17--  http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip’


2025-08-17 07:52:17 (6.60 MB/s) - ‘ml-latest-small.zip’ saved [978202/978202]

Archive:  ml-latest-small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride