<a href="https://colab.research.google.com/github/balajigund/movie-recommendation-model/blob/main/movie_recommendation_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy
print(numpy.__version__)



2.4.1


In [None]:
# ================================
# ONE-CELL MOVIE RECOMMENDER (COLAB)
# Handles NumPy + Surprise issues
# ================================

import sys
import os

# ---------- FIX NUMPY + SURPRISE ----------
try:
    import surprise
    import numpy
    if numpy.__version__.startswith("2"):
        raise ImportError
except:
    print("Fixing NumPy & scikit-surprise compatibility...")

    !pip uninstall -y numpy scikit-surprise
    !pip install numpy==1.26.4
    !pip install scikit-surprise

    print("Restarting runtime automatically...")
    os._exit(0)   # forces runtime restart

# ---------- IMPORTS ----------
import pandas as pd
import zipfile
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# ---------- DOWNLOAD DATA ----------
!wget -q https://files.grouplens.org/datasets/movielens/ml-latest-small.zip

with zipfile.ZipFile("ml-latest-small.zip", 'r') as zip_ref:
    zip_ref.extractall()

# ---------- LOAD DATA ----------
ratings = pd.read_csv("ml-latest-small/ratings.csv")
movies = pd.read_csv("ml-latest-small/movies.csv")

print("Ratings:", ratings.shape)
print("Movies:", movies.shape)

# ---------- PREPARE DATA ----------
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(
    ratings[['userId', 'movieId', 'rating']],
    reader
)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# ---------- TRAIN MODEL ----------
model = SVD()
model.fit(trainset)

predictions = model.test(testset)
print("RMSE:", rmse(predictions))

# ---------- RECOMMENDATION FUNCTION ----------
def recommend_movies(user_id, n=5):
    watched = ratings[ratings.userId == user_id].movieId.tolist()
    all_movies = movies.movieId.tolist()

    preds = []
    for movie_id in all_movies:
        if movie_id not in watched:
            preds.append((movie_id, model.predict(user_id, movie_id).est))

    preds.sort(key=lambda x: x[1], reverse=True)
    top_ids = [m[0] for m in preds[:n]]

    return movies[movies.movieId.isin(top_ids)][['title', 'genres']]

# ---------- TEST ----------
print("\nðŸŽ¬ Recommended Movies for User 1:\n")
print(recommend_movies(user_id=1, n=5))


Fixing NumPy & scikit-surprise compatibility...
Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
[0mCollecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
pytensor 2.36.3 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
opencv-python 4.1

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [1]:
import numpy
print(numpy.__version__)


1.26.4


In [2]:
recommend_movies(user_id=1, n=5)


NameError: name 'recommend_movies' is not defined

In [None]:
# ===== SIMPLE MOVIE RECOMMENDER =====

# 1. Fix NumPy + Surprise
!pip uninstall -y numpy scikit-surprise
!pip install numpy==1.26.4
!pip install scikit-surprise

# 2. Imports
import pandas as pd
import zipfile
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# 3. Download dataset
!wget -q https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
with zipfile.ZipFile("ml-latest-small.zip", 'r') as zip_ref:
    zip_ref.extractall()

# 4. Load data
ratings = pd.read_csv("ml-latest-small/ratings.csv")
movies = pd.read_csv("ml-latest-small/movies.csv")

# 5. Prepare data
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(
    ratings[['userId', 'movieId', 'rating']], reader
)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# 6. Train model
model = SVD()
model.fit(trainset)

predictions = model.test(testset)
print("RMSE:", rmse(predictions))

# 7. Recommendation function
def recommend_movies(user_id, n=5):
    watched = ratings[ratings.userId == user_id].movieId.tolist()
    preds = []

    for movie_id in movies.movieId:
        if movie_id not in watched:
            preds.append((movie_id, model.predict(user_id, movie_id).est))

    preds.sort(key=lambda x: x[1], reverse=True)
    top_ids = [m[0] for m in preds[:n]]

    return movies[movies.movieId.isin(top_ids)][['title', 'genres']]

# 8. Test
recommend_movies(user_id=1, n=5)


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
pytensor 2.36.3 req

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [1]:
# =====================================
# MOVIE RECOMMENDATION SYSTEM (COLAB)
# Content-Based Filtering
# Dataset: MovieLens Latest Small
# =====================================

import pandas as pd
import zipfile
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -------- Download dataset --------
!wget -q https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
with zipfile.ZipFile("ml-latest-small.zip", 'r') as zip_ref:
    zip_ref.extractall()

# -------- Load data --------
movies = pd.read_csv("ml-latest-small/movies.csv")

# -------- Prepare content --------
movies['genres'] = movies['genres'].str.replace('|', ' ', regex=False)

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# -------- Similarity matrix --------
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# -------- Recommendation function --------
def recommend_movie(title, n=5):
    if title not in movies['title'].values:
        return "Movie not found in dataset"

    idx = movies[movies['title'] == title].index[0]
    scores = list(enumerate(cosine_sim[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:n+1]

    return movies.iloc[[i[0] for i in scores]][['title', 'genres']]

# -------- Test --------
recommend_movie("Toy Story (1995)", n=5)


user_id = 1
hdasd




Unnamed: 0,title,genres
1706,Antz (1998),Adventure Animation Children Comedy Fantasy
2355,Toy Story 2 (1999),Adventure Animation Children Comedy Fantasy
2809,"Adventures of Rocky and Bullwinkle, The (2000)",Adventure Animation Children Comedy Fantasy
3000,"Emperor's New Groove, The (2000)",Adventure Animation Children Comedy Fantasy
3568,"Monsters, Inc. (2001)",Adventure Animation Children Comedy Fantasy


In [2]:
# =====================================
# MOVIE RECOMMENDATION SYSTEM
# Input: Genre
# Output: Movie Names
# Dataset: MovieLens Latest Small
# =====================================

import pandas as pd
import zipfile

# -------- Download dataset --------
!wget -q https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
with zipfile.ZipFile("ml-latest-small.zip", 'r') as zip_ref:
    zip_ref.extractall()

# -------- Load data --------
movies = pd.read_csv("ml-latest-small/movies.csv")

# -------- Clean genres --------
movies['genres'] = movies['genres'].str.lower()

# -------- Recommendation function --------
def recommend_by_genre(user_genre, n=10):
    user_genre = user_genre.lower()

    filtered = movies[movies['genres'].str.contains(user_genre)]

    if filtered.empty:
        return "No movies found for this genre"

    return filtered[['title', 'genres']].head(n)

# -------- User Input --------
recommend_by_genre("action", n=10)


Unnamed: 0,title,genres
5,Heat (1995),action|crime|thriller
8,Sudden Death (1995),action
9,GoldenEye (1995),action|adventure|thriller
14,Cutthroat Island (1995),action|adventure|romance
19,Money Train (1995),action|comedy|crime|drama|thriller
22,Assassins (1995),action|crime|thriller
38,Dead Presidents (1995),action|crime|drama
40,Mortal Kombat (1995),action|adventure|fantasy
59,Lawnmower Man 2: Beyond Cyberspace (1996),action|sci-fi|thriller
62,From Dusk Till Dawn (1996),action|comedy|horror|thriller
