In [1]:
import numpy as np
import pandas as pd
from fuzzywuzzy import process
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors



In [2]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,movieId,userId,rating
0,42594,0,6.0
1,42594,1,5.0
2,65891,2,7.0
3,11202,3,9.0
4,11202,4,9.0


In [3]:
movies = pd.read_csv("movies.csv")
movie_metadata = movies[["title", "genre"]]
movie_metadata.head()

Unnamed: 0,title,genre
0,Scream and Scream Again,"Horror, Science Fiction"
1,Jenny,"Drama, Romance"
2,The Adventures of Gerard,"Adventure, Comedy, Drama"
3,...tick...tick...tick...,"Drama, Action"
4,Last of the Mobile Hot Shots,Drama


In [4]:
movies = ratings.merge(movies, on="movieId")
movies.head()

Unnamed: 0,movieId,userId,rating,title,cast_and_crew,year,poster_path,genre
0,42594,0,6.0,Scream and Scream Again,Gordon Hessler (director); Christopher Wicking...,1970,/49m9QJ2ubKuVtdDj9B7XqgCyriv.jpg,"Horror, Science Fiction"
1,42594,1,5.0,Scream and Scream Again,Gordon Hessler (director); Christopher Wicking...,1970,/49m9QJ2ubKuVtdDj9B7XqgCyriv.jpg,"Horror, Science Fiction"
2,65891,2,7.0,The Dunwich Horror,"Daniel Haller (director); Curtis Hanson, Henry...",1970,/q2KoYMsXi8LHW3ApNimgzWEx2dP.jpg,Horror
3,11202,3,9.0,Patton,Franklin J. Schaffner (director); Francis Ford...,1970,/rLM7jIEPTjj4CF7F1IrzzNjLUCu.jpg,"War, Drama, History"
4,11202,4,9.0,Patton,Franklin J. Schaffner (director); Francis Ford...,1970,/rLM7jIEPTjj4CF7F1IrzzNjLUCu.jpg,"War, Drama, History"


In [5]:
user_item_matrix = ratings.pivot(index=['userId'], columns=['movieId'], values='rating').fillna(0)
user_item_matrix

movieId,11,12,13,14,16,18,20,22,24,28,...,1040148,1056360,1059811,1071215,1072371,1072790,1075794,1092329,1101224,1189198
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11669,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11670,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11671,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=10, n_jobs=-1)
model.fit(user_item_matrix)

In [7]:
def movie_recommender_engine(movie_name, matrix, cf_model, n_recs):
    # Fit model on matrix
    cf_model.fit(matrix)
    
    # Extract input movie ID
    match = process.extractOne(movie_name, movies['title'])
    if match is None or match[1] < 80:
        print(f"No close match found for movie '{movie_name}'.")
        return None
    
    movie_id = match[2]
    
    # Calculate neighbour distances
    distances, indices = cf_model.kneighbors(matrix.iloc[movie_id].values.reshape(1, -1), n_neighbors=n_recs)
    
    # List to store recommendations
    cf_recs = []
    for i in range(n_recs):
        movie_index = indices.squeeze()[i]
        if movie_index >= len(movies):
            continue
        cf_recs.append({'Title': movies['title'].iloc[movie_index], 'Distance': distances.squeeze().tolist()[i]})
    
    # Create DataFrame for recommendations
    df = pd.DataFrame(cf_recs)
    
    return df

In [8]:
n_recs = 10
movie_recommender_engine('Batman', user_item_matrix, model, n_recs)

Unnamed: 0,Title,Distance
0,Batman,0.0
1,Batman,0.0
2,Batman,0.0
3,Batman,0.0
4,Batman,0.0
5,Batman,0.0
6,Batman,0.0
7,Batman,0.0
8,Spider-Man: Homecoming,1.0
9,Spider-Man: Homecoming,1.0


<hr>