In [67]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [68]:

# Load and preprocess data
ratings = pd.read_csv(
    "https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
movies = pd.read_csv(
    "https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")


In [69]:
ratings.head(10)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100


In [70]:
ratings.shape

(100836, 4)

In [71]:
movies.tail(10)

Unnamed: 0,movieId,title,genres
9732,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi
9733,193567,anohana: The Flower We Saw That Day - The Movi...,Animation|Drama
9734,193571,Silver Spoon (2014),Comedy|Drama
9735,193573,Love Live! The School Idol Movie (2015),Animation
9736,193579,Jon Stewart Has Left the Building (2015),Documentary
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy


In [72]:
# 

In [73]:
movies.shape

(9742, 3)

In [74]:
def create_matrix(df):
    N = len(df['userId'].unique())
    M = len(df['movieId'].unique())
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))
    user_index = [user_mapper[i] for i in df['userId']]
    movie_index = [movie_mapper[i] for i in df['movieId']]
    # print(user_index)
    # print(movie_index)
    X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))
    return X, user_mapper, movie_mapper


X, user_mapper, movie_mapper = create_matrix(ratings)


In [75]:
X

<9724x610 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

In [76]:
# user_mapper

In [77]:
# movie_mapper
for i in movie_mapper:
    if movie_mapper[i] == 2047:
        print("yes there is")

yes there is


In [95]:
def find_similar_movies(movie_id, X, k, metric='cosine', show_distance=False):
    neighbour_ids = []
    if movie_id not in movie_mapper:
        return neighbour_ids  # Return an empty list if the movie_id is not found
    movie_ind = movie_mapper[movie_id]
    movie_vec = X[movie_ind]
    k += 1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    movie_vec = movie_vec.reshape(1, -1)
    neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
    # print("NearestNeighbors",neighbour)
    for i in range(0, k):
        n = neighbour.item(i)
        # print("*"*80)
        # print(n,"nnnnnnn")
        # print(movie_mapper[n],"mapped nnnnnnn")
        neighbour_ids.append(movie_mapper[n])
    
    # print("neighbour_ids before ===> ",neighbour_ids)
    neighbour_ids.pop(0)
    # print("neighbour_ids ===> ",neighbour_ids)
    return neighbour_ids



def recommend_movies(movie_id: int):
    similar_ids = find_similar_movies(movie_id, X, k=2)
    # print(similar_ids)
    if not similar_ids:
        return {"error": "Movie ID not found or not enough data to make recommendations."}

    movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]

    recommended_movies = []
    for i in similar_ids:
        title = movies[movies['movieId'] == i]['title'].values[0]
        recommended_movies.append(title)

    return {"movie_title": movie_title, "recommended_movies": recommended_movies}

movie_id = 184
recommend_movies(movie_id)

{'movie_title': 'Nadja (1994)',
 'recommended_movies': ['Geronimo: An American Legend (1993)',
  'Eye for an Eye (1996)']}