## 1. Required Imports

In [6]:
import sys
import os
sys.path.append(os.path.abspath("../"))

import pandas as pd
import numpy as np

## 2. Load Data

In [7]:
from src.data_loader import load_data

df = load_data("../data/TMDB_movies.csv")
df = df.reset_index(drop=True)

## 3. Select Features for Collaborative Filtering

In [8]:
features = df[['vote_average', 'vote_count', 'popularity']].fillna(0)

In [9]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)

##  4. Train KNN Model

In [10]:
from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(scaled_features)

## 5. Create Recommendation Function

In [11]:
# Reverse index for movie lookup
indices = pd.Series(df.index, index=df['title'].str.lower()).drop_duplicates()

def recommend_collaborative(title, top_n=5):
    title = title.lower()
    if title not in indices:
        return f"❌ Movie '{title}' not found in dataset."

    idx = indices[title]
    movie_vector = scaled_features[idx].reshape(1, -1)

    distances, indices_knn = model_knn.kneighbors(movie_vector, n_neighbors=top_n + 1)

    # Skip the input movie itself at position 0
    recommended_indices = indices_knn.flatten()[1:]

    return df[['title', 'genre', 'vote_average', 'popularity']].iloc[recommended_indices]

## 6. Test

In [12]:
recommend_collaborative("Avatar", top_n=5)

Unnamed: 0,title,genre,vote_average,popularity
8495,2012,"action,adventure,science fiction",5.8,134.916
801,The Avengers,"science fiction,action,adventure",7.7,278.98
4720,Jurassic World,"action,adventure,science fiction,thriller",6.7,212.653
1028,Deadpool,"action,adventure,comedy",7.6,230.309
9910,Zoolander 2,comedy,4.8,19.258
