In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from fuzzywuzzy import fuzz
from typing import List
from fastapi import FastAPI

app = FastAPI()

# Cargamos el dataset en un DataFrame
peliculas = pd.read_csv("C:/Users/Fede/Documents/henry/Labs/Fanny/peliculas.csv")

# Resto del código ...

# Eliminamos filas con valores faltantes en las columnas relevantes para el análisis
peliculas.dropna(subset=['belongs_to_collection', 'genres', 'release_date'], inplace=True)
peliculas['title'] = peliculas['title'].str.lower().str.strip()

peliculas['combined_features'] = (
    peliculas['belongs_to_collection'].astype(str) + ' ' +
    peliculas['genres'].astype(str) + ' ' +
    peliculas['release_date'].astype(str)
)

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(peliculas['combined_features'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

def get_recommendations(title: str) -> List[str]:
    title = title.lower().strip()
    match_scores = peliculas['title'].apply(lambda x: fuzz.partial_ratio(x.lower().strip(), title))
    best_match_index = match_scores.idxmax()
    index = best_match_index
    sim_scores = list(enumerate(cosine_sim[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:6]]
    recommended_movies = peliculas['title'].iloc[top_indices].tolist()
    return recommended_movies

@app.get('/recommendation/{title}')
def recommend(title: str):
    recommendations = get_recommendations(title)
    return {'recommended_movies': recommendations}

