In [1]:
import pandas as pd 
import ast
import json
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MultiLabelBinarizer
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array
from sklearn.neighbors import NearestNeighbors

In [2]:
data = pd.read_csv('datasets/raw/movie_tmdb.csv' , sep=',')
movie = pd.read_csv('datasets/raw/movie.csv', sep=',')

""" Changement des formats facilitant les traitements """

# Convertir les chaînes en listes de dictionnaires
data['spoken_languages'] = data['spoken_languages'].apply(ast.literal_eval)
# Convertir la chaîne en liste python avec ast.literal_eval (plus sûr que json.loads si la chaîne n'est pas en format JSON strict)
data['production_countries'] = data['production_countries'].apply(ast.literal_eval)

In [None]:
data

In [None]:
# Trier les genres qu'il nous faut
# Trier le spoken_language en fr ou French ou Fr
# Changer le format de la realese_date en date
# Changer le nom de imdb_id en tconst
# Rajouter une colonne décennie
# Rajouter les informations sur les acteurs, actrices et directeurs avec l'api
# Trier sur les meilleurs films top 5 %  des films par décennie
# Voir lequel des vite_average et vote_count gardé
# Merger avec movie_csv
# Détermination des types de colonnes 
# data_fr['spoken_languages'].apply(type).value_counts()
# Détermination des types de colonnes 
# movie_tmdb['production_countries'].apply(type).value_counts()

In [3]:
#convertir les colonnes (genres, actors, actresses, directors) en liste
movie['genres'] = movie['genres'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['actors'] = movie['actors'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['actresses'] = movie['actresses'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['directors'] = movie['directors'].apply(lambda x: x.split(',') if isinstance(x, str) else x)

In [15]:
data_fr = data.copy()

# Filtrer pour ne garder que les films exclusivement en français
data_fr = data_fr[data_fr['spoken_languages'].apply(lambda langs: any(lang.get('iso_639_1') in  ['fr', 'en'] for lang in langs))]

# Remplir les vides dans poster_path par backdrop_path
data_fr['poster_path'] = data_fr['poster_path'].fillna(data_fr['backdrop_path'])

# Supprimer les colonnes inutiles
data_fr = data_fr.drop(columns=['adult','backdrop_path','belongs_to_collection','budget', 'homepage','production_companies',
                                'revenue','origin_country','tagline','video','title'])

""" Realease date formatage et création de 2 colonnes"""
#Changement du format de la released_date 
data_fr['release_date'] = pd.to_datetime(data_fr['release_date'])

# Rajout de la colonne decennie et start year
data_fr['startYear'] = data_fr['release_date'].dt.year.astype('Int64')

# Créer la colonne décennie
data_fr['decennie'] = (data_fr['startYear'] // 10) * 10

""" Genre Formatage """
# Convertir les chaînes en listes de dictionnaires
data_fr['genres'] = data_fr['genres'].apply(ast.literal_eval)

# Liste des genres à exclure
genres_exclus = {'Documentaire', 'Musique', 'Téléfilm'}

movie_tmdb = data_fr.copy() 

# Filtrer pour supprimer les films contenant ces genres
movie_tmdb = movie_tmdb[~movie_tmdb['genres'].apply(lambda genre_list: any(g['name'] in genres_exclus for g in genre_list))]

# Changement du nom imdb_id en tconst
movie_tmdb.rename(columns={'imdb_id': 'tconst'}, inplace=True)

""" Réinitialisation de l'index """
# Réinitialiser les index
movie_tmdb.reset_index(drop=True, inplace=True)

# Affichage
movie_tmdb

Unnamed: 0,genres,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,runtime,spoken_languages,status,vote_average,vote_count,startYear,decennie
0,"[{'id': 10751, 'name': 'Familial'}, {'id': 35,...",552524,tt11655566,en,Lilo & Stitch,L’histoire touchante et drôle d’une petite fil...,733.0556,/71IjwRa88OJMYJBntId7nn0eFHy.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-05-17,108,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,7.118,325,2025,2020
1,"[{'id': 10751, 'name': 'Familial'}, {'id': 35,...",950387,tt3566834,en,A Minecraft Movie,Quatre mésadaptés traversent soudainement un p...,469.6691,/cq9z69AyIXeL2H14bqHE5ukm3M9.jpg,"[{'iso_3166_1': 'SE', 'name': 'Sweden'}, {'iso...",2025-03-31,101,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,6.500,1546,2025,2020
2,"[{'id': 27, 'name': 'Horreur'}, {'id': 9648, '...",574475,tt9619824,en,Final Destination Bloodlines,"Stefani, 18 ans, fait d’affreux cauchemars. Da...",364.4598,/4uI8C2zcfLWRhZDBgd0oTlZjV9j.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-05-14,110,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,7.033,498,2025,2020
3,"[{'id': 27, 'name': 'Horreur'}, {'id': 9648, '...",1232546,tt30955489,en,Until Dawn,Un an après la mystérieuse disparition de sa s...,296.7954,/7lX0gaDIoEn7T8p8uDn7gk5SdOn.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-04-23,103,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,6.473,496,2025,2020
4,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1197306,tt9150192,en,A Working Man,Levon Cade a quitté une carrière militaire pre...,298.6536,/hVUb6AoL79cgMFPRRg70IpJGc6h.jpg,"[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2025-03-26,116,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,6.671,1062,2025,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2762,"[{'id': 27, 'name': 'Horreur'}, {'id': 12, 'na...",1301333,tt32242519,en,Lone Star Shark,,0.2608,/nFj2YlFzPtQKoXSWgajR1UBQQWw.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-02-01,65,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,0.000,0,2025,2020
2763,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1397413,tt33320905,en,South Side Hero,,0.2218,/qnaJquOCbFSQRChwBHy6REJbxUs.jpg,[],2024-12-03,91,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,0.000,1,2024,2020
2764,"[{'id': 35, 'name': 'Comédie'}]",1379382,tt34253921,en,Fortune Feimster: Crushing It,,0.2877,/nzr70rq2m82zJc4SbGeFBVa2yuD.jpg,[],2024-12-02,58,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,5.200,9,2024,2020
2765,"[{'id': 18, 'name': 'Drame'}]",1383592,tt28308317,nl,Soft Leaves,,0.2662,/70b82KsPUkH61R9mF8xLKfpwnSq.jpg,"[{'iso_3166_1': 'BE', 'name': 'Belgium'}]",2025-02-02,94,"[{'english_name': 'Dutch', 'iso_639_1': 'nl', ...",Released,7.000,3,2025,2020


In [7]:
movie_tmdb['genres'] = movie_tmdb['genres'].apply(
    lambda genre_list: [g.get('name') for g in genre_list if isinstance(g, dict) and g.get('name')]
)

movie_tmdb['spoken_languages'] = movie_tmdb['spoken_languages'].apply(
    lambda langs: [l.get('iso_639_1') for l in langs if isinstance(l, dict) and l.get('iso_639_1')]
)

movie_tmdb['production_countries'] = movie_tmdb['production_countries'].apply(lambda countries: [c['name'] for c in countries])
movie_tmdb

Unnamed: 0,genres,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,runtime,spoken_languages,status,vote_average,vote_count,startYear,decennie
0,"[Familial, Comédie, Science-Fiction]",552524,tt11655566,en,Lilo & Stitch,L’histoire touchante et drôle d’une petite fil...,733.0556,/71IjwRa88OJMYJBntId7nn0eFHy.jpg,[United States of America],2025-05-17,108,"[es, en]",Released,7.118,325,2025,2020
1,"[Familial, Comédie, Aventure, Fantastique]",950387,tt3566834,en,A Minecraft Movie,Quatre mésadaptés traversent soudainement un p...,469.6691,/cq9z69AyIXeL2H14bqHE5ukm3M9.jpg,"[Sweden, United States of America]",2025-03-31,101,[en],Released,6.500,1546,2025,2020
2,"[Horreur, Mystère]",574475,tt9619824,en,Final Destination Bloodlines,"Stefani, 18 ans, fait d’affreux cauchemars. Da...",364.4598,/4uI8C2zcfLWRhZDBgd0oTlZjV9j.jpg,[United States of America],2025-05-14,110,[en],Released,7.033,498,2025,2020
3,"[Horreur, Mystère]",1232546,tt30955489,en,Until Dawn,Un an après la mystérieuse disparition de sa s...,296.7954,/7lX0gaDIoEn7T8p8uDn7gk5SdOn.jpg,[United States of America],2025-04-23,103,[en],Released,6.473,496,2025,2020
4,"[Action, Crime, Thriller]",1197306,tt9150192,en,A Working Man,Levon Cade a quitté une carrière militaire pre...,298.6536,/hVUb6AoL79cgMFPRRg70IpJGc6h.jpg,"[United Kingdom, United States of America]",2025-03-26,116,"[en, ru, es]",Released,6.671,1062,2025,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2762,"[Horreur, Aventure]",1301333,tt32242519,en,Lone Star Shark,,0.2608,/nFj2YlFzPtQKoXSWgajR1UBQQWw.jpg,[United States of America],2025-02-01,65,[en],Released,0.000,0,2025,2020
2763,"[Action, Crime, Thriller]",1397413,tt33320905,en,South Side Hero,,0.2218,/qnaJquOCbFSQRChwBHy6REJbxUs.jpg,[],2024-12-03,91,[en],Released,0.000,1,2024,2020
2764,[Comédie],1379382,tt34253921,en,Fortune Feimster: Crushing It,,0.2877,/nzr70rq2m82zJc4SbGeFBVa2yuD.jpg,[],2024-12-02,58,[en],Released,5.200,9,2024,2020
2765,[Drame],1383592,tt28308317,nl,Soft Leaves,,0.2662,/70b82KsPUkH61R9mF8xLKfpwnSq.jpg,[Belgium],2025-02-02,94,"[nl, en, de, ja]",Released,7.000,3,2025,2020


In [8]:
# Changement du nom pour faciliter la concaténation
movie.rename(columns={'decennie_x': 'decennie'}, inplace=True)
movie_tmdb.rename(columns={'runtime': 'runtimeMinutes'}, inplace=True)

# Concaténation et non pas une merge car on rajoute des lignes en plus afin d'enrichir notre dataframe
films =  pd.concat([movie, movie_tmdb], ignore_index=True)

# Suppression des doublons
films.drop_duplicates(subset='id', inplace=True)

# changer le Format de released_date en date time
films['release_date'] = pd.to_datetime(films['release_date'], errors='coerce')

films['startYear'] = films['startYear'].astype('Int64')

films['decennie'] = films['decennie'].astype('Int64')

# Start year à partir de 1960
films = films[films['startYear'] >= 1980]
# Filtrer la note à partir de 5.8
#films = films[films['vote_average'] >= 5.8]

# Reset de l'index 
films.reset_index(drop=True, inplace=True)

films

Unnamed: 0,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,actors,actresses,directors
0,24,tt0266697,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,7.970,15748,2000,2003,111.0,"[Action, Crime, Thriller]","[David Carradine, Michael Madsen, Shin'ichi ...","[Uma Thurman, Daryl Hannah, Lucy Liu, Vivic...",[Quentin Tarantino]
1,75,tt0116996,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,6.394,4864,1990,1996,106.0,"[Comedy, Sci-Fi]","[Jack Nicholson, Jack Nicholson, Pierce Bros...","[Sarah Jessica Parker, Annette Bening, Glenn...",
2,76,tt0112471,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,7.968,3547,1990,1995,101.0,"[Comedy, Drama, Romance]","[Ethan Hawke, Hanno Pöschl, Karl Bruckschwai...","[Julie Delpy, Andrea Eckert, Erni Mangold]",[Richard Linklater]
3,80,tt0381681,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,7.818,2943,2000,2004,80.0,"[Drama, Romance]","[Ethan Hawke, Vernon Dobtcheff, Rodolphe Pau...","[Julie Delpy, Louise Lemoine Torrès, Mariane...",[Richard Linklater]
4,101,tt0110413,en,Léon: The Professional,"Léon, the top hit man in New York, has earned ...",44.7010,/yI6X2cCM5YPJtxMhUd3dPGqDAhw.jpg,"['US', 'FR']",1994-09-14,"['en', 'fr', 'it']",Released,8.300,13202,1990,1994,110.0,"[Action, Crime, Drama]","[Jean Reno, Gary Oldman, Danny Aiello, Pete...",[Natalie Portman],[Luc Besson]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10234,1301333,tt32242519,en,Lone Star Shark,,0.2608,/nFj2YlFzPtQKoXSWgajR1UBQQWw.jpg,[United States of America],2025-02-01,[en],Released,0.000,0,2020,2025,65.0,"[Horreur, Aventure]",,,
10235,1397413,tt33320905,en,South Side Hero,,0.2218,/qnaJquOCbFSQRChwBHy6REJbxUs.jpg,[],2024-12-03,[en],Released,0.000,1,2020,2024,91.0,"[Action, Crime, Thriller]",,,
10236,1379382,tt34253921,en,Fortune Feimster: Crushing It,,0.2877,/nzr70rq2m82zJc4SbGeFBVa2yuD.jpg,[],2024-12-02,[en],Released,5.200,9,2020,2024,58.0,[Comédie],,,
10237,1383592,tt28308317,nl,Soft Leaves,,0.2662,/70b82KsPUkH61R9mF8xLKfpwnSq.jpg,[Belgium],2025-02-02,"[nl, en, de, ja]",Released,7.000,3,2020,2025,94.0,[Drame],,,


                                                    Machine Learning

In [9]:


films = films.copy()

# nouveau dataframe avec uniquement les deux premiers acteurs, actrices et un réalisateur 
films[['acteurs_1', 'acteurs_2']] = films['actors'].apply(lambda x: pd.Series(x[:2]) if isinstance(x, list) else pd.Series([None, None]))  
films['actrices'] = films['actresses'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)
films['realisateurs'] = films['directors'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)

films['acteurs_1'] = films['acteurs_1'].fillna('unknown_actor')
films['acteurs_2'] = films['acteurs_2'].fillna('unknown_actor')
films['actrices'] = films['actrices'].fillna('unknown_actress')
films['realisateurs'] = films['realisateurs'].fillna('unknown_director')
films['genres'] = films['genres'].fillna('unknown_genre')

# Suppression des colonnes actors, acrtress et directors
films = films.drop(columns=['tconst','actors', 'actresses', 'directors'])    

# Nettoyage des lignes manquantes
#films = films.dropna()

films.reset_index(drop=True, inplace=True)

films


Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
0,24,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,7.970,15748,2000,2003,111.0,"[Action, Crime, Thriller]",David Carradine,Michael Madsen,Uma Thurman,Quentin Tarantino
1,75,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,6.394,4864,1990,1996,106.0,"[Comedy, Sci-Fi]",Jack Nicholson,Jack Nicholson,Sarah Jessica Parker,unknown_director
2,76,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,7.968,3547,1990,1995,101.0,"[Comedy, Drama, Romance]",Ethan Hawke,Hanno Pöschl,Julie Delpy,Richard Linklater
3,80,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,7.818,2943,2000,2004,80.0,"[Drama, Romance]",Ethan Hawke,Vernon Dobtcheff,Julie Delpy,Richard Linklater
4,101,en,Léon: The Professional,"Léon, the top hit man in New York, has earned ...",44.7010,/yI6X2cCM5YPJtxMhUd3dPGqDAhw.jpg,"['US', 'FR']",1994-09-14,"['en', 'fr', 'it']",Released,8.300,13202,1990,1994,110.0,"[Action, Crime, Drama]",Jean Reno,Gary Oldman,Natalie Portman,Luc Besson
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10234,1301333,en,Lone Star Shark,,0.2608,/nFj2YlFzPtQKoXSWgajR1UBQQWw.jpg,[United States of America],2025-02-01,[en],Released,0.000,0,2020,2025,65.0,"[Horreur, Aventure]",unknown_actor,unknown_actor,unknown_actress,unknown_director
10235,1397413,en,South Side Hero,,0.2218,/qnaJquOCbFSQRChwBHy6REJbxUs.jpg,[],2024-12-03,[en],Released,0.000,1,2020,2024,91.0,"[Action, Crime, Thriller]",unknown_actor,unknown_actor,unknown_actress,unknown_director
10236,1379382,en,Fortune Feimster: Crushing It,,0.2877,/nzr70rq2m82zJc4SbGeFBVa2yuD.jpg,[],2024-12-02,[en],Released,5.200,9,2020,2024,58.0,[Comédie],unknown_actor,unknown_actor,unknown_actress,unknown_director
10237,1383592,nl,Soft Leaves,,0.2662,/70b82KsPUkH61R9mF8xLKfpwnSq.jpg,[Belgium],2025-02-02,"[nl, en, de, ja]",Released,7.000,3,2020,2025,94.0,[Drame],unknown_actor,unknown_actor,unknown_actress,unknown_director


In [None]:
import requests
# Fonction pour récupérer le trailer ou vidéo ==> charge environ 35 min
API_KEY = "3d79d38c50b154c6468f96037645635e"

def get_trailer_url(movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}/videos"
    params = {
        "api_key": API_KEY,
        "language": "fr-FR"
    }
    
    try:
        response = requests.get(url, params=params)
        data = response.json().get("results", [])
        
        # Cherche un trailer YouTube (en fr ou pas)
        for video in data:
            if video["type"] == "Trailer" and video["site"] == "YouTube":
                return f"https://www.youtube.com/watch?v={video['key']}"
    except Exception as e:
        print(f"Erreur pour le film {movie_id}: {e}")
    
    return None  # Si pas de trailer

# Ajouter une colonne 'trailer_url' avec les liens YouTube
films["trailer_url"] = films["id"].apply(get_trailer_url)


In [28]:
def get_best_trailer(movie_id):
    for lang in ["fr-FR", "en-US", "es-ES","de-DE","it-IT","ru-RU","ja-JP"]:  # Tu peux remettre ["fr-FR", "en-US", ""] si tu veux essayer plusieurs langues
        url = f"https://api.themoviedb.org/3/movie/{movie_id}/videos"
        params = {"api_key": API_KEY, "language": lang}
        try:
            r = requests.get(url, params=params)
            results = r.json().get("results", [])

            # 1. Chercher un trailer officiel
            for video in results:
                if (
                    video["site"] == "YouTube"
                    and video["type"] == "Trailer"
                    and video.get("official") == True
                ):
                    return f"https://www.youtube.com/watch?v={video['key']}"

            # 2. Sinon un trailer non-officiel
            for video in results:
                if video["site"] == "YouTube" and video["type"] == "Trailer":
                    return f"https://www.youtube.com/watch?v={video['key']}"

            # 3. Sinon un clip ou autre (si vraiment rien d'autre)
            for video in results:
                if video["site"] == "YouTube":
                    return f"https://www.youtube.com/watch?v={video['key']}"

        except Exception as e:
            print(f"Erreur avec {movie_id}: {e}")
            continue

    return None  # si aucun résultat


# Créer un masque pour les lignes où trailer_url est vide
mask = films["trailer_url"].isna()

# Appliquer la fonction SEULEMENT sur les lignes manquantes
films.loc[mask, "trailer_url"] = films.loc[mask, "id"].apply(get_best_trailer)

In [26]:
# Remplir overview 
def get_overview(movie_id):
    for lang in ["fr-FR", "en-US", "es-ES","de-DE","it-IT","ru-RU","ja-JP"]:
        url = f"https://api.themoviedb.org/3/movie/{movie_id}"
        params = {"api_key": API_KEY, "language": lang}
        try:
            r = requests.get(url, params=params)
            data = r.json()
            overview = data.get("overview")
            if overview:  # non vide
                return overview
        except Exception as e:
            print(f"Erreur TMDB overview pour {movie_id}: {e}")
            continue
    return None

mask = films["overview"].isna() | (films["overview"].str.strip() == "")
films.loc[mask, "overview"] = films.loc[mask, "id"].apply(get_overview)

In [27]:
# Remplir poster_path
def get_poster_path(movie_id):
    for lang in ["fr-FR", "en-US", "es-ES","de-DE","it-IT","ru-RU","ja-JP"]:
        url = f"https://api.themoviedb.org/3/movie/{movie_id}"
        params = {"api_key": API_KEY, "language": lang}
        try:
            r = requests.get(url, params=params)
            data = r.json()
            poster_path = data.get("poster_path")
            if poster_path:  # non vide
                return poster_path
        except Exception as e:
            print(f"Erreur TMDB poster_path pour {movie_id}: {e}")
            continue
    return None

mask = films["poster_path"].isna() | (films["poster_path"].str.strip() == "")
films.loc[mask, "poster_path"] = films.loc[mask, "id"].apply(get_poster_path)

In [29]:
# Exportation du dataframe en csv
films.to_csv('C:/Users/sirnb/OneDrive/Bureau/WildCode/Projets/Projet_2_App/Scripts/datasets/raw/films.csv', index=False)
films

Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,...,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs,trailer_url
0,24,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,...,15748,2000,2003,111.0,"['Action', 'Crime', 'Thriller']",David Carradine,Michael Madsen,Uma Thurman,Quentin Tarantino,https://www.youtube.com/watch?v=sXwXk3jcrfU
1,75,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,...,4864,1990,1996,106.0,"['Comedy', 'Sci-Fi']",Jack Nicholson,Jack Nicholson,Sarah Jessica Parker,unknown_director,https://www.youtube.com/watch?v=NzC5qlbSNAs
2,76,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,...,3547,1990,1995,101.0,"['Comedy', 'Drama', 'Romance']",Ethan Hawke,Hanno Pöschl,Julie Delpy,Richard Linklater,https://www.youtube.com/watch?v=IQ0ZrOYMdtA
3,80,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,...,2943,2000,2004,80.0,"['Drama', 'Romance']",Ethan Hawke,Vernon Dobtcheff,Julie Delpy,Richard Linklater,https://www.youtube.com/watch?v=at3hf6am2OQ
4,101,en,Léon: The Professional,"Léon, the top hit man in New York, has earned ...",44.7010,/yI6X2cCM5YPJtxMhUd3dPGqDAhw.jpg,"['US', 'FR']",1994-09-14,"['en', 'fr', 'it']",Released,...,13202,1990,1994,110.0,"['Action', 'Crime', 'Drama']",Jean Reno,Gary Oldman,Natalie Portman,Luc Besson,https://www.youtube.com/watch?v=vSi5jdziv3I
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10234,1301333,en,Lone Star Shark,A group of virtual hunters unleash a deadly sh...,0.2608,/nFj2YlFzPtQKoXSWgajR1UBQQWw.jpg,['United States of America'],2025-02-01,['en'],Released,...,0,2020,2025,65.0,"['Horreur', 'Aventure']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=9pNQFMKo9zo
10235,1397413,en,South Side Hero,After serving a 10-year sentence on a RICO cha...,0.2218,/qnaJquOCbFSQRChwBHy6REJbxUs.jpg,[],2024-12-03,['en'],Released,...,1,2020,2024,91.0,"['Action', 'Crime', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=tK3Vl70gxzM
10236,1379382,en,Fortune Feimster: Crushing It,From no longer being her mom's stand-in husban...,0.2877,/nzr70rq2m82zJc4SbGeFBVa2yuD.jpg,[],2024-12-02,['en'],Released,...,9,2020,2024,58.0,['Comédie'],unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=7sbpA9MpU8E
10237,1383592,nl,Soft Leaves,After her father is involved in a serious acci...,0.2662,/70b82KsPUkH61R9mF8xLKfpwnSq.jpg,['Belgium'],2025-02-02,"['nl', 'en', 'de', 'ja']",Released,...,3,2020,2025,94.0,['Drame'],unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=U62q9Vj3jGE


In [None]:
"""# Custom transformer for MultiLabelBinarizer
class MultiLabelBinarizerPipelineFriendly(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mlb = MultiLabelBinarizer()

    def fit(self, X, y=None):
        self.mlb.fit(X)
        return self

    def transform(self, X):
        return self.mlb.transform(X)

    def get_feature_names_out(self, input_features=None):
        return self.mlb.classes_
    
# Define the ColumnTransformer and the pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('acteurs', OneHotEncoder(), ['acteurs_1', 'acteurs_2','actrices']),
        ('réalisateur', OneHotEncoder(), ['realisateurs']),
        ('status', OneHotEncoder (), ['status']),
        ('genres', MultiLabelBinarizerPipelineFriendly(), 'genres'),
        ('annee', OrdinalEncoder(), ['decennie']),
        ('note', 'passthrough', ['vote_average'])
    ]
)

# Create the pipeline with preprocessing
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

processed_data = pipeline.fit_transform(films)

from scipy.sparse import issparse

if issparse(processed_data):
    processed_data = processed_data.toarray()

df_processed = pd.DataFrame(
    processed_data.toarray() if issparse(processed_data) else processed_data,
    columns=pipeline.named_steps['preprocessor'].get_feature_names_out()
)
#df_processed = pd.concat([films["id"], df_processed], axis=1)
#df_processed

k= 5
nn_model = NearestNeighbors(n_neighbors=k+1, algorithm='auto', metric='euclidean')
nn_model.fit(df_processed)


# Exemple de recommandation pour un item
def recommend_similar_items(index: int, data: pd.DataFrame=df_processed, model: NearestNeighbors=nn_model, original_data: pd.DataFrame=films, n_neighbors: int=3) -> tuple[list[float], list[float]]:
    Trouve les n éléments les plus proches pour un élément donné.

    Args:
        index: Index de l'élément de référence.
        data: Données transformées utilisées pour KNN.
        model: Modèle KNN pré-entraîné.
        original_data: Données originales (pour affichage).
        n_neighbors: Nombre de voisins à recommander.

    Returns:
        voisins: Indices et distances des voisins.
    
    _, indices = model.kneighbors(data, n_neighbors=n_neighbors) # _ nous permet d'ignorer la variable distance
    print(f"Recommandations pour l'élément {index}:\n")
    display(original_data.iloc[indices[index][0:]]) # On veut afficher uniquement les voisins et pas le point de référence avec cad
                              #(si je fais un affichage des voisins de l index 0 je veux pas le point 0 qui s affiche)

recommend_similar_items(index=44, n_neighbors=11)
"""

Recommandations pour l'élément 44:



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
44,508,en,Love Actually,Eight London couples try to deal with their re...,19.095,/7QPeVsr9rcFU9Gl90yg0gTOTpVv.jpg,"['FR', 'GB']",2003-09-07,"['en', 'fr', 'pt']",Released,7.105,5907,2000,2003,135.0,"[Comedy, Drama, Romance]",Hugh Grant,Liam Neeson,Martine McCutcheon,Richard Curtis
98,1555,fr,L'Auberge espagnole,A strait-laced French student moves into an ap...,9.534,/qKq4uLZDAJhBfpRsfPRYouQ5jW5.jpg,"['FR', 'ES']",2002-06-19,"['it', 'fr', 'es', 'en', 'ca', 'da', 'de']",Released,7.105,1004,2000,2002,122.0,"[Comedy, Drama, Romance]",Romain Duris,Federico D'Anna,Judith Godrèche,Cédric Klapisch
936,17379,en,Last Holiday,The discovery that she has a terminal illness ...,12.293,/khcU8msd2NooJwmcvwbWguvkCzU.jpg,['US'],2006-01-13,"['en', 'fr']",Released,7.098,612,2000,2006,112.0,"[Comedy, Drama, Romance]",LL Cool J,Timothy Hutton,Queen Latifah,Wayne Wang
4642,44524,fr,Rois et Reine,"Shortly before her wedding, art gallery direct...",3.303,/oNG2gAM1xqZnV8iTXn5qXF8IMyv.jpg,['FR'],2004-09-03,"['de', 'en', 'fr']",Released,7.2,67,2000,2004,150.0,"[Comedy, Drama, Romance]",Geoffrey Carey,Thierry Bosc,Emmanuelle Devos,Arnaud Desplechin
6037,77284,fr,Osmose,"Abel awash in his conquests phone numbers, Rem...",0.901,/ulG3VmpOtj1zflul4E3L9KMEyuT.jpg,['FR'],2004-05-05,"['en', 'fr']",Released,7.0,2,2000,2003,75.0,"[Comedy, Drama, Romance]",Romain Duris,Clément Sibony,Mathilde Bertrandy,Raphael Fejtö
441,8424,fr,Jeux d'enfants,"As adults, best friends Julien and Sophie cont...",12.664,/vvjC5LSMANKvGGDHjIWj1YomPdN.jpg,"['BE', 'FR', 'US']",2003-09-17,['fr'],Released,7.26,1660,2000,2003,93.0,"[Comedy, Drama, Romance]",Guillaume Canet,Thibault Verhaeghe,Marion Cotillard,Yann Samuell
1035,19913,en,(500) Days of Summer,"Tom, greeting-card writer and hopeless romanti...",34.459,/f9mbM0YMLpYemcWx6o2WeiYQLDP.jpg,['US'],2009-07-17,"['en', 'fr', 'sv']",Released,7.267,9091,2000,2009,95.0,"[Comedy, Drama, Romance]",Joseph Gordon-Levitt,Geoffrey Arend,Zooey Deschanel,Marc Webb
728,12555,cs,Obsluhoval jsem anglického krále,"Prague, Czechoslovakia, during the inter-war p...",4.593,/mwuabUjZMSg8ngHScjcKCmFO0jA.jpg,"['CZ', 'SK']",2007-01-11,"['cs', 'en', 'fr', 'de']",Released,6.931,80,2000,2006,113.0,"[Comedy, Drama, Romance]",Ivan Barnev,Oldrich Kaiser,Julia Jentsch,Jirí Menzel
4066,25985,en,Jump Tomorrow,"When shy, straitlaced George (Tunde Adebimpe) ...",1.798,/kYEQLfbrV7HITy5RuEHLhEI44Ty.jpg,"['GB', 'US']",2001-11-09,"['en', 'fr', 'es']",Released,6.9,15,2000,2001,97.0,"[Comedy, Drama, Romance]",Tunde Adebimpe,Hippolyte Girardot,Amy Sedaris,Joel Hopkins
412,7973,ar,سكر بنات,In a beauty salon in Beirut the lives of five ...,7.677,/jvAwQDPpXRfeJZpiBvgc2HxLsnn.jpg,"['LB', 'FR']",2007-08-09,"['fr', 'ar']",Released,6.894,199,2000,2007,95.0,"[Comedy, Drama, Romance]",Adel Karam,Dimitri Staneofski,Nadine Labaki,Nadine Labaki


# Même modèle mais avec des paramètres différents 

In [36]:
import pandas as pd
import joblib
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.sparse import issparse

# ---------------------- Custom Transformer ----------------------
class MultiLabelBinarizerPipelineFriendly(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mlb = MultiLabelBinarizer()

    def fit(self, X, y=None):
        self.mlb.fit(X)
        return self

    def transform(self, X):
        return self.mlb.transform(X)

    def get_feature_names_out(self, input_features=None):
        return [f"genres_{cls}" for cls in self.mlb.classes_]

# ---------------------- Prétraitement Optimisé ----------------------
preprocessor = ColumnTransformer(
    transformers=[
        ('acteurs', OneHotEncoder(), ['acteurs_1', 'acteurs_2', 'actrices']),
        ('realisateur', OneHotEncoder(), ['realisateurs']),
        ('status', OneHotEncoder(), ['status']),
        ('genres', MultiLabelBinarizerPipelineFriendly(), 'genres'),
        ('annee', OrdinalEncoder(), ['decennie']),
        ('note', 'passthrough', ['vote_average']),
        ('popularite', StandardScaler(), ['popularity']),
        ('duree', StandardScaler(), ['runtimeMinutes'])
    ]
)

pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

# ---------------------- Chargement des données ----------------------
films = pd.read_csv("datasets/raw/films.csv")  # Chargez vos données ici

# Vérifiez s'il existe déjà un fichier traité
try:
    df_processed = joblib.load('datasets/raw/processed_films.pkl')
    nn_model = joblib.load('datasets/raw/nn_model.pkl')
    distances_all, indices_all = joblib.load('datasets/raw/nn_distances.pkl')
except FileNotFoundError:
    processed_data = pipeline.fit_transform(films)
    if issparse(processed_data):
        processed_data = processed_data.toarray()
    columns = pipeline.named_steps['preprocessor'].get_feature_names_out()
    df_processed = pd.DataFrame(processed_data, columns=columns)

    # Sauvegarde
    joblib.dump(df_processed, 'datasets/raw/processed_films.pkl')

    nn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
    nn_model.fit(df_processed)
    joblib.dump(nn_model, 'datasets/raw/nn_model.pkl')

    distances_all, indices_all = nn_model.kneighbors(df_processed)
    joblib.dump((distances_all, indices_all), 'datasets/raw/nn_distances.pkl')

# ---------------------- Fonction de recommandation ----------------------
def recommend_similar_items(index: int,
                             data: pd.DataFrame = df_processed,
                             model: NearestNeighbors = nn_model,
                             original_data: pd.DataFrame = films,
                             distances=distances_all,
                             indices=indices_all,
                             n_neighbors: int = 5,
                             sort_by: str = None,
                             min_popularity: float = None,
                             runtime_range: tuple = None) -> pd.DataFrame:

    neighbor_distances = distances[index][0:n_neighbors+1]
    neighbors_idx = indices[index][0:n_neighbors+1]

    recommendations = original_data.iloc[neighbors_idx].copy()
    recommendations['distance'] = neighbor_distances

    if min_popularity is not None:
        recommendations = recommendations[recommendations['popularity'] >= min_popularity]

    if runtime_range is not None:
        min_runtime, max_runtime = runtime_range
        recommendations = recommendations[
            recommendations['runtimeMinutes'].between(min_runtime, max_runtime)
        ]

    if sort_by and sort_by in recommendations.columns:
        recommendations = recommendations.sort_values(by=sort_by, ascending=False)

    print(f"\nRecommandations pour le film : {original_data.iloc[index]['original_title']}\n")
    return recommendations

# ---------------------- Exemple d'utilisation ----------------------
display(recommend_similar_items(
    index= 1825,
    n_neighbors=10
))

#print(recommandations[['original_title', 'popularity', 'runtimeMinutes', 'distance']])


Recommandations pour le film : John Wick: Chapter 4



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,...,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs,trailer_url,distance
1825,603692,en,John Wick: Chapter 4,"With the price on his head ever increasing, Jo...",1170.178,/vZloFAK7NmvMGKE7VkF5UHaz0I.jpg,"['DE', 'US']",2023-03-22,"['cn', 'fr', 'en', 'es', 'it', 'ja', 'la', 'ru']",Released,...,2020,2023,169.0,"['Action', 'Crime', 'Thriller']",Keanu Reeves,Laurence Fishburne,Aimée Kwan,Chad Stahelski,https://www.youtube.com/watch?v=JjBZ2iEBcxM,0.0
854,1102776,fr,AKA,A steely special ops agent finds his morality ...,988.329,/3BSxAjiporlwQTWzaHZ9Yrl5C9D.jpg,['FR'],2023-04-28,['fr'],Released,...,2020,2023,122.0,"['Action', 'Crime', 'Thriller']",Alban Lenoir,Éric Cantona,Sveva Alviti,Morgan S. Dalibert,https://www.youtube.com/watch?v=gWQ8dWfUDoo,9.042016
7479,552524,en,Lilo & Stitch,L’histoire touchante et drôle d’une petite fil...,733.0556,/71IjwRa88OJMYJBntId7nn0eFHy.jpg,['United States of America'],2025-05-17,"['es', 'en']",Released,...,2020,2025,108.0,"['Familial', 'Comédie', 'Science-Fiction']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=fc4tWQJWq-8,20.277524
2389,505642,en,Black Panther: Wakanda Forever,"Queen Ramonda, Shuri, M’Baku, Okoye and the Do...",620.645,/sv1xJUazXeYqALzczSZ3O6nkH75.jpg,['US'],2022-11-09,"['en', 'fr', 'ht', 'es', 'xh']",Released,...,2020,2022,161.0,"['Action', 'Adventure', 'Drama']",Winston Duke,Tenoch Huerta,Letitia Wright,Ryan Coogler,https://www.youtube.com/watch?v=DlGIWM_e9vg,24.883123
7480,950387,en,A Minecraft Movie,Quatre mésadaptés traversent soudainement un p...,469.6691,/cq9z69AyIXeL2H14bqHE5ukm3M9.jpg,"['Sweden', 'United States of America']",2025-03-31,['en'],Released,...,2020,2025,101.0,"['Familial', 'Comédie', 'Aventure', 'Fantastiq...",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=sjvO4KhI-CM,31.832475
7481,574475,en,Final Destination Bloodlines,"Stefani, 18 ans, fait d’affreux cauchemars. Da...",364.4598,/4uI8C2zcfLWRhZDBgd0oTlZjV9j.jpg,['United States of America'],2025-05-14,['en'],Released,...,2020,2025,110.0,"['Horreur', 'Mystère']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=X0QsjrFUcq0,36.442703
7483,1197306,en,A Working Man,Levon Cade a quitté une carrière militaire pre...,298.6536,/hVUb6AoL79cgMFPRRg70IpJGc6h.jpg,"['United Kingdom', 'United States of America']",2025-03-26,"['en', 'ru', 'es']",Released,...,2020,2025,116.0,"['Action', 'Crime', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=SNHSqCD7b7g,39.13505
7482,1232546,en,Until Dawn,Un an après la mystérieuse disparition de sa s...,296.7954,/7lX0gaDIoEn7T8p8uDn7gk5SdOn.jpg,['United States of America'],2025-04-23,['en'],Released,...,2020,2025,103.0,"['Horreur', 'Mystère']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=-PmA__F5uz0,39.477846
7484,575265,en,Mission: Impossible - The Final Reckoning,Ethan Hunt se rend à Londres avec son équipe d...,286.3732,/AozMgdALZuR1hDPZt2a1aXiWmL4.jpg,['United States of America'],2025-05-17,"['fr', 'en', 'iu']",Released,...,2020,2025,163.0,"['Action', 'Aventure', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,https://www.youtube.com/watch?v=Si8mScZ-RgY,39.606786
1447,766507,en,Prey,"When danger threatens her camp, the fierce and...",267.138,/ujr5pztc1oitbe7ViMUOilFaJ7s.jpg,['US'],2022-08-02,"['en', 'fr']",Released,...,2020,2022,100.0,"['Action', 'Adventure', 'Horror']",Dakota Beavers,Dane DiLiegro,Amber Midthunder,Dan Trachtenberg,https://www.youtube.com/watch?v=DcuPy47vOJ8,40.701923


# Modèle prédisant un ou plus de films ayant au moins un des acteurs/actrices

In [34]:
def recommend_by_actors(index: int,
                        original_data: pd.DataFrame = films,
                        min_popularity: float = None,
                        decade: str = None,
                        sort_by: str = 'popularity',
                        top_n: int = 10) -> pd.DataFrame:
    def normalize_name(name):
        return name.strip().lower().replace("é", "e").replace("-", " ")

    # Nettoyage
    for col in ['acteurs_1', 'acteurs_2', 'actrices']:
        original_data[col] = original_data[col].fillna('').astype(str).str.strip().str.title()

    film_ref = original_data.iloc[index]
    acteurs_ref = set([
        film_ref['acteurs_1'],
        film_ref['acteurs_2'],
        film_ref['actrices']
    ])

    def has_common_actor(row):
        acteurs_row = set([
            row['acteurs_1'],
            row['acteurs_2'],
            row['actrices']
        ])
        return not acteurs_ref.isdisjoint(acteurs_row)

    filtered = original_data[original_data.index != index]
    filtered = filtered[filtered.apply(has_common_actor, axis=1)]

    if min_popularity is not None:
        filtered = filtered[filtered['popularity'] >= min_popularity]

    if decade is not None:
        filtered = filtered[filtered['decennie'] == decade]

    if sort_by and sort_by in filtered.columns:
        filtered = filtered.sort_values(by=sort_by, ascending=False)

    print(f"\nFilms avec {', '.join(acteurs_ref)} comme acteur(s)/actrice(s):\n")
    print(f"\nRecommandations avec au moins un acteur commun avec « {film_ref['original_title']} » :\n")
    return filtered.head(top_n)

display(recommend_by_actors(
    index= 42,
    min_popularity= None,
    top_n=10
))


Films avec Carrie Anne Moss, Keanu Reeves, Laurence Fishburne comme acteur(s)/actrice(s):


Recommandations avec au moins un acteur commun avec « The Matrix Revolutions » :



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,...,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs,trailer_url
1825,603692,en,John Wick: Chapter 4,"With the price on his head ever increasing, Jo...",1170.178,/vZloFAK7NmvMGKE7VkF5UHaz0I.jpg,"['DE', 'US']",2023-03-22,"['cn', 'fr', 'en', 'es', 'it', 'ja', 'la', 'ru']",Released,...,1341,2020,2023,169.0,"['Action', 'Crime', 'Thriller']",Keanu Reeves,Laurence Fishburne,Aimee Kwan,Chad Stahelski,https://www.youtube.com/watch?v=JjBZ2iEBcxM
98,1921,en,Sweet November,Nelson is a man devoted to his advertising car...,15.02,/3biEHZSBSYqSkawU2sQyNbLcjgE.jpg,['US'],2001-02-16,"['en', 'fr']",Released,...,1152,2000,2001,119.0,"['Drama', 'Romance']",Keanu Reeves,Jason Isaacs,Charlize Theron,Pat O'Connor,https://www.youtube.com/watch?v=cYR4vL5ydUI
236,6964,en,Something's Gotta Give,Harry Sanborn is an aged music industry exec w...,13.93,/4fQJjwbDf1KELvJJMcmAV6AxJdH.jpg,['US'],2003-12-12,"['fr', 'en']",Released,...,1396,2000,2003,128.0,"['Comedy', 'Drama', 'Romance']",Jack Nicholson,Keanu Reeves,Diane Keaton,Nancy Meyers,https://www.youtube.com/watch?v=fyB1Awfq3YU


In [33]:
def normalize_name(name):
    return name.strip().lower().replace("é", "e").replace("è", "e").replace("-", " ").replace("_"," ").replace("ê"," ").replace("ô","o")

def get_films_by_actor(actor_name: str, original_data: pd.DataFrame = films) -> pd.DataFrame:
    # Normaliser les colonnes d'acteurs
    for col in ['acteurs_1', 'acteurs_2', 'actrices']:
        original_data[col] = original_data[col].fillna('').astype(str).apply(normalize_name)

    actor_name_normalized = normalize_name(actor_name)

    filtered = original_data[
        (original_data['acteurs_1'] == actor_name_normalized) |
        (original_data['acteurs_2'] == actor_name_normalized) |
        (original_data['actrices'] == actor_name_normalized)
    ]

    print(f"\nFilms avec l'acteur/l'actrice : {actor_name.title()}\n")
    return filtered[['original_title', 'decennie', 'popularity', 'vote_average']]
display(get_films_by_actor("Keanu Reeves"))



Films avec l'acteur/l'actrice : Keanu Reeves



Unnamed: 0,original_title,decennie,popularity,vote_average
42,The Matrix Revolutions,2000,26.049,6.705
98,Sweet November,2000,15.02,6.87
236,Something's Gotta Give,2000,13.93,6.622
1825,John Wick: Chapter 4,2020,1170.178,7.926


In [47]:
def recommend_by_director(index: int,
                          original_data: pd.DataFrame = films,
                          min_popularity: float = None,
                          decade: str = None,
                          sort_by: str = 'popularity',
                          top_n: int = 10) -> pd.DataFrame:
    def normalize_name(name):
        return name.strip().lower().replace("é", "e").replace("-", " ")

    # Nettoyage du nom du réalisateur
    original_data['realisateurs'] = original_data['realisateurs'].fillna('').astype(str).apply(normalize_name)

    film_ref = original_data.iloc[index]
    director_ref = normalize_name(film_ref['realisateurs'])

    # Filtrer films avec le même réalisateur (exact match)
    filtered = original_data[(original_data.index != index) & (original_data['realisateurs'] == director_ref)]

    if min_popularity is not None:
        filtered = filtered[filtered['popularity'] >= min_popularity]

    if decade is not None:
        filtered = filtered[filtered['decennie'] == decade]

    if sort_by and sort_by in filtered.columns:
        filtered = filtered.sort_values(by=sort_by, ascending=False)

    print(f"\nFilms réalisés par {director_ref.title()} :\n")
    print(f"\nRecommandations avec le même réalisateur que « {film_ref['original_title']} » :\n")
    return filtered.head(top_n)

display(recommend_by_director(
    index=44,
    sort_by='popularity',
    min_popularity=None,
    top_n=10
))


Films réalisés par James Cameron :


Recommandations avec le même réalisateur que « Titanic » :



Unnamed: 0,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,...,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
3813,36955,tt0111503,en,True Lies,"A fearless, globe-trotting, terrorist-battling...",32.758,/pweFTnzzTfGK68woSVkiTgjLzWm.jpg,['US'],1994-07-15,"['en', 'fr', 'ar', 'de']",...,7.0,3518,1990.0,1994.0,141.0,"['Action', 'Comedy', 'Thriller']",arnold schwarzenegger,tom arnold,jamie lee curtis,james cameron
