In [18]:
import pandas as pd 
import ast
import json
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MultiLabelBinarizer
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array
from sklearn.neighbors import NearestNeighbors

In [19]:
data = pd.read_csv('datasets/raw/movie_tmdb.csv' , sep=',')
movie = pd.read_csv('datasets/raw/movie.csv', sep=',')

""" Changement des formats facilitant les traitements """

# Convertir les chaînes en listes de dictionnaires
data['spoken_languages'] = data['spoken_languages'].apply(ast.literal_eval)
# Convertir la chaîne en liste python avec ast.literal_eval (plus sûr que json.loads si la chaîne n'est pas en format JSON strict)
data['production_countries'] = data['production_countries'].apply(ast.literal_eval)

In [None]:
data

In [None]:
# Trier les genres qu'il nous faut
# Trier le spoken_language en fr ou French ou Fr
# Changer le format de la realese_date en date
# Changer le nom de imdb_id en tconst
# Rajouter une colonne décennie
# Rajouter les informations sur les acteurs, actrices et directeurs avec l'api
# Trier sur les meilleurs films top 5 %  des films par décennie
# Voir lequel des vite_average et vote_count gardé
# Merger avec movie_csv
# Détermination des types de colonnes 
# data_fr['spoken_languages'].apply(type).value_counts()
# Détermination des types de colonnes 
# movie_tmdb['production_countries'].apply(type).value_counts()

In [21]:
#convertir les colonnes (genres, actors, actresses, directors) en liste
movie['genres'] = movie['genres'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['actors'] = movie['actors'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['actresses'] = movie['actresses'].apply(lambda x: x.split(',') if isinstance(x, str) else x)
movie['directors'] = movie['directors'].apply(lambda x: x.split(',') if isinstance(x, str) else x)

In [51]:
data_fr = data.copy()

# Filtrer pour ne garder que les films exclusivement en français
data_fr = data_fr[data_fr['spoken_languages'].apply(lambda langs: any(lang.get('iso_639_1') == 'fr' for lang in langs))]

# Supprimer les colonnes inutiles
data_fr = data_fr.drop(columns=['adult','backdrop_path','belongs_to_collection','budget', 'homepage','production_companies',
                                'revenue','origin_country','tagline','video','title'])

""" Realease date formatage et création de 2 colonnes"""
#Changement du format de la released_date 
data_fr['release_date'] = pd.to_datetime(data_fr['release_date'])

# Rajout de la colonne decennie et start year
data_fr['startYear'] = data_fr['release_date'].dt.year.astype('Int64')

# Créer la colonne décennie
data_fr['decennie'] = (data_fr['startYear'] // 10) * 10

""" Genre Formatage """
# Convertir les chaînes en listes de dictionnaires
data_fr['genres'] = data_fr['genres'].apply(ast.literal_eval)

# Liste des genres à exclure
genres_exclus = {'Documentaire', 'Musique', 'Téléfilm'}

movie_tmdb = data_fr.copy() 

# Filtrer pour supprimer les films contenant ces genres
movie_tmdb = movie_tmdb[~movie_tmdb['genres'].apply(lambda genre_list: any(g['name'] in genres_exclus for g in genre_list))]

# Changement du nom imdb_id en tconst
movie_tmdb.rename(columns={'imdb_id': 'tconst'}, inplace=True)

""" Réinitialisation de l'index """
# Réinitialiser les index
movie_tmdb.reset_index(drop=True, inplace=True)

# Affichage
movie_tmdb

Unnamed: 0,genres,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,runtime,spoken_languages,status,vote_average,vote_count,startYear,decennie
0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",575265,tt9603208,en,Mission: Impossible - The Final Reckoning,Ethan Hunt se rend à Londres avec son équipe d...,286.3732,/AozMgdALZuR1hDPZt2a1aXiWmL4.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-05-17,163,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,7.113,462,2025,2020
1,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",1144430,tt29768333,fr,Balle perdue 3,"Dans un dernier opus chargé en adrénaline, le ...",112.4993,/xccJs9vGCEvch1H9SIMSjNqrLKU.jpg,"[{'iso_3166_1': 'BE', 'name': 'Belgium'}, {'is...",2025-05-06,111,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,6.727,150,2025,2020
2,"[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...",939243,tt18259086,en,Sonic the Hedgehog 3,"Sonic, Knuckles et Tails sont à nouveau réunis...",80.0790,/gERwLGTa6JGN4qXjkip13eDaxy1.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2024-12-19,100,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,7.715,2632,2024,2020
3,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",959604,tt27490099,fr,L'Amour ouf,"Les années 80, dans le nord de la France. Jack...",52.1345,/6akNNv4KyrguZUiG4uemfV6toVq.jpg,"[{'iso_3166_1': 'BE', 'name': 'Belgium'}, {'is...",2024-10-16,166,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,7.229,793,2024,2020
4,"[{'id': 12, 'name': 'Aventure'}, {'id': 35, 'n...",1137350,tt30840798,en,The Phoenician Scheme,Zsa-Zsa Korda est un richissime homme d’affair...,36.0999,/aWucFFSKSo5k6dKn4WQXCQiu6MP.jpg,"[{'iso_3166_1': 'US', 'name': 'United States o...",2025-05-23,102,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,7.000,52,2025,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,"[{'id': 18, 'name': 'Drame'}]",1000085,tt34798218,fr,Morlaix,Jeune lycéenne marquée par le récent décès de ...,0.2135,/nUTE76LKG45ntFkgGFz1lpluzQR.jpg,"[{'iso_3166_1': 'ES', 'name': 'Spain'}, {'iso_...",2025-03-14,124,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,0.000,0,2025,2020
433,"[{'id': 35, 'name': 'Comédie'}]",1019864,tt28487662,fr,Comme un prince,"Souleyman, 27 ans, champion de boxe en pleine ...",0.3786,/esp2ki9rwiZzwpK5pz2gk7Tpyhn.jpg,"[{'iso_3166_1': 'FR', 'name': 'France'}]",2024-01-17,90,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,5.539,38,2024,2020
434,"[{'id': 10749, 'name': 'Romance'}, {'id': 878,...",1200277,tt7264528,fr,Vous n’êtes pas seuls,"Léo, un livreur de pizza solitaire et anxieux,...",0.3962,/hV0GMf8Tjag4upsPh1UY0MRAxNK.jpg,"[{'iso_3166_1': 'CA', 'name': 'Canada'}]",2024-12-12,105,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,7.000,1,2024,2020
435,"[{'id': 35, 'name': 'Comédie'}, {'id': 53, 'na...",1211886,tt4726640,ht,Kidnapping Inc.,Le fils d'un riche candidat à la présidence ha...,0.3690,/rAU6nOHcR3NQTpxgPF4yoeWFW9r.jpg,"[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2024-01-22,107,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,8.667,3,2024,2020


In [52]:
movie_tmdb['genres'] = movie_tmdb['genres'].apply(
    lambda genre_list: [g.get('name') for g in genre_list if isinstance(g, dict) and g.get('name')]
)

movie_tmdb['spoken_languages'] = movie_tmdb['spoken_languages'].apply(
    lambda langs: [l.get('iso_639_1') for l in langs if isinstance(l, dict) and l.get('iso_639_1')]
)

movie_tmdb['production_countries'] = movie_tmdb['production_countries'].apply(lambda countries: [c['name'] for c in countries])
movie_tmdb

Unnamed: 0,genres,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,runtime,spoken_languages,status,vote_average,vote_count,startYear,decennie
0,"[Action, Aventure, Thriller]",575265,tt9603208,en,Mission: Impossible - The Final Reckoning,Ethan Hunt se rend à Londres avec son équipe d...,286.3732,/AozMgdALZuR1hDPZt2a1aXiWmL4.jpg,[United States of America],2025-05-17,163,"[fr, en, iu]",Released,7.113,462,2025,2020
1,"[Action, Crime, Thriller, Drame]",1144430,tt29768333,fr,Balle perdue 3,"Dans un dernier opus chargé en adrénaline, le ...",112.4993,/xccJs9vGCEvch1H9SIMSjNqrLKU.jpg,"[Belgium, France]",2025-05-06,111,[fr],Released,6.727,150,2025,2020
2,"[Action, Science-Fiction, Comédie, Familial]",939243,tt18259086,en,Sonic the Hedgehog 3,"Sonic, Knuckles et Tails sont à nouveau réunis...",80.0790,/gERwLGTa6JGN4qXjkip13eDaxy1.jpg,"[United States of America, Japan]",2024-12-19,100,"[es, en, ja, fr]",Released,7.715,2632,2024,2020
3,"[Crime, Drame, Romance]",959604,tt27490099,fr,L'Amour ouf,"Les années 80, dans le nord de la France. Jack...",52.1345,/6akNNv4KyrguZUiG4uemfV6toVq.jpg,"[Belgium, France]",2024-10-16,166,[fr],Released,7.229,793,2024,2020
4,"[Aventure, Comédie]",1137350,tt30840798,en,The Phoenician Scheme,Zsa-Zsa Korda est un richissime homme d’affair...,36.0999,/aWucFFSKSo5k6dKn4WQXCQiu6MP.jpg,"[United States of America, Germany]",2025-05-23,102,"[en, fr, es]",Released,7.000,52,2025,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,[Drame],1000085,tt34798218,fr,Morlaix,Jeune lycéenne marquée par le récent décès de ...,0.2135,/nUTE76LKG45ntFkgGFz1lpluzQR.jpg,"[Spain, France]",2025-03-14,124,[fr],Released,0.000,0,2025,2020
433,[Comédie],1019864,tt28487662,fr,Comme un prince,"Souleyman, 27 ans, champion de boxe en pleine ...",0.3786,/esp2ki9rwiZzwpK5pz2gk7Tpyhn.jpg,[France],2024-01-17,90,[fr],Released,5.539,38,2024,2020
434,"[Romance, Science-Fiction, Comédie, Horreur]",1200277,tt7264528,fr,Vous n’êtes pas seuls,"Léo, un livreur de pizza solitaire et anxieux,...",0.3962,/hV0GMf8Tjag4upsPh1UY0MRAxNK.jpg,[Canada],2024-12-12,105,[fr],Released,7.000,1,2024,2020
435,"[Comédie, Thriller, Drame]",1211886,tt4726640,ht,Kidnapping Inc.,Le fils d'un riche candidat à la présidence ha...,0.3690,/rAU6nOHcR3NQTpxgPF4yoeWFW9r.jpg,"[Canada, France, Haiti]",2024-01-22,107,"[fr, ht]",Released,8.667,3,2024,2020


In [100]:
# Changement du nom pour faciliter la concaténation
movie.rename(columns={'decennie_x': 'decennie'}, inplace=True)
movie_tmdb.rename(columns={'runtime': 'runtimeMinutes'}, inplace=True)

# Concaténation et non pas une merge car on rajoute des lignes en plus afin d'enrichir notre dataframe
films =  pd.concat([movie, movie_tmdb], ignore_index=True)

# Suppression des doublons
films.drop_duplicates(subset='id', inplace=True)

# changer le Format de released_date en date time
films['release_date'] = pd.to_datetime(films['release_date'], errors='coerce')

films['startYear'] = films['startYear'].astype('Int64')

films['decennie'] = films['decennie'].astype('Int64')

# Start year à partir de 1960
films = films[films['startYear'] >= 1960]
# Filtrer la note à partir de 5.8
#films = films[films['vote_average'] >= 5.8]

# Reset de l'index 
films.reset_index(drop=True, inplace=True)

films

Unnamed: 0,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,actors,actresses,directors
0,8773,tt0055747,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,3.7700,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,"['DE', 'FR', 'IT', 'JP', 'PL']",1962-06-22,"['it', 'ja', 'pl', 'fr', 'de']",Released,6.700,41,1960,1962,110.0,"[Drama, Romance]","[Jean-Pierre Léaud, Patrick Auffay, François...","[Marie-France Pisier, Rosy Varte, Cristina G...","[Shintarô Ishihara, Marcel Ophüls, Renzo Ros..."
1,24,tt0266697,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,7.970,15748,2000,2003,111.0,"[Action, Crime, Thriller]","[David Carradine, Michael Madsen, Shin'ichi ...","[Uma Thurman, Daryl Hannah, Lucy Liu, Vivic...",[Quentin Tarantino]
2,75,tt0116996,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,6.394,4864,1990,1996,106.0,"[Comedy, Sci-Fi]","[Jack Nicholson, Jack Nicholson, Pierce Bros...","[Sarah Jessica Parker, Annette Bening, Glenn...",
3,76,tt0112471,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,7.968,3547,1990,1995,101.0,"[Comedy, Drama, Romance]","[Ethan Hawke, Hanno Pöschl, Karl Bruckschwai...","[Julie Delpy, Andrea Eckert, Erni Mangold]",[Richard Linklater]
4,80,tt0381681,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,7.818,2943,2000,2004,80.0,"[Drama, Romance]","[Ethan Hawke, Vernon Dobtcheff, Rodolphe Pau...","[Julie Delpy, Louise Lemoine Torrès, Mariane...",[Richard Linklater]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9707,1000085,tt34798218,fr,Morlaix,Jeune lycéenne marquée par le récent décès de ...,0.2135,/nUTE76LKG45ntFkgGFz1lpluzQR.jpg,"[Spain, France]",2025-03-14,[fr],Released,0.000,0,2020,2025,124.0,[Drame],,,
9708,1019864,tt28487662,fr,Comme un prince,"Souleyman, 27 ans, champion de boxe en pleine ...",0.3786,/esp2ki9rwiZzwpK5pz2gk7Tpyhn.jpg,[France],2024-01-17,[fr],Released,5.539,38,2020,2024,90.0,[Comédie],,,
9709,1200277,tt7264528,fr,Vous n’êtes pas seuls,"Léo, un livreur de pizza solitaire et anxieux,...",0.3962,/hV0GMf8Tjag4upsPh1UY0MRAxNK.jpg,[Canada],2024-12-12,[fr],Released,7.000,1,2020,2024,105.0,"[Romance, Science-Fiction, Comédie, Horreur]",,,
9710,1211886,tt4726640,ht,Kidnapping Inc.,Le fils d'un riche candidat à la présidence ha...,0.3690,/rAU6nOHcR3NQTpxgPF4yoeWFW9r.jpg,"[Canada, France, Haiti]",2024-01-22,"[fr, ht]",Released,8.667,3,2020,2024,107.0,"[Comédie, Thriller, Drame]",,,


                                                    Machine Learning

In [102]:


films = films.copy()

# nouveau dataframe avec uniquement les deux premiers acteurs, actrices et un réalisateur 
films[['acteurs_1', 'acteurs_2']] = films['actors'].apply(lambda x: pd.Series(x[:2]) if isinstance(x, list) else pd.Series([None, None]))  
films['actrices'] = films['actresses'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)
films['realisateurs'] = films['directors'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)

films['acteurs_1'] = films['acteurs_1'].fillna('unknown_actor')
films['acteurs_2'] = films['acteurs_2'].fillna('unknown_actor')
films['actrices'] = films['actrices'].fillna('unknown_actress')
films['realisateurs'] = films['realisateurs'].fillna('unknown_director')
films['genres'] = films['genres'].fillna('unknown_genre')

# Suppression des colonnes actors, acrtress et directors
films = films.drop(columns=['tconst','actors', 'actresses', 'directors'])    

# Nettoyage des lignes manquantes
#films = films.dropna()

films.reset_index(drop=True, inplace=True)

films


Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
0,8773,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,3.7700,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,"['DE', 'FR', 'IT', 'JP', 'PL']",1962-06-22,"['it', 'ja', 'pl', 'fr', 'de']",Released,6.700,41,1960,1962,110.0,"[Drama, Romance]",Jean-Pierre Léaud,Patrick Auffay,Marie-France Pisier,Shintarô Ishihara
1,24,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,7.970,15748,2000,2003,111.0,"[Action, Crime, Thriller]",David Carradine,Michael Madsen,Uma Thurman,Quentin Tarantino
2,75,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,6.394,4864,1990,1996,106.0,"[Comedy, Sci-Fi]",Jack Nicholson,Jack Nicholson,Sarah Jessica Parker,unknown_director
3,76,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,7.968,3547,1990,1995,101.0,"[Comedy, Drama, Romance]",Ethan Hawke,Hanno Pöschl,Julie Delpy,Richard Linklater
4,80,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,7.818,2943,2000,2004,80.0,"[Drama, Romance]",Ethan Hawke,Vernon Dobtcheff,Julie Delpy,Richard Linklater
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9707,1000085,fr,Morlaix,Jeune lycéenne marquée par le récent décès de ...,0.2135,/nUTE76LKG45ntFkgGFz1lpluzQR.jpg,"[Spain, France]",2025-03-14,[fr],Released,0.000,0,2020,2025,124.0,[Drame],unknown_actor,unknown_actor,unknown_actress,unknown_director
9708,1019864,fr,Comme un prince,"Souleyman, 27 ans, champion de boxe en pleine ...",0.3786,/esp2ki9rwiZzwpK5pz2gk7Tpyhn.jpg,[France],2024-01-17,[fr],Released,5.539,38,2020,2024,90.0,[Comédie],unknown_actor,unknown_actor,unknown_actress,unknown_director
9709,1200277,fr,Vous n’êtes pas seuls,"Léo, un livreur de pizza solitaire et anxieux,...",0.3962,/hV0GMf8Tjag4upsPh1UY0MRAxNK.jpg,[Canada],2024-12-12,[fr],Released,7.000,1,2020,2024,105.0,"[Romance, Science-Fiction, Comédie, Horreur]",unknown_actor,unknown_actor,unknown_actress,unknown_director
9710,1211886,ht,Kidnapping Inc.,Le fils d'un riche candidat à la présidence ha...,0.3690,/rAU6nOHcR3NQTpxgPF4yoeWFW9r.jpg,"[Canada, France, Haiti]",2024-01-22,"[fr, ht]",Released,8.667,3,2020,2024,107.0,"[Comédie, Thriller, Drame]",unknown_actor,unknown_actor,unknown_actress,unknown_director


In [103]:
# Exportation du dataframe en csv
films.to_csv('C:/Users/sirnb/OneDrive/Bureau/WildCode/Projets/Projet_2_App/Scripts/datasets/raw/films.csv', index=False)
films

Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
0,8773,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,3.7700,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,"['DE', 'FR', 'IT', 'JP', 'PL']",1962-06-22,"['it', 'ja', 'pl', 'fr', 'de']",Released,6.700,41,1960,1962,110.0,"[Drama, Romance]",Jean-Pierre Léaud,Patrick Auffay,Marie-France Pisier,Shintarô Ishihara
1,24,en,Kill Bill: Vol. 1,"An assassin is shot by her ruthless employer, ...",31.5560,/v7TaX8kXMXs5yFFGR41guUDNcnB.jpg,['US'],2003-10-10,"['en', 'ja', 'fr']",Released,7.970,15748,2000,2003,111.0,"[Action, Crime, Thriller]",David Carradine,Michael Madsen,Uma Thurman,Quentin Tarantino
2,75,en,Mars Attacks!,A fleet of Martian spacecraft surrounds the wo...,21.0510,/hll4O5vSAfnZDb6JbnP06GPtz7b.jpg,['US'],1996-12-12,"['en', 'fr']",Released,6.394,4864,1990,1996,106.0,"[Comedy, Sci-Fi]",Jack Nicholson,Jack Nicholson,Sarah Jessica Parker,unknown_director
3,76,en,Before Sunrise,A young man and woman meet on a train in Europ...,22.3250,/kf1Jb1c2JAOqjuzA3H4oDM263uB.jpg,"['AT', 'CH', 'US']",1995-01-27,"['en', 'de', 'fr']",Released,7.968,3547,1990,1995,101.0,"[Comedy, Drama, Romance]",Ethan Hawke,Hanno Pöschl,Julie Delpy,Richard Linklater
4,80,en,Before Sunset,"Nine years later, Jesse travels across Europe ...",16.1840,/gycdE1ARByGQcK4fYR2mgpU6OO.jpg,['US'],2004-02-10,"['en', 'fr']",Released,7.818,2943,2000,2004,80.0,"[Drama, Romance]",Ethan Hawke,Vernon Dobtcheff,Julie Delpy,Richard Linklater
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9707,1000085,fr,Morlaix,Jeune lycéenne marquée par le récent décès de ...,0.2135,/nUTE76LKG45ntFkgGFz1lpluzQR.jpg,"[Spain, France]",2025-03-14,[fr],Released,0.000,0,2020,2025,124.0,[Drame],unknown_actor,unknown_actor,unknown_actress,unknown_director
9708,1019864,fr,Comme un prince,"Souleyman, 27 ans, champion de boxe en pleine ...",0.3786,/esp2ki9rwiZzwpK5pz2gk7Tpyhn.jpg,[France],2024-01-17,[fr],Released,5.539,38,2020,2024,90.0,[Comédie],unknown_actor,unknown_actor,unknown_actress,unknown_director
9709,1200277,fr,Vous n’êtes pas seuls,"Léo, un livreur de pizza solitaire et anxieux,...",0.3962,/hV0GMf8Tjag4upsPh1UY0MRAxNK.jpg,[Canada],2024-12-12,[fr],Released,7.000,1,2020,2024,105.0,"[Romance, Science-Fiction, Comédie, Horreur]",unknown_actor,unknown_actor,unknown_actress,unknown_director
9710,1211886,ht,Kidnapping Inc.,Le fils d'un riche candidat à la présidence ha...,0.3690,/rAU6nOHcR3NQTpxgPF4yoeWFW9r.jpg,"[Canada, France, Haiti]",2024-01-22,"[fr, ht]",Released,8.667,3,2020,2024,107.0,"[Comédie, Thriller, Drame]",unknown_actor,unknown_actor,unknown_actress,unknown_director


In [104]:
# Custom transformer for MultiLabelBinarizer
class MultiLabelBinarizerPipelineFriendly(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mlb = MultiLabelBinarizer()

    def fit(self, X, y=None):
        self.mlb.fit(X)
        return self

    def transform(self, X):
        return self.mlb.transform(X)

    def get_feature_names_out(self, input_features=None):
        return self.mlb.classes_
    
# Define the ColumnTransformer and the pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('acteurs', OneHotEncoder(), ['acteurs_1', 'acteurs_2','actrices']),
        ('réalisateur', OneHotEncoder(), ['realisateurs']),
        ('status', OneHotEncoder (), ['status']),
        ('genres', MultiLabelBinarizerPipelineFriendly(), 'genres'),
        ('annee', OrdinalEncoder(), ['decennie']),
        ('note', 'passthrough', ['vote_average'])
    ]
)

# Create the pipeline with preprocessing
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

processed_data = pipeline.fit_transform(films)

from scipy.sparse import issparse

if issparse(processed_data):
    processed_data = processed_data.toarray()

df_processed = pd.DataFrame(
    processed_data.toarray() if issparse(processed_data) else processed_data,
    columns=pipeline.named_steps['preprocessor'].get_feature_names_out()
)
#df_processed = pd.concat([films["id"], df_processed], axis=1)
#df_processed

k= 5
nn_model = NearestNeighbors(n_neighbors=k+1, algorithm='auto', metric='euclidean')
nn_model.fit(df_processed)


# Exemple de recommandation pour un item
def recommend_similar_items(index: int, data: pd.DataFrame=df_processed, model: NearestNeighbors=nn_model, original_data: pd.DataFrame=films, n_neighbors: int=3) -> tuple[list[float], list[float]]:
    """
    Trouve les n éléments les plus proches pour un élément donné.

    Args:
        index: Index de l'élément de référence.
        data: Données transformées utilisées pour KNN.
        model: Modèle KNN pré-entraîné.
        original_data: Données originales (pour affichage).
        n_neighbors: Nombre de voisins à recommander.

    Returns:
        voisins: Indices et distances des voisins.
    """
    

    _, indices = model.kneighbors(data, n_neighbors=n_neighbors) # _ nous permet d'ignorer la variable distance
    print(f"Recommandations pour l'élément {index}:\n")
    display(original_data.iloc[indices[index][0:]]) # On veut afficher uniquement les voisins et pas le point de référence avec cad
                              #(si je fais un affichage des voisins de l index 0 je veux pas le point 0 qui s affiche)

recommend_similar_items(index=44, n_neighbors=11)


Recommandations pour l'élément 44:



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
44,508,en,Love Actually,Eight London couples try to deal with their re...,19.095,/7QPeVsr9rcFU9Gl90yg0gTOTpVv.jpg,"['FR', 'GB']",2003-09-07,"['en', 'fr', 'pt']",Released,7.105,5907,2000,2003,135.0,"[Comedy, Drama, Romance]",Hugh Grant,Liam Neeson,Martine McCutcheon,Richard Curtis
98,1555,fr,L'Auberge espagnole,A strait-laced French student moves into an ap...,9.534,/qKq4uLZDAJhBfpRsfPRYouQ5jW5.jpg,"['FR', 'ES']",2002-06-19,"['it', 'fr', 'es', 'en', 'ca', 'da', 'de']",Released,7.105,1004,2000,2002,122.0,"[Comedy, Drama, Romance]",Romain Duris,Federico D'Anna,Judith Godrèche,Cédric Klapisch
936,17379,en,Last Holiday,The discovery that she has a terminal illness ...,12.293,/khcU8msd2NooJwmcvwbWguvkCzU.jpg,['US'],2006-01-13,"['en', 'fr']",Released,7.098,612,2000,2006,112.0,"[Comedy, Drama, Romance]",LL Cool J,Timothy Hutton,Queen Latifah,Wayne Wang
4642,44524,fr,Rois et Reine,"Shortly before her wedding, art gallery direct...",3.303,/oNG2gAM1xqZnV8iTXn5qXF8IMyv.jpg,['FR'],2004-09-03,"['de', 'en', 'fr']",Released,7.2,67,2000,2004,150.0,"[Comedy, Drama, Romance]",Geoffrey Carey,Thierry Bosc,Emmanuelle Devos,Arnaud Desplechin
6037,77284,fr,Osmose,"Abel awash in his conquests phone numbers, Rem...",0.901,/ulG3VmpOtj1zflul4E3L9KMEyuT.jpg,['FR'],2004-05-05,"['en', 'fr']",Released,7.0,2,2000,2003,75.0,"[Comedy, Drama, Romance]",Romain Duris,Clément Sibony,Mathilde Bertrandy,Raphael Fejtö
441,8424,fr,Jeux d'enfants,"As adults, best friends Julien and Sophie cont...",12.664,/vvjC5LSMANKvGGDHjIWj1YomPdN.jpg,"['BE', 'FR', 'US']",2003-09-17,['fr'],Released,7.26,1660,2000,2003,93.0,"[Comedy, Drama, Romance]",Guillaume Canet,Thibault Verhaeghe,Marion Cotillard,Yann Samuell
1035,19913,en,(500) Days of Summer,"Tom, greeting-card writer and hopeless romanti...",34.459,/f9mbM0YMLpYemcWx6o2WeiYQLDP.jpg,['US'],2009-07-17,"['en', 'fr', 'sv']",Released,7.267,9091,2000,2009,95.0,"[Comedy, Drama, Romance]",Joseph Gordon-Levitt,Geoffrey Arend,Zooey Deschanel,Marc Webb
728,12555,cs,Obsluhoval jsem anglického krále,"Prague, Czechoslovakia, during the inter-war p...",4.593,/mwuabUjZMSg8ngHScjcKCmFO0jA.jpg,"['CZ', 'SK']",2007-01-11,"['cs', 'en', 'fr', 'de']",Released,6.931,80,2000,2006,113.0,"[Comedy, Drama, Romance]",Ivan Barnev,Oldrich Kaiser,Julia Jentsch,Jirí Menzel
4066,25985,en,Jump Tomorrow,"When shy, straitlaced George (Tunde Adebimpe) ...",1.798,/kYEQLfbrV7HITy5RuEHLhEI44Ty.jpg,"['GB', 'US']",2001-11-09,"['en', 'fr', 'es']",Released,6.9,15,2000,2001,97.0,"[Comedy, Drama, Romance]",Tunde Adebimpe,Hippolyte Girardot,Amy Sedaris,Joel Hopkins
412,7973,ar,سكر بنات,In a beauty salon in Beirut the lives of five ...,7.677,/jvAwQDPpXRfeJZpiBvgc2HxLsnn.jpg,"['LB', 'FR']",2007-08-09,"['fr', 'ar']",Released,6.894,199,2000,2007,95.0,"[Comedy, Drama, Romance]",Adel Karam,Dimitri Staneofski,Nadine Labaki,Nadine Labaki


# Même modèle mais avec des paramètres différents 

In [105]:
import pandas as pd
import joblib
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MultiLabelBinarizer
from scipy.sparse import issparse

# ---------------------- Custom Transformer ----------------------
class MultiLabelBinarizerPipelineFriendly(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mlb = MultiLabelBinarizer()

    def fit(self, X, y=None):
        self.mlb.fit(X)
        return self

    def transform(self, X):
        return self.mlb.transform(X)

    def get_feature_names_out(self, input_features=None):
        return [f"genres_{cls}" for cls in self.mlb.classes_]

# ---------------------- Prétraitement Optimisé ----------------------
preprocessor = ColumnTransformer(
    transformers=[
        ('acteurs', OneHotEncoder(), ['acteurs_1', 'acteurs_2', 'actrices']),
        ('realisateur', OneHotEncoder(), ['realisateurs']),
        ('status', OneHotEncoder(), ['status']),
        ('genres', MultiLabelBinarizerPipelineFriendly(), 'genres'),
        ('annee', OrdinalEncoder(), ['decennie']),
        ('note', 'passthrough', ['vote_average']),
        ('popularite', StandardScaler(), ['popularity']),
        ('duree', StandardScaler(), ['runtimeMinutes'])
    ]
)

pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

# ---------------------- Chargement des données ----------------------
films = pd.read_csv("datasets/raw/films.csv")  # Chargez vos données ici

# Vérifiez s'il existe déjà un fichier traité
try:
    df_processed = joblib.load('datasets/raw/processed_films.pkl')
    nn_model = joblib.load('datasets/raw/nn_model.pkl')
    distances_all, indices_all = joblib.load('datasets/raw/nn_distances.pkl')
except FileNotFoundError:
    processed_data = pipeline.fit_transform(films)
    if issparse(processed_data):
        processed_data = processed_data.toarray()
    columns = pipeline.named_steps['preprocessor'].get_feature_names_out()
    df_processed = pd.DataFrame(processed_data, columns=columns)

    # Sauvegarde
    joblib.dump(df_processed, 'datasets/raw/processed_films.pkl')

    nn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
    nn_model.fit(df_processed)
    joblib.dump(nn_model, 'datasets/raw/nn_model.pkl')

    distances_all, indices_all = nn_model.kneighbors(df_processed)
    joblib.dump((distances_all, indices_all), 'datasets/raw/nn_distances.pkl')

# ---------------------- Fonction de recommandation ----------------------
def recommend_similar_items(index: int,
                             data: pd.DataFrame = df_processed,
                             model: NearestNeighbors = nn_model,
                             original_data: pd.DataFrame = films,
                             distances=distances_all,
                             indices=indices_all,
                             n_neighbors: int = 5,
                             sort_by: str = None,
                             min_popularity: float = None,
                             runtime_range: tuple = None) -> pd.DataFrame:

    neighbor_distances = distances[index][0:n_neighbors+1]
    neighbors_idx = indices[index][0:n_neighbors+1]

    recommendations = original_data.iloc[neighbors_idx].copy()
    recommendations['distance'] = neighbor_distances

    if min_popularity is not None:
        recommendations = recommendations[recommendations['popularity'] >= min_popularity]

    if runtime_range is not None:
        min_runtime, max_runtime = runtime_range
        recommendations = recommendations[
            recommendations['runtimeMinutes'].between(min_runtime, max_runtime)
        ]

    if sort_by and sort_by in recommendations.columns:
        recommendations = recommendations.sort_values(by=sort_by, ascending=False)

    print(f"\nRecommandations pour le film : {original_data.iloc[index]['original_title']}\n")
    return recommendations

# ---------------------- Exemple d'utilisation ----------------------
display(recommend_similar_items(
    index= 9295,
    n_neighbors=10
))

#print(recommandations[['original_title', 'popularity', 'runtimeMinutes', 'distance']])


Recommandations pour le film : Survivre



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,...,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs,distance
9295,1196470,fr,Survivre,Une catastrophe bouleverse la planète : les pô...,16.7712,/901kRttzfLScns7ss7RyKqG6d2A.jpg,['France'],2024-06-19,['fr'],Released,...,237,2020,2024,90.0,"['Science-Fiction', 'Thriller', 'Aventure', 'A...",unknown_actor,unknown_actor,unknown_actress,unknown_director,0.0
9660,1280016,fr,2029,"Dans un univers post-apocalyptique, les clans ...",0.2405,/ynbUFP3Z267u7Sn4hNpJOowZbnv.jpg,['France'],2024-02-23,['fr'],Released,...,1,2020,2024,82.0,"['Thriller', 'Action', 'Aventure', 'Science-Fi...",unknown_actor,unknown_actor,unknown_actress,unknown_director,1.274665
9306,1000075,fr,Largo Winch : Le Prix de l'argent,"Depuis l’enlèvement brutal de son fils Noom, L...",8.3417,/hextHjNWD79MAJWux4ScvvzUdrR.jpg,"['Belgium', 'France']",2024-07-31,"['fr', 'en']",Released,...,243,2020,2024,100.0,"['Action', 'Aventure', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,1.882353
9474,1261504,fr,Rapide,Max a toujours aimé aller vite. Elle ne sait p...,1.1871,/nQCNtMrIPTojOK2OqX5jcrpheW8.jpg,['France'],2025-04-16,['fr'],Released,...,17,2020,2025,98.0,"['Action', 'Aventure', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,1.979621
9367,1108354,fr,KALI: L'ange de la Vengeance,"Lisa, alias Kali, ancienne des Forces Spéciale...",3.0072,/vN5g21wH8ptEknCMzl5G7ZWcVFe.jpg,['France'],2024-05-31,['fr'],Released,...,130,2020,2024,96.0,"['Action', 'Thriller', 'Aventure']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.279719
9333,1124127,en,Air Force One Down,Lors de sa première mission à bord d'Air Force...,4.1609,/4VBzIkP0EJWZysH5LiIhXxJwaDU.jpg,['United States of America'],2024-02-09,"['en', 'fr']",Released,...,143,2020,2024,84.0,"['Action', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.365578
9382,1018634,fr,Roqya,Une femme vit de la contrebande d'animaux exot...,2.2573,/t0mY6CaJr1Cuta8HTIm6cAhkeKI.jpg,['France'],2024-05-15,['fr'],Released,...,26,2020,2024,97.0,"['Thriller', 'Horreur', 'Action']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.396579
9293,757725,en,Shadow Force,Kyrah et Isaac étaient les chefs d'une unité s...,21.5277,/uvamlgCZdrIgLlVULp44EYX4xdl.jpg,['United States of America'],2025-05-01,"['de', 'en', 'fr']",Released,...,12,2020,2025,103.0,"['Action', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.408191
9409,1025527,fr,Six jours,"Malik, inspecteur de police, assiste impuissan...",1.2992,/foyiQu23zq4WhmffnQkFiNAvqcJ.jpg,['France'],2025-01-01,['fr'],Released,...,27,2020,2025,95.0,"['Thriller', 'Action']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.412225
9446,1242765,en,Wynonna Earp: Vengeance,Wynonna Earp rentre chez elle pour affronter s...,0.9529,/xobQ2MsXEoKcs3OdWNvY4vdFeVy.jpg,['Canada'],2024-09-13,"['en', 'fr']",Released,...,22,2020,2024,96.0,"['Action', 'Thriller']",unknown_actor,unknown_actor,unknown_actress,unknown_director,2.413073


# Modèle prédisant un ou plus de films ayant au moins un des acteurs/actrices

In [90]:
def recommend_by_actors(index: int,
                        original_data: pd.DataFrame = films,
                        min_popularity: float = None,
                        decade: str = None,
                        sort_by: str = 'popularity',
                        top_n: int = 10) -> pd.DataFrame:
    def normalize_name(name):
        return name.strip().lower().replace("é", "e").replace("-", " ")

    # Nettoyage
    for col in ['acteurs_1', 'acteurs_2', 'actrices']:
        original_data[col] = original_data[col].fillna('').astype(str).str.strip().str.title()

    film_ref = original_data.iloc[index]
    acteurs_ref = set([
        film_ref['acteurs_1'],
        film_ref['acteurs_2'],
        film_ref['actrices']
    ])

    def has_common_actor(row):
        acteurs_row = set([
            row['acteurs_1'],
            row['acteurs_2'],
            row['actrices']
        ])
        return not acteurs_ref.isdisjoint(acteurs_row)

    filtered = original_data[original_data.index != index]
    filtered = filtered[filtered.apply(has_common_actor, axis=1)]

    if min_popularity is not None:
        filtered = filtered[filtered['popularity'] >= min_popularity]

    if decade is not None:
        filtered = filtered[filtered['decennie'] == decade]

    if sort_by and sort_by in filtered.columns:
        filtered = filtered.sort_values(by=sort_by, ascending=False)

    print(f"\nFilms avec {', '.join(acteurs_ref)} comme acteur(s)/actrice(s):\n")
    print(f"\nRecommandations avec au moins un acteur commun avec « {film_ref['original_title']} » :\n")
    return filtered.head(top_n)

display(recommend_by_actors(
    index= 2112,
    min_popularity= None,
    top_n=10
))


Films avec Bae Doona, Ilian Bergala, Alain Chabat comme acteur(s)/actrice(s):


Recommandations avec au moins un acteur commun avec « #JeSuisLà » :



Unnamed: 0,id,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,status,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
6596,102207,fr,Sur la piste du Marsupilami,When reporter Dan Geraldo (Alain Chabat) arriv...,10.307,/yn3ydorp7YigPcnEZgNMvdNHPaF.jpg,"['BE', 'FR', 'MX']",2012-04-03,['fr'],Released,5.255,776,2010,2012,105.0,"['Adventure', 'Comedy', 'Family']",Jamel Debbouze,Alain Chabat,Géraldine Nakache,Alain Chabat
847,15097,fr,La Cité de la peur,A second-class horror movie has to be shown at...,9.381,/jBzDbxsEiCUCiYcpDLpvbQ6kN2U.jpg,['FR'],1994-03-09,"['en', 'fr', 'sv']",Released,7.507,1096,1990,1994,100.0,"['Comedy', 'Horror']",Alain Chabat,Alain Chabat,Chantal Lauby,Alain Berbérian
23,300,fr,La Science des rêves,A man entranced by his dreams and imagination ...,8.741,/1qCq228LsNtUseCnNE7Naw6NBUz.jpg,"['FR', 'IT']",2006-02-11,"['en', 'fr', 'es']",Released,7.052,655,2000,2006,105.0,"['Comedy', 'Drama', 'Fantasy']",Gael García Bernal,Alain Chabat,Charlotte Gainsbourg,Michel Gondry
291,4482,fr,Gazon maudit,"After learning of her husband's infidelities, ...",8.523,/xlLnHizshGBwMiNrnop2kg2nYzE.jpg,['FR'],1995-01-18,"['fr', 'es']",Released,6.0,270,1990,1995,104.0,"['Comedy', 'Drama', 'Romance']",Alain Chabat,Ticky Holgado,Victoria Abril,Josiane Balasko
3141,451500,fr,Santa & Cie,Christmas is on its way and with it disaster. ...,8.311,/mrz2fC1F9H8kbdKu2vOwyeMAzvg.jpg,['FR'],2017-12-06,"['fr', 'pt']",Released,6.281,534,2010,2017,92.0,"['Comedy', 'Family', 'Fantasy']",Alain Chabat,Pio Marmaï,Golshifteh Farahani,Alain Chabat
1812,735697,fr,Incroyable mais vrai,Alain and Marie moved to the suburb house of t...,7.418,/yBLlwTXweDzBKgEbYz5ZVTRQaZX.jpg,['FR'],2022-06-15,['fr'],Released,6.019,232,2020,2022,74.0,"['Comedy', 'Drama', 'Fantasy']",Alain Chabat,Benoît Magimel,Léa Drucker,Quentin Dupieux
7476,183836,fr,Les Gamins,"Newly engaged, Thomas meets his future father-...",6.771,/tRCGzqHllKheDaIlr5ACbRb4pXm.jpg,['FR'],2013-04-17,['fr'],Released,6.1,489,2010,2013,95.0,['Comedy'],Alain Chabat,Max Boublil,Sandrine Kiberlain,Anthony Marciano
7454,179150,fr,Réalité,A wanna-be director is given 48 hours by a pro...,6.729,/l6aty0lfqsTefZ5xduwT7D44N1q.jpg,"['BE', 'FR']",2014-08-28,"['fr', 'en']",Released,7.188,401,2010,2014,95.0,['Comedy'],Alain Chabat,Jonathan Lambert,Élodie Bouchez,Quentin Dupieux
1037,19955,fr,Prête-moi ta main,"Life is easy for 43-year-old Luis, a happy sin...",6.608,/jfeJwdtZpsAR6ap9kSrBg7c3bSL.jpg,['FR'],2006-11-01,['fr'],Released,6.374,273,2000,2006,90.0,"['Comedy', 'Romance']",Alain Chabat,Wladimir Yordanoff,Charlotte Gainsbourg,Éric Lartigau
953,17684,fr,Chouchou,"Choukri, alias Chouchou, a transvestite Maghre...",6.309,/n5hHf0x8i62kbUycqHgkop7jUDy.jpg,['FR'],2003-03-19,['fr'],Released,5.3,417,2000,2003,105.0,['Comedy'],Gad Elmaleh,Alain Chabat,Catherine Frot,Merzak Allouache


In [46]:
def normalize_name(name):
    return name.strip().lower().replace("é", "e").replace("è", "e").replace("-", " ").replace("_"," ").replace("ê"," ").replace("ô","o")

def get_films_by_actor(actor_name: str, original_data: pd.DataFrame = films) -> pd.DataFrame:
    # Normaliser les colonnes d'acteurs
    for col in ['acteurs_1', 'acteurs_2', 'actrices']:
        original_data[col] = original_data[col].fillna('').astype(str).apply(normalize_name)

    actor_name_normalized = normalize_name(actor_name)

    filtered = original_data[
        (original_data['acteurs_1'] == actor_name_normalized) |
        (original_data['acteurs_2'] == actor_name_normalized) |
        (original_data['actrices'] == actor_name_normalized)
    ]

    print(f"\nFilms avec l'acteur/l'actrice : {actor_name.title()}\n")
    return filtered[['original_title', 'decennie', 'popularity', 'vote_average']]
display(get_films_by_actor("Keanu Reeves"))



Films avec l'acteur/l'actrice : Keanu Reeves



Unnamed: 0,original_title,decennie,popularity,vote_average
45,The Matrix Revolutions,2000.0,26.049,6.705
128,Sweet November,2000.0,15.02,6.87
372,Something's Gotta Give,2000.0,13.93,6.622
1893,John Wick: Chapter 4,2020.0,1170.178,7.926


In [47]:
def recommend_by_director(index: int,
                          original_data: pd.DataFrame = films,
                          min_popularity: float = None,
                          decade: str = None,
                          sort_by: str = 'popularity',
                          top_n: int = 10) -> pd.DataFrame:
    def normalize_name(name):
        return name.strip().lower().replace("é", "e").replace("-", " ")

    # Nettoyage du nom du réalisateur
    original_data['realisateurs'] = original_data['realisateurs'].fillna('').astype(str).apply(normalize_name)

    film_ref = original_data.iloc[index]
    director_ref = normalize_name(film_ref['realisateurs'])

    # Filtrer films avec le même réalisateur (exact match)
    filtered = original_data[(original_data.index != index) & (original_data['realisateurs'] == director_ref)]

    if min_popularity is not None:
        filtered = filtered[filtered['popularity'] >= min_popularity]

    if decade is not None:
        filtered = filtered[filtered['decennie'] == decade]

    if sort_by and sort_by in filtered.columns:
        filtered = filtered.sort_values(by=sort_by, ascending=False)

    print(f"\nFilms réalisés par {director_ref.title()} :\n")
    print(f"\nRecommandations avec le même réalisateur que « {film_ref['original_title']} » :\n")
    return filtered.head(top_n)

display(recommend_by_director(
    index=44,
    sort_by='popularity',
    min_popularity=None,
    top_n=10
))


Films réalisés par James Cameron :


Recommandations avec le même réalisateur que « Titanic » :



Unnamed: 0,id,tconst,original_language,original_title,overview,popularity,poster_path,production_countries,release_date,spoken_languages,...,vote_average,vote_count,decennie,startYear,runtimeMinutes,genres,acteurs_1,acteurs_2,actrices,realisateurs
3813,36955,tt0111503,en,True Lies,"A fearless, globe-trotting, terrorist-battling...",32.758,/pweFTnzzTfGK68woSVkiTgjLzWm.jpg,['US'],1994-07-15,"['en', 'fr', 'ar', 'de']",...,7.0,3518,1990.0,1994.0,141.0,"['Action', 'Comedy', 'Thriller']",arnold schwarzenegger,tom arnold,jamie lee curtis,james cameron
