In [22]:
import duckdb
import pandas as pd
import sys
from pathlib import Path

project_root = Path(r"F:\DataAnalystSimplon\Projet_recommandation_film")
sys.path.insert(0, str(project_root / "src"))

csv_path = Path(r"F:\DataAnalystSimplon\Projet_recommandation_film\data\clean\tmdb_final.csv")
name_path = Path(r"F:\DataAnalystSimplon\Projet_recommandation_film\data\row\name.basics.tsv")
crew_path = Path(r"F:\DataAnalystSimplon\Projet_recommandation_film\data\row\title.crew.tsv")
title_path = Path(r"F:\DataAnalystSimplon\Projet_recommandation_film\data\row\title.principals.tsv")

In [2]:
movies = pd.read_csv(csv_path)

In [3]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6200 entries, 0 to 6199
Data columns (total 20 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   backdrop_path                      4676 non-null   object 
 1   id                                 6200 non-null   int64  
 2   imdb_id                            6200 non-null   object 
 3   original_language                  6200 non-null   object 
 4   original_title                     6200 non-null   object 
 5   overview                           6200 non-null   object 
 6   poster_path                        6200 non-null   object 
 7   release_date                       6200 non-null   object 
 8   runtime                            6200 non-null   int64  
 9   title                              6200 non-null   object 
 10  video                              6200 non-null   bool   
 11  vote_average                       6200 non-null   float

In [4]:
# Connexion DuckDB
con = duckdb.connect(database=':memory:')

# Créer les tables
con.execute(f"CREATE TABLE crew AS SELECT * FROM read_csv_auto('{crew_path}', delim='\t', nullstr='\\N')")
con.execute(f"CREATE TABLE principals AS SELECT * FROM read_csv_auto('{title_path}', delim='\t', nullstr='\\N')")
con.execute(f"CREATE TABLE names AS SELECT * FROM read_csv_auto('{name_path}', delim='\t', nullstr='\\N')")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<_duckdb.DuckDBPyConnection at 0x1ea46cc8b70>

In [5]:
tconst_list = movies['imdb_id'].tolist()

In [6]:
con.execute("DROP TABLE IF EXISTS crew_filtered")
con.execute("DROP TABLE IF EXISTS principals_filtered")
con.execute("DROP TABLE IF EXISTS crew_named")
con.execute("DROP TABLE IF EXISTS actors_named")

<_duckdb.DuckDBPyConnection at 0x1ea46cc8b70>

In [7]:
con.execute(f"""
    CREATE TABLE crew_filtered AS
    SELECT * FROM crew
    WHERE tconst IN ({','.join([f"'{t}'" for t in tconst_list])})
""").df()

Unnamed: 0,Count
0,6189


In [8]:
con.execute(f"""
    CREATE TABLE principals_filtered AS
    SELECT * FROM principals
    WHERE tconst IN ({','.join([f"'{t}'" for t in tconst_list])})
""").df()

Unnamed: 0,Count
0,103114


In [9]:
# Joindre avec names pour avoir les noms réels
con.execute("""
    CREATE TABLE crew_named AS
    SELECT 
        c.tconst,
        CASE WHEN c.directors='\\N' THEN '' ELSE c.directors END AS directors,
        CASE WHEN c.writers='\\N' THEN '' ELSE c.writers END AS writers
    FROM crew_filtered c
""").df()

Unnamed: 0,Count
0,6189


In [10]:
con.execute("""
    CREATE TABLE actors_named AS
    SELECT 
        p.tconst, 
        n.primaryName AS actor_name,
        p.category
    FROM principals_filtered p
    LEFT JOIN names n
    ON p.nconst = n.nconst
    WHERE p.category IN ('actor','actress')
""").df()

Unnamed: 0,Count
0,46826


In [11]:
# Directors et writers sous forme de liste
crew_lists = con.execute("""
    SELECT tconst,
           split(directors, ',') AS director_list,
           split(writers, ',') AS writer_list
    FROM crew_named
""").df()

# Acteurs sous forme de liste
actors_lists = con.execute("""
    SELECT tconst,
           list(actor_name) AS actor_list
    FROM actors_named
    GROUP BY tconst
""").df()

In [12]:
print(crew_lists)

         tconst                                      director_list  \
0     tt0961097                                        [nm0074426]   
1     tt0962782                                        [nm0069342]   
2     tt0963989                                        [nm0415245]   
3     tt0969701                                        [nm0494504]   
4     tt0973844  [nm0000766, nm0000801, nm0000806, nm0001005, n...   
...         ...                                                ...   
6184  tt9369620                             [nm8263214, nm8441994]   
6185  tt9372054                                        [nm0084485]   
6186  tt9408232                                        [nm4766133]   
6187  tt9430820                                        [nm3251986]   
6188  tt9437598  [nm10326714, nm10326715, nm10326716, nm1032671...   

                                            writer_list  
0                                [nm0443592, nm0074426]  
1                     [nm6570587, nm0069342

In [13]:
print(actors_lists)

          tconst                                         actor_list
0      tt0035515  [Gisèle Pascal, Jean Parédès, André Roussin, L...
1      tt0064327  [Jean Parédès, Bernadette Lafont, Francis Lax,...
2      tt0034471  [Gisèle Pascal, Henri Poupon, Raimu, Louis Jou...
3      tt0207724  [Lucien Pascal, Marcel Pérès, Roger Saget, Ode...
4      tt0167000  [Gisèle Pascal, Simone Renant, Germaine Ledoye...
...          ...                                                ...
5268  tt10655822  [Louise Massin, Mona Behr, Benjamin Siksou, Ga...
5269   tt5821240                 [Ines Le Poullennec, Dylan Pedron]
5270   tt0176958                         [Muriel Simon, Joel Simon]
5271   tt0132649                            [Marie-Anne Malleville]
5272   tt5082718                              [Stéphane Ronchewski]

[5273 rows x 2 columns]


In [14]:
# 1) Directors : explode, join names, re-aggregate en array de noms
con.execute("""
CREATE OR REPLACE TABLE directors_named AS
SELECT
    tconst,
    array_agg(COALESCE(n.primaryName, s.nconst) ORDER BY 1) AS director_names
FROM (
    SELECT
        tconst,
        trim(replace(replace(u.nconst_raw, '[', ''), ']', '')) AS nconst
    FROM crew_named,
         UNNEST(split(replace(replace(directors,'[',''),']',''), ',')) AS u(nconst_raw)
) AS s
LEFT JOIN names n ON s.nconst = n.nconst
GROUP BY tconst;
""")

# 2) Writers : même logique
con.execute("""
CREATE OR REPLACE TABLE writers_named AS
SELECT
    tconst,
    array_agg(COALESCE(n.primaryName, s.nconst) ORDER BY 1) AS writer_names
FROM (
    SELECT
        tconst,
        trim(replace(replace(u.nconst_raw, '[', ''), ']', '')) AS nconst
    FROM crew_named,
         UNNEST(split(replace(replace(writers,'[',''),']',''), ',')) AS u(nconst_raw)
) AS s
LEFT JOIN names n ON s.nconst = n.nconst
GROUP BY tconst;
""")

<_duckdb.DuckDBPyConnection at 0x1ea46cc8b70>

In [15]:
# Directors
directors_df = con.execute("SELECT * FROM directors_named").df()
print(directors_df)

# Writers
writers_df = con.execute("SELECT * FROM writers_named").df()
print(writers_df)

          tconst                             director_names
0      tt3674140     [Wim Wenders, Juliano Ribeiro Salgado]
1      tt0021312  [Sergei Eisenstein, Grigoriy Aleksandrov]
2      tt0097884                              [Louis Malle]
3      tt3716530                           [Paul Verhoeven]
4      tt0092593                              [Louis Malle]
...          ...                                        ...
6168   tt4228810                           [Vincent Garenq]
6169  tt13460024                           [Thierry Berrod]
6170  tt11349900                              [Chloé Mazlo]
6171   tt5196472                             [Morgan Simon]
6172   tt3985394                              [Laurence Ly]

[6173 rows x 2 columns]
          tconst                                       writer_names
0      tt0421974  [Gérard Pirès, Gilles Malençon, Albert Uderzo,...
1      tt0082269  [Jean-Jacques Beineix, Daniel Odier, Jean Van ...
2     tt14851374                             [Charl

In [16]:
movies = movies.merge(actors_lists, left_on='imdb_id', right_on='tconst', how='left').drop(columns='tconst')
movies

Unnamed: 0,backdrop_path,id,imdb_id,original_language,original_title,overview,poster_path,release_date,runtime,title,...,vote_average,vote_count,year,decade,genres_list,production_companies_name_list,production_countries_list,production_companies_country_list,spoken_languages_list,actor_list
0,/uJlc4aNPF3Y8yAqahJTKBwgwPVW.jpg,8773,tt0055747,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,1962-06-22,110,Love at Twenty,...,6.700,41,1962,1960,"['Drama', 'Romance']","['Ulysse Productions', 'Unitec Films', 'Cinese...","['DE', 'FR', 'IT', 'JP', 'PL']","['', 'NZ', 'IT', 'JP', 'DE', 'PL', '']","['it', 'ja', 'pl', 'fr', 'de']","[Marie-France Pisier, Eleonora Rossi Drago, Ch..."
1,/s9rWmLANsVlSV4XmuC0IUcseGzO.jpg,108,tt0108394,fr,Trois couleurs : Bleu,Julie is haunted by her grief after living thr...,/33wsWxzsNstI8N7dvuwzFmj1qBd.jpg,1993-08-01,98,Three Colors: Blue,...,7.700,1421,1993,1990,['Drama'],"['France 3 Cinéma', 'CED Productions', 'Miramax']","['FR', 'PL', 'CH']","['FR', '', 'US']","['fr', 'pl']","[Florence Pernel, Hugues Quester, Benoît Régen..."
2,/2eBL3wXDZqemW8SJ1RNxqtx6bgJ.jpg,109,tt0111507,fr,Trois couleurs : Blanc,Polish immigrant Karol Karol finds himself out...,/fdIet3NSa27gobMbaUml66oCQNT.jpg,1994-01-26,92,Three Colors: White,...,7.482,989,1994,1990,"['Comedy', 'Drama', 'Mystery']","['Le Studio Canal+', 'France 3 Cinéma', 'Eurim...","['FR', 'PL']","['FR', 'FR', 'FR', 'US']","['fr', 'hu', 'pl']","[Cezary Pazura, Jerzy Nowak, Zbigniew Zamachow..."
3,/A9IY3j3Hwf4Q8Q9w5QxSQPYSvCu.jpg,110,tt0111495,fr,Trois couleurs : Rouge,"Valentine, a student model in Geneva, struggle...",/JHmsBiX1tjCKqAul1lzC20WcAW.jpg,1994-05-12,100,Three Colors: Red,...,7.957,1160,1994,1990,"['Drama', 'Mystery', 'Romance']","['Zespół Filmowy TOR', 'Le Studio Canal+', 'Fr...","['FR', 'PL', 'CH']","['PL', 'FR', 'FR', 'CH']",['fr'],"[Jean Schlegel, Elzbieta Jasinska, Samuel Le B..."
4,/63HcRYJI4LKeo8sINlL71ZtlUFY.jpg,147,tt0053198,fr,Les Quatre Cents Coups,"For young Parisian boy Antoine Doinel, life is...",/12PuU23kkDLvTd0nb8hMlE3oShB.jpg,1959-06-03,99,The 400 Blows,...,8.071,1843,1959,1950,['Drama'],"['Les Films du Carrosse', 'Sédif Productions']",['FR'],"['FR', 'FR']","['en', 'fr']","[Albert Rémy, Richard Kanayan, Daniel Couturie..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6195,,228590,tt0197505,fr,Gala,"Gala (1961), a more polished version of Pourvu...",/fZ6jgGNN5gVDVn3M7F0QP5spfuP.jpg,1961-01-01,20,Gala,...,6.100,7,1961,1960,[],[],[],[],[],"[Claude Melki, Gésip Légitimus, Dolly Bell, Ge..."
6196,,228655,tt2007418,fr,Les trois disparitions de Soad Hosni,This haunting and beautifully formed documenta...,/ttVoObIPbqBflOguONw9Dm6w1qa.jpg,2011-07-10,70,The Three Disappearances of Soad Hosni,...,6.000,1,2011,2010,['Documentary'],"['Joun Films', 'Sharjah Art Foundation', 'CNC']","['FR', 'LB', 'AE']",[],"['ar', 'en', 'fr']",
6197,,231154,tt1982764,fr,Le repas fantastique,A gentleman having lunch at a hostelry experie...,/uD8Bgyo3kIaUHUUPU5PgMsZY4Wv.jpg,1903-01-18,2,Le repas fantastique,...,6.000,2,1903,1900,"['Comedy', 'Fantasy']",['Société Lumière'],['FR'],[],['xx'],
6198,,232151,tt0268168,fr,Le bain des dames de la cour,"Through an invisible keyhole, the viewer peeps...",/vBXTVwKN0FmCzW8R1gvnJCqnmM3.jpg,1904-10-01,1,Court Ladies Bathing,...,6.375,8,1904,1900,[],['Pathé Frères'],['FR'],[],['xx'],


In [17]:
movies = movies.merge(directors_df, left_on='imdb_id', right_on='tconst', how='left').drop(columns='tconst')
movies

Unnamed: 0,backdrop_path,id,imdb_id,original_language,original_title,overview,poster_path,release_date,runtime,title,...,vote_count,year,decade,genres_list,production_companies_name_list,production_countries_list,production_companies_country_list,spoken_languages_list,actor_list,director_names
0,/uJlc4aNPF3Y8yAqahJTKBwgwPVW.jpg,8773,tt0055747,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,1962-06-22,110,Love at Twenty,...,41,1962,1960,"['Drama', 'Romance']","['Ulysse Productions', 'Unitec Films', 'Cinese...","['DE', 'FR', 'IT', 'JP', 'PL']","['', 'NZ', 'IT', 'JP', 'DE', 'PL', '']","['it', 'ja', 'pl', 'fr', 'de']","[Marie-France Pisier, Eleonora Rossi Drago, Ch...","[François Truffaut, Marcel Ophüls, Shintarô Is..."
1,/s9rWmLANsVlSV4XmuC0IUcseGzO.jpg,108,tt0108394,fr,Trois couleurs : Bleu,Julie is haunted by her grief after living thr...,/33wsWxzsNstI8N7dvuwzFmj1qBd.jpg,1993-08-01,98,Three Colors: Blue,...,1421,1993,1990,['Drama'],"['France 3 Cinéma', 'CED Productions', 'Miramax']","['FR', 'PL', 'CH']","['FR', '', 'US']","['fr', 'pl']","[Florence Pernel, Hugues Quester, Benoît Régen...",[Krzysztof Kieslowski]
2,/2eBL3wXDZqemW8SJ1RNxqtx6bgJ.jpg,109,tt0111507,fr,Trois couleurs : Blanc,Polish immigrant Karol Karol finds himself out...,/fdIet3NSa27gobMbaUml66oCQNT.jpg,1994-01-26,92,Three Colors: White,...,989,1994,1990,"['Comedy', 'Drama', 'Mystery']","['Le Studio Canal+', 'France 3 Cinéma', 'Eurim...","['FR', 'PL']","['FR', 'FR', 'FR', 'US']","['fr', 'hu', 'pl']","[Cezary Pazura, Jerzy Nowak, Zbigniew Zamachow...",[Krzysztof Kieslowski]
3,/A9IY3j3Hwf4Q8Q9w5QxSQPYSvCu.jpg,110,tt0111495,fr,Trois couleurs : Rouge,"Valentine, a student model in Geneva, struggle...",/JHmsBiX1tjCKqAul1lzC20WcAW.jpg,1994-05-12,100,Three Colors: Red,...,1160,1994,1990,"['Drama', 'Mystery', 'Romance']","['Zespół Filmowy TOR', 'Le Studio Canal+', 'Fr...","['FR', 'PL', 'CH']","['PL', 'FR', 'FR', 'CH']",['fr'],"[Jean Schlegel, Elzbieta Jasinska, Samuel Le B...",[Krzysztof Kieslowski]
4,/63HcRYJI4LKeo8sINlL71ZtlUFY.jpg,147,tt0053198,fr,Les Quatre Cents Coups,"For young Parisian boy Antoine Doinel, life is...",/12PuU23kkDLvTd0nb8hMlE3oShB.jpg,1959-06-03,99,The 400 Blows,...,1843,1959,1950,['Drama'],"['Les Films du Carrosse', 'Sédif Productions']",['FR'],"['FR', 'FR']","['en', 'fr']","[Albert Rémy, Richard Kanayan, Daniel Couturie...",[François Truffaut]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6195,,228590,tt0197505,fr,Gala,"Gala (1961), a more polished version of Pourvu...",/fZ6jgGNN5gVDVn3M7F0QP5spfuP.jpg,1961-01-01,20,Gala,...,7,1961,1960,[],[],[],[],[],"[Claude Melki, Gésip Légitimus, Dolly Bell, Ge...",[Jean-Daniel Pollet]
6196,,228655,tt2007418,fr,Les trois disparitions de Soad Hosni,This haunting and beautifully formed documenta...,/ttVoObIPbqBflOguONw9Dm6w1qa.jpg,2011-07-10,70,The Three Disappearances of Soad Hosni,...,1,2011,2010,['Documentary'],"['Joun Films', 'Sharjah Art Foundation', 'CNC']","['FR', 'LB', 'AE']",[],"['ar', 'en', 'fr']",,[Rania Stephan]
6197,,231154,tt1982764,fr,Le repas fantastique,A gentleman having lunch at a hostelry experie...,/uD8Bgyo3kIaUHUUPU5PgMsZY4Wv.jpg,1903-01-18,2,Le repas fantastique,...,2,1903,1900,"['Comedy', 'Fantasy']",['Société Lumière'],['FR'],[],['xx'],,[Gaston Velle]
6198,,232151,tt0268168,fr,Le bain des dames de la cour,"Through an invisible keyhole, the viewer peeps...",/vBXTVwKN0FmCzW8R1gvnJCqnmM3.jpg,1904-10-01,1,Court Ladies Bathing,...,8,1904,1900,[],['Pathé Frères'],['FR'],[],['xx'],,


In [18]:
movies = movies.merge(writers_df, left_on='imdb_id', right_on='tconst', how='left').drop(columns='tconst')
movies

Unnamed: 0,backdrop_path,id,imdb_id,original_language,original_title,overview,poster_path,release_date,runtime,title,...,year,decade,genres_list,production_companies_name_list,production_countries_list,production_companies_country_list,spoken_languages_list,actor_list,director_names,writer_names
0,/uJlc4aNPF3Y8yAqahJTKBwgwPVW.jpg,8773,tt0055747,fr,L'Amour à vingt ans,Love at Twenty unites five directors from five...,/aup2QCYCsyEeQfpboXy0f4uj8aE.jpg,1962-06-22,110,Love at Twenty,...,1962,1960,"['Drama', 'Romance']","['Ulysse Productions', 'Unitec Films', 'Cinese...","['DE', 'FR', 'IT', 'JP', 'PL']","['', 'NZ', 'IT', 'JP', 'DE', 'PL', '']","['it', 'ja', 'pl', 'fr', 'de']","[Marie-France Pisier, Eleonora Rossi Drago, Ch...","[François Truffaut, Marcel Ophüls, Shintarô Is...","[François Truffaut, Shintarô Ishihara, Marcel ..."
1,/s9rWmLANsVlSV4XmuC0IUcseGzO.jpg,108,tt0108394,fr,Trois couleurs : Bleu,Julie is haunted by her grief after living thr...,/33wsWxzsNstI8N7dvuwzFmj1qBd.jpg,1993-08-01,98,Three Colors: Blue,...,1993,1990,['Drama'],"['France 3 Cinéma', 'CED Productions', 'Miramax']","['FR', 'PL', 'CH']","['FR', '', 'US']","['fr', 'pl']","[Florence Pernel, Hugues Quester, Benoît Régen...",[Krzysztof Kieslowski],"[Krzysztof Kieslowski, Agnieszka Holland, Slaw..."
2,/2eBL3wXDZqemW8SJ1RNxqtx6bgJ.jpg,109,tt0111507,fr,Trois couleurs : Blanc,Polish immigrant Karol Karol finds himself out...,/fdIet3NSa27gobMbaUml66oCQNT.jpg,1994-01-26,92,Three Colors: White,...,1994,1990,"['Comedy', 'Drama', 'Mystery']","['Le Studio Canal+', 'France 3 Cinéma', 'Eurim...","['FR', 'PL']","['FR', 'FR', 'FR', 'US']","['fr', 'hu', 'pl']","[Cezary Pazura, Jerzy Nowak, Zbigniew Zamachow...",[Krzysztof Kieslowski],"[Krzysztof Kieslowski, Agnieszka Holland, Edwa..."
3,/A9IY3j3Hwf4Q8Q9w5QxSQPYSvCu.jpg,110,tt0111495,fr,Trois couleurs : Rouge,"Valentine, a student model in Geneva, struggle...",/JHmsBiX1tjCKqAul1lzC20WcAW.jpg,1994-05-12,100,Three Colors: Red,...,1994,1990,"['Drama', 'Mystery', 'Romance']","['Zespół Filmowy TOR', 'Le Studio Canal+', 'Fr...","['FR', 'PL', 'CH']","['PL', 'FR', 'FR', 'CH']",['fr'],"[Jean Schlegel, Elzbieta Jasinska, Samuel Le B...",[Krzysztof Kieslowski],"[Krzysztof Kieslowski, Agnieszka Holland, Edwa..."
4,/63HcRYJI4LKeo8sINlL71ZtlUFY.jpg,147,tt0053198,fr,Les Quatre Cents Coups,"For young Parisian boy Antoine Doinel, life is...",/12PuU23kkDLvTd0nb8hMlE3oShB.jpg,1959-06-03,99,The 400 Blows,...,1959,1950,['Drama'],"['Les Films du Carrosse', 'Sédif Productions']",['FR'],"['FR', 'FR']","['en', 'fr']","[Albert Rémy, Richard Kanayan, Daniel Couturie...",[François Truffaut],"[François Truffaut, Marcel Moussy]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6195,,228590,tt0197505,fr,Gala,"Gala (1961), a more polished version of Pourvu...",/fZ6jgGNN5gVDVn3M7F0QP5spfuP.jpg,1961-01-01,20,Gala,...,1961,1960,[],[],[],[],[],"[Claude Melki, Gésip Légitimus, Dolly Bell, Ge...",[Jean-Daniel Pollet],[Jean-Daniel Pollet]
6196,,228655,tt2007418,fr,Les trois disparitions de Soad Hosni,This haunting and beautifully formed documenta...,/ttVoObIPbqBflOguONw9Dm6w1qa.jpg,2011-07-10,70,The Three Disappearances of Soad Hosni,...,2011,2010,['Documentary'],"['Joun Films', 'Sharjah Art Foundation', 'CNC']","['FR', 'LB', 'AE']",[],"['ar', 'en', 'fr']",,[Rania Stephan],[Rania Stephan]
6197,,231154,tt1982764,fr,Le repas fantastique,A gentleman having lunch at a hostelry experie...,/uD8Bgyo3kIaUHUUPU5PgMsZY4Wv.jpg,1903-01-18,2,Le repas fantastique,...,1903,1900,"['Comedy', 'Fantasy']",['Société Lumière'],['FR'],[],['xx'],,[Gaston Velle],
6198,,232151,tt0268168,fr,Le bain des dames de la cour,"Through an invisible keyhole, the viewer peeps...",/vBXTVwKN0FmCzW8R1gvnJCqnmM3.jpg,1904-10-01,1,Court Ladies Bathing,...,1904,1900,[],['Pathé Frères'],['FR'],[],['xx'],,,


In [19]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6200 entries, 0 to 6199
Data columns (total 23 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   backdrop_path                      4676 non-null   object 
 1   id                                 6200 non-null   int64  
 2   imdb_id                            6200 non-null   object 
 3   original_language                  6200 non-null   object 
 4   original_title                     6200 non-null   object 
 5   overview                           6200 non-null   object 
 6   poster_path                        6200 non-null   object 
 7   release_date                       6200 non-null   object 
 8   runtime                            6200 non-null   int64  
 9   title                              6200 non-null   object 
 10  video                              6200 non-null   bool   
 11  vote_average                       6200 non-null   float

In [23]:
from cinema_de_la_cite.data.save_dataframe_to_csv import save_dataframe_clean
save_dataframe_clean(movies, "tmdb_processed.csv", project_root)