### Pipeline, step by step

In [44]:
import sys
import os
sys.path.append(os.getcwd())

import logging
import pandas as pd
from data_pipelines.scrapers.filmladder import FilmladderScraper
from data_pipelines.scrapers.imdb import IMDBFetcher, IMDBScraper
from data_pipelines.scrapers.letterboxd import LetterboxdScraper
from data_pipelines.daily_pipeline import extract_unique_movies, get_new_movies
from data_pipelines.daily_pipeline import process_cinemas, process_screenings, process_enriched_movies
from data_pipelines.save_to_db import save_all_to_db

%load_ext autoreload
%autoreload 2

logging.basicConfig(level=logging.INFO)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
logging.info("Starting daily data pipeline...")

# 1️⃣ Scrape Filmladder (returns two DataFrames)
filmladder = FilmladderScraper()
screenings_df, cinemas_df = filmladder.run()

INFO:root:Starting daily data pipeline...
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/ardsnijders/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver] found in cache


In [None]:
# 2️⃣ Remove duplicates (imax, 2d, etc), add unique ids
screenings_df = process_screenings(screenings_df)
cinemas_df = process_cinemas(cinemas_df)

In [25]:
scraped_movies = extract_unique_movies(screenings_df)
scraped_movies.head(5)

Unnamed: 0,movie_id,title,year,movie_link
0,378909531a,favoriten,2024,https://www.filmladder.nl/film/favoriten-2024/...
1,430904b120,merckx,2025,https://www.filmladder.nl/film/merckx-2025/pop...
2,e7bc07ba2a,a complete unknown,2024,https://www.filmladder.nl/film/a-complete-unkn...
3,ff1d83f7ce,alpha,2024,https://www.filmladder.nl/film/alpha-2024/popu...
4,172e369640,anora,2024,https://www.filmladder.nl/film/anora-2024/popu...


In [39]:
screenings_df[screenings_df.movie_id=='74077e7959']

Unnamed: 0,cinema_name,title,year,show_datetime,ticket_url,rating,movie_link,poster_url,movie_id,cinema_id
1753,pathé arena,vaiana 2 (ov),2024,2025-03-02 13:00:00+01:00,https://www.filmladder.nl/kaartjes/1208921225,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,a8798dbb51
2344,pathé de munt,vaiana 2 (ov),2024,2025-02-27 16:10:00+01:00,https://www.filmladder.nl/kaartjes/1208921681,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa
2345,pathé de munt,vaiana 2 (ov),2024,2025-02-28 17:30:00+01:00,https://www.filmladder.nl/kaartjes/1208921682,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa
2346,pathé de munt,vaiana 2 (ov),2024,2025-03-01 17:30:00+01:00,https://www.filmladder.nl/kaartjes/1208921683,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa
2347,pathé de munt,vaiana 2 (ov),2024,2025-03-03 16:10:00+01:00,https://www.filmladder.nl/kaartjes/1208921684,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa
2348,pathé de munt,vaiana 2 (ov),2024,2025-03-04 16:10:00+01:00,https://www.filmladder.nl/kaartjes/1208921685,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa
2349,pathé de munt,vaiana 2 (ov),2024,2025-03-05 16:10:00+01:00,https://www.filmladder.nl/kaartjes/1208921686,6.8★,https://www.filmladder.nl/film/vaiana-2-ov-202...,https://assets.filmladder.nl/uploads/depot_ima...,74077e7959,6c790260aa


In [26]:
pd.set_option('display.width', 1000)
new_movies = get_new_movies(scraped_movies=scraped_movies)
new_movies

No existing Movies table found


Unnamed: 0,movie_id,title,year,movie_link
0,378909531a,favoriten,2024,https://www.filmladder.nl/film/favoriten-2024/...
1,430904b120,merckx,2025,https://www.filmladder.nl/film/merckx-2025/pop...
2,e7bc07ba2a,a complete unknown,2024,https://www.filmladder.nl/film/a-complete-unkn...
3,ff1d83f7ce,alpha,2024,https://www.filmladder.nl/film/alpha-2024/popu...
4,172e369640,anora,2024,https://www.filmladder.nl/film/anora-2024/popu...
...,...,...,...,...
145,d6ba2e07ad,from here to eternity,1953,https://www.filmladder.nl/film/from-here-to-et...
146,3927581269,memoir of a snail,2024,https://www.filmladder.nl/film/memoir-of-a-sna...
147,767834fde2,paths of glory,1957,https://www.filmladder.nl/film/paths-of-glory-...
148,42e553e73d,platoon,1986,https://www.filmladder.nl/film/platoon-1986/po...


In [27]:
scraper = IMDBFetcher(headless=True)
new_movies = scraper.run(new_movies)

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/ardsnijders/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver] found in cache
Scraping Progress: 100%|██████████| 150/150 [04:44<00:00,  1.90s/it]


In [33]:
scraper = IMDBScraper(headless=True)
enriched_new_movies = scraper.run(new_movies)

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/ardsnijders/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver] found in cache
Scraping IMDb Metadata: 100%|██████████| 147/147 [13:16<00:00,  5.42s/it]


In [34]:
metadata_df = pd.DataFrame(scraper.metadata_results)
enriched_new_movies = new_movies.merge(metadata_df, on="imdb_link", how="left")
enriched_new_movies = process_enriched_movies(enriched_new_movies)

In [45]:

from data_pipelines.scrapers.letterboxd import LetterboxdScraper
lb_scraper = LetterboxdScraper()
watchlist_df = lb_scraper.run()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/ardsnijders/.wdm/drivers/chromedriver/mac64/133.0.6943.141/chromedriver-mac-x64/chromedriver] found in cache


In [52]:
enriched_new_movies

Unnamed: 0,movie_id,title,year,movie_link,imdb_link,imdb_year,rating,genres,content_rating,duration,director,writers,actors,rating_count,plot,release_date,keywords,poster_url,trailer_url
0,378909531a,favoriten,2024,https://www.filmladder.nl/film/favoriten-2024/...,https://www.imdb.com/title/tt31015543,2024,7.7,"[Documentary, Back to top]",,PT1H58M,[Ruth Beckermann],"[Ruth Beckermann, Elisabeth Menasse]",[Ilkay Idiskut],203.0,Explores teaching and learning in Vienna where...,2024-09-19,[],https://m.media-amazon.com/images/M/MV5BN2VhNz...,
1,430904b120,merckx,2025,https://www.filmladder.nl/film/merckx-2025/pop...,https://www.imdb.com/title/tt35488509,2025,7.4,"[Documentary, Back to top]",,PT1H24M,"[Christophe Hermans, Boris Tilquin]","[Christophe Hermans, Boris Tilquin]",[Eddy Merckx],35.0,Eddy Merckx is more than a champion cyclist; i...,2025-02-26,[],https://m.media-amazon.com/images/M/MV5BMWQ5Zm...,
2,e7bc07ba2a,a complete unknown,2024,https://www.filmladder.nl/film/a-complete-unkn...,https://www.imdb.com/title/tt11563598,2025,7.6,"[Docudrama, Period Drama, Biography, Drama, Mu...",15,PT2H21M,[James Mangold],"[James Mangold, Jay Cocks, Elijah Wald]","[Timothée Chalamet, Edward Norton, Elle Fanning]",37486.0,"In 1961, an unknown 19-year-old Bob Dylan arri...",2025-01-17,"[1960s,johnny cash character,bob dylan charact...",https://m.media-amazon.com/images/M/MV5BZjIyOD...,https://www.imdb.com/video/imdb/vi2538325529
3,ff1d83f7ce,alpha,2024,https://www.filmladder.nl/film/alpha-2024/popu...,https://www.imdb.com/title/tt28797114,2025,6.9,"[Drama, Back to top]",,PT1H40M,[Jan-Willem van Ewijk],[Jan-Willem van Ewijk],"[Reinout Scholten van Aschat, Gijs Scholten va...",192.0,"Rein, snowboard teacher, works in the Alps. Re...",2025-02-13,[],https://m.media-amazon.com/images/M/MV5BMjIxOG...,
4,172e369640,anora,2024,https://www.filmladder.nl/film/anora-2024/popu...,https://www.imdb.com/title/tt28607951,2024,7.7,"[Raunchy Comedy, Romantic Comedy, Steamy Roman...",18,PT2H19M,[Sean Baker],[Sean Baker],"[Mikey Madison, Paul Weissman, Yura Borisov]",105929.0,A young escort from Brooklyn meets and impulsi...,2024-11-01,"[female protagonist,nudity,russian oligarch,la...",https://m.media-amazon.com/images/M/MV5BYThiN2...,https://www.imdb.com/video/imdb/vi944621081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,d6ba2e07ad,from here to eternity,1953,https://www.filmladder.nl/film/from-here-to-et...,https://www.imdb.com/title/tt0045793,1954,7.6,"[Drama, Romance, War, Back to top]",PG,PT1H58M,[Fred Zinnemann],"[Daniel Taradash, James Jones]","[Burt Lancaster, Montgomery Clift, Deborah Kerr]",52335.0,"At a U.S. Army base in 1941 Hawaii, a pugilist...",1954-01-25,"[extramarital affair,prostitute,marital separa...",https://m.media-amazon.com/images/M/MV5BYjNhMD...,https://www.imdb.com/video/imdb/vi4249681945
144,3927581269,memoir of a snail,2024,https://www.filmladder.nl/film/memoir-of-a-sna...,https://www.imdb.com/title/tt23770030,2025,7.8,"[Adult Animation, Stop Motion Animation, Anima...",15,PT1H35M,[Adam Elliot],[Adam Elliot],"[Jacki Weaver, Sarah Snook, Charlotte Belsey]",12797.0,A bittersweet memoir of a melancholic woman ca...,2025-02-14,"[stop motion,stop motion animation,for grown u...",https://m.media-amazon.com/images/M/MV5BMWQzMz...,https://www.imdb.com/video/imdb/vi1982973465
145,767834fde2,paths of glory,1957,https://www.filmladder.nl/film/paths-of-glory-...,https://www.imdb.com/title/tt0050825,1957,8.4,"[Epic, Psychological Drama, War Epic, Drama, W...",PG,PT1H28M,[Stanley Kubrick],"[Stanley Kubrick, Calder Willingham, Jim Thomp...","[Kirk Douglas, Ralph Meeker, Adolphe Menjou]",224013.0,A colonel defends three of his soldiers in a c...,1957-12-20,"[trench warfare,military officer,world war one...",https://m.media-amazon.com/images/M/MV5BNmQ2Nm...,https://www.imdb.com/video/imdb/vi763627801
146,42e553e73d,platoon,1986,https://www.filmladder.nl/film/platoon-1986/po...,https://www.imdb.com/title/tt0091763,1987,8.1,"[Drama, War, Back to top]",15,PT2H,[Oliver Stone],[Oliver Stone],"[Charlie Sheen, Tom Berenger, Willem Dafoe]",452050.0,"Chris Taylor, a neophyte recruit in Vietnam, f...",1987-04-24,"[vietnam war,corpse,vietnam,jungle,marijuana]",https://m.media-amazon.com/images/M/MV5BZjE4Yj...,https://www.imdb.com/video/imdb/vi212245529


In [49]:
# from data_pipelines.save_to_db import save_watchlist
# save_watchlist(watchlist_df)
enriched_new_movies

Unnamed: 0,movie_id,title,year,movie_link,imdb_link,imdb_year,rating,genres,content_rating,duration,director,writers,actors,rating_count,plot,release_date,keywords,poster_url,trailer_url
0,378909531a,favoriten,2024,https://www.filmladder.nl/film/favoriten-2024/...,https://www.imdb.com/title/tt31015543,2024,7.7,"[Documentary, Back to top]",,PT1H58M,[Ruth Beckermann],"[Ruth Beckermann, Elisabeth Menasse]",[Ilkay Idiskut],203.0,Explores teaching and learning in Vienna where...,2024-09-19,[],https://m.media-amazon.com/images/M/MV5BN2VhNz...,
1,430904b120,merckx,2025,https://www.filmladder.nl/film/merckx-2025/pop...,https://www.imdb.com/title/tt35488509,2025,7.4,"[Documentary, Back to top]",,PT1H24M,"[Christophe Hermans, Boris Tilquin]","[Christophe Hermans, Boris Tilquin]",[Eddy Merckx],35.0,Eddy Merckx is more than a champion cyclist; i...,2025-02-26,[],https://m.media-amazon.com/images/M/MV5BMWQ5Zm...,
2,e7bc07ba2a,a complete unknown,2024,https://www.filmladder.nl/film/a-complete-unkn...,https://www.imdb.com/title/tt11563598,2025,7.6,"[Docudrama, Period Drama, Biography, Drama, Mu...",15,PT2H21M,[James Mangold],"[James Mangold, Jay Cocks, Elijah Wald]","[Timothée Chalamet, Edward Norton, Elle Fanning]",37486.0,"In 1961, an unknown 19-year-old Bob Dylan arri...",2025-01-17,"[1960s,johnny cash character,bob dylan charact...",https://m.media-amazon.com/images/M/MV5BZjIyOD...,https://www.imdb.com/video/imdb/vi2538325529
3,ff1d83f7ce,alpha,2024,https://www.filmladder.nl/film/alpha-2024/popu...,https://www.imdb.com/title/tt28797114,2025,6.9,"[Drama, Back to top]",,PT1H40M,[Jan-Willem van Ewijk],[Jan-Willem van Ewijk],"[Reinout Scholten van Aschat, Gijs Scholten va...",192.0,"Rein, snowboard teacher, works in the Alps. Re...",2025-02-13,[],https://m.media-amazon.com/images/M/MV5BMjIxOG...,
4,172e369640,anora,2024,https://www.filmladder.nl/film/anora-2024/popu...,https://www.imdb.com/title/tt28607951,2024,7.7,"[Raunchy Comedy, Romantic Comedy, Steamy Roman...",18,PT2H19M,[Sean Baker],[Sean Baker],"[Mikey Madison, Paul Weissman, Yura Borisov]",105929.0,A young escort from Brooklyn meets and impulsi...,2024-11-01,"[female protagonist,nudity,russian oligarch,la...",https://m.media-amazon.com/images/M/MV5BYThiN2...,https://www.imdb.com/video/imdb/vi944621081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,d6ba2e07ad,from here to eternity,1953,https://www.filmladder.nl/film/from-here-to-et...,https://www.imdb.com/title/tt0045793,1954,7.6,"[Drama, Romance, War, Back to top]",PG,PT1H58M,[Fred Zinnemann],"[Daniel Taradash, James Jones]","[Burt Lancaster, Montgomery Clift, Deborah Kerr]",52335.0,"At a U.S. Army base in 1941 Hawaii, a pugilist...",1954-01-25,"[extramarital affair,prostitute,marital separa...",https://m.media-amazon.com/images/M/MV5BYjNhMD...,https://www.imdb.com/video/imdb/vi4249681945
144,3927581269,memoir of a snail,2024,https://www.filmladder.nl/film/memoir-of-a-sna...,https://www.imdb.com/title/tt23770030,2025,7.8,"[Adult Animation, Stop Motion Animation, Anima...",15,PT1H35M,[Adam Elliot],[Adam Elliot],"[Jacki Weaver, Sarah Snook, Charlotte Belsey]",12797.0,A bittersweet memoir of a melancholic woman ca...,2025-02-14,"[stop motion,stop motion animation,for grown u...",https://m.media-amazon.com/images/M/MV5BMWQzMz...,https://www.imdb.com/video/imdb/vi1982973465
145,767834fde2,paths of glory,1957,https://www.filmladder.nl/film/paths-of-glory-...,https://www.imdb.com/title/tt0050825,1957,8.4,"[Epic, Psychological Drama, War Epic, Drama, W...",PG,PT1H28M,[Stanley Kubrick],"[Stanley Kubrick, Calder Willingham, Jim Thomp...","[Kirk Douglas, Ralph Meeker, Adolphe Menjou]",224013.0,A colonel defends three of his soldiers in a c...,1957-12-20,"[trench warfare,military officer,world war one...",https://m.media-amazon.com/images/M/MV5BNmQ2Nm...,https://www.imdb.com/video/imdb/vi763627801
146,42e553e73d,platoon,1986,https://www.filmladder.nl/film/platoon-1986/po...,https://www.imdb.com/title/tt0091763,1987,8.1,"[Drama, War, Back to top]",15,PT2H,[Oliver Stone],[Oliver Stone],"[Charlie Sheen, Tom Berenger, Willem Dafoe]",452050.0,"Chris Taylor, a neophyte recruit in Vietnam, f...",1987-04-24,"[vietnam war,corpse,vietnam,jungle,marijuana]",https://m.media-amazon.com/images/M/MV5BZjE4Yj...,https://www.imdb.com/video/imdb/vi212245529


In [35]:
# enriched_new_movies = enriched_new_movies.where(pd.notna(enriched_new_movies), None)
save_all_to_db(cinemas_df=cinemas_df,
               movies_df=enriched_new_movies,
               screenings_df=screenings_df)