In [1]:
import random
import time
import bs4
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from fake_useragent import FakeUserAgent
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains

In [2]:
# Function to create a new WebDriver instance with a random User-Agent
def create_driver_with_random_user_agent():
    # Initialize UserAgent
    ua = FakeUserAgent()

    # Set up Chrome options to use a random User-Agent
    chrome_options = Options()
    chrome_options.add_argument(f'--user-agent={ua.random}')
    chrome_options.add_argument("--disable-extensions")  # Disable extensions like Google Optimize
    chrome_options.add_argument("--disable-gpu")  # Optional: Disable GPU to improve performance
    chrome_options.add_argument("--disable-software-rasterizer")  # Optional: Reduce CPU usage
    chrome_options.add_argument("--no-sandbox")  # Optional: Often useful in headless mode
    chrome_options.add_argument("--disable-features=VizDisplayCompositor")  # Optional: Disable some experimental features

    # Set up the Chrome WebDriver using webdriver_manager
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

    return driver

# Instantiate a WebDriver with a random User-Agent
driver = create_driver_with_random_user_agent()

# Go to the website
url = "https://www.filmladder.nl/amsterdam/bioscopen"
driver.get(url)

# Simulate some movement and scrolling to trigger loading of more content
try:
    # Scroll the page to simulate user interaction
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait a bit for content to load after scrolling

    # Scroll a bit more to simulate user activity
    driver.execute_script("window.scrollBy(0, 500);")
    time.sleep(3)  # Wait for more content to load

    # Now fetch the page source with the likely loaded content
    html_content = driver.page_source
    print(html_content)  # Or use BeautifulSoup to parse it

except Exception as e:
    print(f"An error occurred: {e}")

# Close the driver when done
driver.quit()

<html lang="nl"><head>
    <!-- Google tag (gtag.js) -->
    <script async="" src="//c.amazon-adsystem.com/aax2/apstag.js"></script><script async="" src="//resources.planetnine.com/player/latest/Pnvp.js?t=0.5610900016920164&amp;h=www.filmladder.nl"></script><script async="" src="https://www.googletagmanager.com/gtag/js?id=G-FXGH1XSDLV"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());

      gtag('config', 'G-FXGH1XSDLV');
    </script>

    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width">
    <meta name="theme-color" content="#FFCC02">

    <link rel="apple-touch-icon-precomposed" type="image/png" href="/assets/apple-touch-icon-144x144-af857028c3108d31c1be3c58e48fb488b2aee4fe36160f5a674fa708427176df.png" sizes="144x144">
    <link rel="apple-touch-icon-precomposed" type="image/png" href="/assets/ap

In [4]:
from bs4 import BeautifulSoup

def parse_films(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    cinemas = soup.find_all("div", class_="cinema")
    
    movies_data = []

    for cinema in cinemas:
        cinema_name_tag = cinema.find("div", class_="info cinema-name").find("h3").find("a")
        cinema_name = cinema_name_tag.text.strip() if cinema_name_tag else "Unknown Cinema"
        
        movies = cinema.find_all("div", class_="hall")
        
        for movie in movies:
            # Extract movie title
            movie_title_tag = movie.find("h4").find("a")
            movie_title = movie_title_tag.text.strip() if movie_title_tag else "Unknown Title"

            # Extract movie URL
            movie_url = movie_title_tag["href"] if movie_title_tag else "Unknown URL"

            # Extract image URL
            img_tag = movie.find("img", class_="poster")
            img_url = img_tag["data-src"] if img_tag else "No Image"

            # Extract rating
            rating_tag = movie.find("span", class_="star-rating")
            rating = rating_tag.text.strip() if rating_tag else "No Rating"

            # Extract screenings
            screenings = []
            days = movie.find_all("div", class_="day with-perfomances")
            
            for day in days:
                day_name = day.find("span", class_="name full").text.strip()
                times = day.find_all("div", itemprop="startDate")
                
                for time in times:
                    time_str = time.find("a").text.strip()
                    ticket_url = time.find("a")["href"]
                    start_date = time["content"] if "content" in time.attrs else "Unknown Start Date"
                    
                    screenings.append({
                        "day": day_name,
                        "time": time_str,
                        "startDate": start_date,
                        "ticket_url": ticket_url
                    })

            # Store movie data
            movies_data.append({
                "cinema": cinema_name,
                "title": movie_title,
                "url": movie_url,
                "image": img_url,
                "rating": rating,
                "screenings": screenings
            })

    return movies_data

movies = parse_films(html_content)

# Print parsed data
for movie in movies:
    print(f"Cinema: {movie['cinema']}")
    print(f"Title: {movie['title']}")
    print(f"URL: {movie['url']}")
    print(f"Image: {movie['image']}")
    print(f"Rating: {movie['rating']}")
    print("Screenings:")
    for screening in movie["screenings"]:
        print(f"  {screening['day']} at {screening['time']} (Start Date: {screening['startDate']}) - {screening['ticket_url']}")
    print("\n")



Cinema: Bijlmerbios
Title: Favoriten
URL: https://www.filmladder.nl/film/favoriten-2024/popup/amsterdam
Image: https://assets.filmladder.nl/uploads/depot_image/asset/001/049/812/1049812/small_36b8e9fd3a14abc8.jpg
Rating: 7.7★
Screenings:
  donderdag at 21:00 (Start Date: 2025-02-27T21:00:00+01:00) - https://www.filmladder.nl/kaartjes/1208540575
  vrijdag at 19:45 (Start Date: 2025-02-28T19:45:00+01:00) - https://www.filmladder.nl/kaartjes/1208629906
  zaterdag at 21:00 (Start Date: 2025-03-01T21:00:00+01:00) - https://www.filmladder.nl/kaartjes/1208633393


Cinema: Bijlmerbios
Title: Merckx
URL: https://www.filmladder.nl/film/merckx-2025/popup/amsterdam
Image: https://assets.filmladder.nl/uploads/depot_image/asset/001/054/533/1054533/small_df453d27bff4fd5e.jpg
Rating: 7.5★
Screenings:
  morgen at 19:45 (Start Date: 2025-02-26T19:45:00+01:00) - https://www.filmladder.nl/kaartjes/1208525211
  donderdag at 19:00 (Start Date: 2025-02-27T19:00:00+01:00) - https://www.filmladder.nl/kaartjes/