In [3]:

!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv

# Initialize WebDriver (Make sure you have ChromeDriver installed and in PATH)
def initialize_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (no GUI)
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--window-size=1920x1080")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=options)
    return driver

# Extract branch addresses from a single page
def get_branch_addresses(driver, page_url):
    print(f"Scraping page: {page_url}")
    driver.get(page_url)  # Navigate to the page
    time.sleep(2)  # Wait for the page to load (adjust if needed)
    
    # Find all address elements
    address_elements = driver.find_elements(By.CSS_SELECTOR, 'span.d-block.text-heading.font-size-sm.text-uppercase')
    addresses = []
    for element in address_elements:
        address = element.text.strip()  # Extract and clean text
        print(f"Found address: {address}")  # Debugging line
        addresses.append({"Address": address})
    
    return addresses

# Save all collected addresses to a CSV file
def save_to_csv(data, filename="deutsche_bank_branches.csv"):
    print(f"Saving {len(data)} addresses to {filename}...")
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["Address"])
        writer.writeheader()
        writer.writerows(data)
    print(f"Data saved successfully to {filename}!")

# Main function to scrape all pages
def main():
    base_url = "https://deutsche.banklocationmaps.com/en/branches/deu?page={}&partner=hide"
    total_pages = 39  # Number of pages to scrape
    all_addresses = []
    
    # Initialize Selenium WebDriver
    driver = initialize_driver()
    
    try:
        for page in range(1, total_pages + 1):
            page_url = base_url.format(page)
            addresses = get_branch_addresses(driver, page_url)
            all_addresses.extend(addresses)
    except Exception as e:
        print(f"Error occurred: {e}")
    finally:
        driver.quit()  # Ensure the browser is closed

    # Save to CSV
    save_to_csv(all_addresses)

if __name__ == "__main__":
    main()


Defaulting to user installation because normal site-packages is not writeable
Collecting selenium
  Downloading selenium-4.27.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.27.0-py3-none-any.whl.metadata (8.6 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting websocket-client~=1.8 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-24.3.0-py3-none-any.whl.metadata (11 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0.14->trio-websocket~=0.9->selenium)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Dow



ModuleNotFoundError: No module named 'selenium'

In [5]:
import requests

# Define the URL
URL = "https://deutsche.banklocationmaps.com/en/branches/deu?page=1&partner=hide"

# Define headers copied from your browser
HEADERS = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "en-US,en;q=0.9",
    "DNT": "1",
    "Sec-CH-UA": '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"macOS"',
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}

# Send a GET request
try:
    response = requests.get(URL, headers=HEADERS, timeout=10)
    response.raise_for_status()
    print("Request successful!")
    print(f"Status Code: {response.status_code}")
    print(f"Page Content (truncated): {response.text[:500]}")  # Print first 500 characters of the content
except requests.RequestException as e:
    print(f"Request failed: {e}")


Request failed: 403 Client Error: Forbidden for url: https://deutsche.banklocationmaps.com/en/branches/deu?page=1&partner=hide


In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv

# Initialize WebDriver
def initialize_driver():
    options = webdriver.ChromeOptions()
    # Comment out the following line for debugging purposes
    # options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=options)
    return driver

# Extract branch addresses from a single page
def scrape_page(driver, url):
    print(f"Scraping URL: {url}")
    driver.get(url)

    try:
        # Wait for the address elements to load
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.media-body.pl-3 span.d-block.text-heading.font-size-sm.text-uppercase'))
        )
    except Exception as e:
        print(f"Error waiting for page content: {e}")
        return []

    # Locate and extract branch addresses
    branch_elements = driver.find_elements(By.CSS_SELECTOR, 'div.media-body.pl-3 span.d-block.text-heading.font-size-sm.text-uppercase')
    addresses = []
    for branch in branch_elements:
        # Extract full address and handle <br> tags
        address = branch.get_attribute("innerHTML").replace("<br>", " ").strip()
        print(f"Extracted address: {address}")
        addresses.append(address)

    print(f"Found {len(addresses)} addresses on this page.")
    return addresses

# Save extracted addresses to a CSV file
def save_to_csv(data, filename="deutsche_bank_branches.csv"):
    print(f"Saving {len(data)} addresses to {filename}...")
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Address"])  # Header row
        for address in data:
            writer.writerow([address])
    print(f"Data saved successfully to {filename}!")

# Main function to scrape all pages
def main():
    base_url = "https://deutsche.banklocationmaps.com/en/branches/deu?page={}&partner=hide"
    total_pages = 39  # Number of pages to scrape
    all_addresses = []

    driver = initialize_driver()
    try:
        for page in range(1, total_pages + 1):
            url = base_url.format(page)
            addresses = scrape_page(driver, url)
            all_addresses.extend(addresses)
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()

    # Save all extracted addresses to CSV
    save_to_csv(all_addresses)

if __name__ == "__main__":
    main()


Scraping URL: https://deutsche.banklocationmaps.com/en/branches/deu?page=1&partner=hide
Extracted address: Bohlweg 24 38100 Braunschweig
Extracted address: Marienplatz 21 80331 München
Extracted address: Berliner Allee 61 40212 Düsseldorf
Extracted address: Janusz-Korczak-Straße 3-5 12627 Berlin
Extracted address: Promenadeplatz 15 80333 München
Extracted address: Roßmarkt 18 60311 Frankfurt am Main
Extracted address: Stuttgarter Straße 81 70469 Stuttgart
Extracted address: Koblenzer Straße 7 57072 Siegen
Extracted address: Schweizer Straße 28a/30 60594 Frankfurt am Main
Extracted address: Otto-Suhr-Allee 6 10585 Berlin
Extracted address: Schönhauser Allee 120 10437 Berlin
Extracted address: Taunusanlage 12 60325 Frankfurt am Main
Extracted address: Leipziger Straße 17 60487 Frankfurt am Main
Extracted address: Schloßstraße 114 12163 Berlin
Extracted address: Leopoldstraße 53 80802 München
Extracted address: Rotteckring 3 79098 Freiburg
Extracted address: Bredeneyer Straße 156-158 4513

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv

# Initialize WebDriver
def initialize_driver():
    options = webdriver.ChromeOptions()
    # Comment out the following line for debugging purposes
    # options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=options)
    return driver

# Extract branch addresses from a single page
def scrape_page(driver, url):
    print(f"Scraping URL: {url}")
    driver.get(url)

    print("Waiting for manual cookie consent. Please accept the cookies in the browser window...")
    time.sleep(60)  # Adjust the delay if needed (e.g., 15 seconds)

    try:
        # Wait for the address elements to load
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.media-body.pl-3 span.d-block.text-heading.font-size-sm.text-uppercase'))
        )
    except Exception as e:
        print(f"Error waiting for page content: {e}")
        return []

    # Locate and extract branch addresses
    branch_elements = driver.find_elements(By.CSS_SELECTOR, 'div.media-body.pl-3 span.d-block.text-heading.font-size-sm.text-uppercase')
    addresses = []
    for branch in branch_elements:
        # Extract full address and handle <br> tags
        address = branch.get_attribute("innerHTML").replace("<br>", " ").strip()
        print(f"Extracted address: {address}")
        addresses.append(address)

    print(f"Found {len(addresses)} addresses on this page.")
    return addresses

# Save extracted addresses to a CSV file
def save_to_csv(data, filename="deutsche_bank_branches.csv"):
    print(f"Saving {len(data)} addresses to {filename}...")
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Address"])  # Header row
        for address in data:
            writer.writerow([address])
    print(f"Data saved successfully to {filename}!")

# Main function to scrape all pages
def main():
    base_url = "https://deutsche.banklocationmaps.com/en/branches/deu?page={}&partner=hide"
    total_pages = 39  # Number of pages to scrape
    all_addresses = []

    driver = initialize_driver()
    try:
        for page in range(1, total_pages + 1):
            url = base_url.format(page)
            addresses = scrape_page(driver, url)
            all_addresses.extend(addresses)
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()

    # Save all extracted addresses to CSV
    save_to_csv(all_addresses)

if __name__ == "__main__":
    main()


Scraping URL: https://deutsche.banklocationmaps.com/en/branches/deu?page=1&partner=hide
Waiting for manual cookie consent. Please accept the cookies in the browser window...
An error occurred: name 'time' is not defined
Saving 0 addresses to deutsche_bank_branches.csv...
Data saved successfully to deutsche_bank_branches.csv!
