In [None]:
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from datetime import datetime, timedelta
import time

In [None]:
def initialize_webdriver(initial_url="https://www.google.com"):
    # Initialize WebDriver with options
    options = webdriver.ChromeOptions()
    
    # Replicate the user-agent and other common headers
    options.add_argument("start-maximized")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    options.add_argument("accept=text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
    options.add_argument("accept-language=en-US,en;q=0.5")
    options.add_argument("accept-encoding=gzip, deflate")
    options.add_argument("--headless")
    
    # Preferences to automatically download files without prompting
    cwd = os.getcwd() # Get the current working directory
    prefs = {
        "download.default_directory": cwd,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    }
    options.add_experimental_option("prefs", prefs)

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    # Navigate to the initial URL
    driver.get(initial_url)
    time.sleep(5)  # Give some time for the page to load and session to be established
    
    return driver

In [None]:
def download_csv(driver, base_url, start_date, end_date, initial_url):

    # Navigate to the initial URL
    driver.get(initial_url)
    time.sleep(5)  # Give some time for the page to load and session to be established
    
    # Iterate over the dates and download the files
    current_date = start_date
    
    while current_date <= end_date:
        formatted_date = current_date.strftime('%Y%m%d')
        url = f"{base_url}{formatted_date}"
        
        try:
            driver.get(url)
            time.sleep(5)  # Wait for the download to complete (adjust as needed)
            print(f"Succeeded: downloaded file {formatted_date} from {url}...")            
    
        except Exception as e:
            print(f"Failed. Error: {e}")
            # Check for disconnection and re-initialize the WebDriver
            if "disconnected" in str(e):
                print("Re-initializing WebDriver due to disconnection...")
                driver.quit()  # Quit the old instance
                driver = initialize_webdriver(initial_url)  # Re-initialize WebDriver
                continue  # Retry the current date after reinitializing
        
        current_date += timedelta(days=1)

In [None]:
if __name__ == "__main__":

    # URL to initiate the session
    initial_url = "https://www.iso-ne.com/isoexpress/web/reports/operations/-/tree/gen-fuel-mix"

    # Initialize the WebDriver with the specified initial URL
    driver = initialize_webdriver()

    # Base URL without the date
    base_url = "https://www.iso-ne.com/transform/csv/genfuelmix?start="
    
    # Define the start and end dates
    start_date = datetime(2018, 1, 1) # Start date (January 1, 2018)
    end_date = datetime(2018, 1, 2) # End date (December 31, 2023)
    
    # Download the files
    download_csv(driver, base_url, start_date, end_date, initial_url)

    driver.quit()