In [4]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

class CricketdataDownloader:
    def __init__(self, website_url, destination_dir):
        self.website_url = website_url
        self.destination_dir = destination_dir
        self.browser = None

        # Set up Chrome options for Selenium
        self.chrome_options = Options()
        preferences = {"download.default_directory": self.destination_dir, "profile.default_content_settings.popups": 0}
        self.chrome_options.add_experimental_option("prefs", preferences)

    def initialize_browser(self):
        """Initialize the WebDriver."""
        service = Service()  # Provide the path to chromedriver if needed
        self.browser = webdriver.Chrome(service=service, options=self.chrome_options)

    def fetch_json_files(self):
        """Automate the download of JSON files for Test, ODI, and T20 matches."""
        try:
            # Navigate to the Cricsheet matches page
            self.browser.get(self.website_url)
            time.sleep(3)  # Allow time for the page to load

            # Define match types and their corresponding CSS selectors or XPath
            match_types = {
                "Test matches": '//a[contains(@href, "tests_json.zip")]',
                "One-day internationals": '//a[contains(@href, "odis_json.zip")]',
                "T20 internationals": '//a[contains(@href, "t20s_json.zip")]',
            }

            for match_type, xpath in match_types.items():
                print(f"Downloading {match_type}...")
                try:
                    # Locate and click the download link
                    download_link = self.browser.find_element(By.XPATH, xpath)
                    download_link.click()
                    time.sleep(5)  # Wait for the file to download
                except Exception as e:
                    print(f"Error downloading {match_type}: {e}")

            print("Download completed for all match types.")
        except Exception as e:
            print(f"An error occurred during scraping: {e}")

    def close_browser(self):
        """Close the Selenium WebDriver."""
        if self.browser:
            self.browser.quit()

    def execute(self):
        """Execute the scraping process."""
        self.initialize_browser()
        self.fetch_json_files()
        self.close_browser()

if __name__ == "__main__":
    # Define the URL and download directory
    WEBSITE_URL = "https://cricsheet.org/matches/"
    DESTINATION_DIR = os.path.join(os.getcwd(), "cricket_downloads")  # Set to a preferred download directory

    # Ensure the download directory exists
    if not os.path.exists(DESTINATION_DIR):
        os.makedirs(DESTINATION_DIR)

    # Run the downloader
    downloader = CricketdataDownloader(WEBSITE_URL, DESTINATION_DIR)
    downloader.execute()


Error downloading T20 internationals: HTTPConnectionPool(host='localhost', port=55025): Read timed out. (read timeout=120)
Download completed for all match types.
