In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

from data import countries_meta_data
from tqdm import tqdm
import os
import requests

driver = webdriver.Chrome()  # Use your specific driver, e.g., Firefox() if using Firefox
driver.maximize_window()  # Maximize the browser window
driver.get("https://health.google.com/covid-19/open-data/raw-data")

data_urls = []

try:
    # Wrap the range with tqdm to display a progress bar
    for i in tqdm(range(247), desc="Scraping"):
        # Wait until the mat-chip-option is present
        mat_chip_option = WebDriverWait(driver, 100).until(
            EC.presence_of_element_located((By.TAG_NAME, "mat-chip-option"))
        )

        # Scroll to the mat-chip-option and click to open the options
        actions = ActionChains(driver)
        actions.move_to_element(mat_chip_option).click().perform()

        # Wait for the dropdown menu to appear
        menu_content = WebDriverWait(driver, 100).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "mat-mdc-menu-content"))
        )

        # Find all buttons within the menu content div and click the button at index i
        buttons = menu_content.find_elements(By.TAG_NAME, "button")
        if buttons:
            buttons[i].click()

            # Locate the first element with tag 'copyable-field'
            copyable_field = WebDriverWait(driver, 100).until(
                EC.presence_of_element_located((By.TAG_NAME, "copyable-field"))
            )

            # Find the first div within the copyable-field
            first_div = copyable_field.find_element(By.TAG_NAME, "div")
            inner_div = first_div.find_element(By.TAG_NAME, "div")  # Find the first div within the first div

            # Get the content of the innermost div
            div_content = inner_div.text
            
            data_urls.append(div_content)

finally:
    # Close the driver
    driver.quit()


ModuleNotFoundError: No module named 'selenium'

In [None]:
country_codes = [sub_array[4] for sub_array in countries_meta_data if len(sub_array) > 4]

len(country_codes)
unique_country_codes = list(set(country_codes))
# Directory to store the downloaded CSV files
os.makedirs('data', exist_ok=True)

# Download each CSV file
for code in tqdm(unique_country_codes, desc="Downloading CSV files"):
    url = f'https://storage.googleapis.com/covid19-open-data/v3/location/{code}.csv'
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        file_path = os.path.join('data', f'{code}.csv')
        with open(file_path, 'wb') as file:
            file.write(response.content)
    else:
        tqdm.write(f'Failed to download {code}.csv: {response.status_code}')
