In [None]:
# -----********************-----

# Created Time: 2024/09/06

# Author: Tara Liu, Yiyi He

### Use Case

# This notebook downloads ERA5-land data in GRIB format.

# Main function: download_era5_data(start_date, end_date, area, variables, folder_path)

# To use: 

# 1. add your API key

# 2. change to desired folder_path

# 3. input main function params

# -----********************-----

In [None]:
# Import libraries
import cdsapi
import os
import time
from datetime import datetime, timedelta

In [None]:
# Please input your api key below and run the cell

config_content = """{
    url: https://cds-beta.climate.copernicus.eu/api
    key: 
}
"""

with open('/root/.cdsapirc', 'w') as config_file:
    config_file.write(config_content)

print("Configuration file created successfully.")


Configuration file created successfully.


In [None]:
# Initialize 
c = cdsapi.Client()

# Define the folder path where the GRIB files will be saved
folder_path = '/content/gdrive/MyDrive/GRIB'

# Ensure the folder exists (create if necessary)
os.makedirs(folder_path, exist_ok=True)

def get_existing_files(folder):
    """Get a set of filenames that already exist in the folder, and return them with their date and time."""
    existing_files = set()
    for filename in os.listdir(folder):
        if filename.endswith('.grib'):
            # Extract the date and time from the filename (assuming format: era5_land_YYYYMMDD_HHMM.grib)
            date_part = filename.split('_')[2]  # Extract YYYYMMDD part
            time_part = filename.split('_')[3].replace('.grib', '')  # Extract HHMM part
            existing_files.add(f"{date_part}_{time_part}")
    return existing_files

def print_existing_files_range(existing_files):
    """Print the date range of the existing files."""
    if not existing_files:
        print("No existing files found.")
        return
    dates = sorted([datetime.strptime(f"{f[:8]} {f[9:]}00", "%Y%m%d %H%M%S") for f in existing_files])
    print(f"Existing files cover from {dates[0]} to {dates[-1]}")

def download_era5_data(start_date, end_date, area, variables, folder_path):
    """
    
    Downloads ERA5 data for a specified date range and all hours of the day.
    Skips downloading if the file already exists.

    params:

    start_date: datetime object, the start date of the range
    end_date: datetime object, the end date of the range
    area: list of floats, the bounding box coordinates [N, W, S, E]
    variables: str, the variables to download
    folder_path: str, the folder path to save the GRIB files


    """
    # Get existing files
    existing_files = get_existing_files(folder_path)
    print_existing_files_range(existing_files)

    # Loop through each day in the range
    current_date = start_date
    while current_date <= end_date:
        # Extract the year, month, and day
        year = current_date.strftime('%Y')
        month = current_date.strftime('%m')
        day = current_date.strftime('%d')

        # Loop through all hours (00:00 to 23:00)
        for hour in range(24):
            # Format the hour as two digits
            hour_str = f"{hour:02d}:00"
            hour_for_filename = f"{hour:02d}00"

            # Filename for the GRIB file with the date and hour in the name
            filename = os.path.join(folder_path, f"era5_land_{year}{month}{day}_{hour_for_filename}.grib")

            # Check if the file already exists
            file_key = f"{year}{month}{day}_{hour_for_filename}"
            if file_key in existing_files:
                print(f"File for {year}-{month}-{day} {hour_str} already exists. Skipping download.")
                continue

            # Make the API call to retrieve data for the specific day and hour
            c.retrieve(
                'reanalysis-era5-land',
                {
                    'variable': variables,
                    'year': year,
                    'month': month,
                    'day': day,
                    'time': hour_str,  # Request for this specific hour
                    'area': area,
                    'format': 'grib',
                },
                filename
            )

            # Print a message when the file is saved
            print(f"Downloaded data for {year}-{month}-{day} {hour_str} and saved as {filename}")

        # Move to the next day
        current_date += timedelta(days=1)



In [None]:

# Example usage
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
area = [31.0954, 72.4750, 12.4319, 94.802]  # North, West, South, East bounding box
variables = ['2m_temperature', '10m_u_component_of_wind', '10m_v_component_of_wind', 'total_precipitation']

# Run the function
download_era5_data(start_date, end_date, area, variables, folder_path)
