In [1]:
import datetime

In [2]:
def get_month_range(date: datetime.date):
    """Return the first and last date of the previous month based on the input date.

    Parameters:
        date (datetime.date): The input date.

    Returns:
        tuple: A tuple containing the first and last date of the month as
                datetime.date objects.
    """
    last_day = date.replace(day=1) - datetime.timedelta(days=1)
    first_day = last_day.replace(day=1)
    return first_day, last_day

In [3]:
def get_previous_month_dates():
    """Return a dictionary containing the first and third previous month's dates from
    the current date.

    Returns:
        dict: A dictionary containing the following key-value pairs:
            - 'first_day_third_prev': The first day of the third previous month
                                        (datetime.date).
            - 'last_day_third_prev': The last day of the third previous month
                                        (datetime.date).
            - 'sl_year': The year of the third previous month in 'YYYY' format (str).
            - 'sl_month': The month of the third previous month in 'MM' format (str).
    """

    today = datetime.date.today()
    # Calculate the correct previous month considering months from 1 to 12
    prev_month = today.month + 10 if today.month < 3 else today.month - 2
    third_prev_month = today.replace(month=prev_month)
    first_day_third_prev, last_day_third_prev = get_month_range(third_prev_month)
    first_date_third_prev = first_day_third_prev
    sl_year, sl_month = str(first_date_third_prev)[:4], str(first_date_third_prev)[5:7]

    return {
        'first_day_third_prev': first_day_third_prev,
        'last_day_third_prev': last_day_third_prev,
        'sl_year': sl_year,
        'sl_month': sl_month,
    }

In [4]:
import re
def replace_non_alphanumeric_with_hyphen(input_string: str) -> str:
    """
    Replace non-alphanumeric characters with hyphens in the input string.

    Args:
        input_string (str): The input string to process.

    Returns:
        str: The processed string with non-alphanumeric characters replaced by hyphens.
    """
    return re.sub(r'[^a-z0-9-]', '-', input_string)

In [5]:
dates_data = get_previous_month_dates()
start_date = dates_data['first_day_third_prev']
end_date = dates_data['last_day_third_prev']

In [7]:
import os
target_path = 'gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3'
job_name = target_path.split('/')[-1]
job_name = os.path.splitext(job_name)[0]
job_name = (
        f"zarr-data-ingestion-{replace_non_alphanumeric_with_hyphen(job_name)}-{start_date}-to-{end_date}"
    )

In [8]:
job_name

'zarr-data-ingestion-full-37-1h-0p25deg-chunk-1-2024-01-01-to-2024-01-31'

In [4]:
bucket = 'dabhis_temp'
project = 'grid-intelligence'
sdk_container_image = 'gcr.io/grid/arco-era5:model'
zarr_avro_conversion_network = ''
zarr_avro_conversion_subnet = 'regions/us-central1/subnetworks/arco-era5-subnet'
command = (
            f"--temp_location gs://{bucket}/temp "
            f"--runner DataflowRunner --project {project} --region us-central1 "
            f"--sdk_container_image {sdk_container_image} "
            f"--experiments use_runner_v2 --disk_size_gb 300 --machine_type n1-highmem-4 "
        )
if zarr_avro_conversion_network and zarr_avro_conversion_subnet:
    command = command + (
        f"--no_use_public_ips --network {zarr_avro_conversion_network} "
        f"--subnetwork {zarr_avro_conversion_subnet}"
    )
command

'--temp_location gs://dabhis_temp/temp --runner DataflowRunner --project grid-intelligence --region us-central1 --sdk_container_image gcr.io/grid/arco-era5:model --experiments use_runner_v2 --disk_size_gb 300 --machine_type n1-highmem-4 '