In [1]:
import pandas as pd

df = pd.DataFrame(
    columns=[
        "long_name",
        "short_name",
        "start_datetime",
        "end_datetime",
        "temporal_resolution",
        "temporal_aggregation",
        "min_lat",
        "max_lat",
        "min_lon",
        "max_lon",
        "spatial_resolution",
        "file_path",
    ]
)

from utils import *

# raw data
for long_name in ["2m_temperature", "total_precipitation", "surface_pressure"]:
    short_name = get_short_from_long(long_name)
    for year in range(2014, 2024):
        start_datetime = f"{year}-01-01 00:00:00"
        end_datetime = f"{year}-12-31 23:59:59"

        record = {
            "long_name": long_name,
            "short_name": short_name,
            "start_datetime": start_datetime,
            "end_datetime": end_datetime,
            "temporal_resolution": "hourly",
            "temporal_aggregation": None,
            "min_lat": -90,
            "max_lat": 90,
            "min_lon": -180,
            "max_lon": 180,
            "spatial_resolution": 0.25,
            "file_path": f"/data/era5/raw/{long_name}/{long_name}-{year}.nc",
        }

        df.loc[len(df)] = record

df.to_csv("raw_data_metadata.csv", index=False)

In [2]:
df = pd.DataFrame(
    columns=[
        "long_name",
        "short_name",
        "start_datetime",
        "end_datetime",
        "temporal_resolution",
        "temporal_aggregation",
        "min_lat",
        "max_lat",
        "min_lon",
        "max_lon",
        "spatial_resolution",
        "file_path",
    ]
)

In [3]:
# temperature pre-compute data
for temporal_resolution in ["daily", "weekly", "monthly", "yearly"]:
    for agg in ["mean", "min", "max"]:
        long_name = "2m_temperature"
        short_name = get_short_from_long(long_name)
        start_year = 2014
        end_year = 2023
        record = {
            "long_name": long_name,
            "short_name": short_name,
            "start_datetime": f"{start_year}-01-01 00:00:00",
            "end_datetime": f"{end_year}-12-31 23:59:59",
            "temporal_resolution": temporal_resolution,
            "temporal_aggregation": agg,
            "min_lat": -90,
            "max_lat": 90,
            "min_lon": -180,
            "max_lon": 180,
            "spatial_resolution": 0.25,
            "file_path": f"/data/era5/precomputed/2m_temperature/{temporal_resolution}_{agg}_{start_year}-{end_year}.nc",
        }
        df.loc[len(df)] = record

In [4]:
# precipitation pre-compute data
for temporal_resolution in ["weekly", "monthly", "yearly"]:
    for agg in ["mean", "min", "max"]:
        long_name = "total_precipitation"
        short_name = get_short_from_long(long_name)
        start_year = 2014
        end_year = 2023
        record = {
            "long_name": long_name,
            "short_name": short_name,
            "start_datetime": f"{start_year}-01-01 00:00:00",
            "end_datetime": f"{end_year}-12-31 23:59:59",
            "temporal_resolution": temporal_resolution,
            "temporal_aggregation": agg,
            "min_lat": -90,
            "max_lat": 90,
            "min_lon": -180,
            "max_lon": 180,
            "spatial_resolution": 0.25,
            "file_path": f"/data/era5/precomputed/total_precipitation/{temporal_resolution}_{agg}_{start_year}-{end_year}.nc",
        }
        df.loc[len(df)] = record

temporal_resolution = "daily"
for agg in ["mean", "min", "max"]:
    long_name = "total_precipitation"
    short_name = get_short_from_long(long_name)
    start_year = 2014
    end_year = 2023
    record = {
        "long_name": long_name,
        "short_name": short_name,
        "start_datetime": f"{start_year}-01-01 00:00:00",
        "end_datetime": f"{end_year}-12-31 23:59:59",
        "temporal_resolution": temporal_resolution,
        "temporal_aggregation": agg,
        "min_lat": 30,
        "max_lat": 90,
        "min_lon": -180,
        "max_lon": 180,
        "spatial_resolution": 0.25,
        "file_path": f"/data/era5/precomputed/total_precipitation/{temporal_resolution}_{agg}_{start_year}-{end_year}-p1.nc",
    }
    df.loc[len(df)] = record

temporal_resolution = "daily"
for agg in ["mean", "min", "max"]:
    long_name = "total_precipitation"
    short_name = get_short_from_long(long_name)
    start_year = 2019
    end_year = 2023
    record = {
        "long_name": long_name,
        "short_name": short_name,
        "start_datetime": f"{start_year}-01-01 00:00:00",
        "end_datetime": f"{end_year}-12-31 23:59:59",
        "temporal_resolution": temporal_resolution,
        "temporal_aggregation": agg,
        "min_lat": -90,
        "max_lat": 30,
        "min_lon": -180,
        "max_lon": 180,
        "spatial_resolution": 0.25,
        "file_path": f"/data/era5/precomputed/total_precipitation/{temporal_resolution}_{agg}_{start_year}-{end_year}-p2.nc",
    }
    df.loc[len(df)] = record

In [5]:
# pressure pre-compute data
for temporal_resolution in ["daily", "weekly", "monthly", "yearly"]:
    for agg in ["mean", "min", "max"]:
        long_name = "surface_pressure"
        short_name = get_short_from_long(long_name)
        start_year = 2014
        end_year = 2023
        record = {
            "long_name": long_name,
            "short_name": short_name,
            "start_datetime": f"{start_year}-01-01 00:00:00",
            "end_datetime": f"{end_year}-12-31 23:59:59",
            "temporal_resolution": temporal_resolution,
            "temporal_aggregation": agg,
            "min_lat": 30,
            "max_lat": 90,
            "min_lon": -180,
            "max_lon": 180,
            "spatial_resolution": 0.25,
            "file_path": f"/data/era5/precomputed/surface_pressure/{temporal_resolution}_{agg}_{start_year}-{end_year}-p1.nc",
        }
        df.loc[len(df)] = record


for temporal_resolution in ["daily", "weekly", "monthly", "yearly"]:
    for agg in ["mean", "min", "max"]:
        long_name = "surface_pressure"
        short_name = get_short_from_long(long_name)
        start_year = 2014
        end_year = 2023
        record = {
            "long_name": long_name,
            "short_name": short_name,
            "start_datetime": f"{start_year}-01-01 00:00:00",
            "end_datetime": f"{end_year}-12-31 23:59:59",
            "temporal_resolution": temporal_resolution,
            "temporal_aggregation": agg,
            "min_lat": -90,
            "max_lat": 30,
            "min_lon": -180,
            "max_lon": 180,
            "spatial_resolution": 0.5,
            "file_path": f"/data/era5/precomputed/surface_pressure/{temporal_resolution}_{agg}_{start_year}-{end_year}-p2.nc",
        }
        df.loc[len(df)] = record

In [6]:
df.to_csv("pre_compute_metadata.csv", index=False)