In [26]:
# ! pip install fme cfgrib eccodes --quiet

# ACE2-ERA5

In [1]:
import os
import sys
import subprocess
from pathlib import Path
import yaml
import urllib.request
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


def download_ace_model(base_dir):
    """
    Download ACE2-ERA5 model checkpoint and data from Hugging Face.

    Args:
        base_dir (str or Path): Directory where files will be downloaded.

    Returns:
        dict: Paths to downloaded files
    """
    base_dir = Path(base_dir)
    base_dir.mkdir(parents=True, exist_ok=True)

    print(f"Downloading ACE2-ERA5 model into {base_dir}...")

    # URLs for ACE2-ERA5 model files
    base_url = "https://huggingface.co/allenai/ACE2-ERA5/resolve/main"

    files_to_download = {
        "checkpoint": f"{base_url}/ace2_era5_ckpt.tar",
        "config": f"{base_url}/inference_config.yaml",
        "initial_conditions": f"{base_url}/initial_conditions/ic_2020.nc",
        "forcing_data": f"{base_url}/forcing_data/forcing_2020.nc"
    }

    downloaded_files = {}

    for name, url in files_to_download.items():
        print(f"Downloading {name}...")
        if name == "initial_conditions":
            target_dir = base_dir / "initial_conditions"
            target_dir.mkdir(exist_ok=True)
            output_path = target_dir / "ic_2020.nc"
        elif name == "forcing_data":
            target_dir = base_dir / "forcing_data"
            target_dir.mkdir(exist_ok=True)
            output_path = target_dir / "forcing_2020.nc"
        else:
            output_path = base_dir / url.split("/")[-1]

        urllib.request.urlretrieve(url, output_path)
        downloaded_files[name] = output_path
        print(f"  Saved to: {output_path}")

    return downloaded_files


def create_inference_config(base_dir, downloaded_files):
    """
    Create inference configuration file.

    Args:
        base_dir (Path): Base directory
        downloaded_files (dict): Paths to downloaded files

    Returns:
        Path to configuration file
    """
    print("Creating inference configuration...")

    with open(downloaded_files["config"], 'r') as f:
        config = yaml.safe_load(f)

    config['checkpoint_path'] = str(downloaded_files["checkpoint"])
    config['initial_condition']['path'] = str(downloaded_files["initial_conditions"])
    config['forcing_loader']['dataset']['data_path'] = str(base_dir / "forcing_data")

    config['n_forward_steps'] = 20
    config['forward_steps_in_memory'] = 5
    config['experiment_dir'] = str(base_dir / "inference_output")

    config_path = base_dir / "custom_config.yaml"
    with open(config_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False)

    print(f"Configuration saved to: {config_path}")
    return config_path


def run_inference(config_path):
    """Run ACE model inference."""
    print("Running ACE model inference...")
    try:
        result = subprocess.run([
            sys.executable, '-m', 'fme.ace.inference',
            str(config_path)
        ], capture_output=True, text=True, check=True)
        print("✓ Inference completed successfully!")
        return True
    except subprocess.CalledProcessError as e:
        print(f"✗ Inference failed: {e}")
        print("STDOUT:", e.stdout)
        print("STDERR:", e.stderr)
        return False


def analyze_results(output_dir):
    """Analyze and visualize inference results."""
    output_path = Path(output_dir)
    print(f"Analyzing results in {output_path}...")

    output_files = list(output_path.glob("*.nc"))
    if not output_files:
        print("No output files found.")
        return

    for file_path in output_files:
        print(f"\n--- {file_path.name} ---")
        try:
            ds = xr.open_dataset(file_path)
            print(f"Dimensions: {dict(ds.dims)}")
            print(f"Variables: {list(ds.data_vars)}")
            _create_simple_plot(ds, output_path)
        except Exception as e:
            print(f"Error analyzing {file_path.name}: {e}")


def _create_simple_plot(ds, output_dir):
    """Create a simple visualization of the data."""
    try:
        plot_vars = [var for var in ds.data_vars if len(ds[var].dims) >= 2]
        if not plot_vars:
            return
        var_name = plot_vars[0]
        data = ds[var_name]
        if 'time' in data.dims:
            data = data.isel(time=0)
        while len(data.dims) > 2:
            for dim in data.dims:
                if dim not in ['lat', 'lon', 'latitude', 'longitude']:
                    data = data.isel({dim: 0})
                    break
        if len(data.dims) == 2:
            fig, ax = plt.subplots(figsize=(10, 6))
            im = ax.contourf(data.values)
            plt.colorbar(im, ax=ax, label=var_name)
            ax.set_title(f"ACE Model Output: {var_name}")
            plot_path = output_dir / f"{var_name}_plot.png"
            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
            plt.close()
            print(f"  Visualization saved: {plot_path}")
    except Exception as e:
        print(f"  Could not create visualization: {e}")


# if __name__ == "__main__":
#     # Specify your target directory here
#     base_dir = Path("./ace_model_data")  # Change this path as desired

#     downloaded_files = download_ace_model(base_dir)
#     config_path = create_inference_config(base_dir, downloaded_files)
#     if run_inference(config_path):
#         analyze_results(base_dir / "inference_output")


## Initial conditions

These are the features available

- PRESsfc
- surface_temperature
- TMP2m
- Q2m
- UGRD10m
- VGRD10m
- air_temperature_0
- specific_total_water_0
- eastward_wind_0
- northward_wind_0
- air_temperature_1
- specific_total_water_1
- eastward_wind_1
- northward_wind_1
- air_temperature_2
- specific_total_water_2
- eastward_wind_2
- northward_wind_2
- air_temperature_3
- specific_total_water_3
- eastward_wind_3
- northward_wind_3
- air_temperature_4
- specific_total_water_4
- eastward_wind_4
- northward_wind_4
- air_temperature_5
- specific_total_water_5
- eastward_wind_5
- northward_wind_5
- air_temperature_6
- specific_total_water_6
- eastward_wind_6
- northward_wind_6
- air_temperature_7
- specific_total_water_7
- eastward_wind_7
- northward_wind_7

In [2]:
# Step 1: Download model and data (initial conditions)
base_dir = Path("../data/ace_model_data")
downloaded_files = download_ace_model(base_dir)

Downloading ACE2-ERA5 model into ../data/ace_model_data...
Downloading checkpoint...
  Saved to: ../data/ace_model_data/ace2_era5_ckpt.tar
Downloading config...
  Saved to: ../data/ace_model_data/inference_config.yaml
Downloading initial_conditions...
  Saved to: ../data/ace_model_data/initial_conditions/ic_2020.nc
Downloading forcing_data...
  Saved to: ../data/ace_model_data/forcing_data/forcing_2020.nc


In [3]:
file_path = f"{base_dir}/initial_conditions/ic_2020.nc"
ds = xr.open_dataset(file_path)

df = ds.to_dataframe()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PRESsfc,surface_temperature,TMP2m,Q2m,UGRD10m,VGRD10m,air_temperature_0,specific_total_water_0,eastward_wind_0,northward_wind_0,...,eastward_wind_5,northward_wind_5,air_temperature_6,specific_total_water_6,eastward_wind_6,northward_wind_6,air_temperature_7,specific_total_water_7,eastward_wind_7,northward_wind_7
time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2020-01-01,-89.236642,0.5,69749.734375,244.977448,247.028275,0.000455,-3.439817,1.287437,242.565979,0.000003,-5.478108,2.409519,...,-1.379189,0.912131,243.907776,0.000350,-5.428240,1.618374,246.405853,0.000442,-1.553904,2.793084
2020-01-01,-89.236642,1.5,69732.710938,244.980804,247.030670,0.000455,-3.470093,1.198408,242.565826,0.000003,-5.525220,2.316550,...,-1.311625,0.868136,243.898239,0.000350,-5.467637,1.588786,246.404877,0.000442,-1.600728,2.641466
2020-01-01,-89.236642,2.5,69715.156250,244.983902,247.032806,0.000454,-3.500574,1.109444,242.565674,0.000003,-5.570292,2.222988,...,-1.244004,0.825252,243.888153,0.000350,-5.502250,1.557197,246.403381,0.000442,-1.652020,2.483254
2020-01-01,-89.236642,3.5,69697.078125,244.986725,247.034683,0.000454,-3.531254,1.020573,242.565475,0.000003,-5.613327,2.128852,...,-1.176559,0.783300,243.877472,0.000350,-5.531794,1.523608,246.401672,0.000442,-1.707595,2.318596
2020-01-01,-89.236642,4.5,69678.468750,244.989319,247.036301,0.000454,-3.562122,0.931820,242.565308,0.000003,-5.654424,2.034307,...,-1.109073,0.742150,243.866287,0.000350,-5.556032,1.487929,246.399689,0.000441,-1.767010,2.147442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-01,89.236642,355.5,99792.945312,260.037048,260.057556,0.001222,2.019123,-5.798048,198.101654,0.000003,17.742641,6.724038,...,-0.838141,-7.172268,253.571869,0.000837,-1.250476,-7.509150,258.878357,0.001207,1.399752,-9.496107
2020-12-01,89.236642,356.5,99789.906250,260.047241,260.070740,0.001223,1.915941,-5.810715,198.101395,0.000003,17.869431,6.441854,...,-0.998493,-7.191439,253.568604,0.000837,-1.385639,-7.511917,258.895325,0.001210,1.145988,-9.516674
2020-12-01,89.236642,357.5,99786.859375,260.057373,260.084015,0.001225,1.812483,-5.823605,198.101074,0.000003,17.990612,6.157196,...,-1.156823,-7.208270,253.565186,0.000837,-1.513650,-7.513063,258.911865,0.001213,0.893108,-9.533369
2020-12-01,89.236642,358.5,99783.812500,260.067505,260.097351,0.001226,1.708780,-5.836714,198.100647,0.000003,18.106155,5.870431,...,-1.313173,-7.222762,253.561447,0.000837,-1.634741,-7.512548,258.928131,0.001216,0.641489,-9.546026


In [4]:
file_path = f"{base_dir}/forcing_data/forcing_2020.nc"
ds = xr.open_dataset(file_path)

df = ds.to_dataframe()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,DSWRFtoa,HGTsfc,land_fraction,ocean_fraction,sea_ice_fraction,surface_temperature,global_mean_co2,ak_0,bk_0,ak_1,...,ak_4,bk_4,ak_5,bk_5,ak_6,bk_6,ak_7,bk_7,ak_8,bk_8
time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2020-01-01 00:00:00,-89.236642,0.5,540.471680,2716.394531,1.0,0.000000,0.000000,244.977448,0.000411,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-01-01 00:00:00,-89.236642,1.5,540.288269,2718.293945,1.0,0.000000,0.000000,244.980804,0.000411,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-01-01 00:00:00,-89.236642,2.5,540.100098,2720.221680,1.0,0.000000,0.000000,244.983902,0.000411,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-01-01 00:00:00,-89.236642,3.5,539.914734,2722.177490,1.0,0.000000,0.000000,244.986725,0.000411,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-01-01 00:00:00,-89.236642,4.5,539.736816,2724.160400,1.0,0.000000,0.000000,244.989319,0.000411,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-31 18:00:00,89.236642,355.5,0.000000,-0.718468,0.0,0.000002,0.999998,262.447021,0.000414,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-12-31 18:00:00,89.236642,356.5,0.000000,-0.750450,0.0,0.000002,0.999998,262.473846,0.000414,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-12-31 18:00:00,89.236642,357.5,0.000000,-0.782188,0.0,0.000002,0.999998,262.500122,0.000414,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0
2020-12-31 18:00:00,89.236642,358.5,0.000000,-0.813683,0.0,0.000003,0.999997,262.525879,0.000414,0.0,0.0,5119.89502,...,20087.085938,0.203491,15596.695312,0.438391,8880.453125,0.680643,3057.265625,0.873929,0.0,1.0


## Inference

In [5]:
# Step 2: Create configuration
config_path = create_inference_config(base_dir, downloaded_files)

Creating inference configuration...
Configuration saved to: ../data/ace_model_data/custom_config.yaml


In [6]:
with open(config_path, 'r') as f:
    print(f.read())

checkpoint_path: ../data/ace_model_data/ace2_era5_ckpt.tar
data_writer:
  names:
  - TMP2m
  - VGRD10m
  - PRATEsfc
  save_monthly_files: false
  save_prediction_files: true
experiment_dir: ../data/ace_model_data/inference_output
forcing_loader:
  dataset:
    data_path: ../data/ace_model_data/forcing_data
  num_data_workers: 4
forward_steps_in_memory: 5
initial_condition:
  path: ../data/ace_model_data/initial_conditions/ic_2020.nc
  start_indices:
    times:
    - '2020-01-01T00:00:00'
logging:
  log_to_file: true
  log_to_screen: true
  log_to_wandb: false
  project: ace
n_forward_steps: 20



In [7]:
# Step 3: Run inference
success = run_inference(config_path)

Running ACE model inference...
✓ Inference completed successfully!


### Outputs

In [8]:
file_path = f"{base_dir}/inference_output/autoregressive_predictions.nc"
ds = xr.open_dataset(file_path)

df = ds.to_dataframe()
df

To continue decoding into a timedelta64 dtype, either set `decode_timedelta=True` when opening this dataset, or add the attribute `dtype='timedelta64[ns]'` to this variable on disk.
To opt-in to future behavior, set `decode_timedelta=False`.
  ds = xr.open_dataset(file_path)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TMP2m,VGRD10m,PRATEsfc,init_time,valid_time
sample,time,lat,lon,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0 days 00:00:00,-89.236641,0.5,247.070145,0.027962,0.000000,2020-01-01 06:00:00,2020-01-01 06:00:00
0,0 days 00:00:00,-89.236641,1.5,247.088303,-0.062476,0.000000,2020-01-01 06:00:00,2020-01-01 06:00:00
0,0 days 00:00:00,-89.236641,2.5,247.099655,-0.155643,0.000000,2020-01-01 06:00:00,2020-01-01 06:00:00
0,0 days 00:00:00,-89.236641,3.5,247.117340,-0.246716,0.000000,2020-01-01 06:00:00,2020-01-01 06:00:00
0,0 days 00:00:00,-89.236641,4.5,247.131592,-0.341724,0.000000,2020-01-01 06:00:00,2020-01-01 06:00:00
0,...,...,...,...,...,...,...,...
0,4 days 18:00:00,89.236641,355.5,247.530701,-2.194218,0.000003,2020-01-01 06:00:00,2020-01-06 00:00:00
0,4 days 18:00:00,89.236641,356.5,247.558029,-2.176298,0.000003,2020-01-01 06:00:00,2020-01-06 00:00:00
0,4 days 18:00:00,89.236641,357.5,247.580887,-2.158167,0.000003,2020-01-01 06:00:00,2020-01-06 00:00:00
0,4 days 18:00:00,89.236641,358.5,247.599197,-2.141026,0.000003,2020-01-01 06:00:00,2020-01-06 00:00:00


In [19]:
# # These are only the latitudes, longitudes, times to predict
# file_path = f"{base_dir}/inference_output/autoregressive_target.nc"
# ds = xr.open_dataset(file_path)

# df = ds.to_dataframe()
# df

# Datasets

Check https://cds.climate.copernicus.eu/datasets?q=era5+single&limit=30

We could use the daily or monthly means, also they have other potential datasets for downstream tasks.


## ERA5 hourly data on single levels from 1940 to present
So I think there is no need for the emulator, there is this reanalysis with hourly data https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=overview

**ERA5 Dataset Variable Descriptions**

- **`number`** — Ensemble member index (for ensemble datasets; typically `0` for the deterministic ERA5 run).  
- **`step`** — Forecast lead time in hours (e.g., `0` = analysis, `1` = 1-hour forecast).  
- **`surface`** — Model surface level height, usually `0 m` for single-level variables.  
- **`valid_time`** — Actual timestamp of the data (analysis or forecast time + step).  

Below are the ERA5 variables included in this dataset download:

- **`u10`** — 10 m *U-component of wind* (east–west wind speed).  
  Positive values indicate wind blowing *toward the east*.

- **`v10`** — 10 m *V-component of wind* (north–south wind speed).  
  Positive values indicate wind blowing *toward the north*.

- **`d2m`** — 2 m *Dew point temperature* (Kelvin).  
  Indicates the temperature at which air becomes saturated (humidity measure).

- **`t2m`** — 2 m *Air temperature* (Kelvin).  
  To convert to Celsius: `t2m - 273.15`.

- **`msl`** — *Mean sea level pressure* (Pascals).  
  Pressure reduced to sea level; often converted to hPa as `msl / 100`.

- **`mwd`** — *Mean wave direction* (degrees).  
  The average direction from which waves are coming, relative to true north.

- **`mwp`** — *Mean wave period* (seconds).  
  Average time interval between successive wave crests.

- **`sst`** — *Sea surface temperature* (Kelvin).  
  Temperature of the ocean surface; available over water grid cells only.

- **`swh`** — *Significant height of combined wind waves and swell* (meters).  
  Represents the average height (from trough to crest) of the highest one-third of waves.

- **`sp`** — *Surface pressure* (Pascals).  
  Atmospheric pressure at the model’s surface level.

- **`tp`** — *Total precipitation* (meters).  
  Accumulated liquid and frozen water equivalent (rain + snow) since the previous time step.


In [22]:
# ! wget https://object-store.os-api.cci2.ecmwf.int/cci2-prod-cache-1/2025-11-04/fae342affae481084dfcaefe814d3049.zip

--2025-11-04 03:58:55--  https://object-store.os-api.cci2.ecmwf.int/cci2-prod-cache-1/2025-11-04/fae342affae481084dfcaefe814d3049.zip
Resolving object-store.os-api.cci2.ecmwf.int (object-store.os-api.cci2.ecmwf.int)... 136.156.136.3
Connecting to object-store.os-api.cci2.ecmwf.int (object-store.os-api.cci2.ecmwf.int)|136.156.136.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1208863318 (1.1G) [application/zip]
Saving to: ‘fae342affae481084dfcaefe814d3049.zip’


2025-11-04 03:59:45 (23.6 MB/s) - ‘fae342affae481084dfcaefe814d3049.zip’ saved [1208863318/1208863318]



In [23]:
# ! unzip fae342affae481084dfcaefe814d3049.zip

Archive:  fae342affae481084dfcaefe814d3049.zip
  inflating: data.grib               


In [9]:
file_path = "../data/data.grib"
ds = xr.open_dataset(
    file_path,
    engine="cfgrib",
    backend_kwargs={
        "errors": "ignore"
    }
)

df = ds.to_dataframe()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,number,step,surface,valid_time,u10,v10,d2m,t2m,msl,meanSea,sst,sp,u100,v100,u10n,v10n
time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2000-01-01 00:00:00,90.0,0.00,0,0 days,0.0,2000-01-01 00:00:00,0.303375,-0.373459,262.655884,264.692444,100318.0625,0.0,271.460449,100317.429688,0.330719,-0.439285,0.306808,-0.369476
2000-01-01 00:00:00,90.0,0.25,0,0 days,0.0,2000-01-01 00:00:00,0.303375,-0.373459,262.655884,264.692444,100318.0625,0.0,271.460449,100317.429688,0.330719,-0.439285,0.306808,-0.369476
2000-01-01 00:00:00,90.0,0.50,0,0 days,0.0,2000-01-01 00:00:00,0.303375,-0.373459,262.655884,264.692444,100318.0625,0.0,271.460449,100317.429688,0.330719,-0.439285,0.306808,-0.369476
2000-01-01 00:00:00,90.0,0.75,0,0 days,0.0,2000-01-01 00:00:00,0.303375,-0.373459,262.655884,264.692444,100318.0625,0.0,271.460449,100317.429688,0.330719,-0.439285,0.306808,-0.369476
2000-01-01 00:00:00,90.0,1.00,0,0 days,0.0,2000-01-01 00:00:00,0.303375,-0.373459,262.655884,264.692444,100318.0625,0.0,271.460449,100317.429688,0.330719,-0.439285,0.306808,-0.369476
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000-01-02 23:00:00,-90.0,358.75,0,0 days,0.0,2000-01-02 23:00:00,0.075516,0.025253,240.765442,243.958466,99217.0625,0.0,,68354.351562,0.044052,0.133057,-0.111832,0.046509
2000-01-02 23:00:00,-90.0,359.00,0,0 days,0.0,2000-01-02 23:00:00,0.075516,0.025253,240.765442,243.958466,99217.0625,0.0,,68354.351562,0.044052,0.133057,-0.111832,0.046509
2000-01-02 23:00:00,-90.0,359.25,0,0 days,0.0,2000-01-02 23:00:00,0.075516,0.025253,240.765442,243.958466,99217.0625,0.0,,68354.351562,0.044052,0.133057,-0.111832,0.046509
2000-01-02 23:00:00,-90.0,359.50,0,0 days,0.0,2000-01-02 23:00:00,0.075516,0.025253,240.765442,243.958466,99217.0625,0.0,,68354.351562,0.044052,0.133057,-0.111832,0.046509


In [10]:
coordinates = [[x[1], x[2]] for x in df.index.values]
coordinates = pd.DataFrame(coordinates, columns=['latitude', 'longitude']).drop_duplicates().reset_index(drop=True)
coordinates

Unnamed: 0,latitude,longitude
0,90.0,0.00
1,90.0,0.25
2,90.0,0.50
3,90.0,0.75
4,90.0,1.00
...,...,...
1038235,-90.0,358.75
1038236,-90.0,359.00
1038237,-90.0,359.25
1038238,-90.0,359.50


In [9]:
# ! pip install cdsapi --quiet

In [11]:
# ! cat ~/.cdsapirc

cat: /root/.cdsapirc: No such file or directory


In [11]:
# api key required
import cdsapi

dataset = "derived-era5-single-levels-daily-statistics"
request = {
    "product_type": "reanalysis",
    "variable": [
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
        "2m_dewpoint_temperature",
        "2m_temperature",
        "mean_sea_level_pressure",
        "mean_wave_direction",
        "mean_wave_period",
        "sea_surface_temperature",
        "significant_height_of_combined_wind_waves_and_swell",
        "surface_pressure",
        "total_precipitation"
    ],
    "year": "2000",
    "month": ["01"],
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ],
    "daily_statistic": "daily_mean",
    "time_zone": "utc+00:00",
    "frequency": "1_hourly"
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()

Exception: Missing/incomplete configuration file: /home/bic/gregbell/.cdsapirc