In [1]:
import atlite
from atlite.datasets.era5 import features as era5_feature_map
import xarray as xr

# Define the global Cutout with 0.25° resolution
cutout = atlite.Cutout(
    path='data/global_cutout_2019',
    module='era5',
    x=slice(-180, 180),
    y=slice(-90, 90),
    time='2019',
    dx=0.25,
    dy=0.25,
    overwrite=False,  # Set to True to force re-download
    show_progress=True,
)

# Prepare the cutout (this will download and cache the ERA5 data as needed)
cutout.prepare(monthly_requests=True)
# global 2019 took 38h on ethernet connection


  warn(


<Cutout "global_cutout_2019">
 x = -180.00 ⟷ 179.75, dx = 0.25
 y = -90.00 ⟷ 89.75, dy = 0.25
 time = 2019-01-01 ⟷ 2019-12-31, dt = h
 module = era5
 prepared_features = ['height', 'wind', 'influx', 'temperature', 'runoff']

In [2]:
# Select region for testing
region = {
    "x": slice(-1.0, 1.0),     #
    "y": slice(9.0, 11.0)      #
}

sample_ds = cutout.data.sel(**region)
print(sample_ds)


<xarray.Dataset> Size: 48MB
Dimensions:               (x: 9, y: 9, time: 8760)
Coordinates:
  * x                     (x) float64 72B -1.0 -0.75 -0.5 -0.25 ... 0.5 0.75 1.0
  * y                     (y) float64 72B 9.0 9.25 9.5 9.75 ... 10.5 10.75 11.0
  * time                  (time) datetime64[ns] 70kB 2019-01-01 ... 2019-12-3...
    lon                   (x) float64 72B dask.array<chunksize=(9,), meta=np.ndarray>
    lat                   (y) float64 72B dask.array<chunksize=(9,), meta=np.ndarray>
Data variables: (12/15)
    height                (y, x) float32 324B dask.array<chunksize=(9, 9), meta=np.ndarray>
    wnd100m               (time, y, x) float32 3MB dask.array<chunksize=(100, 9, 9), meta=np.ndarray>
    wnd_shear_exp         (time, y, x) float64 6MB dask.array<chunksize=(100, 9, 9), meta=np.ndarray>
    wnd_azimuth           (time, y, x) float32 3MB dask.array<chunksize=(100, 9, 9), meta=np.ndarray>
    roughness             (time, y, x) float32 3MB dask.array<chunksize=

In [3]:
import atlite
import geopandas as gpd
import geodatasets
from shapely.ops import unary_union
import xarray as xr
import pandas as pd

# ─────────────────────────────────────────────────────────────
# 1. Wrap your sample as a temporary Cutout
# ─────────────────────────────────────────────────────────────
sample_cutout = atlite.Cutout(path="sample_cutout", data=sample_ds)

# ─────────────────────────────────────────────────────────────
# 2. Land mask using Natural Earth
# ─────────────────────────────────────────────────────────────
land_geom = unary_union(gpd.read_file(geodatasets.get_path("naturalearth_land")).geometry)
grid = sample_cutout.grid.reset_index()
grid["on_land"] = grid.geometry.centroid.within(land_geom)

coord_y = sample_cutout.coords["y"].values
coord_x = sample_cutout.coords["x"].values
mask_table = (
    grid.set_index(["y", "x"])["on_land"]
    .unstack("x")
    .reindex(index=coord_y, columns=coord_x)
)
land_mask = xr.DataArray(
    mask_table.values.astype(bool),
    coords={"y": coord_y, "x": coord_x},
    dims=("y", "x")
)

print("Grid shape (ny, nx):", land_mask.shape)
print("Onshore cells:", int(land_mask.sum().item()))
print("Offshore cells:", int((~land_mask).sum().item()))
print(grid[["y", "x", "on_land"]])

# ─────────────────────────────────────────────────────────────
# 3. Uniform layout (1 MW per cell)
# ─────────────────────────────────────────────────────────────
def unit_layout(mask: xr.DataArray | None = None) -> xr.DataArray:
    layout = sample_cutout.data["height"].copy(deep=False)
    layout[:] = 1.0
    if mask is not None:
        layout = layout.where(mask, 0.0)
    return layout

# ─────────────────────────────────────────────────────────────
# 4. Compute hourly capacity factor time series
# ─────────────────────────────────────────────────────────────
cf_wind_on = sample_cutout.wind(
    layout=unit_layout(land_mask),
    turbine="Vestas_V112_3MW",
    capacity_factor=True,
    per_unit=True
)

cf_wind_off = sample_cutout.wind(
    layout=unit_layout(~land_mask),
    turbine="NREL_ReferenceTurbine_5MW_offshore",
    capacity_factor=True,
    per_unit=True
)

cf_solar = sample_cutout.pv(
    layout=unit_layout(land_mask).compute(),
    panel="CSi",
    orientation='latitude_optimal',
    tracking=None, #"tilted_horizontal",
    capacity_factor=True,
    per_unit=True
)

# ─────────────────────────────────────────────────────────────
# 5. Confirm output
# ─────────────────────────────────────────────────────────────
print(cf_wind_on)
print(cf_wind_off)
print(cf_solar)


Grid shape (ny, nx): (9, 9)
Onshore cells: 81
Offshore cells: 0
       y     x  on_land
0    9.0 -1.00     True
1    9.0 -0.75     True
2    9.0 -0.50     True
3    9.0 -0.25     True
4    9.0  0.00     True
..   ...   ...      ...
76  11.0  0.00     True
77  11.0  0.25     True
78  11.0  0.50     True
79  11.0  0.75     True
80  11.0  1.00     True

[81 rows x 3 columns]



  grid["on_land"] = grid.geometry.centroid.within(land_geom)
power curves will default to True in atlite relase v0.2.15.
power curves will default to True in atlite relase v0.2.15.
power curves will default to True in atlite relase v0.2.15.


<xarray.DataArray (time: 8760, dim_0: 1)> Size: 70kB
array([[0.44199642],
       [0.43033208],
       [0.40158301],
       ...,
       [0.22949103],
       [0.22653586],
       [0.25022666]], shape=(8760, 1))
Coordinates:
  * time     (time) datetime64[ns] 70kB 2019-01-01 ... 2019-12-31T23:00:00
  * dim_0    (dim_0) int64 8B 0
Attributes:
    units:    p.u.
<xarray.DataArray (time: 8760, dim_0: 1)> Size: 70kB
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], shape=(8760, 1))
Coordinates:
  * time     (time) datetime64[ns] 70kB 2019-01-01 ... 2019-12-31T23:00:00
  * dim_0    (dim_0) int64 8B 0
Attributes:
    units:    p.u.
<xarray.DataArray (time: 8760, dim_0: 1)> Size: 70kB
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], shape=(8760, 1))
Coordinates:
  * time     (time) datetime64[ns] 70kB 2019-01-01 ... 2019-12-31T23:00:00
  * dim_0    (dim_0) int64 8B 0
Attributes:
    units:    p.u.


In [4]:
print(sample_cutout.data["influx_toa"].shape)


(8760, 9, 9)


In [5]:
# Inspect CF arrays as a single wide table (hours × lat_lon columns plus aggregated means)
import pandas as pd

pd.set_option("display.max_columns", 50)

def cf_matrix(da: xr.DataArray | None, label: str, max_hours: int = 24) -> pd.DataFrame | None:
    if da is None:
        return None

    time_dim = next((d for d in da.dims if "time" in d.lower()), da.dims[0])
    spatial_dims = [d for d in da.dims if d != time_dim and "technology" not in d.lower()]
    if not spatial_dims:
        return None

    if len(spatial_dims) == 1:
        cell_dim = "cell"
        stacked = da.rename({spatial_dims[0]: cell_dim})
    else:
        cell_dim = "cell"
        stacked = da.stack({cell_dim: spatial_dims})

    n_cells = stacked.sizes[cell_dim]
    print(f"{label}: {n_cells} spatial cells")

    time_index = pd.Index(stacked[time_dim].values[:max_hours], name="time")
    matrix = stacked.transpose(time_dim, cell_dim).values[:max_hours, :]

    cell_index = stacked.get_index(cell_dim)
    lat_vals = lon_vals = None
    if isinstance(cell_index, pd.MultiIndex):
        name_pairs = list(zip(cell_index.names, range(len(cell_index.names))))
        lat_level = next((idx for name, idx in name_pairs if name and any(tok in name.lower() for tok in ("lat", "y"))), 0)
        lon_level = next((idx for name, idx in name_pairs if name and any(tok in name.lower() for tok in ("lon", "x"))), 1)
        lat_vals = cell_index.get_level_values(lat_level).astype(float)
        lon_vals = cell_index.get_level_values(lon_level).astype(float)
    elif "grid" in globals():
        lat_vals = grid.geometry.centroid.y.values[:matrix.shape[1]]
        lon_vals = grid.geometry.centroid.x.values[:matrix.shape[1]]

    if lat_vals is None or lon_vals is None:
        col_labels = [f"loc{i:03d} {label}" for i in range(matrix.shape[1])]
    else:
        col_labels = [f"{lat:.2f}_{lon:.2f} {label}" for lat, lon in zip(lat_vals, lon_vals)]

    df = pd.DataFrame(matrix, index=time_index, columns=col_labels)
    df[f"aggregated_{label}"] = stacked.mean(cell_dim).values[:max_hours]
    return df

matrices = [
    cf_matrix(cf_wind_on, "cf_wind_on"),
    cf_matrix(cf_wind_off, "cf_wind_off"),
    cf_matrix(cf_solar, "cf_solar"),
]
combined = pd.concat([m for m in matrices if m is not None], axis=1)
print("Combined CF matrix shape:", combined.shape)
display(combined.head())


cf_wind_on: 1 spatial cells
cf_wind_off: 1 spatial cells
cf_solar: 1 spatial cells
Combined CF matrix shape: (24, 6)



  lat_vals = grid.geometry.centroid.y.values[:matrix.shape[1]]

  lon_vals = grid.geometry.centroid.x.values[:matrix.shape[1]]

  lat_vals = grid.geometry.centroid.y.values[:matrix.shape[1]]

  lon_vals = grid.geometry.centroid.x.values[:matrix.shape[1]]

  lat_vals = grid.geometry.centroid.y.values[:matrix.shape[1]]

  lon_vals = grid.geometry.centroid.x.values[:matrix.shape[1]]


Unnamed: 0_level_0,9.00_-1.00 cf_wind_on,aggregated_cf_wind_on,9.00_-1.00 cf_wind_off,aggregated_cf_wind_off,9.00_-1.00 cf_solar,aggregated_cf_solar
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-01 00:00:00,0.441996,0.441996,0.0,0.0,0.0,0.0
2019-01-01 01:00:00,0.430332,0.430332,0.0,0.0,0.0,0.0
2019-01-01 02:00:00,0.401583,0.401583,0.0,0.0,0.0,0.0
2019-01-01 03:00:00,0.367697,0.367697,0.0,0.0,0.0,0.0
2019-01-01 04:00:00,0.349065,0.349065,0.0,0.0,0.0,0.0
