In [1]:
import xarray as xr
import numpy as np

def trim_to_PNW(data):
    """Trim data to Pacific Northwest region
    as defined in Bartusek et al. (2021)"""

    ## lon/lat range
    lat_range = [60, 40]
    lon_range = [230, 250]

    return data.sel(latitude=slice(*lat_range), longitude=slice(*lon_range))


def landarea_weighted_mean(data, lsm=None):
    """Get landarea-weighted mean on regular lon-lat grid.
    Specifically, weight by cosine of latitude"""

    ## get cos(lat)
    cos_lat = np.cos(np.deg2rad(data.latitude))

    ## multiply by fraction of land, if land-sea mask is provided
    if lsm is None:
        weights = cos_lat

    else:
        weights = cos_lat * lsm

    return data.weighted(weights=weights).mean(["latitude", "longitude"])

def preprocess(data, lsm):
    """pre-processing function to reduce data size"""

    ## trim LSM and data to Pac NW
    lsm_PNW = trim_to_PNW(lsm)
    data_PNW = trim_to_PNW(data)

    ## weighted mean
    data_PNW_mean = landarea_weighted_mean(data_PNW, lsm=lsm_PNW)

    return data_PNW_mean


In [5]:
## open dataset and select relevant variables
data = xr.open_zarr(
    "gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr",
    chunks={"time":48},
)

lsm = data["land_sea_mask"]
data = data[["2m_temperature","2m_dewpoint_temperature","surface_pressure"]]

In [8]:
data_prepped = preprocess(data, lsm=lsm)

In [9]:
import time

In [None]:
start = time.time()
data_prepped.load()
end = time.time()

In [None]:
end

# scratch

In [14]:
data = xr.open_zarr(
    'gs://gcp-public-data-arco-era5/co/single-level-reanalysis.zarr-v2/',
    # chunks={"time":24},
    consolidated=True,
)

## select relevant variables
# data = data[["t2m","d2m","sp",]]

# ## downsample in time
# data = data.isel(time=slice(None,None,4))
# data = data.sel(time=slice("1950","2023"))

In [26]:
data["tcc"].attrs["GRIB_name"]

'Total cloud cover'

In [17]:
list(data)

['cape',
 'd2m',
 'hcc',
 'istl1',
 'istl2',
 'istl3',
 'istl4',
 'lcc',
 'mcc',
 'msl',
 'p79.162',
 'p80.162',
 'siconc',
 'skt',
 'sp',
 'sst',
 'stl1',
 'stl2',
 'stl3',
 'stl4',
 'swvl1',
 'swvl2',
 'swvl3',
 'swvl4',
 't2m',
 'tcc',
 'tciw',
 'tclw',
 'tcrw',
 'tcsw',
 'tcw',
 'tcwv',
 'tsn',
 'u10',
 'u100',
 'v10',
 'v100',
 'z']

In [30]:
# data2 = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr")
data2 = xr.open_zarr("gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr")

In [39]:
data2[["2m_temperature","2m_dewpoint_temperature","surface_pressure","land_sea_mask"]]

Unnamed: 0,Array,Chunk
Bytes,361.80 GiB,3.96 MiB
Shape,"(93544, 721, 1440)","(1, 721, 1440)"
Dask graph,93544 chunks in 2 graph layers,93544 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 361.80 GiB 3.96 MiB Shape (93544, 721, 1440) (1, 721, 1440) Dask graph 93544 chunks in 2 graph layers Data type float32 numpy.ndarray",1440  721  93544,

Unnamed: 0,Array,Chunk
Bytes,361.80 GiB,3.96 MiB
Shape,"(93544, 721, 1440)","(1, 721, 1440)"
Dask graph,93544 chunks in 2 graph layers,93544 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [35]:
list(data2)

['10m_u_component_of_wind',
 '10m_v_component_of_wind',
 '10m_wind_speed',
 '2m_dewpoint_temperature',
 '2m_temperature',
 'above_ground',
 'ageostrophic_wind_speed',
 'angle_of_sub_gridscale_orography',
 'anisotropy_of_sub_gridscale_orography',
 'boundary_layer_height',
 'divergence',
 'eddy_kinetic_energy',
 'geopotential',
 'geopotential_at_surface',
 'geostrophic_wind_speed',
 'high_vegetation_cover',
 'integrated_vapor_transport',
 'lake_cover',
 'land_sea_mask',
 'lapse_rate',
 'leaf_area_index_high_vegetation',
 'leaf_area_index_low_vegetation',
 'low_vegetation_cover',
 'mean_sea_level_pressure',
 'mean_surface_latent_heat_flux',
 'mean_surface_net_long_wave_radiation_flux',
 'mean_surface_net_short_wave_radiation_flux',
 'mean_surface_sensible_heat_flux',
 'mean_top_downward_short_wave_radiation_flux',
 'mean_top_net_long_wave_radiation_flux',
 'mean_top_net_short_wave_radiation_flux',
 'mean_vertically_integrated_moisture_divergence',
 'potential_vorticity',
 'relative_humidi

In [31]:
data2.time

In [12]:
((data.latitude > 30) & (data.latitude<40)).sum().compute()

In [3]:
data["sp"]

Unnamed: 0,Array,Chunk
Bytes,2.15 TiB,49.63 MiB
Shape,"(1089864, 542080)","(24, 542080)"
Dask graph,45411 chunks in 2 graph layers,45411 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.15 TiB 49.63 MiB Shape (1089864, 542080) (24, 542080) Dask graph 45411 chunks in 2 graph layers Data type float32 numpy.ndarray",542080  1089864,

Unnamed: 0,Array,Chunk
Bytes,2.15 TiB,49.63 MiB
Shape,"(1089864, 542080)","(24, 542080)"
Dask graph,45411 chunks in 2 graph layers,45411 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.32 MiB,192 B
Shape,"(1089864,)","(24,)"
Dask graph,45411 chunks in 2 graph layers,45411 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 8.32 MiB 192 B Shape (1089864,) (24,) Dask graph 45411 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",1089864  1,

Unnamed: 0,Array,Chunk
Bytes,8.32 MiB,192 B
Shape,"(1089864,)","(24,)"
Dask graph,45411 chunks in 2 graph layers,45411 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray


In [19]:
ml_surface[["t2m","d2m","sp",]].sel(time=slice("1950","2023")).isel(time=slice(None,None,4))

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.14 MiB 4.14 MiB Shape (542080,) (542080,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",542080  1,

Unnamed: 0,Array,Chunk
Bytes,4.14 MiB,4.14 MiB
Shape,"(542080,)","(542080,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.24 MiB,96 B
Shape,"(162168,)","(12,)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 1.24 MiB 96 B Shape (162168,) (12,) Dask graph 13514 chunks in 4 graph layers Data type datetime64[ns] numpy.ndarray",162168  1,

Unnamed: 0,Array,Chunk
Bytes,1.24 MiB,96 B
Shape,"(162168,)","(12,)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 327.48 GiB 24.81 MiB Shape (162168, 542080) (12, 542080) Dask graph 13514 chunks in 4 graph layers Data type float32 numpy.ndarray",542080  162168,

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 327.48 GiB 24.81 MiB Shape (162168, 542080) (12, 542080) Dask graph 13514 chunks in 4 graph layers Data type float32 numpy.ndarray",542080  162168,

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 327.48 GiB 24.81 MiB Shape (162168, 542080) (12, 542080) Dask graph 13514 chunks in 4 graph layers Data type float32 numpy.ndarray",542080  162168,

Unnamed: 0,Array,Chunk
Bytes,327.48 GiB,24.81 MiB
Shape,"(162168, 542080)","(12, 542080)"
Dask graph,13514 chunks in 4 graph layers,13514 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
