In [1]:
import sys
import os

sys.path.append(os.path.dirname(os.getcwd()))

os.chdir("../")

# ERA5 Dataset

In [2]:
import xarray as xr
import numpy as np
import glob
import torch
import era5_data_proc
from neural_lam import constants

import os

RAW_ERA5_PATH = constants.ERA5UKConstants.RAW_ERA5_PATH

## Choose dataset

In [3]:
# dataset = "era5_uk"
# subset = era5_data_proc.uk_subset

# dataset = "era5_uk_small"
# subset = era5_data_proc.uk_small_subset

# dataset = "era5_uk_big"
# subset = era5_data_proc.uk_big_subset

# dataset = "era5_uk_big_coarse"
# subset = era5_data_proc.uk_big_subset

dataset = "era5_uk_max"
subset = era5_data_proc.uk_max_subset

# dataset = "era5_uk_max_coarse"
# subset = era5_data_proc.uk_max_subset

In [4]:
# Additional Options
TIME_SAMPLE = 2

## Verify nwp_xy.npy and time step files

In [5]:
# load raw data
nc_files = glob.glob(f'{RAW_ERA5_PATH}/2022*.nc')
nc_files.sort()
nc_file = nc_files[0]
# print(nc_file)

data = xr.open_dataset(nc_file)
data = subset(data)
time = data['time'].values[TIME_SAMPLE]

# Use this to select a specific time
# time = np.datetime64('2022-06-01T00:00:00')
# print(time)
sample = data.sel(time=time)

In [6]:
# load time step data
sample_dir_path = f"data/{dataset}/samples/train"
sample_files = glob.glob(f'{sample_dir_path}/2022*.npy')
sample_files.sort()
sample_file = sample_files[TIME_SAMPLE]
print(sample_file)

time_step_data = np.load(sample_file)
print("Time Step Data Shape:")
print(time_step_data.shape)

data/era5_uk_max/samples/train/20220101120000.npy
Time Step Data Shape:
(18769, 48)


In [7]:
# load nwp_xy.npy
static_dir_path = f"data/{dataset}/static"
grid_xy = torch.tensor(
    np.load(os.path.join(static_dir_path, "nwp_xy.npy"))
)  # (2, N_y, N_x)

print("Grid Shape:")
print(grid_xy.shape)
grid_xy = grid_xy.reshape(2, -1).T # (N_y * N_x, 2)
print("Flat Grid Shape")
print(grid_xy.shape)


Grid Shape:
torch.Size([2, 137, 137])
Flat Grid Shape
torch.Size([18769, 2])


In [8]:
# choose random lon/lat point and verify grid features match
test_point = 201
# grid_xy stores lat/lon in -180/180 format
# convert to 0/360 format
test_xy = grid_xy[test_point] % 360
print("Test coordinates:")
print(test_xy)
time_step_data[test_point]

Test coordinates:
tensor([356.0000,  71.7500])


array([ 1.90510579e+05,  1.26135692e+05,  9.46227681e+04,  6.46130599e+04,
        4.99181637e+04,  3.75969349e+04,  1.27103653e+04,  5.75432470e+02,
        3.17917909e-06,  3.17917909e-06,  7.64446294e-06,  1.88076726e-05,
        5.52741573e-05,  2.36862367e-04,  1.03912503e-03,  1.54035314e-03,
        1.98235449e+02,  2.11257869e+02,  2.18707808e+02,  2.27516789e+02,
        2.31953293e+02,  2.39363989e+02,  2.56560606e+02,  2.65431549e+02,
        2.70918719e+01,  2.72530217e+01,  2.86542049e+01,  2.02307172e+01,
        1.46396408e+01,  6.95088660e+00, -2.58699255e+00,  6.26531739e+00,
        4.07343270e+00,  5.55407710e+00,  3.80116544e+00,  1.10483692e+01,
        1.34100582e+01,  1.90450726e+01, -2.53440184e+00, -1.21127027e+01,
       -1.21631978e-02, -1.49101344e-02, -4.38021071e-03, -1.49510029e-01,
       -4.64492094e-01, -5.33672471e-02, -1.06016866e-01, -3.18495769e-02])

In [9]:
# find the sample from xarray
sample.sel(longitude=test_xy[0], latitude=test_xy[1])

## Grid Features

### Grid Features with Static Variables

In [10]:
grid_features_path = f"./data/{dataset}/static/grid_features.pt"
grid_features = torch.load(grid_features_path)
grid_features.shape

torch.Size([18769, 5])

## Verify static variables

In [11]:
dataset_path = f"{RAW_ERA5_PATH}/static_variables.nc"
data = xr.open_dataset(dataset_path)
data

In [12]:
6 * 8 + 4 * 3 + 5 

65

In [13]:
# surface geopotential should be time invariant but we find that it is not.
# values changed sometime in 2022 October
# 99.7% of the values are the same however

sample1 = data.sel(time=data['time'].values[3])["z"].values
sample2 = data.sel(time=data['time'].values[4])["z"].values
np.isclose(sample1, sample2).sum() / sample1.size

0.9972260748959778

### Select a spatial location from xarray and .npy to test

In [14]:
# choose random lon/lat point and verify grid features match
test_point = 100
test_xy = grid_xy[test_point] % 360
print(test_xy)

data.sel(time=data['time'].values[0], longitude=test_xy[0], latitude=test_xy[1]).to_array()

tensor([ 5., 72.])


In [15]:
grid_features[test_point]

tensor([ 0.3090,  0.0872,  0.9962, -0.0752,  0.0000])