### ERA5 regrid + spatial avg. ###

Regridding daily ERA5 data to the same resolution as HadGEM3-GC31-MM and then applying the UK landmask to get the spatial mean.

In [1]:
# Local imports
import os
import sys
import time
import argparse

# Third-party imports
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import shapely.geometry
import cartopy.io.shapereader as shpreader
import iris

# Specific imports
from tqdm import tqdm
from datetime import datetime, timedelta

  _set_context_ca_bundle_path(ca_bundle_path)


In [2]:
# Load my specific functions
sys.path.append("/home/users/benhutch/unseen_functions")
import functions as funcs
import bias_adjust as ba

In [4]:
dir_test = "/gws/nopw/j04/canari/users/benhutch/ERA5/year_month/"

filename_test = "ERA5_EU_T_U10_V10_msl2021_01.nc"

# form the full path
file_test = os.path.join(dir_test, filename_test)

# if the file exists
if os.path.isfile(file_test):
    # load the file
    cube = iris.load_cube(file_test, "msl")
    # print the cube
    print(cube)

air_pressure_at_mean_sea_level / (Pa)           (time: 744; latitude: 153; longitude: 301)
    Dimension coordinates:
        time                                         x              -               -
        latitude                                     -              x               -
        longitude                                    -              -               x
    Auxiliary coordinates:
        expver                                       x              -               -
    Scalar coordinates:
        realization                             0
    Attributes:
        Conventions                             'CF-1.7'
        GRIB_NV                                 np.int64(0)
        GRIB_Nx                                 np.int64(301)
        GRIB_Ny                                 np.int64(153)
        GRIB_centre                             'ecmf'
        GRIB_centreDescription                  'European Centre for Medium-Range Weather Forecasts'
        GRIB_cfName     



In [8]:
from ncdata.iris_xarray import cubes_from_xarray, cubes_to_xarray

# convert to an iris cube
ds_u10 = ds["u10"].squeeze()

In [9]:
ds_u10

In [12]:
# remove the expver coordintate
ds_u10.coords

Coordinates:
    number      int64 8B 0
  * valid_time  (valid_time) datetime64[ns] 6kB 2021-01-01 ... 2021-01-31T23:...
  * latitude    (latitude) float64 1kB 72.0 71.75 71.5 71.25 ... 34.5 34.25 34.0
  * longitude   (longitude) float64 2kB -40.0 -39.75 -39.5 ... 34.5 34.75 35.0
    expver      (valid_time) <U4 12kB '0001' '0001' '0001' ... '0001' '0001'

In [2]:
# hardcode the data path
era5_dir = "/gws/nopw/j04/canari/users/benhutch/ERA5"

# set up the filename of the temp file to load
temp_fname = "ERA5_t2m_daily_1950_2020.nc"

# set up the path to the temp model file for the grid
hadgem_fpath = "/badc/cmip6/data/CMIP6/DCPP/MOHC/HadGEM3-GC31-MM/dcppA-hindcast/s1960-r1i1p1f2/day/sfcWind/gn/files/d20200417/sfcWind_day_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_19601101-19601230.nc"

In [3]:
# load the era5 data using iris
era5_cube = iris.load_cube(os.path.join(era5_dir, temp_fname))

In [4]:
# load the hadgem data using iris
hadgem_cube = iris.load_cube(hadgem_fpath)



In [5]:
era5_cube

2 Metre Temperature (K),time,latitude,longitude
Shape,25933,214,304
Dimension coordinates,,,
time,x,-,-
latitude,-,x,-
longitude,-,-,x
Attributes,,,
Conventions,'CF-1.6','CF-1.6','CF-1.6'
history,'2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...','2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...','2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...'


In [6]:
# print the min and max lon and lat of the ERA5 data
print("ERA5 min lon: ", era5_cube.coord('longitude').points.min())
print("ERA5 max lon: ", era5_cube.coord('longitude').points.max())

print("ERA5 min lat: ", era5_cube.coord('latitude').points.min())
print("ERA5 max lat: ", era5_cube.coord('latitude').points.max())

ERA5 min lon:  -45.0
ERA5 max lon:  40.219
ERA5 min lat:  29.929733
ERA5 max lat:  89.784874


In [7]:
# print the min and max lon and lat of the hadgem data
print("HadGEM min lon: ", hadgem_cube.coord('longitude').points.min())
print("HadGEM max lon: ", hadgem_cube.coord('longitude').points.max())

print("HadGEM min lat: ", hadgem_cube.coord('latitude').points.min())
print("HadGEM max lat: ", hadgem_cube.coord('latitude').points.max())

HadGEM min lon:  0.4166666567325592
HadGEM max lon:  359.58331298828125
HadGEM min lat:  -89.72222137451172
HadGEM max lat:  89.72223663330078


In [8]:
# calculate the resolution of the ERA5 data
lon_res = np.diff(era5_cube.coord('longitude').points).mean()
lat_res = np.diff(era5_cube.coord('latitude').points).mean()

# print the resolution of the ERA5 data and units
print("ERA5 lon res: ", lon_res)
print("ERA5 lat res: ", lat_res)

ERA5 lon res:  0.28125083
ERA5 lat res:  -0.28101006


In [9]:
# calculate the resolution of the HadGEM data
lon_res = np.diff(hadgem_cube.coord('longitude').points).mean()
lat_res = np.diff(hadgem_cube.coord('latitude').points).mean()

# print the resolution of the HadGEM data and units
print("HadGEM lon res: ", lon_res)
print("HadGEM lat res: ", lat_res)

HadGEM lon res:  0.8333332861520851
HadGEM lat res:  0.5555555975474071


In [10]:
# ensure that hadgem is in -180 to 180
hadgem_cube = hadgem_cube.intersection(longitude=(-180, 180), latitude=(0, 90))

In [11]:
# Europe grid to subset to
eu_grid = {
    "lon1": -40,  # degrees east
    "lon2": 30,
    "lat1": 30,  # degrees north
    "lat2": 80,
}

In [24]:
# print the min and max lon of era5cube
print("ERA5 min lon: ", era5_cube.coord('longitude').points.min())
print("ERA5 max lon: ", era5_cube.coord('longitude').points.max())

# print the min and max lat of era5cube
print("ERA5 min lat: ", era5_cube.coord('latitude').points.min())
print("ERA5 max lat: ", era5_cube.coord('latitude').points.max())

# print the min and max lon of hadgemcube
print("HadGEM min lon: ", hadgem_cube.coord('longitude').points.min())
print("HadGEM max lon: ", hadgem_cube.coord('longitude').points.max())

# print the min and max lat of hadgemcube
print("HadGEM min lat: ", hadgem_cube.coord('latitude').points.min())
print("HadGEM max lat: ", hadgem_cube.coord('latitude').points.max())

ERA5 min lon:  -45.0
ERA5 max lon:  40.219
ERA5 min lat:  29.929733
ERA5 max lat:  89.784874
HadGEM min lon:  -179.5833282470703
HadGEM max lon:  179.5833282470703
HadGEM min lat:  -0.27777099609375
HadGEM max lat:  89.72223663330078


In [25]:
eu_grid

{'lon1': -40, 'lon2': 30, 'lat1': 30, 'lat2': 80}

In [12]:
# subset the ERA5 data to the EU grid
era5_cube_eu = era5_cube.intersection(
    longitude=(eu_grid["lon1"], eu_grid["lon2"]),
    latitude=(eu_grid["lat1"], eu_grid["lat2"]),
)

# subset the HadGEM data to the EU grid
hadgem_cube_eu = hadgem_cube.intersection(
    longitude=(eu_grid["lon1"], eu_grid["lon2"]),
    latitude=(eu_grid["lat1"], eu_grid["lat2"]),
)

In [13]:
# regrid the ERA5 data to the HadGEM grid
era5_cube_eu_regrid = era5_cube_eu.regrid(hadgem_cube_eu, iris.analysis.Linear())

In [14]:
era5_cube_eu_regrid

2 Metre Temperature (K),time,latitude,longitude
Shape,25933,91,85
Dimension coordinates,,,
time,x,-,-
latitude,-,x,-
longitude,-,-,x
Attributes,,,
Conventions,'CF-1.6','CF-1.6','CF-1.6'
history,'2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...','2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...','2021-02-05 11:50:14 GMT by grib_to_netcdf-2.16.0: /opt/ecmwf/eccodes/bin/grib_to_netcdf ...'


In [15]:
# print the min and max lon and lat of the ERA5 data
print("ERA5 min lon: ", era5_cube_eu_regrid.coord('longitude').points.min())
print("ERA5 max lon: ", era5_cube_eu_regrid.coord('longitude').points.max())

print("ERA5 min lat: ", era5_cube_eu_regrid.coord('latitude').points.min())
print("ERA5 max lat: ", era5_cube_eu_regrid.coord('latitude').points.max())

ERA5 min lon:  -39.583343505859375
ERA5 max lon:  30.41666603088379
ERA5 min lat:  29.72222900390625
ERA5 max lat:  79.72223663330078


In [16]:
# print the resolution of the ERA5 data and units
lon_res = np.diff(era5_cube_eu_regrid.coord('longitude').points).mean()
lat_res = np.diff(era5_cube_eu_regrid.coord('latitude').points).mean()

print("ERA5 lon res: ", lon_res)
print("ERA5 lat res: ", lat_res)

# print the hadgem resoltuion
# calculate the resolution of the HadGEM data
lon_res = np.diff(hadgem_cube.coord('longitude').points).mean()
lat_res = np.diff(hadgem_cube.coord('latitude').points).mean()

# print the resolution of the HadGEM data and units
print("HadGEM lon res: ", lon_res)
print("HadGEM lat res: ", lat_res)

ERA5 lon res:  0.8333334468659901
ERA5 lat res:  0.5555556403266059
HadGEM lon res:  0.8333333097311847
HadGEM lat res:  0.5555556026505836


In [17]:
# Hardcoded variables
model = "CESM1-1-CAM5-CMIP5" # Try the canadian model
experiment = "dcppA-hindcast"
freq = "Amon" # go back to using monthly data

# Set up the arguments
variable = "tas" # Obs Tas already regridded to HadGEM grid 
country = "United Kingdom"
season = "ONDJFM"
model_season = "ONDJFM"
first_year = 1960
last_year = 2017
model_fcst_year = 1
lead_year = "1-9" # only 1-9 for CanESM5 and MPI-ESM1-2-HR
detrend = False # True for temperature, false for wind speeds
bias_correct = "None" # No bias correction for tas months
percentile = 10

# Save directory
save_dir = "/gws/nopw/j04/canari/users/benhutch/plots/unseen"

# list of valid bias corrections
valid_bias_corrections = [
    "None",
    "linear_scaling",
    "variance_scaling",
    "quantile_mapping",
    "quantile_delta_mapping",
    "scaled_distribution_mapping",
]

# Set up the output directory for the dfs
output_dir_dfs = "/gws/nopw/j04/canari/users/benhutch/unseen/saved_dfs"

In [19]:
# if the bias correction is not in the valid bias corrections
if bias_correct not in valid_bias_corrections:
    raise ValueError(f"Bias correction {bias_correct} not recognised")

# set up the obs variable depending on the variable
if variable == "tas":
    obs_var = "t2m"
elif variable == "sfcWind":
    obs_var = "si10"
else:
    raise ValueError("Variable not recognised")

# Set up the months depending on the season
if season == "DJF":
    months = [12, 1, 2]
elif season == "D":
    months = [12]
elif season == "NDJ":
    months = [11, 12, 1]
elif season == "OND":
    months = [10, 11, 12]
elif season == "JFM":
    months = [1, 2, 3]
elif season == "MAM":
    months = [3, 4, 5]
elif season == "JJA":
    months = [6, 7, 8]
elif season == "SON":
    months = [9, 10, 11]
elif season == "ONDJFM":
    months = [10, 11, 12, 1, 2, 3]
elif season == "NDJFM":
    months = [11, 12, 1, 2, 3]
else:
    raise ValueError("Season not recognised")

# Depending on the model forecast year
# set the leads to extract from the model
if model in ["MPI-ESM1-2-HR", "HadGEM3-GC31-MM", "CanESM5", "BCC-CSM2-MR", "CMCC-CM2-SR5", "CESM1-1-CAM5-CMIP5"]:
    if model_fcst_year == 0 and season == "NDJFM":
        lead_months = [1, 2, 3, 4, 5]
    elif model_fcst_year == 1 and season == "ONDJFM":
        lead_months = [12, 13, 14, 15, 16, 17]
    elif model_fcst_year == 1 and season in ["OND", "NDJ", "DJF", "JFM", "D"]:
        lead_months = [12, 13, 14, 15, 16, 17] # include all then subset later
    else:
        raise ValueError("Model forecast year and season not recognised")
elif model == "CanESM5":
    if model_fcst_year == 1 and season == "ONDJFM":
        lead_months = [10, 11, 12, 13, 14, 15]
    elif model_fcst_year == 1 and season in ["OND", "NDJ", "DJF", "JFM", "D"]:
        lead_months = [10, 11, 12, 13, 14, 15]
else:
    raise ValueError("Model not recognised")

In [22]:
# create the mask
MASK_MATRIX = funcs.create_masked_matrix(
    country=country,
    cube=era5_cube_eu_regrid,
)

Found Country United Kingdom


In [23]:
%%time

# apply the mask to the observed data
obs_values = era5_cube_eu_regrid.data * MASK_MATRIX

KeyboardInterrupt: 