## 0. Import Libraries

In [4]:
from datetime import datetime, timedelta
from glob import glob

import numpy as np
import pandas as pd
import pyproj
import rioxarray
import salem
import xarray as xr
from shapely.geometry import mapping

import cartopy
import matplotlib.pyplot as plt
import proplot
from matplotlib.colors import BoundaryNorm, ListedColormap
from cartopy.feature import ShapelyFeature
from cartopy.io.shapereader import Reader
from dea_tools.spatial import xr_vectorize, xr_rasterize
from wrf import ll_to_xy
import matplotlib.patheffects as pe
from netCDF4 import Dataset
from SPAEF_metric import SPAEF

import warnings
warnings.filterwarnings('ignore')

## 1. Simulation Data

In [5]:
variable = 'PRCP'
general_path = 'data'

# ensemble members
micro_options = [
    'LIN',
    'THOMPSON',
    'WSM6'
]

luse_options = [
    'urban',
    'nourban'
]

### 1.1 ERA5

#### 1.1.1 Data Options

In [6]:
run_data = 'era5'

# data path
data_path = glob(fr'{general_path}/*{run_data}**{variable}*')

In [7]:
data_path

['data/era5_LIN_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/era5_LIN_urban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/era5_THOMPSON_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/era5_THOMPSON_urban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/era5_WSM6_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/era5_WSM6_urban_PRCP_wrfout_d03_2017-07-18.nc']

#### 1.1.2 Open Data

In [8]:
# open data
dt = xr.open_mfdataset(data_path,
                       concat_dim='ens')[variable]

# instead of 201 grid we want 200 grid in each direction
dt = dt.isel(south_north=slice(0,200),
             west_east=slice(0,200))

# assign projection and dim info
dt = dt.rio.write_crs(dt.attrs['pyproj_srs'])
dt_era5 = dt.rio.set_spatial_dims(x_dim='west_east',
                             y_dim='south_north')

# sum over time dimension
dt_era5_sum = dt_era5.sum(dim='time')

In [9]:
dt_era5_sum

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,156.25 kiB
Shape,"(6, 200, 200)","(1, 200, 200)"
Count,48 Tasks,6 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.92 MiB 156.25 kiB Shape (6, 200, 200) (1, 200, 200) Count 48 Tasks 6 Chunks Type float32 numpy.ndarray",200  200  6,

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,156.25 kiB
Shape,"(6, 200, 200)","(1, 200, 200)"
Count,48 Tasks,6 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray


#### 1.1.3 Define Members

In [10]:
# ensemble member list
ensemble_members = list(dt.ens.values)

# urban and nourban members
urban_members = [ens for ens in ensemble_members if not 'nourban' in ens ]
nourban_members = [ens for ens in ensemble_members if 'nourban' in ens ]

#### 1.1.4 Ensemble Mean For Urban and Nourban

In [11]:
dt_era5_sum_urban_ens = dt_era5_sum.sel(ens=urban_members).mean(dim='ens')
dt_era5_urban_ens = dt_era5.sel(ens=urban_members).mean(dim='ens')

In [12]:
dt_era5_sum_urban_ens

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,55 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 55 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,55 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray


### 1.2 GFS

#### 1.2.1 Data Options

In [13]:
run_data = 'gfs' # gfs in normal

# data path
data_path = glob(fr'{general_path}/*{run_data}**{variable}*')

In [14]:
data_path

['data/gfs_LIN_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/gfs_LIN_urban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/gfs_THOMPSON_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/gfs_THOMPSON_urban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/gfs_WSM6_nourban_PRCP_wrfout_d03_2017-07-18.nc',
 'data/gfs_WSM6_urban_PRCP_wrfout_d03_2017-07-18.nc']

#### 1.1.2 Open Data

In [15]:
# open data
dt = xr.open_mfdataset(data_path,
                       concat_dim='ens')[variable]

# instead of 201 grid we want 200 grid in each direction
dt = dt.isel(south_north=slice(0,200),
             west_east=slice(0,200))

# assign projection and dim info
dt = dt.rio.write_crs(dt.attrs['pyproj_srs'])
dt_gfs = dt.rio.set_spatial_dims(x_dim='west_east',
                             y_dim='south_north')

# sum over time dimension
dt_gfs_sum = dt_gfs.sum(dim='time')

In [16]:
dt_gfs_sum

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,156.25 kiB
Shape,"(6, 200, 200)","(1, 200, 200)"
Count,48 Tasks,6 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.92 MiB 156.25 kiB Shape (6, 200, 200) (1, 200, 200) Count 48 Tasks 6 Chunks Type float32 numpy.ndarray",200  200  6,

Unnamed: 0,Array,Chunk
Bytes,0.92 MiB,156.25 kiB
Shape,"(6, 200, 200)","(1, 200, 200)"
Count,48 Tasks,6 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray


#### 1.1.3 Ensemble Mean For Urban and Nourban

In [17]:
dt_gfs_sum_urban_ens = dt_gfs_sum.sel(ens=urban_members).mean(dim='ens')
dt_gfs_urban_ens = dt_gfs.sel(ens=urban_members).mean(dim='ens')

In [18]:
dt_gfs_sum_urban_ens

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,55 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 55 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,55 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 156.25 kiB 156.25 kiB Shape (200, 200) (200, 200) Count 27 Tasks 1 Chunks Type float32 numpy.ndarray",200  200,

Unnamed: 0,Array,Chunk
Bytes,156.25 kiB,156.25 kiB
Shape,"(200, 200)","(200, 200)"
Count,27 Tasks,1 Chunks
Type,float32,numpy.ndarray


## 2. Intra-Simulation Metric Calculation

### 2.1 Concat All Ensemble Members

In [49]:
era5_gfs_concat = np.concatenate([dt_era5_sum.sel(ens=urban_members).values, 
               dt_gfs_sum.sel(ens=urban_members).values])

### 2.2 Concat Ensemble Mean

In [50]:
ens_era5_gfs_concat = np.concatenate([dt_era5_sum_urban_ens.values[np.newaxis],
                    dt_gfs_sum_urban_ens.values[np.newaxis]])

### 2.3 Concat All Simulations

In [51]:
simulations = np.concatenate([
    era5_gfs_concat,
    ens_era5_gfs_concat
]) 

In [52]:
np.shape(simulations)

(8, 200, 200)

### 2.4 Calculate Metrics

In [53]:
# data used in simulation numpy array in order
simulation_data_order = [
    'era5-lin',
    'era5-thompson',
    'era5-wsm6',
    'gfs-lin',
    'gfs-thompson',
    'gfs-wsm6',
    'ens-era5',
    'ens-gfs'
]

In [54]:
metrics = np.zeros((8,8), dtype=np.float64)

for i, s1 in enumerate(simulations):
    
    print(fr'next first data is {simulation_data_order[i]}')
    for j, s2 in enumerate(simulations):
        
        print(fr'>> next second data is {simulation_data_order[j]}')
        
        # spaef calculation
        SPAef, alpha, beta, gamma = SPAEF(s1, s2)
        
        metrics[i, j] = SPAef
        
    print('END ------------------')

next first data is era5-lin
>> next second data is era5-lin
>> next second data is era5-thompson
>> next second data is era5-wsm6
>> next second data is gfs-lin
>> next second data is gfs-thompson
>> next second data is gfs-wsm6
>> next second data is ens-era5
>> next second data is ens-gfs
END ------------------
next first data is era5-thompson
>> next second data is era5-lin
>> next second data is era5-thompson
>> next second data is era5-wsm6
>> next second data is gfs-lin
>> next second data is gfs-thompson
>> next second data is gfs-wsm6
>> next second data is ens-era5
>> next second data is ens-gfs
END ------------------
next first data is era5-wsm6
>> next second data is era5-lin
>> next second data is era5-thompson
>> next second data is era5-wsm6
>> next second data is gfs-lin
>> next second data is gfs-thompson
>> next second data is gfs-wsm6
>> next second data is ens-era5
>> next second data is ens-gfs
END ------------------
next first data is gfs-lin
>> next second data is

In [59]:
df = pd.DataFrame(metrics, columns = [simulation_data_order],
             index = simulation_data_order)

In [60]:
df.style.background_gradient(cmap="Reds")

Unnamed: 0,"('era5-lin',)",Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
era5-lin,1.0,0.405058,0.688453,0.41347,0.349114,0.470367,0.737699,0.489903
era5-thompson,0.400094,1.0,0.2664,0.197037,0.294638,0.351379,0.429175,0.307943
era5-wsm6,0.688479,0.27242,1.0,0.248763,0.218401,0.346818,0.709324,0.327769
gfs-lin,0.398771,0.196744,0.233512,1.0,0.70837,0.643436,0.210431,0.768703
gfs-thompson,0.329516,0.293592,0.197424,0.70829,1.0,0.583002,0.16072,0.695891
gfs-wsm6,0.47015,0.352698,0.346322,0.654441,0.59865,1.0,0.417556,0.856429
ens-era5,0.755991,0.485236,0.720351,0.280762,0.244266,0.435053,1.0,0.425147
ens-gfs,0.489517,0.308841,0.327056,0.782952,0.71439,0.856423,0.405556,1.0
