# Compare USDM & SPI
a. stein 4.27.2022

Building off of `explore/usdm_spi_explore_workflow.ipynb`, this notebook builds upon lessons learned and the workflow developed there to more thoroughly compare the USDM and various interval SPIs.

In [1]:
%pylab inline
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import xarray as xr
import rioxarray
import rasterio as rio
import pandas as pd
import geopandas as gpd

from tqdm.autonotebook import tqdm

import sys
sys.path.append('../../')
import ndrought.wrangle as wrangle
import ndrought.compare as compare

Populating the interactive namespace from numpy and matplotlib


  if sys.path[0] == '':


Load in data

In [2]:
dm_path = '/pool0/home/steinadi/data/drought/drought_impact/data/drought_measures'

# USDM & SPI already clipped in WA folders
usdm = xr.open_dataset(f'{dm_path}/usdm/USDM_WA_20000104_20220412.nc')
spi_1y = xr.open_dataset(f'{dm_path}/spi/WA/spi_1y.nc')
spi_2y = xr.open_dataset(f'{dm_path}/spi/WA/spi_2y.nc')
spi_5y = xr.open_dataset(f'{dm_path}/spi/WA/spi_5y.nc')
spi_14d = xr.open_dataset(f'{dm_path}/spi/WA/spi_14d.nc')
spi_30d = xr.open_dataset(f'{dm_path}/spi/WA/spi_30d.nc')
spi_90d = xr.open_dataset(f'{dm_path}/spi/WA/spi_90d.nc')
spi_180d = xr.open_dataset(f'{dm_path}/spi/WA/spi_180d.nc')
spi_270d = xr.open_dataset(f'{dm_path}/spi/WA/spi_270d.nc')
spi_list = [spi_1y, spi_2y, spi_5y, spi_14d, spi_30d, spi_90d, spi_180d, spi_270d]
spi_intervals = ['1y', '2y', '5y', '14d', '30d', '90d', '180d', '270d']

Check if SPI dates and coordinates all match

In [3]:
for spi in spi_list[1:]:
    if len(np.where(spi_1y['day'].values != spi['day'].values)[0]) != 0:
        print('Time mistmatch')
    if len(np.where(spi_1y['lat'].values != spi['lat'].values)[0]) != 0:
        print('lat mismatch')
    if len(np.where(spi_1y['lon'].values != spi['lon'].values)[0]) != 0:
        print('lon mismatch')

Cool, so then I only need to make 1 set of paired dates and upscale USDM to 1 of the SPI grids cause they're all the same.

In [4]:
usdm_da = usdm['USDM'].rio.write_crs('EPSG:4326', inplace=True)
spi_1y_da = spi_1y['spi'].rio.write_crs('EPSG:4326', inplace=True)
usdm_upscale = (usdm_da).rio.reproject_match(spi_1y_da)

In [7]:
usdm_dates

DatetimeIndex(['2000-01-04', '2000-01-11', '2000-01-18', '2000-01-25',
               '2000-02-01', '2000-02-08', '2000-02-15', '2000-02-22',
               '2000-02-29', '2000-03-07',
               ...
               '2022-02-08', '2022-02-15', '2022-02-22', '2022-03-01',
               '2022-03-08', '2022-03-15', '2022-03-22', '2022-03-29',
               '2022-04-05', '2022-04-12'],
              dtype='datetime64[ns]', length=1163, freq=None)

In [31]:
usdm_dates = pd.DatetimeIndex(usdm['date'].values)
usdm_dates = usdm_dates[usdm_dates < '2022']

spi_dates = pd.DatetimeIndex(spi_1y['day'].values)
#spi_dates = spi_dates[spi_dates >= '1999-12-31']

pair_dates = compare.pair_to_usdm_date(usdm_dates, spi_dates, 'SPI Date', realign=True)
pair_dates

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0,SPI Date,USDM Date
0,1999-12-31,2000-01-04
1,2000-01-10,2000-01-11
2,2000-01-15,2000-01-18
3,2000-01-25,2000-01-25
4,2000-01-30,2000-02-01
...,...,...
1143,2021-11-26,2021-11-30
1144,2021-12-06,2021-12-07
1145,2021-12-11,2021-12-14
1146,2021-12-21,2021-12-21


Add in Hydrologic Year (need to double check I have the counting right ...)

In [32]:
pair_dates['USDM Hydrologic Year'] = (pd.DatetimeIndex(pair_dates['USDM Date']) + pd.DateOffset(months=3)).year
pair_dates

Unnamed: 0,SPI Date,USDM Date,USDM Hydrologic Year
0,1999-12-31,2000-01-04,2000
1,2000-01-10,2000-01-11,2000
2,2000-01-15,2000-01-18,2000
3,2000-01-25,2000-01-25,2000
4,2000-01-30,2000-02-01,2000
...,...,...,...
1143,2021-11-26,2021-11-30,2022
1144,2021-12-06,2021-12-07,2022
1145,2021-12-11,2021-12-14,2022
1146,2021-12-21,2021-12-21,2022


Convert SPI to USDM categories ...

In [41]:
spi_usdmcat = dict()

for spi, interval in tqdm(zip(spi_list, spi_intervals), total=len(spi_intervals)):
    spi_usdmcat[interval] = compare.spi_to_usdmcat_multtime(spi['spi'].sel(day=pair_dates['SPI Date'].values))

  0%|          | 0/8 [00:00<?, ?it/s]

This looks like it might take a bit ... should save out this dataset once done ...

Make some preliminary plots

In [None]:
fig, axs = plt.subplots(5,4, figsize=(20, 20), sharex=True, sharey=True)

for year, ax in zip(np.arange(2000, 2021, 1), axs.ravel()):
    hyear_idxs = date_pairs[date_pairs['USDM Hydrologic Year'] == pd.to_datetime(f'{year}').year].index.values
    extreme = 5*len(hyear_idxs)
    (usdm_resample_sel_timeless.sel(index=hyear_idxs)-spi_usdmcat_sel_timeless.sel(index=hyear_idxs)).mean(dim='index').plot(ax=ax, vmin=-5, vmax=5, cmap='PuOr')

    ax.set_title(f"USDM {year}'s Hydrologic Year ({len(hyear_idxs)} times)")
    ax.set_xlabel('')
    ax.set_ylabel('')

fig.supylabel('latitude [degrees_north]', x=-0.01)
fig.supxlabel('longitude [degrees_east]', y=-0.01)
fig.suptitle('Mean Difference between Upscaled USDM and 1-yr SPI', y=1.01)

plt.tight_layout()