# Clip & Categorize SPI

a. stein 1.19.2023

In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import xarray as xr
import rioxarray
import rasterio as rio
import pandas as pd
import geopandas as gpd

from tqdm.notebook import tqdm

import sys

In [2]:
dm_path = '/pool0/home/steinadi/data/drought/drought_impact/data/drought_measures'

In [3]:
spi_1y = xr.open_dataset(f'{dm_path}/spi/spi1y.nc')

In [4]:
spi_1y

In [5]:
def dm_to_usdmcat(da:xr.DataArray):
    """Categorizes drought measure based on USDM categories.

    Uses the mapping scheme presented by USDM (https://droughtmonitor.unl.edu/About/AbouttheData/DroughtClassification.aspx)
    Where Neutral is -1, D0 is 0, D1 is 1, D2, is 2, D3 is 3, and D4 is 4.

    Parameters
    ----------
    da : xr.DataArray
        Contains SPI values.
    
    Returns
    -------
    xr.DataArray
        DataArray formatted the same as da but using USDM categories.

    """

    # make sure we don't overwrite the original
    da_copy = da.copy()
    # can only do boolean indexing on the underlying array
    da_vals = da.values
    da_vals_nonnan = da_vals[np.isnan(da_vals) == False]
    # calculate percentiles
    (p30, p20, p10, p5, p2) = np.percentile(da_vals_nonnan.ravel(), [30, 20, 10, 5, 2])
    # get a copy to make sure reassignment isn't compounding
    da_origin = da_vals.copy()

    # assign neutral
    da_vals[da_origin > p30] = -1
    # assign D0
    da_vals[(da_origin <= p30)&(da_origin > p20)] = 0
    # assign D1
    da_vals[(da_origin <= p20)&(da_origin > p10)] = 1
    # assign D2
    da_vals[(da_origin <= p10)&(da_origin > p5)] = 2
    # assign D3
    da_vals[(da_origin <= p5)&(da_origin > p2)] = 3
    # assign D4
    da_vals[(da_origin <= p2)] = 4

    # put them back into the dataarray
    da_copy.loc[:,:] = da_vals

    return da_copy

In [6]:
def dm_to_usdmcat_multtime(ds:xr.Dataset):
    """Categorizes drought measure based on USDM categories for multiple times.
    
    See dm_to_usdmcat for further documentation.
    
    Parameters
    ----------
    spi_ds : xr.Dataset
        SPI at multiple time values as the coordinate 'day'.
    
    Returns
    -------
    xr.Dataset
        Drought measure categorized by dm_to_usdmcat.
    """
    
    return dm_to_usdmcat(xr.concat([ds.sel(time=t) for t in ds['time'].values], dim='time'))

In [7]:
spi_intervals = ['1y', '2y', '5y', '14d', '30d', '90d', '180d', '270d']

for interval in tqdm(spi_intervals):
    spi_ds = xr.open_dataset(f'{dm_path}/spi/spi{interval}.nc')
    spi_da = spi_ds['spi'].rio.write_crs('EPSG:4326', inplace=True)
    spi_da_clip = spi_da.sel(lon=slice(-105))
    spi_da_clip = spi_da_clip.rename({'day':'time'})
    spi_da_clip = spi_da_clip.rio.reproject('EPSG:5070')
    attrs = spi_da_clip.attrs
    attrs['Clipping'] = 'This selection has been clipped to everything west of longitude 105 degrees within CONUS. EPSG:5070 was picked to preserve area for future computations.'
    del attrs['grid_mapping']
    spi_da_clip.attrs = attrs

    try:
        os.remove(f'{dm_path}/spi/CONUS_105W/spi_{interval}.nc')
    except:
        pass

    spi_da_clip.to_netcdf(f'{dm_path}/spi/CONUS_105W/spi_{interval}.nc')

    # do some gc
    spi_ds = None
    spi_da = None
    spi_da_clip = None
    

  0%|          | 0/8 [00:00<?, ?it/s]