# Calculating SPEI-12 at 0.5 degrees (1950-2023) for every gridcell

This python script uses the xclim package to calculate the SPEI-12 for every gridcell at 0.5 degrees. 

In [1]:
import xclim
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pint
from xclim import indices
from xclim.core import units
from xclim.indices import standardized_precipitation_evapotranspiration_index
import pandas as pd
import spei as si  # si for standardized index

xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x7ff79f65e850>

## Definitions

The definition of the SPEI calculation. PE is the water balance. Window is the SPEI period you want to calculate. Dist is the ditribution used. Because PE can become negative, and the distribution does not allow that, an offset has to be implementen, where all the data is shifted towards a positive value. Cal_start and cal_end are the start and end of the calibration period. This makes sure that you can later add more years without chaning the previous years. 

In [2]:
def SPEI_calculation(pe, spei_period, offset, cal_start, cal_end, dir):
    print("calculating spei")
    SPEI = standardized_precipitation_evapotranspiration_index(pe, window = spei_period, dist = "fisk",freq= "MS", offset=offset, cal_start = cal_start, cal_end = cal_end)

    del SPEI.attrs['freq']
    del SPEI.attrs['time_indexer']
    del SPEI.attrs['units']
    del SPEI.attrs['offset']
    print("saving")
    SPEI.to_netcdf(path = "/scratch/ruiij001/Data/SPEI/0_5_degrees_apr_2024/" + dir)
    print("done")

## Load the data

Load the landmask first. The data is also over the ocean, but we don't need that.

In [3]:
landmask = xr.open_dataarray("/scratch/6196306/ERA5/land-sea-mask_0_5.nc").mean("time")

Load the total precipitation data. Note that this data is in m, and we want it in mm. So we multiply by 1000. If necesarry, resample the data to monthly. Then use the land/sea mask to filter out only the land data.

In [4]:
total_prec_mm = (xr.open_dataset("/scratch/6196306/ERA5/total_precipitation/era5_total_precipitation_1950-2023_monthly_0_5.nc").tp*1000).where(landmask>=0.5)#.resample(time="MS").mean()
total_prec_mm

Load all the PET data files. Note that this is daily data, so we resample to monthly data. Also use the land/sea mask.

In [5]:
pet = xr.open_mfdataset("/scratch/6196306/PET/PenmanMonteith/pm_fao56_*_daily_0_5_v3.nc").PM_FAO_56.where(landmask>=0.5).resample(time="1MS").mean()
pet

Unnamed: 0,Array,Chunk
Bytes,878.03 MiB,11.87 MiB
Shape,"(888, 360, 720)","(12, 360, 720)"
Dask graph,74 chunks in 379 graph layers,74 chunks in 379 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 878.03 MiB 11.87 MiB Shape (888, 360, 720) (12, 360, 720) Dask graph 74 chunks in 379 graph layers Data type float32 numpy.ndarray",720  360  888,

Unnamed: 0,Array,Chunk
Bytes,878.03 MiB,11.87 MiB
Shape,"(888, 360, 720)","(12, 360, 720)"
Dask graph,74 chunks in 379 graph layers,74 chunks in 379 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


Make sure the lon and lat values are the same for both datasets.

In [6]:
# Check if the longitude and latitude values are the same
lon_same = (pet['lon'] == total_prec_mm['lon']).all()
lat_same = (pet['lat'] == total_prec_mm['lat']).all()

if lon_same and lat_same:
    print("Longitude and Latitude values are the same for both datasets.")
else:
    print("Longitude and/or Latitude values are different between the datasets.")

Longitude and Latitude values are the same for both datasets.


If not: uncomment these lines

In [7]:
# pet['lon'] = total_prec_mm['lon']
# pet['lat'] = total_prec_mm['lat']

Calculate the water balance (P - PET), which is the input of the SPEI calculation. Make sure the units of the xarray are set to mm/d, because the xclim package wants this. If necesarry, select only the overlapping time periods ot filter out the bad data. Check if resolution is still 888x360x720.

In [8]:
#pe = total_prec_mm.sel(time = slice("1955-01-01","2023-12-31")).assign_attrs(units='mm/d') - pet.sel(time = slice("1955-01-01","2023-12-31")).assign_attrs(units='mm/d')
pe = total_prec_mm.assign_attrs(units='mm/d') - pet.assign_attrs(units='mm/d')
pe

Unnamed: 0,Array,Chunk
Bytes,878.03 MiB,11.87 MiB
Shape,"(888, 360, 720)","(12, 360, 720)"
Dask graph,74 chunks in 382 graph layers,74 chunks in 382 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 878.03 MiB 11.87 MiB Shape (888, 360, 720) (12, 360, 720) Dask graph 74 chunks in 382 graph layers Data type float32 numpy.ndarray",720  360  888,

Unnamed: 0,Array,Chunk
Bytes,878.03 MiB,11.87 MiB
Shape,"(888, 360, 720)","(12, 360, 720)"
Dask graph,74 chunks in 382 graph layers,74 chunks in 382 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


For distributions bounded by zero (e.g. “gamma”, “fisk”), an offset must be added to the water budget to make sure there are no negative values. Keep the offset as small as possible to minimize its influence on the results.

In [9]:
pe.min().values

array(-17.229572, dtype=float32)

## Calculate SPEI-12

Use this found value as the offset parameter. Then specify the start and the end of the calibration period. Note that the mean is now only 0 between the start and the end date. 

In [11]:
dir = "SPEI12_monthly_1950_2023_0_5_degree.nc"
offset = '20 mm/d'
cal_start = "1950-01-01"
cal_end = "2020-12-31"
spei_period = 12

SPEI_calculation(pe = pe, spei_period = spei_period, offset = offset, cal_start = cal_start, cal_end = cal_end, dir = dir)

calculating spei


  pr, _ = preprocess_standardized_index(pr, freq=freq, window=window, **indexer)
  return self.array[key]


saving


  return func(*(_execute_task(a, cache) for a in args))


done


## Calculate SPEI-3 and SPEI-6 also

Use this found value as the offset parameter. Then specify the start and the end of the calibration period. Note that the mean is now only 0 between the start and the end date. 

In [9]:
dir = "SPEI03_monthly_1950_2023_0_5_degree.nc"
offset = '20 mm/d'
cal_start = "1950-01-01"
cal_end = "2020-12-31"
spei_period = 3

SPEI_calculation(pe = pe, spei_period = spei_period, offset = offset, cal_start = cal_start, cal_end = cal_end, dir = dir)

calculating spei


  pr, _ = preprocess_standardized_index(pr, freq=freq, window=window, **indexer)
  return self.array[key]


saving


  return func(*(_execute_task(a, cache) for a in args))


done


In [10]:
dir = "SPEI06_monthly_1950_2023_0_5_degree.nc"
offset = '20 mm/d'
cal_start = "1950-01-01"
cal_end = "2020-12-31"
spei_period = 6

SPEI_calculation(pe = pe, spei_period = spei_period, offset = offset, cal_start = cal_start, cal_end = cal_end, dir = dir)

calculating spei


  pr, _ = preprocess_standardized_index(pr, freq=freq, window=window, **indexer)
  return self.array[key]


saving


  return func(*(_execute_task(a, cache) for a in args))


done
