## Process SPEI

In this notebook, previously prepared monthly SPEI data (with a 3-month integration period) is further processed to retrieve spatially aggregated mean values per state and for Germany. For comparison, calculated SPEI values as well as SPEI values downloaded from the SPEI database are processed.

In [1]:
# import libraries
import os
import glob
import xarray as xr
import pandas as pd
import geopandas as gpd
import rasterio
import odc.stac
import pathlib
from pathlib import Path
import warnings

In [2]:
# set working directory
os.chdir("E:/Master/Thesis/3_Data")
print("Current working directory: {0}".format(os.getcwd()))

Current working directory: E:\Master\Thesis\3_Data


In [3]:
warnings.filterwarnings("ignore", category = UserWarning)
warnings.filterwarnings("ignore", category = RuntimeWarning)

In [4]:
# load calculated SPEI values
spei_calc = xr.open_dataset("./SPEI/SPEI_calc_3M_GER.nc",  decode_coords="all",decode_times=True)

In [5]:
# calculate mean values per month for Germany
spei_monthly_spatial_means = spei_calc.mean(['lon','lat'], skipna=True).to_dataframe()
spei_monthly_spatial_means

Unnamed: 0_level_0,spatial_ref,number,expver,spei
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1981-01-01,0,0,0001,
1981-02-01,0,0,0001,
1981-03-01,0,0,0001,1.104737
1981-04-01,0,0,0001,0.487875
1981-05-01,0,0,0001,0.942274
...,...,...,...,...
2024-08-01,0,0,0001,0.103173
2024-09-01,0,0,0001,0.377631
2024-10-01,0,0,0001,0.432200
2024-11-01,0,0,0001,0.725536


In [6]:
# drop unneeded columns
spei_monthly_spatial_means = spei_monthly_spatial_means.drop(['spatial_ref', 'number', 'expver'], axis=1)

In [7]:
# rename column with spei value to show area
spei_monthly_spatial_means.rename({"spei":"GER"}, axis=1, inplace=True)

In [8]:
# now, calculate monthly mean values for all federal states
# get list of shapefiles of all states
directory = "./Administrative_Borders"
files = list(pathlib.Path(directory).glob('*.shp'))

# loop over filelist to process each state
for f in range(len(files)):
    file = files[f]

    # load state shapefile 
    state = str(file)[23:25]
    state_shp = gpd.read_file(file)

    # reproject shapefile to match netcdf data
    state_shp = state_shp.to_crs(4326)

    # extract one date to create mask of state with dimensions of data
    spei_ex = spei_calc.sel(time = "1999-01-16", method = "nearest")

    # create mask of research area using the dimensions of the exemplary data
    state_mask = rasterio.features.geometry_mask(state_shp.geometry, 
                                                out_shape=spei_ex.odc.geobox.shape,
                                                transform=spei_ex.odc.geobox.affine,
                                                all_touched=False,
                                                invert=False)
    
    state_mask = xr.DataArray(state_mask, dims=("lat", "lon"))

    # mask monthly data
    spei_state = spei_calc["spei"].where(~state_mask)

    # calculate monthly mean value of state and extract to dataframe
    spei_state_spatial_means = spei_state.mean(['lon','lat'], skipna=True).to_dataframe()

    # drop unneeded columns
    spei_state_spatial_means = spei_state_spatial_means.drop(['spatial_ref', 'number', 'expver'], axis=1)

    # add monthly mean values of state to dataframe
    spei_monthly_spatial_means[state] = spei_state_spatial_means["spei"]  

In [9]:
spei_monthly_spatial_means

Unnamed: 0_level_0,GER,BB,BE,BW,BY,HB,HE,HH,MV,NI,NW,RP,SH,SL,SN,ST,TH
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1981-01-01,,,,,,,,,,,,,,,,,
1981-02-01,,,,,,,,,,,,,,,,,
1981-03-01,1.104737,1.632426,1.746350,0.697386,0.598061,1.379188,0.864433,1.577839,1.846079,1.443129,1.184516,0.683540,1.278637,0.738324,0.996884,1.522720,1.026491
1981-04-01,0.487875,1.320793,1.395118,-0.298439,-0.305321,0.609777,0.346060,0.704479,1.098270,0.730765,0.511157,0.358173,0.753467,0.344538,1.138513,1.165615,0.647111
1981-05-01,0.942274,1.526463,1.549036,0.261711,-0.026845,0.965457,1.030334,1.449035,1.587263,1.276644,0.981767,0.840808,1.434535,0.768999,1.483128,1.549525,1.396486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-01,0.103173,-0.179879,0.028901,-0.290004,-0.017282,0.329947,0.476913,0.052571,-0.044993,0.358615,0.309930,0.511465,0.364297,0.504465,-0.351673,0.083671,0.202653
2024-09-01,0.377631,-0.094189,-0.065112,-0.094615,0.661819,0.392929,0.730617,0.220732,-0.233116,0.545776,0.635503,0.769683,0.221513,0.771268,0.180429,0.245942,0.476700
2024-10-01,0.432200,-0.295220,-0.522106,0.166255,0.890684,0.253686,0.444684,0.354799,-0.265597,0.461833,0.907982,0.803636,-0.136091,0.815450,0.531297,0.300877,0.510053
2024-11-01,0.725536,0.295557,0.290777,0.659038,0.969622,0.694819,0.737220,1.297173,0.495750,0.868465,0.835349,0.948132,0.451627,1.164597,0.550190,0.557546,0.649389


In [10]:
# save as csv dataset
spei_monthly_spatial_means.to_csv("./Tables/Spatial_Means/SPEI_calc_3M_Spatial_Means_GER.csv")

In [11]:
# now follow the same steps for the SPEI downloaded from the official database
spei_db = xr.open_dataset("./SPEI/SPEI_database_3M_GER.nc",  decode_coords="all",decode_times=True)

In [12]:
# calculate mean values per month for Germany
spei_monthly_spatial_means = spei_db.mean(['lon','lat'], skipna=True).to_dataframe()
spei_monthly_spatial_means

Unnamed: 0_level_0,crs,spei
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1981-01-16,-2147483647,0.607773
1981-02-15,-2147483647,0.428334
1981-03-16,-2147483647,1.462595
1981-04-16,-2147483647,0.414364
1981-05-16,-2147483647,0.989989
...,...,...
2023-08-16,-2147483647,-0.460030
2023-09-16,-2147483647,-0.471239
2023-10-16,-2147483647,-0.012287
2023-11-16,-2147483647,0.478862


In [13]:
# drop unneeded columns
spei_monthly_spatial_means = spei_monthly_spatial_means.drop(['crs'], axis=1)

In [14]:
# rename column with spei value to show area
spei_monthly_spatial_means.rename({"spei":"GER"}, axis=1, inplace=True)

In [15]:
# now, calculate monthly mean values for all federal states
# get list of shapefiles of all states
directory = "./Administrative_Borders"
files = list(pathlib.Path(directory).glob('*.shp'))

# loop over filelist to process each state
for f in range(len(files)):
    file = files[f]

    # load state shapefile 
    state = str(file)[23:25]
    state_shp = gpd.read_file(file)

    # reproject shapefile to match netcdf data
    state_shp = state_shp.to_crs(4326)

    # extract one date to create mask of state with dimensions of data
    spei_ex = spei_db.sel(time = "1999-01-16", method = "nearest")

    # create mask of research area using the dimensions of the exemplary data
    state_mask = rasterio.features.geometry_mask(state_shp.geometry, 
                                                out_shape=spei_ex.odc.geobox.shape,
                                                transform=spei_ex.odc.geobox.affine,
                                                all_touched=False,
                                                invert=False)
    
    state_mask = xr.DataArray(state_mask, dims=("lat", "lon"))

    # mask monthly data
    spei_state = spei_db["spei"].where(~state_mask)

    # calculate monthly mean value of state and extract to dataframe
    spei_state_spatial_means = spei_state.mean(['lon','lat'], skipna=True).to_dataframe()

    # drop unneeded columns
    spei_state_spatial_means = spei_state_spatial_means.drop(['crs'], axis=1)

    # add monthly mean values of state to dataframe
    spei_monthly_spatial_means[state] = spei_state_spatial_means["spei"]  

In [16]:
spei_monthly_spatial_means

Unnamed: 0_level_0,GER,BB,BE,BW,BY,HB,HE,HH,MV,NI,NW,RP,SH,SL,SN,ST,TH
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1981-01-16,0.607773,0.565035,,0.106986,0.409815,,0.324426,,0.702715,0.969783,1.041045,0.602449,1.028769,0.318042,0.220727,0.878921,0.603984
1981-02-15,0.428334,0.409415,,0.394279,0.372011,,0.083464,,0.596588,0.580492,0.711728,0.544609,0.618191,0.463044,-0.058358,0.434976,0.162313
1981-03-16,1.462595,1.923707,,0.828452,0.687931,,1.440621,,2.037362,2.092129,1.843710,1.134964,2.073601,0.981582,1.123724,1.864254,1.509660
1981-04-16,0.414364,0.659946,,-0.524944,-0.665220,,0.759310,,0.779167,1.231069,0.907411,0.382356,1.303435,0.194570,0.356025,0.793413,0.775035
1981-05-16,0.989989,0.753511,,0.363841,-0.002812,,1.584745,,0.980581,1.839702,1.647625,1.065686,1.656462,0.737659,0.693799,1.291243,1.586843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-16,-0.460030,-0.537199,,-1.039619,-0.782729,,-0.191217,,-0.481509,-0.145232,-0.094738,-0.468440,-0.149590,-0.566938,-0.378525,-0.501915,-0.092353
2023-09-16,-0.471239,-0.905429,,-0.645603,-0.592919,,-0.186239,,-0.721173,-0.453638,0.151269,0.128012,-0.143911,-0.025872,-0.862886,-0.792627,-0.485541
2023-10-16,-0.012287,-0.123462,,-0.618178,-0.500541,,0.232834,,-0.022274,0.283141,0.508528,0.241747,0.767914,0.097657,-0.088114,-0.060078,0.179207
2023-11-16,0.478862,0.199532,,0.411966,0.103318,,0.583835,,0.540354,0.766807,0.955216,0.758425,1.359429,0.696160,-0.179522,0.432082,0.129020


In [17]:
# save as csv dataset
spei_monthly_spatial_means.to_csv("./Tables/Spatial_Means/SPEI_database_3M_Spatial_Means_GER.csv")