In [1]:
import xarray as xr
import rioxarray as rio
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from rasterio.features import rasterize
from shapely.geometry import Point

### Import boundaries for spatial distribution

In [2]:
# Read the shapefile
shapefile_path = '//gmvstorage.gmv.es/storage/anin/Groundwater/2011 Freshwater Ecosystem Priority Areas 9 Water Management Areas/9WMA.shp'
areas = gpd.read_file(shapefile_path)
areas = areas.set_crs('EPSG:4326')
# Field in the shapefile table of attributes to be used for the averaging of the SGI in this case  "SECONDARY" for secondary catchments
grouping_field = 'WMA_NewID' 


In [15]:
areas

Unnamed: 0,WMA_NewID,WMA_NewN,Water_Curr,Water_Futu,geometry
0,1,Limpopo,0,0,"POLYGON ((29.67462 -22.13940, 29.68016 -22.139..."
1,2,Olifants,0,0,"POLYGON ((30.88171 -22.30560, 30.88370 -22.305..."
2,6,Orange,0,0,"POLYGON ((20.07278 -24.83449, 20.07467 -24.834..."
3,5,Vaal,0,0,"POLYGON ((23.28425 -25.27973, 23.28543 -25.281..."
4,9,Berg-Olifants,0,0,"POLYGON ((19.30589 -29.86658, 19.31224 -29.866..."
5,7,Mzimvubu-Tsitsikama,0,0,"POLYGON ((29.18880 -29.92530, 29.19085 -29.927..."
6,8,Breede-Gouritz,0,0,"POLYGON ((22.04727 -32.00736, 22.04929 -32.008..."
7,4,Pongola-Umzikhulu,0,0,"POLYGON ((32.06005 -26.87098, 32.06392 -26.873..."
8,3,Inkomati-Usuthu,0,0,"POLYGON ((31.88512 -24.17270, 31.88913 -24.175..."


#### Read SPEI and SPI

In [3]:
fN_spei = "//gmvstorage.gmv.es/storage/anin/SPEI/outputs/SPEI3.nc"
fN_spi= "//gmvstorage.gmv.es/storage/anin/SPI/outputs/SPI.nc"

spei = xr.open_dataset(fN_spei)
spi = xr.open_dataset(fN_spi)

# Select time range matching the available GWL observations.
start_date = "1999-10-01"
end_date = "2022-07-01"

spei = spei.sel(time=slice(start_date, end_date))
spei= spei.rio.write_crs(4326, inplace=True)
spei = spei.rename({'__xarray_dataarray_variable__': 'spei'})

spi = spi.sel(time=slice(start_date, end_date))
spi = spi.rio.write_crs(4326, inplace=True)
spi = spi.rename({'__xarray_dataarray_variable__': 'spi'})

#### Rasterize the shapefile to be able to perform zonal statistics over rasterized data (ERA5)

In [4]:

shapes = ((geom, value) for geom, value in zip(areas.geometry, areas['WMA_NewID']))

eco_regions = rasterize(
    shapes=shapes,
    out_shape=(len(spi.y.data), len(spi.x.data)),
    transform=spi.rio.transform(),
    default_value=0,
)

#### Compute Zonal Statistics

In [25]:
eco_spi = {}
eco_spei= {}

for id_basin in areas['WMA_NewID']:

    spi_aux = spi.spi.data[:, eco_regions==id_basin]
    spi_mean = np.nanmean(spi_aux,axis=1)
    eco_spi.update({areas['WMA_NewN'][areas['WMA_NewID']==id_basin].values[0]: spi_mean})

    spei_aux = spei.spei.data[:, eco_regions==id_basin]
    spei_mean = np.nanmean(spei_aux,axis=1)
    eco_spei.update({areas['WMA_NewN'][areas['WMA_NewID']==id_basin].values[0]: spei_mean})


In [None]:
df_spi = pd.DataFrame.from_dict(eco_spi).T
df_spei = pd.DataFrame.from_dict(eco_spei).T

#### Import GWL stations data

In [8]:
# Import data and metadata

# Load Metadata 
metadata_path = "//gmvstorage.gmv.es/storage/anin/Groundwater/processed_data/stations_metadadata22_active.xlsx"
meta_df = pd.read_excel(metadata_path)
meta_df.set_index('Station',inplace=True)

# Select the columns that are necessary for this code
meta_df = meta_df.iloc[:, 0:2]

# Load data
df_path = "//gmvstorage.gmv.es/storage/anin/Groundwater/processed_data/stations_data22_active.xlsx"
df = pd.read_excel(df_path)
df = df.iloc[:,1:]
df.set_index('date',inplace=True)

# Create a pandas time series for the plot function to work
dates = pd.to_datetime(df.index)

### Compute the SGI for all the stations

In [9]:
# Import the modified sgi function which includes scaling 
from sgi_tools import compute_sgi

# Define groundwater index scale factor
scale = 3

In [10]:
# Create a new DataFrame to store the SGI values
df_sgi = pd.DataFrame()

# Apply the SGI function to each column of the DataFrame

for column in df.columns:
    df_sgi[column] = compute_sgi(df[column],scale)

# Reset the index & Transpose the DataFrame
dates = df_sgi.index.to_pydatetime()
df_sgi = df_sgi.reset_index(drop = True)
df_sgi = df_sgi.T

  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = compute_sgi(df[column],scale)
  df_sgi[column] = c

#### Perform the average of the SGI values at each timestep for each catchment in the shapefile

In [11]:
# Join with coordinate information in metadata dataframe
df_latlon = df_sgi.join(meta_df)

# Convert the stations dataframe into a GeoDataFrame by specifying the geometry column with the coordinates:

geometry = [Point(xy) for xy in zip(df_latlon['Longitude'], df_latlon['Latitude'])]
stations_gdf = gpd.GeoDataFrame(df_latlon, geometry=geometry)

# Perform a spatial join between the polygons and the stations to determine which stations fall within each polygon:
stations_by_polygon = gpd.sjoin(areas, stations_gdf, how='inner', op='contains')

# Select the columns based on their numeric names
time_step_columns = [col for col in stations_by_polygon.columns if str(col).isdigit()]

# Group the stations by polygon and time step, and calculate the average for each group:
sgi_averages = stations_by_polygon.groupby(['WMA_NewN'])[time_step_columns].mean()

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: None

  stations_by_polygon = gpd.sjoin(areas, stations_gdf, how='inner', op='contains')
