# Libraries

In [1]:
import os
import xarray as xr
import rioxarray as rxr
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.enums import Resampling
from datetime import datetime, timedelta
import calendar
from calendar import monthrange
import numpy as np

# Directories

In [2]:
import os

# Set the base directory with an environment variable, fallback to a relative path
main_dir = os.getenv('PROJECT_DIR', os.path.join(os.getcwd(), 'data', 'ANIN', 'Generating Indices', 'CDI'))

# Path for FAPAR anomaly with crop mask
FAPARa_path = os.path.join(main_dir, 'FAPAR_Anomaly_crop_mask', 'RT1')

# Path for Soil Moisture Anomaly (SMA) data
SMA_path = os.path.join(main_dir, 'SMA')

# Path for SPI data with an optional environment variable for network paths
SPI_path = os.getenv('SPI_PATH', os.path.join(main_dir, 'SPI', 'outputs'))

# Path for AOI folder which has the shape file
Boundary = os.path.join(main_dir, 'Boundary')

# Print paths for verification
print('Main Directory:', main_dir)
print('FAPARa Path:', FAPARa_path)
print('SMA Path:', SMA_path)
print('SPI Path:', SPI_path)
print('Boundary Path:', Boundary)


# FAPAR anomaly data 

In [14]:
# Empty dictionary to add FAPAR anomaly data
# the key is the date 
FAPAR_a ={}
# Looping through each tif file of FAPAR anomaly 
for image in os.listdir(FAPARa_path):
    # Path for the tif file
    path = os.path.join(FAPARa_path, image)
    # Open the tif file as xarray
    fapar = rxr.open_rasterio(path, masked = True)
    # Extract the month from the name of the tif file
    month = image[:-32]
    # Extract the year from the name of the tif file
    year = image[-32:-28]
    # Put month and year together as string
    m_y = month+year
    # Convert the month and year to date formate,
    # then to string to be the key in the dictionary
    key = str(datetime.strptime(m_y, "%B%Y"))
    # Append the tif file to the dictionary
    # .sel(band =1) is added to have dataarray with only
    # x and y dimensions to be the same as those for SPI
    FAPAR_a[key]=fapar.sel(band=1)
# The following lines is to reorder the dictionary based on the date
# The order will be from the first date to the last date
myKeys = list(FAPAR_a.keys())
myKeys.sort()
FAPAR_a = {i: FAPAR_a[i] for i in myKeys}
# Take the crs of the project
crs_project= fapar.rio.crs

# SMA data

In [20]:
# Empty dictionary to add SMA data
# the key is the date
SMA={}
# Looping through each tif file of SMA 
for image in os.listdir(SMA_path):
    # Path for the tif file
    path = os.path.join(SMA_path, image)
    # Open the tif file as xarray
    sma = rxr.open_rasterio(path, masked = True)
    # Reproject SMA to match fapar
    # Resampling to 300m as fapar using Bilinear method
    sma = sma.rio.reproject_match(fapar, resampling= Resampling.bilinear)
    # Extract the year and month from the name of the tif file
    # and add them as key for the dictionary
    key = str(datetime.strptime(image[:-4], "%Y%m"))
    # Append the tif file to the dictionary
    # .sel(band =1) is added to have dataarray with only
    # x and y dimensions to be the same as those for SPI
    SMA[key]=sma.sel(band=1)
# The following lines is to reorder the dictionary based on the date
# The order will be from the first date to the last date
myKeys = list(SMA.keys())
myKeys.sort()
SMA = {i: SMA[i] for i in myKeys}

In [24]:
# Open the netcdf file of SPI
SPI_nc = rxr.open_rasterio(SPI_path +'\\'+'SPI.nc', masked=True)
# Giving CRS
SPI_nc.rio.write_crs(crs_project, inplace=True)
# Empty dictionary to add SPI data
# the key is the date
SPI={}
# 1980 is the start date for the time band of the netcdf file
# the name of bands is the number of days after 1980
# So start_datetime will be used as a reference to select 
# SPI for each month seperately
start_datetime = datetime(1980,1,1,0,0,0) 
# Looping through days after 1980 which is as a list of 
# attribute 'NETCDF_DIM_time_VALUES'
for d in SPI_nc.attrs['NETCDF_DIM_time_VALUES']:
    # this to skip dates before July 2020 and after
    # December 2022
    if d<14792 or d>=15706:
        continue
    # Convert the days after 1980 to a date
    date = start_datetime + timedelta(days =d)
    # Select the SPI corresponds to this date
    spi = SPI_nc.sel(time=date)
    # Reproject SPI to match fapar
    # Resampling to 300m as fapar using Bilinear method   
    spi = spi.rio.reproject_match(fapar, resampling= Resampling.bilinear,nodata=np.nan)
    # The key is the date
    key = str(date)
    # Append the tif file to the dictionary
    SPI[key]=spi
    
# The following lines is to reorder the dictionary based on the date
# The order will be from the first date to the last date 
myKeys = list(SPI.keys())
myKeys.sort()
SPI = {i: SPI[i] for i in myKeys}

# CDI

In [26]:
'''
The following lines is to add FAPAR anomaly,
SMA, and SPI as three bands in an xarray
'''
# Empty dictionary to add CDI data
# the key is the date
CDI_stack ={}
# Looping using keys for FAPAR anomaly data
for i in FAPAR_a:
    # FAPAR anomaly for the corresponding month
    F = FAPAR_a[i]
    # SMA for the corresponding month
    # try and except to avoid missing months
    try:
        SM = SMA[i]
    except:
        continue
    # SPI for the corresponding month
    # try and except to avoid missing months
    try:
        SP = SPI[i]
    except:
        continue
    # Concatenation of the three bands
    concat =xr.concat([SP,SM,F], 'index')
    # Append the tif file to the dictionary
    CDI_stack[i]=concat

In [30]:
# Load the shapefile
def load_shape_file(filepath):
    """Loads the shape file desired to mask a grid.
    Args:
        filepath: Path to *.shp file
    """
    shpfile = gpd.read_file(filepath)
    print("""Shapefile loaded. To prepare for masking, run the function
        `select_shape`.""")
    return shpfile

#Create the mask
def select_shape(shpfile):

    """Select the submask of interest from the shapefile.
    Args:
        shpfile: (*.shp) loaded through `load_shape_file`
        category: (str) header of shape file from which to filter shape.
            (Run print(shpfile) to see options)
        name: (str) name of shape relative to category.
           Returns:
        shapely polygon
    """

    col_code = 'ISO3_CODE'
    country_codes = ['ZAF', 'LSO', 'SWZ']

    # Extract the rows that have 'ZAF', 'LSO', or 'SWZ' in the 'SOV_A3' column
    selected_rows = shpfile[shpfile[col_code].isin(country_codes)]

    # Combine the selected polygons into a single polygon
    unioned_polygon = selected_rows.geometry.unary_union

    # Convert the unioned polygon to a geopandas dataframe with a single row
    mask_polygon = gpd.GeoDataFrame(geometry=[unioned_polygon])
    
    print("""Mask created.""")

    return mask_polygon
shpfile = load_shape_file(Boundary + '\\'+ 'CNTR_RG_01M_2020_4326.shp')
AOI = select_shape(shpfile)
# Define a crs to the AOI 
AOI.crs =fapar.rio.crs

Shapefile loaded. To prepare for masking, run the function
        `select_shape`.
Mask created.


In [44]:
'''
The following lines are to use thresholds 
of FAPAR anomaly, SMA, and SPI to calculate CDI
'''
# This count is to skip the first month in the sequence
# because there are some classes which need data from
# previous month
count =0
# looping through stacks of data
for i in CDI_stack:
    # skip the first image
    # but save it as i_1 to be used with the next month
    if count ==0:
        count =count+1
        i_1 = i
        continue
    # Watch class: when SPI-3 is less than -1 and make no data as 0
    CDI = xr.where(CDI_stack[i][0]<-1, 1, 0)
    # Warning class: where SPI-3 < -1 and SMA < -1
    CDI =xr.where((CDI_stack[i][1]<-1) & (CDI==1),2,CDI)
    # Alert class: where SPI-3 < -1 and FAPAR anomaly < -1
    CDI =xr.where((CDI_stack[i][2]<-1) & (CDI==1),3,CDI)
    # Partial recovery:  where FAPAR anomaly < -1 and SPI-3 m-1 < -1 and SPI-3 > -1
    CDI =xr.where((CDI_stack[i][2]<-1) & (CDI_stack[i][0]>-1) & (CDI_stack[i_1][0]<-1),4,CDI)
    # Full recovery:  where FAPAR anomaly > -1 and SPI-3 m-1 < -1 and SPI-3 > -1
    CDI =xr.where((CDI_stack[i][2]>-1) & (CDI_stack[i][0]>-1) & (CDI_stack[i_1][0]<-1),5,CDI)
    # make no data as nan
    CDI = CDI.where(CDI!=0)
    # CRS
    CDI= CDI.rio.write_crs(crs_project)
    # Clipping to the AOI to delete pixels outside AOI which are few
    clipped_CDI = CDI.rio.clip(AOI.geometry.apply(mapping),
                                     crs=AOI.crs,
                                     all_touched=True,
                                     from_disk=True).squeeze()
    # Export CDI as tif file
    CDI.rio.to_raster(main_dir+'\\'+'CDI'+'\\'+i[:10] +'_CDI.tif')
    # To save the stack of FAPAR anomaly, SPI, and SMA
    # to be used for the following month
    i_1 = i
    # to avoid the if condition which is in the beginning 
    # of the loop
    count =count+1
