# Libraries

In [19]:
import os
import xarray as xr
import rioxarray as rxr
from shapely.geometry import mapping
import geopandas as gpd
from rasterio.enums import Resampling
from datetime import datetime
from calendar import monthrange

In [16]:
# Set the base directory with an environment variable, fallback to a relative path
main_dir = os.getenv('PROJECT_DIR', os.path.join(os.getcwd(), 'data', 'ANIN', 'Generating Indices', 'VCI', 'NDVI-16', 'Trial', 'Tiles'))
# Directory to years files for Copernicus data of AOI
Copernicus = os.path.join(main_dir, 'Copernicus') 
# Directory to the shape file of the three countries
Boundary = os.path.join(main_dir, 'Boundary')
Output_dir = os.path.join(main_dir, 'Copernicus_monthly_WA')

In [17]:
# Load the shapefile
def load_shape_file(filepath):
    """Loads the shape file desired to mask a grid.
    Args:
        filepath: Path to *.shp file
    """
    shpfile = gpd.read_file(filepath)
    print("""Shapefile loaded. To prepare for masking, run the function
        `select_shape`.""")
    return shpfile

#Create the mask
def select_shape(shpfile):

    """Select the submask of interest from the shapefile.
    Args:
        shpfile: (*.shp) loaded through `load_shape_file`
        category: (str) header of shape file from which to filter shape.
            (Run print(shpfile) to see options)
        name: (str) name of shape relative to category.
           Returns:
        shapely polygon
    """

    col_code = 'ISO3_CODE'
    country_codes = ['ZAF', 'LSO', 'SWZ']

    # Extract the rows that have 'ZAF', 'LSO', or 'SWZ' in the 'SOV_A3' column
    selected_rows = shpfile[shpfile[col_code].isin(country_codes)]

    # Combine the selected polygons into a single polygon
    unioned_polygon = selected_rows.geometry.unary_union

    # Convert the unioned polygon to a geopandas dataframe with a single row
    mask_polygon = gpd.GeoDataFrame(geometry=[unioned_polygon])
    
    print("""Mask created.""")

    return mask_polygon
#Generate AOI
shpfile = load_shape_file(os.path.join(Boundary, 'CNTR_RG_01M_2020_4326.shp'))
AOI = select_shape(shpfile)

Shapefile loaded. To prepare for masking, run the function
        `select_shape`.
Mask created.


In [23]:
'''
The monthly NDVI data is stored based on the following:
there is a dictionary of which the keys are the years and the values are other
dictioneris in which the keys are the months names and the values are 
the monthly NDVI data 
'''
# years folders which contain month folders of 10Days Copernicus NDVI data
Copernicus_years = sorted(os.listdir(Copernicus)) 
#Empty dictionray to contain monthly data for each year
years_copernicus = {}
#loop through all availabe years folders
for year in Copernicus_years:
    #Empty dictionary to contain monthly NDVI data per year
    months_copernicus={}
    #loop through each month folder per year
    for month in sorted(os.listdir(os.path.join(Copernicus, year))):
        #list contains the three 10Days composite files in the month
        NDVI_10Days = sorted(os.listdir(os.path.join(Copernicus, year, month)))
        #path to be used to open the tiff files
        path = os.path.join(Copernicus, year, month)
        # open tiff files of each 10Days composite
        first_10Days = rxr.open_rasterio(os.path.join(path, NDVI_10Days[0]), 
                                         masked=True)
        second_10Days = rxr.open_rasterio(os.path.join(path, NDVI_10Days[1]), 
                                          masked=True) 
        third_10Days = rxr.open_rasterio(os.path.join(path, NDVI_10Days[2]), 
                                         masked=True) 
        
        '''
        Extract scale factor and offset from Copernicus NDVI metadata 
        which will be used to calculate the real NDVI values.
        '''
        Scale_factor = first_10Days.attrs['scale_factor']
        Offset = first_10Days.attrs['add_offset']
        Number_of_Month = datetime.strptime(month, '%B').month
        nMonth_days = monthrange(int(year),Number_of_Month)[1]
        # Calculation of real NDVI values using scale_factor and offset
        first_real = (first_10Days* Scale_factor + Offset) * (10/nMonth_days)
        second_real = (second_10Days* Scale_factor + Offset) * (10/nMonth_days)
        third_real = (third_10Days * Scale_factor + Offset) * ((nMonth_days + 1 -21)/nMonth_days)
        #Concatenation of the three 10Days real NDVI as different bands
        concat = xr.concat([first_real, second_real, third_real], dim='band')
        #Calculation of monthly NDVI data usind the average of
        #the three 10Days values
        Monthly_average = concat.sum(dim='band')
        # Read the crs of NDVI copernicus data
        Copernicus_crs = first_10Days.rio.crs
        #Assign crs for monthly data
        Monthly_average.rio.write_crs(Copernicus_crs, inplace=True)
        
        #Clipping the monthly NDVI data using AOI
        
        Monthly_average_AOI = Monthly_average.rio.clip(AOI.geometry.apply(mapping),
                                     crs=Copernicus_crs,all_touched=True,
                                     from_disk=True).squeeze() 
    
        # Append the monthly data of a month to the dictionry 
        #of all months through a year
        months_copernicus[month]=Monthly_average_AOI
        #The following is optional if we need to export the monthly data 
        #to tiff files
        
        check_folder_Output_dir = os.path.isdir(Output_dir)
        if not check_folder_Output_dir:
            os.makedirs(Output_dir)
            
        years_cop = os.path.join(Output_dir, year)
        check_folder_years_cop = os.path.isdir(years_cop)
        if not check_folder_years_cop:
            os.makedirs(years_cop)          

        Monthly_average_AOI.rio.to_raster(os.path.join(years_cop, year + month + '.tif'))
        
    #Append a dictionary which contains the monthly data of a year
    years_copernicus[year] = months_copernicus

print('Dictionaries of NDVI data of all months per year is created') 

Dictionaries of NDVI data of all months per year is created
