## ET correction using wofs

##### 1 - Reproject DEA wofs Projection coordinate system to Geography coordinate system by using QGIS
##### 2 - Apply 0.75 threshold to wofs for extracting reservoirs
##### 3 - Create nc using wofs_mask rasters
##### 4 - Download GLEAM nc global dataset from s3 bucket
##### 5 - Create daily tif files using GLEAM global nc
##### 6 - Clip global tif to Africa extend
##### 7 - Time conversion daily to monthly
##### 8 - Create GLEAM nc as a single file using monthly tif
##### 9 - Download global SSEBop from s3 bucket
##### 10- Clip to Africa extend and rename the file
##### 11 - Apply ET correction for the SSEBop using GLEAM ET and WOFS mask ncs

##### Note - Time range 2003 - 2021

#### 2 - Apply 0.75 threshold to wofs for extracting reservoirs

In [None]:
import os
import glob
import xarray as xr
import pandas as pd
import numpy as np
import rasterio
import time

wofs_folder = r'/efs/CWA/rs_input_tifs/wofs/wofs_reproject'
output_folder = r'/efs/CWA/rs_input_tifs/wofs/wofs_mask'
wofs_fhs = glob.glob(os.path.join(wofs_folder, '*.tif'))

In [None]:
for fh in sorted(wofs_fhs):
    start = time.time()
    filename = fh.split('/')[-1]
    
    dataset = rasterio.open(fh)

    profile = {'driver': 'GTiff', 'dtype': 'float32', 'nodata': -9999, 'width':dataset.shape[1],                   
               'height':dataset.shape[0] , 'count': 1, 'crs': dataset.crs, 'transform': dataset.transform,
               'compress':'lzw', 'num_threads':'all_cpus', 'blockxsize':128, 'blockysize':128, 'tiled':True}
    
    data = dataset.read(1)
    wofs_nw = xr.where(data >= 0.75, 1, np.nan)
    wofs_nw = wofs_nw.astype('float32')
    
    outpath = os.path.join(output_folder, filename)
    
    with rasterio.open(outpath, "w", **profile) as dst:
        dst.write(wofs_nw, indexes=1)

    print(outpath)
    end = time.time()
    print('Total time:', end - start)

#### 3 - create nc using wofs_mask rasters

In [None]:
# %% netCDF files from
import os
import glob
import sys

sys.path.append(r'/efs/CWA/scripts')
import createNC_cmi_v5 as createNC_cmi
# import  createNC_cmi
import time

start = time.time()

basedir = r"/efs/CWA/rs_input_tifs"
pathwatermask = os.path.join(basedir, 'wofs', '3_wofs_mask')

template = r"/efs/CWA/static_datasets/template/L1_LCC_21_Resample_1km_correct_nodata.tif"

shp_path = r"/efs/CWA/static_datasets/shapefile/Africa_adm0_buffer.shp"
save_location = r"/efs/CWA/netcdf_files_test"


name = 'AFR'

datasets = {                   
                     'wofs'  :   [pathwatermask,
                                  ('time','latitude', 'longitude'), 
                                  {'units': 'None', 'source': 'DEA', 'quantity':'WOFS'}]
          }

nc_files = []

for d in datasets:
    filesAll = glob.glob(os.path.join(datasets[d][0],'*.tif'))
    dataset = {d: datasets[d]}
    fname = name+'_'+datasets[d][2]['quantity']+'_'+ datasets[d][2]['source']+'.nc'
    nc_file = os.path.join(save_location, fname)
    nc_files.append(nc_file)
    succes = createNC_cmi.make_netcdf(nc_file, dataset, shp_path, template, name)
    
end = time.time()
print('\n',end - start)

#### 4 - Download GLEAM nc global dataset from s3 bucket

In [None]:
import os
import glob
import sys
import numpy as np

years = np.arange(2003,2022)
for i in years:
    # print(i)
    source = 's3://iwmi-wa/Mansoor_ec2_data/data_from_sftp/v3.6a/daily/{}/Ep_{}_GLEAM_v3.6a.nc'.format(i, i)
    disti = '/efs/CWA/rs_input_tifs/GLEAM/nc_global/Ep_{}_GLEAM_v3.6a.nc'.format(i)
    cmd = 'aws s3 cp {} {}'.format(source, disti) 
    os.system(cmd)

#### 5 - Create daily tif files using GLEAM global nc

In [None]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import gdal
import osr
import glob
from datetime import datetime
import subprocess

def Save_as_tiff(name='', data='', geo='', projection=''):
    """
    This function save the array as a geotiff

    Keyword arguments:
    name -- string, directory name
    data -- [array], dataset of the geotiff
    geo -- [minimum lon, pixelsize, rotation, maximum lat, rotation,
            pixelsize], (geospatial dataset)
    projection -- integer, the EPSG code
    """
    
    # save as a geotiff
    driver = gdal.GetDriverByName("GTiff")
    dst_ds = driver.Create(name, int(data.shape[1]), int(data.shape[0]), 1, gdal.GDT_Float32, ['COMPRESS=LZW'])
    srse = osr.SpatialReference()
    if projection == '':
        srse.SetWellKnownGeogCS("WGS84")

    else:
        try:
            if not srse.SetWellKnownGeogCS(projection) == 6:
                srse.SetWellKnownGeogCS(projection)
            else:
                try:
                    srse.ImportFromEPSG(int(projection))
                except:
                    srse.ImportFromWkt(projection)
        except:
            try:
                srse.ImportFromEPSG(int(projection))
            except:
                srse.ImportFromWkt(projection)

    dst_ds.SetProjection(srse.ExportToWkt())
    dst_ds.GetRasterBand(1).SetNoDataValue(-9999)
    dst_ds.SetGeoTransform(geo)
    dst_ds.GetRasterBand(1).WriteArray(data)
    dst_ds = None
    return()


nc_files = glob.glob(r'/efs/CWA/rs_input_tifs/GLEAM/nc_global/*.nc')
output_tif = r"/efs/CWA/rs_input_tifs/GLEAM/daily"
parameter = 'Ep'

for fh in sorted(nc_files):    
    nc_data = xr.open_dataset(fh)    

    data_range = nc_data['time'][:].values
    
    for i in data_range:
        str_date = datetime.strptime(str(i),'%Y-%m-%dT%H:%M:%S.%f000').strftime("%Y.%m.%d")
        print(str_date)
        
        bottom_left_x = nc_data['lon'][0].values
        bottom_left_y = nc_data['lat'][0].values
        cellsize1 = 0.25
        cellsize2 = -0.25
        data = nc_data[list(nc_data.keys())[0]][0].values
        data[np.isnan(data)] = -9999
    

        geo = [bottom_left_x, cellsize1, 0, bottom_left_y, 0, cellsize2]
        geotif = r'{}/{}_GLEAM_global_v3.6a_mm.day_{}.tif'.format(output_tif, parameter, str_date)
        print(geotif)
        Save_as_tiff(name=geotif, data=data, geo=geo, projection="WGS84")


#### 6 - Clip global tif to Africa extend

In [None]:
import os
import shapefile
import glob
import watertools.General.raster_conversions as RC
import watertools.General.data_conversions as DC

### OPTION 2: Read from shapefile
# BASIN_SHP = r"C:\_WA\Ethiopia\shp\riftvalley_lakes_basin_10km_buff.shp" #path to Basin shapefile

# shape=shapefile.Reader(BASIN_SHP)
# xmin,ymin,xmax,ymax=shape.bbox

path = r'/efs/CWA/rs_input_tifs/GLEAM/daily'
output_path = r'/efs/CWA/rs_input_tifs/GLEAM/daily_AFR'

filesAll = glob.glob(os.path.join(path,'*.tif'))

for file in sorted(filesAll):
    latlim=[-36,38]
    lonlim=[-20,54]
    filename = file.split("/")[-1]
    
    filepath = os.path.join(output_path, filename.replace('global', 'AFR'))
    
    data, geo, proj = RC.clip_data(file, latlim, lonlim)
    print(filepath)    
    # # Save the file as tiff
    DC.Save_as_tiff(name=filepath, data=data, geo=geo, projection='WGS84')

#### 7 - Time conversion daily to monthly

In [None]:
from watertools.Functions.Time_Conversions import Day_to_monthly_flux as DM

In [None]:
import numpy as np
import os
import glob
import pandas as pd
import gdal
import calendar


def Nearest_Interpolate(Dir_in, Startdate, Enddate, format_in = None, format_out = None, Dir_out = None, AOI = None):
    """
    This functions calculates monthly tiff files based on the daily tiff files.
    (will calculate the total sum)

    Parameters
    ----------
    Dir_in : str
        Path to the input data
    Startdate : str
        Contains the start date of the model 'yyyy-mm-dd'
    Enddate : str
        Contains the end date of the model 'yyyy-mm-dd'
    Dir_out : str
        Path to the output data, default is same as Dir_in

    """
    # import WA+ modules
    import watertools.General.data_conversions as DC
    import watertools.General.raster_conversions as RC

    # Change working directory
    os.chdir(Dir_in)

    # Define end and start date
    Dates = pd.date_range(Startdate, Enddate, freq='MS')

    # Find all monthly files
    if format_in == None:
        files = glob.glob('*.tif')
        #files = glob.glob('*daily*.tif')        
    else:
        files = glob.glob(format_in.replace(":02d","").format(yyyy= "*", mm = "*", dd = "*"))    

    
    # Get array information and define projection
    geo_out, proj, size_X, size_Y = RC.Open_array_info(files[0])
    if int(proj.split('"')[-2]) == 4326:
        proj = "WGS84"

    # Get the No Data Value
    dest = gdal.Open(files[0])
    NDV = dest.GetRasterBand(1).GetNoDataValue()

    # Define output directory
    if Dir_out is None:
	     Dir_out = Dir_in

    if not os.path.exists(Dir_out):
	     os.makedirs(Dir_out)

    # loop over the months and sum the days
    for date in Dates:
        Year = date.year
        Month = date.month
        if format_in == None:
            files_one_year = glob.glob('*%d.%02d*.tif' % (Year, Month))
            #files_one_year = glob.glob('*daily*%d.%02d*.tif' % (Year, Month))
        else:
            files_one_year = glob.glob(format_in.replace(":02d","").format(yyyy=Year, mm = "%02d" %Month, dd = "*"))

        # Create empty arrays
        Month_data = np.zeros([size_Y, size_X])

        # Get amount of days in month
        Amount_days_in_month = int(calendar.monthrange(Year, Month)[1])

        if len(files_one_year) is not Amount_days_in_month:
            print("One day is missing!!! month %s year %s" %(Month, Year))
            print("Days found = %d" %len(files_one_year))

        for file_one_year in files_one_year:
            file_path = os.path.join(Dir_in, file_one_year)

            Day_data = RC.Open_tiff_array(file_path)
            Day_data[np.isnan(Day_data)] = 0.0
            Day_data[Day_data == -9999] = 0.0
            Month_data += Day_data
            
        if str(type(AOI)) == "<class 'numpy.ndarray'>":
            Month_data = Month_data * AOI
            
        # Define output name
        if format_out == None:  
            # file_name = file_one_year.split('_')[0] + '_monthly_' + file_one_year.split('_')[-1] 
            file_name = file_one_year.replace('mm.day', 'mm.month')
            output_name = os.path.join(Dir_out, 'monthly', file_name)
            # output_name = os.path.join(Dir_out, file_one_year
            #                            .replace('daily', 'monthly')
            #                            .replace('day', 'month'))
    
            output_name = output_name[:-14] + '%d.%02d.01.tif' % (date.year, date.month)
        else:
            output_name = os.path.join(Dir_out, format_out.format(yyyy = date.year,  mm = date.month, dd = 1))

        # Save tiff file
        DC.Save_as_tiff(output_name, Month_data, geo_out, proj)
        print(output_name)
    return

In [None]:
Dir_in = r'/efs/CWA/rs_input_tifs/GLEAM/daily_AFR'
Startdate = '2003-01-01'
Enddate = '2021-12-31'
Dir_out = r'/efs/CWA/rs_input_tifs/GLEAM/'

Nearest_Interpolate(Dir_in, Startdate, Enddate, Dir_out=Dir_out)

##### 8 - Create GLEAM nc as a single file using monthly tif

In [None]:
# %% netCDF files from
import os
import glob
import sys

sys.path.append(r'/efs/CWA/scripts')
import createNC_cmi_v5 as createNC_cmi
# import  createNC_cmi
import time

start = time.time()

basedir = r"/efs/CWA/rs_input_tifs"
pathGLEAM = os.path.join(basedir,'GLEAM','monthly')

template = r"/efs/CWA/static_datasets/template/L1_LCC_21_Resample_1km_correct_nodata.tif"

shp_path = r"/efs/CWA/static_datasets/shapefile/Africa_adm0_buffer.shp"
save_location = r"/efs/CWA/netcdf_files_test"


name = 'Africa'

datasets = {                   
                     'ETref'  :   [pathGLEAM,
                                  ('time','latitude', 'longitude'), 
                                  {'units': 'mm.month', 'source': 'GLEAM', 'quantity':'ETref'}]
          }

nc_files = []

for d in datasets:
    filesAll = glob.glob(os.path.join(datasets[d][0],'*.tif'))
    dataset = {d: datasets[d]}
    fname = name+'_'+datasets[d][2]['quantity']+'_'+ datasets[d][2]['source']+'.nc'
    nc_file = os.path.join(save_location, fname)
    nc_files.append(nc_file)
    succes = createNC_cmi.make_netcdf(nc_file, dataset, shp_path, template, name)
    
end = time.time()
print('\n',end - start)

##### 9 - Download global SSEBop from s3 bucket

##### SSEBop download using aws cli

##### 10- Clip to Africa extend and rename the file

In [None]:
import os
import shapefile
import glob
import watertools.General.raster_conversions as RC
import watertools.General.data_conversions as DC
from tqdm import tqdm
### OPTION 2: Read from shapefile
# BASIN_SHP = r"C:\_WA\Ethiopia\shp\riftvalley_lakes_basin_10km_buff.shp" #path to Basin shapefile

# shape=shapefile.Reader(BASIN_SHP)
# xmin,ymin,xmax,ymax=shape.bbox

path = r'/efs/CWA/rs_input_tifs/ET/SSEBop/global'
output_path = r'/efs/CWA/rs_input_tifs/ET/SSEBop/AFR'

filesAll = glob.glob(os.path.join(path,'*.tif'))

for file in tqdm(sorted(filesAll)):
    latlim=[-36,38]
    lonlim=[-20,54]
    filename = file.split("/")[-1]
    year = filename.split('.')[0].split('_')[0][1:5]
    month = filename.split('.')[0].split('_')[0][5:]
    nw_filename = 'SSEBopETv5_actual_mm_monthly_{}.{}.01.tif'.format(year, month)
    filepath = os.path.join(output_path, nw_filename)
    
    data, geo, proj = RC.clip_data(file, latlim, lonlim)
    # print(filepath)    
    # # Save the file as tiff
    DC.Save_as_tiff(name=filepath, data=data, geo=geo, projection='WGS84')

#### 11 - Create SSEBop nc 

In [None]:
# %% netCDF files from
import os
import glob
import sys

sys.path.append(r'/efs/CWA/scripts')
import createNC_cmi_v5 as createNC_cmi
# import  createNC_cmi
import time

start = time.time()

basedir = r"/efs/CWA/rs_input_tifs"
pathSSEBop = os.path.join(basedir,'ET', 'SSEBop','AFR')

template = r"/efs/CWA/static_datasets/template/L1_LCC_21_Resample_1km_correct_nodata.tif"

shp_path = r"/efs/CWA/static_datasets/shapefile/Africa_adm0_buffer.shp"
save_location = r"/efs/CWA/netcdf_files_test"


name = 'AFR'

datasets = {                   
                     'ETref'  :   [pathSSEBop,
                                  ('time','latitude', 'longitude'), 
                                  {'units': 'mm.month', 'source': 'SSEBop', 'quantity':'ETa'}]
          }

nc_files = []

for d in datasets:
    filesAll = glob.glob(os.path.join(datasets[d][0],'*.tif'))
    dataset = {d: datasets[d]}
    fname = name+'_'+datasets[d][2]['quantity']+'_'+ datasets[d][2]['source']+'.nc'
    nc_file = os.path.join(save_location, fname)
    nc_files.append(nc_file)
    succes = createNC_cmi.make_netcdf(nc_file, dataset, shp_path, template, name)
    
end = time.time()
print('\n',end - start) #5306.594389677048

#### 11 - Resample yearly wofs to monthly 

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import os

def open_nc(input_nc, chunksize=None, layer=None):
    if chunksize is None:
        dts=xr.open_dataset(input_nc)
    else:
        dts=xr.open_dataset(input_nc,
                            chunks={'time':chunksize[0],
                                    'longitude':chunksize[1],
                                    'latitude':chunksize[2]})
    if type(layer) is int: #select dataarray by index
        layer_name=list(dts.keys())[layer]
        return dts[layer_name]
    elif type(layer) is str: #select dataarray by name
        return dts[layer]
    else:
        return dts

In [None]:
def resample_to_monthly_dataset(yearly_nc, sample_nc,
                                start_month=0,
                                output=None,
                                chunksize=None):
    '''
    yearly_nc: a yearly dataset to resample to monthly
    sample_nc: a monthly dataset to sample 'time' dimension
    start_month: the index of start month. 
        default start_month = 0 means yearly value resample from the first
        month in sample_nc
                
    Resample a yearly netCDF dataset to monthly netCDF dataset
    Where the value of each month is the same with the value of the year
    '''
    dts1=open_nc(yearly_nc,chunksize=chunksize,layer=0)
    dts2=open_nc(sample_nc,chunksize=chunksize,layer=0)  
   
    for i in range(len(dts1.time)):
        for t in range(i*12-start_month,i*12+12-start_month):
            LU=dts1.isel(time=i)
            if t==0:
                dts=LU
            else:
                dts = xr.concat([dts, LU], dim='time')  
    dts['time']=dts2['time']
    #change coordinates order to [time,latitude,longitude]
    dts=dts.transpose('time','latitude','longitude')               

    dts.attrs=dts1.attrs
    dts.name = dts1.name
    
    comp = dict(zlib=True, 
                # complevel=9, 
                least_significant_digit=2, 
                chunksizes=chunksize)
    if output is None:
        output=yearly_nc.replace('.nc','_resampled_monthly.nc')
    encoding = {dts.name: comp}
    # dts.load().to_netcdf(output,encoding=encoding)
    dts.to_netcdf(output,encoding=encoding)
    ######
    dts1.close()
    dts2.close()
    print('Save monthly LU datacube as {0}'.format(output))
    return output

In [None]:
from dask.distributed import Client
client = Client()
client.dashboard_link

In [None]:
WOFS_nc = r'/efs/CWA/netcdf_files_test/AFR_WOFS_DEA.nc'
sample_nc = r'/efs/CWA/netcdf_files_test/Africa_ETref_GLEAM.nc'
chunksize = [1, 900, 900]
monthly_nc = resample_to_monthly_dataset(WOFS_nc, sample_nc,
                                start_month=0,
                                output=None,
                                chunksize=chunksize)

##### 11 - Apply ET correction for the SSEBop using GLEAM ET and WOFS mask ncs

In [None]:
import os
import xarray as xr
from dask.diagnostics import ProgressBar

def open_nc(nc,timechunk=1,chunksize=1000):
    dts=xr.open_dataset(nc)
    key=list(dts.keys())[0]
    if 'time' in list(dts.dims.keys()):
        var=dts[key].chunk({"time": timechunk, "latitude": chunksize, "longitude": chunksize}) #.ffill("time")
    else:
        var=dts[key].chunk({"latitude": chunksize, "longitude": chunksize}) #.ffill("time")        
    return var,key

if __name__ == '__main__':
    aet_path = r'/efs/CWA/netcdf_files_test/AFR_ETa_SSEBop.nc'
    ret_path = r'/efs/CWA/netcdf_files_test/Africa_ETref_GLEAM.nc'
    watermask_path = r'/efs/CWA/netcdf_files_test/AFR_WOFS_DEA_resampled_monthly.nc'
    
    aet, _ = open_nc(aet_path)
    ret, _ = open_nc(ret_path)
    mask, _ = open_nc(watermask_path)
    # nw_mask = mask.squeeze('time')
    # nw_mask = nw_mask.drop('time')
    # nw_mask = nw_mask.expand_dims(time=aet.time)
           
    corrected_et = xr.where(mask==1,ret*0.75,aet)
    
    ### Write netCDF files
    root_f = os.path.dirname(aet_path)    
    attrs={'units': 'mm/month', 'source': 'SSEBop', 'quantity':'ETa'}
    
    corrected_et.attrs=attrs
    corrected_et.name = 'SSEBop'
    chunks=[1,900,900]
    comp = dict(zlib=True, complevel=9, least_significant_digit=2, chunksizes=chunks)
    print("\n\nwriting the Monthly corrected ET netcdf file using water mask\n\n")
    et_correct_nc = os.path.join(root_f, 'AFR_ETa_SSEBop_wofs_corrected.nc')
    encoding = {"SSEBop": comp}
    with ProgressBar():
        corrected_et.to_netcdf(et_correct_nc, encoding=encoding)
