## INITIALIZE REQUIREMENTS

### Load Dependencies

In [1]:
%matplotlib inline
# !pip install python-dotenv
# load_dotenv()
# from dotenv import load_dotenv

import os, glob, warnings, datacube, rasterio, folium, json
import numpy as np
import xarray as xr
import geopandas as gpd
import matplotlib.pyplot as plt
import rioxarray as rio
from rasterio.merge import merge
from rasterio.plot import show
import contextily as cx

from scipy.ndimage import uniform_filter
from scipy.ndimage import variance
from skimage.filters import threshold_minimum
from datacube.utils.geometry import Geometry

from deafrica_tools.spatial import xr_rasterize
from deafrica_tools.datahandling import load_ard
from deafrica_tools.plotting import display_map, rgb
from deafrica_tools.areaofinterest import define_area

 # G-Drive Dependencies
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaFileUpload
from google.oauth2 import service_account

from IPython.display import clear_output
from IPython.display import display

warnings.filterwarnings("ignore")

In [2]:
# G-Drive Scopes and Credentials
# 'creds' folder contains credentials.json - service account, credentials_2.json - Desktop APP and token.json - user account.
SCOPES = ["https://www.googleapis.com/auth/drive"]
credential_path = "../creds/credentials_2.json"
u_credential_path = "../creds/token.json"

### Connect to the datacube

Connect to the datacube so we can access DEA data.
The `app` parameter is a unique name for the analysis which is based on the notebook file name.

In [3]:
dc = datacube.Datacube(app="Radar_water_detection")

### G-Drive Folder IDs and Timerange Definitions

In [4]:
# Create folder ids by copying the ID from the g-drive folder url
FLOOD_MEAN_ID = "196YsHy1SXjNDnja6LjVhhlvoIt-Jg91l"
FLOOD_MEDIAN_ID = "1Qhum99pKi1Qyon8hcp4K8S_DGaJh5a5e"
PREFLOOD_MEAN_ID = "1K0KqGlLxdUCsXg4771k_uf_x2UwbHlqa"
PREFLOOD_MEDIAN_ID = "1Ovu5Q58xZRGpKsvowVr49klOrgVoZ0XQ"

FLOOD_MEAN_TEST_ID = "1bv76i244wyJzfpWN57wM7Yh4jQ38gove"
PREFLOOD_MEAN_TEST_ID = "1msS6VuX_8UptluKIuQG5PJNOrwqYtacY"

REF_TEST_PRF_ID = "1-GUFFarvB1jpuTBIJ-qJAPkVroVDLXk8"
REF_TEST_F_ID = "10rwk3OW_fBL8ws7K7kt4P5EFjzpuyWp4"

In [5]:
# Define main time period of analysis
timerange = ('2024-02', '2024-09')

# Define sub-periods of analysis - should be within main time period
pre_flood = ['2024-02', '2024-03', '2024-04'] 
flood = ['2024-05', '2024-06', '2024-07', '2024-08', '2024-09'] 

# Run 1. aoi-threshold.ipynb to get the value of th_aoi and store it here.
th_aoi = -27.395682

### G-Drive Function Definitions

In [14]:
def create_user_token():
    # Creates a user token. On first run, run locally to generate token.json and add to root.
    creds = None

    if os.path.exists(u_credential_path):
        creds = Credentials.from_authorized_user_file(u_credential_path, SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(credential_path, SCOPES)
            creds = flow.run_local_server(port=0)

    # Save the credentials for the next run
    with open(u_credential_path, "w") as token:
        token.write(creds.to_json())
    return creds


def create_token():
    # Creates service token. Will be depreciated
    creds = None
    creds = service_account.Credentials.from_service_account_file(
        credential_path, scopes=SCOPES
    )
    return creds


def list_gdrive():
    # Lists all the files and folders in the root directory of g-drive.
    u_creds = create_user_token()
    try:
        service = build("drive", "v3", credentials=u_creds)
        results = (
            service.files()
            .list(pageSize=20, fields="nextPageToken, files(id, name)")
            .execute()
        )
        items = results.get("files", [])
        if not items:
            print("No files found.")
            return
        print("Files:")
        for item in items:
            print(f"{item['name']} ({item['id']})")
    except HttpError as error:
        print(f"An error occurred: {error}")


def upload_to_gdrive(file_paths, folder_id=None):
    # Uploads list of files in file_paths to folder_id
    u_creds = create_user_token()
    for f_path in file_paths:
        try:
            # Create g-drive API client using desktop app and user credentials
            service = build("drive", "v3", credentials=u_creds)
            folder_path = "../Supplementary_data/DriveCredentials/{}".format(folder_id)

            f_name = os.path.basename(f_path)
            file_metadata = {"name": f_name, "parents": [folder_id]}
            
            media = MediaFileUpload(f_path, chunksize=5242880, resumable=True) # 5MB chuncksize
            
            # pylint: disable=maybe-no-member
            file = (
                service.files().create(body=file_metadata, media_body=media).execute()
            )
            print("\033[32m" + "{} UPLOADED SUCCESSFULLY".format(f_name) + "\033[0m")

            # Delete from sandbox to save disc space
            os.remove(f_path)
        except HttpError as error:
            print(f"An error occurred: {error}")
            file = None
    return

# TODO: Add delete function

### Filter and Classifier Functions

In [7]:
# Function to apply lee filtering on S1 image. Speckle Filter
def lee_filter(da, size):
    """
    Apply lee filter of specified window size.
    Adapted from https://stackoverflow.com/questions/39785970/speckle-lee-filter-in-python

    """
    img = da.values
    img_mean = uniform_filter(img, size)
    img_sqr_mean = uniform_filter(img**2, size)
    img_variance = img_sqr_mean - img_mean**2

    overall_variance = variance(img)

    img_weights = img_variance / (img_variance + overall_variance)
    img_output = img_mean + img_weights * (img - img_mean)

    return img_output

# Classifier Function
def S1_water_classifier(da, threshold):
    water_data_array = da < threshold
    return water_data_array.to_dataset(name="s1_water")

In [8]:
# TODO - Add mean and median generators as functions

In [9]:
def iterate_grid(aoi_m, c):
    PRF_FOLDER_ID = REF_TEST_PRF_ID
    F_FOLDER_ID = REF_TEST_F_ID
    e_log = []
    cell = 1
    for aoi, i in zip(aoi_m, c):
        geopolygon = Geometry(aoi["features"][0]["geometry"], crs="epsg:4326")
        geopolygon_gdf = gpd.GeoDataFrame(geometry=[geopolygon], crs=geopolygon.crs)
        g = geopolygon_gdf.centroid
        print(
            "\n\n"
            + "\033[32m"
            + "PROCESSING GRID CELL ID {} NO. {}/{} CENTROID ({}, {})".format(
                i[2], cell, len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
            )
            + "\033[0m"
        )
    
        # Get the latitude and longitude range of the geopolygon
        lat_range = (geopolygon_gdf.total_bounds[1], geopolygon_gdf.total_bounds[3])
        lon_range = (geopolygon_gdf.total_bounds[0], geopolygon_gdf.total_bounds[2])
    
        # Load Sentinel1 data
        try:
            S1 = load_ard(
                dc=dc,
                products=["s1_rtc"],
                measurements=["vv", "vh"],
                y=lat_range,
                x=lon_range,
                time=timerange,
                output_crs="EPSG:6933",
                resolution=(-20, 20),
                group_by="solar_day",
                dtype="native",
            )
        except Exception as e:
            # Log error aoi centroids and keep looping
            e_log.append([g.x[0], g.y[0], i[2], "P"])
            print(
                "\n\n"
                + "\033[31m"
                + "ERROR PROCESSING GRID CELL {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                    i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                )
                + "\033[0m"
            )
            print(e)
            cell += 1
            continue
    
        timesteps = [2, 4, 6, 9, 11]
    
        # The lee filter above doesn't handle null values
        # We therefore set null values to 0 before applying the filter
        valid = np.isfinite(S1)
        S1 = S1.where(valid, 0)
    
        # Create a new entry in dataset corresponding to filtered VV and VH data
        S1["filtered_vh"] = S1.vh.groupby("time").apply(lee_filter, size=7)
    
        # Null pixels should remain null
        S1["filtered_vh"] = S1.filtered_vh.where(valid.vh)
    
        # Convert the digital numbers to dB
        S1["filtered_vh"] = 10 * np.log10(S1.filtered_vh)
    
        threshold_vh = th_aoi
    
        S1["water"] = S1_water_classifier(S1.filtered_vh, threshold_vh).s1_water
        FS1 = S1.water
        PRFS1 = S1.water
    
        # Creting outputs
        # Export to raster - upload to g-drive - delete from sandbox
        # --------------------------------------- preflood ----------------------------------------------
        S1_PreFlood = PRFS1.sel(time=pre_flood, method="nearest").mean(dim="time")
        preflood_val = "CELL_" + str(i[2]) + "_PRE_FLOOD_MEAN"
        preflood_name = preflood_val + ".tif"
        preflood_out = "output/preflood/" + preflood_name
        S1_PreFlood.rio.to_raster(preflood_out)
        
        # preflood meta
        prf_meta_text = "### Meta Data - GRID CELL ID = " + str(
            i[2]
        ) + " ###" + "\n" "Time Range: " + pre_flood[0] + " - " + pre_flood[
            -1
        ] + "\n" + "Lat Range: " + str(
            lat_range
        ) + " Lon Range: " + str(
            lon_range
        ) + "\n" + "Coordinate Reference System: " + str(
            geopolygon.crs
        )
        
        text_flie_name = preflood_val + "_META.txt"
        prf_meta_path = "output/preflood/" + text_flie_name
        with open(prf_meta_path, mode="w") as f:
            f.write(prf_meta_text)
        try:
            upload_to_gdrive([preflood_out, prf_meta_path], PRF_FOLDER_ID)
        except Exception as e:
            e_log.append([g.x[0], g.y[0], i[2], "U-PRF"])
            print(
                "\n\n"
                + "\033[31m"
                + "ERROR UPLOADING GRID CELL ID {} NO.  {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                    cell, i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                )
                + "\033[0m"
            )
            print(e)
            cell += 1
    
        # ----------------------------------------- flood ----------------------------------------------
        S1_Flood = FS1.sel(time=flood, method="nearest").mean(dim="time")
        flood_val = "CELL_" + str(i[2]) + "_FLOOD_MEAN"
        flood_name = flood_val + ".tif"
        flood_out = "output/flood/" + flood_name
        S1_Flood.rio.to_raster(flood_out)
        
        # flood meta
        f_meta_text = "### Meta Data - GRID CELL ID = " + str(
            i[2]
        ) + " ###" + "\n" "Time Range: " + flood[0] + " - " + flood[
            -1
        ] + "\n" + "Lat Range: " + str(
            lat_range
        ) + " Lon Range: " + str(
            lon_range
        ) + "\n" + "Coordinate Reference System: " + str(
            geopolygon.crs
        )
        
        text_flie_name = flood_val + "_META.txt"
        f_meta_path = "output/flood/" + text_flie_name
        with open(f_meta_path, mode="w") as f:
            f.write(f_meta_text)
        try:
            upload_to_gdrive([flood_out, f_meta_path], F_FOLDER_ID)
        except Exception as e:
            e_log.append([g.x[0], g.y[0], i[2], "U-F"])
            print(
                "\n\n"
                + "\033[31m"
                + "ERROR UPLOADING GRID CELL ID {} NO. {}/{} CENTROID ({}, {}). LOGGED CENTROID INFO in e_log".format(
                    cell, i[2], len(aoi_m), round(g.y[0], 5), round(g.x[0], 5)
                )
                + "\033[0m"
            )
            print(e)
            cell += 1

        cell += 1
        clear_output()
    
    if len(e_log) == 0:
        print("\n\n" + "\033[32m" + "GRID PROCESSED AND UPLOADED SUCCESSFULLY" + "\033[0m" + "\n\n")

    # return e_log to be run again
    return e_log

In [10]:
# Crete the aoi-mosaic - aoi_m
def gen_aoim(c, b):
    aoi_m = []
    for i in c:
        aoi_m.append(define_area(i[1], i[0], buffer=b))
    # print(c, len(aoi_m))
    e_log = iterate_grid(aoi_m, c)

    # return e_log to be run again
    return e_log

In [11]:
# Visualize input file
def view_input(grid, grid_c):
    p = grid.dissolve()
    center = p.centroid
    map = folium.Map(location=[center.y, center.x], tiles="CartoDB Positron")
    folium.GeoJson(grid).add_to(map)

    for c in grid_c:
        folium.Marker(
            location=[c[1], c[0]],
            popup=f"Centroid: {c[1]}, {c[0]}",
            icon= folium.DivIcon(
                icon_size=(10, 10),
                icon_anchor=(0,0),
                html='<div style="font-size: 10pt">{}</div>'.format(c[2]),
                )
        ).add_to(map)
    
    bounds = grid.total_bounds.tolist()
    map.fit_bounds([bounds[:2][::-1], bounds[2:][::-1]])
    display(map)

### Upload Gridded Vector File

In [12]:
# Load file from sandbox disc. file should be present in 'input' folder
# grid = gpd.read_file("input/Lake Chad.geojson")
grid = gpd.read_file("input/TCD_55KM_4CTEST.geojson")
# grid = gpd.read_file("input/TCD_55KM_BASE.geojson")
# grid = gpd.read_file("input/TCD_55KM_ERR.geojson")


# Calculate centroids and store in centroid list c[]. 
# The array c[] has four values - x, y, cell_id and None. None will store the "P" or "U" error value
c = []
g = grid.centroid

cell_id = 1
for i in g:
    c.append([round(i.x, 5), round(i.y, 5), cell_id, None])
    cell_id +=1

# # REMOVE THIS AFTER DONE
# c = c[:10]
view_input(grid, c)


### Run Application

In [15]:
# get e_log with centroids, cell_id and error message
# Calling gen_aoim will run the entire Application
e_log = gen_aoim(c, 0.25)

print(len(e_log))



[32mGRID PROCESSED AND UPLOADED SUCCESSFULLY[0m


0


In [None]:
import json
e_log = np.array(e_log)
with open('error_centroids.json', 'w') as filehandle:
    json.dump(e_log.tolist(), filehandle)

## Write Outputs To Disc

In [None]:
dirs = ['flood', 'preflood', 'postflood']

for dir in dirs:
    loc = "output/" + dir
    out = "output/{}/Merged_{}.tif".format(dir, dir)
    extension = "*.tif"
    q = os.path.join(loc, extension)
    files = glob.glob(q)

    r =[]
    for f in files:
        s = rasterio.open(f)
        r.append(s)
    if len(r)>0:
        mosaic, out_trans = merge(r)
        out_meta = s.meta.copy()
        out_meta.update({"driver": "GTiff",
                    "height": mosaic.shape[1],
                    "width": mosaic.shape[2],
                    "transform": out_trans
                    })
        with rasterio.open(out, "w", **out_meta) as dest:
            dest.write(mosaic)
            # upload_to_gdrive(out, "flood")

In [None]:
creds = create_user_token()
service = build("drive", "v3", credentials=creds)
results = service.files().list(fields = "files(id)", pageSize = 1000).execute()
items = results.get('files')
print(len(items))
storage = service.about().get(fields = "storageQuota").execute()
storage

In [None]:
# for item in items:
#     try:
#         response = service.files().delete(fileId=item['id']).execute()
#         print('File Deleted')
#     except HttpError as error:
#         print(f"An error occurred: {error}")