In [1]:
import ee
import time
import random
import os
from google.oauth2.credentials import Credentials
from multiprocessing import Pool
import pandas as pd

In [2]:
# Details to get Google Earth Engine API access
# https://www.kaggle.com/code/pcjimmmy/how-to-get-the-secret-and-tokens-and-access
credentials = Credentials(
        None,
        refresh_token = "MY_REFRESH_TOKEN",
        token_uri=ee.oauth.TOKEN_URI,
        client_id="MY_CLIENT_ID",
        client_secret="MY_CLIENT_SECRET",
        scopes=ee.oauth.SCOPES)

In [3]:
ee.Initialize(credentials=credentials)

#### Define the `year_` variable your GEDI L4A data is from
#### As mentioned before, we downloaded GEDI data for a pre-defined **region**. Provide the GEE path of that region data in `pre_defined_region_path_on_gee` variable
#### We created a merged .shp file of all the downloaded GEDI L4A data and uploaded it to GEE. Provide the GEE path of the merged GEDI L4A data in `merged_gedi_l4a_path_on_gee`

In [4]:
# Define the datasets
year_ = 2021

pre_defined_region_path_on_gee = "projects/ee-mygeeusername/assets/NC_forests_all_time"
all_nc_forests = ee.FeatureCollection(pre_defined_region_path_on_gee)

merged_gedi_l4a_path_on_gee = "projects/ee-mygeeusername/assets/merged_precision_2021"
table = ee.FeatureCollection(merged_gedi_l4a_path_on_gee)


modis_product = ee.ImageCollection("MODIS/061/MCD43A4")
aqua_primary_gpp_npp = ee.ImageCollection("MODIS/061/MYD17A3HGF")
LAI_FPAR = ee.ImageCollection("MODIS/061/MCD15A3H")
terra_primary_gpp_npp = ee.ImageCollection("MODIS/061/MOD17A3HGF")
ecoregionsL3 = ee.FeatureCollection("EPA/Ecoregions/2013/L3")
usgs_land_cover = ee.Image("USGS/NLCD_RELEASES/2020_REL/NALCMS");
all_nc_forests = all_nc_forests.sort("Id")

In [5]:
year_to_calculate_start = year_
year_to_calculate_end = year_
start_date = ee.Date.fromYMD(year_to_calculate_start, 1, 1)
end_date = ee.Date.fromYMD(year_to_calculate_end, 12, 31)

#### Since we did not use GCP buckets to save the data, and used Google Drive instead, create a folder with the value of `GDRIVE_FOLDER` variable. For E.g. we created a folder called **MAPPED_GEDI_MODIS_DATA_2021** The mapped GEDI-MODIS data was saved in this folder. Make sure you do all these Google operations on the same account. By default GEE has access to your drive

In [None]:
GDRIVE_FOLDER = "MAPPED_GEDI_MODIS_DATA_2021"

In [6]:
def run_mp(index_to_run):
  try:
    global GDRIVE_FOLDER
    time.sleep(random.randrange(1,3))
    modis_projection = modis_product.first().projection()

    merged_forests = all_nc_forests.map(lambda feature: feature.transform(modis_projection, ee.ErrorMargin(0.001)))
    agbd_table = table.map(lambda feature: feature.transform(modis_projection, ee.ErrorMargin(0.001)))

    merged_forests = ee.FeatureCollection(merged_forests.toList(12219).slice(index_to_run, index_to_run + 1))
    agbd_table = ee.FeatureCollection(agbd_table)

    roi = merged_forests.geometry()
    specific_feature = agbd_table.filterBounds(merged_forests.geometry())
    specific_feature = specific_feature.filter(ee.Filter.eq('l4_quality', '1'))
    feature_spec = specific_feature.getInfo()
    if len(feature_spec["features"]) == 0:
      print("Skipping for index {} since there are no values".format(index_to_run))
      return

    # Define calculation functions
    def calculate_ndvi(image):
        ndvi = image.normalizedDifference(["Nadir_Reflectance_Band2", "Nadir_Reflectance_Band1"])
        return image.addBands(ndvi.rename("NDVI"))

    def calculate_evi(image):
        G = 2.5
        C1 = 6
        C2 = 7.5
        L = 1
        red = image.select('Nadir_Reflectance_Band1')
        nir = image.select('Nadir_Reflectance_Band2')
        blue = image.select('Nadir_Reflectance_Band3')
        evi = nir.multiply(0.0001).subtract(red.multiply(0.0001)).multiply(G).divide(
            nir.multiply(0.0001).add(red.multiply(0.0001).multiply(C1)).subtract(
                blue.multiply(0.0001).multiply(C2)).add(L))
        return evi.rename('EVI')

    def calculate_ndwi(image):
        ndwi = image.normalizedDifference(["Nadir_Reflectance_Band2", "Nadir_Reflectance_Band5"])
        return ndwi.rename("NDWI")

    def clip_function(image):
        return image.clip(roi)

    def filter_bands_ndvi(image):
        mask_red = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band1").eq(0)
        mask_nir = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band2").eq(0)
        return image.updateMask(mask_red).updateMask(mask_nir)

    def filter_bands_evi(image):
        mask_red = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band1").eq(0)
        mask_nir = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band2").eq(0)
        mask_blue = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band3").eq(0)
        return image.updateMask(mask_nir).updateMask(mask_red).updateMask(mask_blue)

    def filter_bands_ndwi(image):
        mask_nir = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band2").eq(0)
        mask_swir = image.select("BRDF_Albedo_Band_Mandatory_Quality_Band5").eq(0)
        return image.updateMask(mask_nir).updateMask(mask_swir)

    def mask_clouds_in_fpar(image):
        qc = image.select('FparLai_QC')
        cloud_state = qc.bitwiseAnd(int('11000', 2)).rightShift(3)
        mask = cloud_state.eq(0)
        return image.select("Fpar").updateMask(mask)

    def mask_clouds_in_lai(image):
        qc = image.select('FparLai_QC')
        cloud_state = qc.bitwiseAnd(int('11000', 2)).rightShift(3)
        mask = cloud_state.eq(0)
        return image.select("Lai").updateMask(mask)

    # Compute mean values for each product
    modis_ndvi_mean = modis_product.map(clip_function).filterDate(start_date, end_date).filter(
        ee.Filter.calendarRange(6, 9, 'month')).map(filter_bands_ndvi).map(calculate_ndvi).select(
        "NDVI").reduce(ee.Reducer.mean()).rename("NDVI_mean").reproject(crs=modis_projection)

    modis_evi_mean = modis_product.map(clip_function).filterDate(start_date, end_date).filter(
        ee.Filter.calendarRange(6, 9, 'month')).map(filter_bands_evi).map(calculate_evi).select(
        "EVI").reduce(ee.Reducer.mean()).rename("EVI_mean").reproject(crs=modis_projection)

    modis_ndwi_mean = modis_product.map(clip_function).filterDate(start_date, end_date).filter(
        ee.Filter.calendarRange(6, 9, 'month')).map(filter_bands_ndwi).map(calculate_ndwi).select(
        "NDWI").reduce(ee.Reducer.mean()).rename("NDWI_mean").reproject(crs=modis_projection)

    modis_fpar_mean = LAI_FPAR.select("Fpar", "FparLai_QC").map(clip_function).filterDate(
        start_date, end_date).filter(ee.Filter.calendarRange(6, 9, 'month')).map(mask_clouds_in_fpar).select(
        "Fpar").reduce(ee.Reducer.mean()).rename("Fpar_mean").reproject(crs=modis_projection)

    modis_lai_mean = LAI_FPAR.select("Lai", "FparLai_QC").map(clip_function).filterDate(
        start_date, end_date).filter(ee.Filter.calendarRange(6, 9, 'month')).map(mask_clouds_in_lai).select(
        "Lai").reduce(ee.Reducer.mean()).rename("Lai_mean").reproject(crs=modis_projection)

    # Similarly compute annual means
    modis_ndvi_mean_annual = modis_product.map(clip_function).filterDate(start_date, end_date).map(
        filter_bands_ndvi).map(calculate_ndvi).select("NDVI").reduce(ee.Reducer.mean()).rename(
        "NDVI_mean_annual").reproject(crs=modis_projection)

    modis_evi_mean_annual = modis_product.map(clip_function).filterDate(start_date, end_date).map(
        filter_bands_evi).map(calculate_evi).select("EVI").reduce(ee.Reducer.mean()).rename(
        "EVI_mean_annual").reproject(crs=modis_projection)

    modis_ndwi_mean_annual = modis_product.map(clip_function).filterDate(start_date, end_date).map(
        filter_bands_ndwi).map(calculate_ndwi).select("NDWI").reduce(ee.Reducer.mean()).rename(
        "NDWI_mean_annual").reproject(crs=modis_projection)

    modis_fpar_mean_annual = LAI_FPAR.select("Fpar", "FparLai_QC").map(clip_function).filterDate(
        start_date, end_date).map(mask_clouds_in_fpar).select("Fpar").reduce(ee.Reducer.mean()).rename(
        "Fpar_mean_annual").reproject(crs=modis_projection)

    modis_lai_mean_annual = LAI_FPAR.select("Lai", "FparLai_QC").map(clip_function).filterDate(
        start_date, end_date).map(mask_clouds_in_lai).select("Lai").reduce(ee.Reducer.mean()).rename(
        "Lai_mean_annual").reproject(crs=modis_projection)

    terra_gpp_mean_annual = terra_primary_gpp_npp.select("Gpp").map(clip_function).filterDate(
        start_date, end_date).reduce(ee.Reducer.mean()).multiply(0.0001).rename(
        "Terra_gpp_mean_annual").reproject(crs=modis_projection)

    aqua_gpp_mean_annual = aqua_primary_gpp_npp.select("Gpp").map(clip_function).filterDate(
        start_date, end_date).reduce(ee.Reducer.mean()).multiply(0.0001).rename(
        "Aqua_gpp_mean_annual").reproject(crs=modis_projection)

    terra_npp_mean_annual = terra_primary_gpp_npp.select("Npp").map(clip_function).filterDate(
        start_date, end_date).reduce(ee.Reducer.mean()).multiply(0.0001).rename(
        "Terra_npp_mean_annual").reproject(crs=modis_projection)

    aqua_npp_mean_annual = aqua_primary_gpp_npp.select("Npp").map(clip_function).filterDate(
        start_date, end_date).reduce(ee.Reducer.mean()).multiply(0.0001).rename(
        "Aqua_npp_mean_annual").reproject(crs=modis_projection)

    # DEM processing
    dem = ee.Image('NASA/NASADEM_HGT/001').select('elevation').updateMask(ee.Image('NASA/NASADEM_HGT/001').select('elevation').gt(0)).clip(roi)
    slope = ee.Terrain.slope(dem)
    aspect = ee.Terrain.aspect(dem)

    dem_resampled = dem.reduceResolution(reducer=ee.Reducer.mean(), maxPixels=1024*10).reproject(crs=modis_projection)
    slope_resampled = slope.reduceResolution(reducer=ee.Reducer.mean(), maxPixels=1024*10).reproject(crs=modis_projection)
    aspect_resampled = aspect.reduceResolution(reducer=ee.Reducer.mean(), maxPixels=1024*10).reproject(crs=modis_projection)

    # Combine images
    combined_image = modis_ndvi_mean.addBands([
        modis_evi_mean, modis_ndwi_mean, modis_fpar_mean, modis_lai_mean, 
        dem_resampled, slope_resampled, aspect_resampled,
        modis_ndvi_mean_annual, modis_evi_mean_annual, modis_ndwi_mean_annual,
        modis_fpar_mean_annual, modis_lai_mean_annual, terra_gpp_mean_annual,
        aqua_gpp_mean_annual, terra_npp_mean_annual, aqua_npp_mean_annual
    ])

    slope_mask = slope.lte(30).eq(1)
    slope_mask_reproj = slope_mask.reproject(crs=slope.projection())
    combined_image = combined_image.updateMask(slope_mask_reproj)

    # Feature processing
    def error_mask(feature):
        agbd = ee.Number.parse(feature.get('agbd'))
        agbd_se = ee.Number(feature.get('agbd_se'))
        relative_se = agbd_se.divide(agbd)
        valid = relative_se.lte(0.5)
        return feature.set('valid', valid)

    def buffer_agbd_points(feature):
        return feature.buffer(12.5).bounds()

    filtered_features = specific_feature.map(error_mask).filter(ee.Filter.eq('valid', 1))
    filtered_features = filtered_features.map(buffer_agbd_points)

    modis_points = combined_image.sample(region=roi, projection=modis_projection, geometries=True)

    def bounding_circle_func(feature):
        intermediate_buffer = feature.buffer(231.5)
        return intermediate_buffer

    def bounding_box_func(feature):
        intermediate_box = feature.bounds()
        return intermediate_box

    bounding_circles = modis_points.map(bounding_circle_func)
    bounding_boxes = bounding_circles.map(bounding_box_func)

    def calculate_overlap_percentage(modis_feature, agbd_feature):
        modis_geometry = modis_feature.geometry()
        agbd_geometry = agbd_feature.geometry().transform(modis_geometry.projection(), ee.ErrorMargin(0.001))
        intersection = modis_geometry.intersection(agbd_geometry, ee.ErrorMargin(0.001))
        intersection_area = intersection.area(ee.ErrorMargin(0.001))
        gedi_area = agbd_geometry.area(ee.ErrorMargin(0.001))
        overlap_percentage = intersection_area.divide(gedi_area)
        return agbd_feature.set('overlap_percentage', overlap_percentage)

    def sample_points_from_feature_with_all(feature):
        sampled_points = filtered_features.filterBounds(feature.geometry())
        valid_sampled_points = sampled_points.map(lambda point: calculate_overlap_percentage(feature, point))
        valid_sampled_points_size = valid_sampled_points.size()
        result = ee.Algorithms.If(
            valid_sampled_points_size.gt(0),
            feature.set({
                'Ecoregion_l1': ee.Algorithms.If(
                    ecoregionsL3.filterBounds(feature.geometry()).size().gt(0),
                    ecoregionsL3.filterBounds(feature.geometry()).first().get('l1_key'),
                    ee.String('None')
                ),
                'Ecoregion_l2': ee.Algorithms.If(
                    ecoregionsL3.filterBounds(feature.geometry()).size().gt(0),
                    ecoregionsL3.filterBounds(feature.geometry()).first().get('l2_key'),
                    ee.String('None')
                ),
                'Ecoregion_l3': ee.Algorithms.If(
                    ecoregionsL3.filterBounds(feature.geometry()).size().gt(0),
                    ecoregionsL3.filterBounds(feature.geometry()).first().get('l3_key'),
                    ee.String('None')
                ),
                'degrade_flag': valid_sampled_points.aggregate_array('degrade_fl'),
                'NDVI_mean': feature.get('NDVI_mean'),
                'EVI_mean': feature.get('EVI_mean'),
                'NDWI_mean': feature.get('NDWI_mean'),
                'Fpar_mean': feature.get('Fpar_mean'),
                'Lai_mean': feature.get('Lai_mean'),
                'elevation': feature.get('elevation'),
                'aspect': feature.get('aspect'),
                'slope': feature.get('slope'),
                'NDVI_mean_annual': feature.get('NDVI_mean_annual'),
                'EVI_mean_annual': feature.get('EVI_mean_annual'),
                'NDWI_mean_annual': feature.get('NDWI_mean_annual'),
                'Fpar_mean_annual': feature.get('Fpar_mean_annual'),
                'Lai_mean_annual': feature.get('Lai_mean_annual'),
                'Terra_gpp_mean_annual': feature.get('Terra_gpp_mean_annual'),
                'Aqua_gpp_mean_annual': feature.get('Aqua_gpp_mean_annual'),
                'Terra_npp_mean_annual': feature.get('Terra_npp_mean_annual'),
                'Aqua_npp_mean_annual': feature.get('Aqua_npp_mean_annual'),
                'agbd_points': valid_sampled_points.aggregate_array('agbd'),
                'land_cover_points': valid_sampled_points.aggregate_array('land_cover'),
                'overlap': valid_sampled_points.aggregate_array('overlap_percentage')
            }),
            None
        )
        return result

    sampled_points_collection_with_all = bounding_boxes.map(sample_points_from_feature_with_all, True)
    feature_size = sampled_points_collection_with_all.getInfo()

    if len(feature_size["features"]) == 0:
      print("Skipping for index {} since there are no values".format(index_to_run))
      return
    fileName = 'Python_sample_points_{}'.format(str(index_to_run))
    description = 'exported_sampledPoints_with_NDVI_{}'.format(str(index_to_run))
    export_task = ee.batch.Export.table.toDrive(
      collection=sampled_points_collection_with_all,
      description=description,
      folder=GDRIVE_FOLDER,
      fileNamePrefix=fileName,  
      fileFormat='CSV' 
    )
    export_task.start()
    while export_task.active():
      task_status = export_task.status()
      time.sleep(10)
    print("task completed for {}".format(index_to_run))
  except Exception as exp:
    print(exp)
    print("task failed for {} with {}".format(index_to_run, exp))


In [7]:
start = 0
limit_range = int(all_nc_forests.size().getInfo())

In [None]:
no_of_prc = 15
matrix = list(range(start, limit_range))
if __name__ == '__main__':
  try:
    with Pool(no_of_prc) as p:
        p.map(run_mp, matrix)
  except Exception as exp:
    print(exp)
    print("Error in something")

In [None]:
def get_all_csv(folder):
  csvs_list = []
  sorted_csv = sorted(os.listdir(folder), key= lambda x: int(x.split("_")[-1].split(".csv")[0]))
  for csv in sorted_csv:
    csvs_list.append(os.path.join(folder, csv))
  return csvs_list

csv_2021 = get_all_csv(GDRIVE_FOLDER)

In [None]:
def read_csv(csv_list):
  dataframes = []
  for file_ in csv_list:
    if not file_.endswith('.csv'):
        continue
    df = pd.read_csv(file_)
    dataframes.append(df)
  print("Finished doing batch")
  return dataframes


def merge_csv(all_csv, processes = 4):
  batch_size = len(all_csv) // processes
  all_batches = []
  dataframes = []
  count = 0
  for i in range(0, len(all_csv), batch_size):
    batch_files = all_csv[i:i + batch_size]
    all_batches.append(batch_files)
  with Pool(processes=processes) as pool:
    dataframes = pool.map(read_csv, all_batches)
  merged_gdf = merge_dataframe(dataframes)
  return merged_gdf


def merge_dataframe(dataframes):
  merged_gdf = None
  for frame in dataframes:
    batch_gdf = pd.concat(frame, ignore_index=True)
    if merged_gdf is None:
      merged_gdf = batch_gdf
    else:
      merged_gdf = pd.concat([merged_gdf, batch_gdf], ignore_index=True)
  return merged_gdf

#### Define the `FOLDER_PATH_TO_SAVE_MERGED_CSV_2021` folder variable. This is where the CSV's from your `year` will be merged and stored as one single CSV file
#### You need to repeat all the steps until this notebook for year 2019, 2020 and 2021
#### Your final CSV file should have merged data from 2019, 2020, and 2021. We train one single model for all 3 years combined

In [None]:
FOLDER_PATH_TO_SAVE_MERGED_CSV_2021 = "MERGED_FOLDER_PATH_FOR_CSV"
os.makedirs(FOLDER_PATH_TO_SAVE_MERGED_CSV_2021, exist_ok=True)
if __name__ == "__main__":
  dataframes = merge_csv(csv_2021, processes=32)
  dataframes.to_csv(os.path.join(FOLDER_PATH_TO_SAVE_MERGED_CSV_2021, "merged_2021_overlap.csv"), index=False)