In [None]:
import ee
import pandas as pd 
import numpy as np
from typing import List
ee.Authenticate()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&code_challenge=IRtEtWmlj9VR5BwP6uhTMJ7YiQJGbnErFD-qQ-i4HOg&code_challenge_method=S256

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/1AY0e-g51nKSzWtcjNbGvLFXmTmyEjdR9gtaMwR2AU7B-9MQEZK_UE5SJldU

Successfully saved authorization token.


In [None]:
ee.Initialize()

In [None]:
def _extract_and_append_date(image: ee.Image, input_list: ee.List) -> ee.List:
    """Given an ee.Image and an ee.List, append the image's date to the list."""
    date = image.date()
    return ee.List(input_list).add(ee.Date(date))


def _create_yearly_list(collection: ee.ImageCollection) -> ee.List:
    """Given an ee.ImageCollection, return an ee.List containing all the (unique)
    years present in the collection."""
    dates = collection.iterate(_extract_and_append_date, ee.List([]))

    years = ee.List(dates).map(lambda date: ee.Date(date).get("year")).distinct().sort()

    return years


def _extract_medoid_image(
    year: int,
    collection: ee.ImageCollection,
    start_day: str = "06-20",
    end_day: str = "09-10",
) -> ee.Image:
    """Given a year of interest, an ee.ImageCollection, and boundaries of start
    and end days, return an ee.Image which is the medoid image for the year in
    question. Distance calculated using the standard Euclidean norm across all
    6 TM-equivalent bands. Question: is it possible to do better, for the purposes
    of a deforestation analysis, by applying a higher weight to NIR and SWIR bands?
    Parameters
    ----------
    year: int
      The year for which the medoid should be calculated.
    collection: ee.ImageCollection
      The collection whose medoid should be calculated.
    start_day: str
      The first day, inclusive, to consider in calculating the medoid. Formatted
      as 'mm-dd'.
    end_day: str
      The last day, inclusive, to consider in calculating the medoid. Formatted as
      'mm-dd'.
    Returns
    -------
    ee.Image
      An ee.Image containing the pixel-wise medoid for the year in question.
    """
    start_m, start_d = start_day.split("-")
    end_m, end_d = end_day.split("-")

    start_date = ee.Date.fromYMD(ee.Number(year), int(start_m), int(start_d))
    end_date = ee.Date.fromYMD(ee.Number(year), int(end_m), int(end_d)).advance(
        1, "day"
    )
    filtered_collection = collection.filterDate(start_date, end_date)
    empty_col = ee.ImageCollection([ee.Image([0,0,0,0,0,0]).mask(ee.Image(0))]);
    non_empty = filtered_collection.toList(1).length().gt(0)
    final_collection = ee.ImageCollection(ee.Algorithms.If(non_empty, filtered_collection, empty_col))
    median = final_collection.median()

    def _euclidean_distance(image: ee.Image) -> ee.Image:
        distance = ee.Image(image).subtract(median).pow(ee.Image.constant(2))
        return distance.reduce("sum").addBands(image)

    distance_from_median = final_collection.map(_euclidean_distance)
    return (
        ee.ImageCollection(distance_from_median)
        .reduce(ee.Reducer.min(7))
        .set("system:time_start", ee.Date.fromYMD(year, 8, 1).millis())
    )


def _generate_medoid_collection(
    collection: ee.ImageCollection, start_day: str, end_day: str
) -> ee.ImageCollection:
    """Given an ee.ImageCollection and bounds on the start and end days, compute
    an ee.ImageCollection which contains a medoid image for each year present in
    the input collection.
    Parameters
    ----------
    collection: ee.ImageCollection
      The collection whose medoid should be calculated.
    start_day: str
      The first day, inclusive, to consider in calculating the medoid. Formatted
      as 'mm-dd'.
    end_day: str
      The last day, inclusive, to consider in calculating the medoid. Formatted as
      'mm-dd'.
    Returns
    -------
    ee.ImageCollection
      A collection of yearly medoid images for the provided collection.
    """

    years = _create_yearly_list(collection)

    def _extract_medoid(year):
        return _extract_medoid_image(year, collection, start_day, end_day)

    images = years.map(_extract_medoid)
    return ee.ImageCollection.fromImages(images)


def _mask_landsat_sr(image: ee.Image) -> ee.Image:
    """Apply a mask to a Landsat image to filter out water, cloud, snow, and cloud
    shadow pixels."""
    qa_band = image.select("pixel_qa")
    # Bits 2, 3, 4, and 5 of pixel_qa band of Landsat are water, cloud shadow, snow,
    # and cloud, respectively. Define bitmasks for these entries below:
    water_bit_msk = 1 << 2
    cloud_shadow_bit_msk = 1 << 3
    snow_bit_msk = 1 << 4
    cloud_bit_msk = 1 << 5

    qa_mask = (
        qa_band.bitwiseAnd(water_bit_msk)
        .eq(0)
        .And(qa_band.bitwiseAnd(cloud_shadow_bit_msk).eq(0))
        .And(qa_band.bitwiseAnd(snow_bit_msk).eq(0))
        .And(qa_band.bitwiseAnd(cloud_bit_msk).eq(0))
    )

    return image.updateMask(qa_mask)


def _prepare_images(
    image: ee.Image, input_bands: List[str], output_bands: List[str]
) -> ee.Image:
    """Takes an ee.Image object and a list of input and output bands; this function
    resamples the image using bilinear resampling, applies a QA mask, and returns
    an ee.Image with the output bands selected and with the system:time_start
    field set from input image.
    Intended to be used via partial execution for TM and OLI collections.
    Parameters
    ----------
    image: ee.Image
      The image to prepare.
    input_bands: List[str]
      The bands to select from the image.
    output_bands: List[str]
      The labels for the selected bands in the output image.
    Returns
    -------
    ee.Image
      The input image, resampled, with QA mask applied, and with bands renamed.
    """

    resampled_image = image.resample("bilinear").set(
        "system:time_start", image.get("system:time_start")
    )

    return _mask_landsat_sr(resampled_image).select(input_bands, output_bands)


def _build_TM_collection(
    sensor: str,
    aoi: ee.Geometry,
    start_year: int = 1985,
    start_day: int = "06-20",
    end_year: int = 2020,
    end_day: int = "09-10",
) -> ee.ImageCollection:
    """Given the sensor, area of interest, and date boundaries, return a collection
    of TM (or ETM+) Landsat images. Intended for use on Landsat 5 and Landsat 7
    images.
    Parameters
    ----------
    sensor: str
      The sensor to use -- expects one of 'LT05' or 'LE07'.
    aoi: ee.Geometry
      The area of interest for the collection. This will be used in a filterBounds
      call on the collection to reduce size.
    start_year: int
      The first year (inclusive) to get data.
    start_day: str
      The first day (inclusive) to get data. Formatted as 'mm-dd'.
    end_year: int
      The last year (inclusive) to get data.
    end_day: str
      The last day (inclusive) to get data. Formatted as 'mm-dd'.
    Returns
    -------
    ee.ImageCollection
      The filtered collection.
    """

    def _prepare_TM(image: ee.Image) -> ee.Image:
        return _prepare_images(
            image=image,
            input_bands=["B1", "B2", "B3", "B4", "B5", "B7"],
            output_bands=["B1", "B2", "B3", "B4", "B5", "B7"],
        )

    collection = (
        ee.ImageCollection("LANDSAT/" + sensor + "/C01/T1_SR")
        .filterBounds(aoi)
        .filterDate(
            ee.Date(str(start_year) + "-" + start_day),
            ee.Date(str(end_year) + "-" + end_day).advance(1, "day"),
        )
    )
    return collection.map(_prepare_TM)


def _build_OLI_collection(
    sensor: str,
    aoi: ee.Geometry,
    start_year: int = 1985,
    start_day: int = "06-20",
    end_year: int = 2020,
    end_day: int = "09-10",
) -> ee.ImageCollection:
    """Given the sensor, area of interest, and date boundaries, return a collection
    of TM-equivalent Landsat images from an OLI Landsat collection. Intended for
    use on Landsat 8 images.
    This function applies a linear transformation to harmonize Landsat 7 (ETM) and
    Landsat 8 (OLI) images. Transformation developed by Roy et al. and implemented
    by Justin Braaten at:
    https://github.com/eMapR/LT-GEE/blob/master/scripts/python/lt_gee_bap_test.py#L60
    Parameters
    ----------
    sensor: str
      The sensor to use -- expects 'LC08'.
    aoi: ee.Geometry
      The area of interest for the collection. This will be used in a filterBounds
      call on the collection to reduce size.
    start_year: int
      The first year (inclusive) to get data.
    start_day: str
      The first day (inclusive) to get data. Formatted as 'mm-dd'.
    end_year: int
      The last year (inclusive) to get data.
    end_day: str
      The last day (inclusive) to get data. Formatted as 'mm-dd'.
    Returns
    -------
    ee.ImageCollection
      The filtered collection with OLI bands scaled and renamed to TM-equivalents.
    """

    def _harmonization_Roy(image: ee.Image) -> ee.Image:
        """Taken verbatim from Justin Braaten's implementation at:
        https://github.com/eMapR/LT-GEE/blob/b0e92a0c198bdd1a794e1e9b8f4db8fc7fa06054/scripts/python/lt_gee_bap_test.py#L60"""
        slopes = ee.Image.constant([0.9785, 0.9542, 0.9825, 1.0073, 1.0171, 0.9949])
        intercepts = ee.Image.constant(
            [-0.0095, -0.0016, -0.0022, -0.0021, -0.0030, 0.0029]
        )

        return (
            image.subtract(intercepts.multiply(10000))
            .divide(slopes)
            .toShort()
            .set("system:time_start", image.get("system:time_start"))
        )

    def _prepare_OLI(image: ee.Image) -> ee.Image:
        return _prepare_images(
            image=image,
            input_bands=["B2", "B3", "B4", "B5", "B6", "B7"],
            output_bands=["B1", "B2", "B3", "B4", "B5", "B7"],
        )

    collection = (
        ee.ImageCollection("LANDSAT/" + sensor + "/C01/T1_SR")
        .filterBounds(aoi)
        .filterDate(
            ee.Date(str(start_year) + "-" + start_day),
            ee.Date(str(end_year) + "-" + end_day).advance(1, "day"),
        )
    )
    return collection.map(_prepare_OLI).map(_harmonization_Roy)


def _build_combined_Landsat(
    aoi: ee.Geometry,
    start_year: int = 1985,
    start_day: int = "06-20",
    end_year: int = 2020,
    end_day: int = "09-10",
) -> ee.ImageCollection:
    """
    Given an area of interest and date bounds, return a collection containing
    Landsat 5, 7, and 8 images. A linear rescaling is applied to Landsat 8 bands,
    which are from the Operational Land Imager (OLI) instrument, to convert them
    to Thematic Mapper-equivalent values.
    Parameters
    ----------
    aoi: ee.Geometry
      The area of interest for the collection.
    start_year: int
      The first year (inclusive) to get data.
    start_day: str
      The first day (inclusive) to get data. Formatted as 'mm-dd'.
    end_year: int
      The last year (inclusive) to get data.
    end_day: str
      The last day (inclusive) to get data. Formatted as 'mm-dd'.
    Returns
    -------
    ee.ImageCollection
      The TM-equivalent bands from Landsat 5, 7, and 8 for the time period and
      region of interest.
    """
    landsat5 = _build_TM_collection(
        sensor="LT05",
        aoi=aoi,
        start_year=start_year,
        start_day=start_day,
        end_year=end_year,
        end_day=end_day,
    )

    landsat7 = _build_TM_collection(
        sensor="LE07",
        aoi=aoi,
        start_year=start_year,
        start_day=start_day,
        end_year=end_year,
        end_day=end_day,
    )

    landsat8 = _build_OLI_collection(
        sensor="LC08",
        aoi=aoi,
        start_year=start_year,
        start_day=start_day,
        end_year=end_year,
        end_day=end_day,
    )

    return ee.ImageCollection(landsat5.merge(landsat7).merge(landsat8))


def build_SR_collection(
    aoi: ee.Geometry, start_year: int, start_day: str, end_year: int, end_day: str
) -> ee.ImageCollection:
    """
    Given an area of interest and date bounds, return a collection containing a
    yearly medoid for each image in the date range using Landsat 5, 7, and 8
    images.
    Python attempt at implementing:
    https://emapr.github.io/LT-GEE/api.html#buildsrcollection
    Parameters
    ----------
    aoi: ee.Geometry
      The area of interest for the collection.
    start_year: int
      The first year (inclusive) to get data.
    start_day: str
      The first day (inclusive) to get data. Formatted as 'mm-dd'.
    end_year: int
      The last year (inclusive) to get data.
    end_day: str
      The last day (inclusive) to get data. Formatted as 'mm-dd'.
    Returns
    -------
    ee.ImageCollection
      The collection of yearly medoid images.
    """

    combined_landsat = _build_combined_Landsat(
        aoi, start_year, start_day, end_year, end_day
    )
    return _generate_medoid_collection(combined_landsat, start_day, end_day).select(
        [1, 2, 3, 4, 5, 6], ["B1", "B2", "B3", "B4", "B5", "B7"]
    )

In [None]:
aoi = ee.Geometry.Polygon(
        [[[-124.1, 43.38],
          [-124.1, 42.88],
          [-123.6, 42.88],
          [-123.6, 43.38]]])

In [None]:
hudak_sites_2031 = ee.FeatureCollection('users/connorr/hudak_stands_2031')
hudak_sites_2051 = ee.FeatureCollection('users/connorr/hudak_stands_2051')
gedi_canopy_data_fc = ee.FeatureCollection('users/connorr/gediL2A_with_canopy_metrics')
hudak_agb_2016 = ee.Image('users/connorr/agb_2016_hudak')

In [None]:
hudak_sites_bands = ee.List(['frac_change_AGB_live', 'frac_change_AGB_total'])

In [None]:
def reduce_features_to_image(fc: ee.FeatureCollection, aoi: ee.Geometry, bands: ee.List):
  def _add_img_band(band, img):
    return ee.Image(img).addBands(fc.reduceToImage(properties=[band], reducer=ee.Reducer.first()).rename([band]))

  fc = fc.filterBounds(aoi)
  empty = ee.Image().select()
  return bands.iterate(_add_img_band, empty)

In [None]:
gedi_canopy_bands = ee.List(["canopy_avg", "canopy_min", "canopy_max", "canopy_p10", "canopy_p25", "canopy_p50", "canopy_p75", "canopy_p90", "canopy_std", "d01", "d02", "d03", "d04", "dns"])
# gedi_canopy_bands = ee.List(["canopy_max", "canopy_p10", "canopy_p90", "canopy_std", "d02", "d03", "d04"])

In [None]:
def assemble_input_features(aoi, gedi_collection, gedi_bands, year):
  gedi_features = ee.Image(reduce_features_to_image(gedi_collection, aoi, gedi_bands))
  
  ndvi_jul = build_SR_collection(aoi, start_year=year, start_day='07-01', end_year=year, end_day='07-31').first().normalizedDifference(["B4", "B3"]).rename("NDVI_jul")

  return gedi_features.addBands(ndvi_jul).addBands(hudak_agb_2016)

In [None]:
def assemble_targets(aoi, hudak_sites, target_band):
  return ee.Image(reduce_features_to_image(hudak_sites, aoi, ee.List([target_band]))).select(target_band)

In [None]:
X = assemble_input_features(aoi, gedi_canopy_data_fc, gedi_canopy_bands, 2020)
Y = assemble_targets(aoi, hudak_sites_2031, "frac_change_AGB_live")

In [None]:
bands = X.bandNames()

In [None]:
gedi_mask = X.select("canopy_max").mask()
hudak_site_mask = Y.mask()

In [None]:
input_features = X.addBands(Y)

In [None]:
# Train separate models using gedi_mask.and(hudak_site_mask) and gedi_mask.not().and(hudak_site_mask)

input_features_w_gedi = input_features.updateMask(gedi_mask.And(hudak_site_mask))
input_features_no_gedi = input_features.select(["NDVI_jul", "b1", "frac_change_AGB_live"]).updateMask(gedi_mask.Not().And(hudak_site_mask))

In [None]:
input_data_w_gedi = input_features_w_gedi.sample(factor=1, region=aoi, scale=300).randomColumn()

In [None]:
training = input_data_w_gedi.filter(ee.Filter.lt("random", 0.7))
validation = input_data_w_gedi.filter(ee.Filter.gte("random", 0.7))

In [None]:
def run_boosted_grid_search(training, validation, input_bands, target_band):
  results_df = pd.DataFrame()
  for _n in [100, 200, 500]:
    for _l in ["LeastAbsoluteDeviation"]:# ["LeastSquares", "LeastAbsoluteDeviation", "Huber"]:
      for _s in [0.5, 0.7, 0.9]:  
        for _leaves in [2, 4]:
          for _lr in [0.001, 0.005, 0.01]:  
            model = ee.Classifier.smileGradientTreeBoost(numberOfTrees=_n, loss=_l, samplingRate=_s, maxNodes=_leaves, shrinkage=_lr, seed=42).setOutputMode("REGRESSION").train(training, target_band, input_bands)
            holdout = validation.classify(model, "predicted")

            test = holdout.select(["predicted", "frac_change_AGB_live"]).getInfo()
            actual = [x["properties"]["frac_change_AGB_live"] for x in test["features"]]
            predicted = [x["properties"]["predicted"] for x in test["features"]]

            corr_matrix = np.corrcoef(actual, predicted)
            corr_xy = corr_matrix[0, 1]
            r_squared = corr_xy**2
            RMSE = np.sqrt(np.mean((np.array(predicted) - np.array(actual)) ** 2))

            results_df = results_df.append({"n_trees": _n, "loss": _l, "sampling_rate": _s, "max_nodes": _leaves, "learning_rate": _lr, "r_squared": r_squared, "rmse": RMSE}, ignore_index=True,)
            print(f"N_trees: {_n} || loss: {_l} || sampling rate: {_s} || max nodes: {_leaves} || learning rate: {_lr} || r-squared: {r_squared} || rmse: {RMSE}")
  
  return results_df

In [None]:
# With Hudak AGB 2016 as input
df = run_boosted_grid_search(training, validation, bands, "frac_change_AGB_live")

N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.001 || r-squared: 0.2767524548276043 || rmse: 0.27239047103955477
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.005 || r-squared: 0.26091667618533304 || rmse: 0.2652205054963387
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.01 || r-squared: 0.24862757763829244 || rmse: 0.25861185875673104
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.001 || r-squared: 0.25061762044716046 || rmse: 0.2720337668919761
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.005 || r-squared: 0.23499468202111706 || rmse: 0.26337968883674145
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.01 || r-squared: 0.23758265700298892 || rmse: 0.255

In [None]:
df.sort_values(by=["r_squared"], ascending=False).head()

Unnamed: 0,learning_rate,loss,max_nodes,n_trees,r_squared,rmse,sampling_rate
0,0.001,LeastAbsoluteDeviation,2.0,100.0,0.276752,0.27239,0.5
18,0.001,LeastAbsoluteDeviation,2.0,200.0,0.2762,0.270362,0.5
6,0.001,LeastAbsoluteDeviation,2.0,100.0,0.272694,0.272534,0.7
24,0.001,LeastAbsoluteDeviation,2.0,200.0,0.271194,0.270699,0.7
7,0.005,LeastAbsoluteDeviation,2.0,100.0,0.270191,0.265782,0.7


In [None]:
model = ee.Classifier.smileGradientTreeBoost(numberOfTrees=100, shrinkage=0.005, samplingRate=0.7, maxNodes=2, seed=42).setOutputMode("REGRESSION").train(training, "frac_change_AGB_live", bands)

holdout = validation.classify(model, "predicted")

test = holdout.select(["predicted", "frac_change_AGB_live"]).getInfo()
actual = [x["properties"]["frac_change_AGB_live"] for x in test["features"]]
predicted = [x["properties"]["predicted"] for x in test["features"]]

corr_matrix = np.corrcoef(actual, predicted)
corr_xy = corr_matrix[0, 1]
print(corr_xy**2)

0.2701909107568643


In [None]:
classified_w_gedi = input_features.updateMask(gedi_mask).classify(model)

In [None]:
input_data_no_gedi = input_features_no_gedi.sample(factor=1, region=aoi, scale=300).randomColumn()
training_no_gedi = input_data_no_gedi.filter(ee.Filter.lt("random", 0.7))
validation_no_gedi = input_data_no_gedi.filter(ee.Filter.gte("random", 0.7))

In [None]:
model_no_gedi = ee.Classifier.smileGradientTreeBoost(numberOfTrees=200, shrinkage=0.001, samplingRate=0.5, maxNodes=2, seed=42).setOutputMode("REGRESSION").train(training_no_gedi, "frac_change_AGB_live", ee.List(["NDVI_jul", "b1"]))

holdout_no_gedi = validation_no_gedi.classify(model_no_gedi, "predicted")

test_no_gedi = holdout_no_gedi.select(["predicted", "frac_change_AGB_live"]).getInfo()
actual_no_gedi = [x["properties"]["frac_change_AGB_live"] for x in test_no_gedi["features"]]
predicted_no_gedi = [x["properties"]["predicted"] for x in test_no_gedi["features"]]

corr_matrix = np.corrcoef(actual_no_gedi, predicted_no_gedi)
corr_xy = corr_matrix[0, 1]
print(corr_xy**2)

0.18248123079238726


In [None]:
classified_no_gedi = input_features.updateMask(gedi_mask.Not()).classify(model_no_gedi)

In [None]:
classified_col = ee.ImageCollection.fromImages([classified_w_gedi, classified_no_gedi])
combined = classified_col.mosaic().add(ee.Image(1)).multiply(hudak_agb_2016)

In [None]:
print(combined.reduceRegion(ee.Reducer.minMax(), aoi, scale=30).getInfo())

{'classification_max': 936.4993450418115, 'classification_min': 11.686710719019175}


In [None]:
task = ee.batch.Export.image.toAsset(combined, description="hudak_2031_agb", assetId="users/connorr/2031_predicted_agb", region=aoi, scale=30)

In [None]:
task.start()

In [None]:
task.status()

NameError: ignored

In [None]:
print(combined.reduceRegion(ee.Reducer.minMax(), aoi, scale=300).getInfo())

{'classification_max': 0.12216296046972275, 'classification_min': 0.05194784328341484}


In [None]:
df_no_gedi = run_boosted_grid_search(training_no_gedi, validation_no_gedi, ee.List(["NDVI_jul", "b1"]), "frac_change_AGB_live")

N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.001 || r-squared: 0.17902688970626188 || rmse: 0.15677066134651507
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.005 || r-squared: 0.1725429640694186 || rmse: 0.15246832258043336
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.01 || r-squared: 0.1601829557410202 || rmse: 0.149960455489535
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.001 || r-squared: 0.07567765279601157 || rmse: 0.15703572288606754
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.005 || r-squared: 0.060014040222735834 || rmse: 0.15403675092806896
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.01 || r-squared: 0.09012338837079417 || rmse: 0.151

In [None]:
df_no_gedi.sort_values(by=["r_squared"], ascending=False)

Unnamed: 0,learning_rate,loss,max_nodes,n_trees,r_squared,rmse,sampling_rate
18,0.001,LeastAbsoluteDeviation,2.0,200.0,0.182481,0.15561,0.5
0,0.001,LeastAbsoluteDeviation,2.0,100.0,0.179027,0.156771,0.5
1,0.005,LeastAbsoluteDeviation,2.0,100.0,0.172543,0.152468,0.5
36,0.001,LeastAbsoluteDeviation,2.0,500.0,0.170662,0.152834,0.5
2,0.01,LeastAbsoluteDeviation,2.0,100.0,0.160183,0.14996,0.5
42,0.001,LeastAbsoluteDeviation,2.0,500.0,0.157445,0.153531,0.7
24,0.001,LeastAbsoluteDeviation,2.0,200.0,0.156941,0.155983,0.7
19,0.005,LeastAbsoluteDeviation,2.0,200.0,0.154447,0.150298,0.5
6,0.001,LeastAbsoluteDeviation,2.0,100.0,0.150526,0.15701,0.7
7,0.005,LeastAbsoluteDeviation,2.0,100.0,0.142159,0.153609,0.7


In [None]:
# Without Hudak AGB 2016 as input
run_boosted_grid_search(training, validation, bands, "frac_change_AGB_live")

N_trees: 100 || loss: LeastSquares || r-squared: 0.06180051425429168 || rmse: 0.2329074704005871
N_trees: 100 || loss: LeastAbsoluteDeviation || r-squared: 0.11512977489983252 || rmse: 0.24714269008985013
N_trees: 100 || loss: Huber || r-squared: 0.06302718951531323 || rmse: 0.2469534032856114
N_trees: 200 || loss: LeastSquares || r-squared: 0.07188151483454239 || rmse: 0.23098979676002532
N_trees: 200 || loss: LeastAbsoluteDeviation || r-squared: 0.12257918711895713 || rmse: 0.238296114054766
N_trees: 200 || loss: Huber || r-squared: 0.07223047712865668 || rmse: 0.23946122452239016
N_trees: 500 || loss: LeastSquares || r-squared: 0.06992807888405357 || rmse: 0.23326084326771404
N_trees: 500 || loss: LeastAbsoluteDeviation || r-squared: 0.10740421649155739 || rmse: 0.2312250466049165
N_trees: 500 || loss: Huber || r-squared: 0.0808789950083201 || rmse: 0.23537509266655232


In [None]:
model = ee.Classifier.smileGradientTreeBoost(200, seed=42).setOutputMode("REGRESSION").train(training, "frac_change_AGB_live", bands)

In [None]:
holdout = validation.classify(model, "predicted")

In [None]:
test = holdout.select(["predicted", "frac_change_AGB_live"]).getInfo()

In [None]:
actual = [x["properties"]["frac_change_AGB_live"] for x in test["features"]]
predicted = [x["properties"]["predicted"] for x in test["features"]]

In [None]:
corr_matrix = np.corrcoef(actual, predicted)
corr_xy = corr_matrix[0, 1]
r_squared = corr_xy**2

In [None]:
print(r_squared)

0.15356861319861204


In [None]:
print(np.sqrt(np.mean((np.array(predicted) - np.array(actual)) ** 2)))

0.25319489496533926


In [None]:
print(model.mode().getInfo())

REGRESSION


In [None]:
print(ndvi_jan2020.reduceRegion(ee.Reducer.minMax(), aoi, 300).getInfo())
print(ndvi_jul2020.reduceRegion(ee.Reducer.minMax(), aoi, 300).getInfo())

{'NDVI_max': 0.9916839916839917, 'NDVI_min': 0.026124004933288484}
{'NDVI_max': 0.9236396890717878, 'NDVI_min': 0.2605105105105105}


In [None]:
print(gedi_features.mask().reduceRegion(ee.Reducer.sum(), aoi, 300).getInfo())

{'canopy_avg': 303, 'canopy_max': 303, 'canopy_min': 303, 'canopy_p10': 303, 'canopy_p25': 303, 'canopy_p50': 303, 'canopy_p75': 303, 'canopy_p90': 303, 'canopy_std': 303, 'd01': 303, 'd02': 303, 'd03': 303, 'd04': 303, 'dns': 303}


In [None]:
print(hudak_site_images_2031.select('frac_change_AGB_live').mask().reduceRegion(ee.Reducer.sum(), aoi, 300).getInfo())

{'frac_change_AGB_live': 444}


In [None]:
print(hudak_sites_fc.filterBounds(aoi).aggregate_count_distinct("StandID").getInfo())

458


In [None]:
hudak_stand_data = pd.read_csv("combined_hudak_stand_data.csv")
plot_data = pd.read_csv("PlotData.csv")

In [None]:
plot_data.head()

Unnamed: 0,PROJECT,STAND_ID,PLOT_ID,LIDAR_UNIT,LIDAR_YEAR,PLOT_TYPE,VARIANT,INV_YEAR,LATITUDE,LONGITUDE,STATE,COUNTY,REGION,FOREST,DISTRICT,LOCATION,PV_CODE,PV_REF_CODE,AGE,ASPECT,SLOPE,ELEVFT,BASAL_AREA_FACTOR,INV_PLOT_SIZE,BRK_DBH,NUM_PLOTS,NONSTK_PLOTS,SAM_WT,STK_PCNT,DG_TRANS,DG_MEASURE,HTG_TRANS,HTG_MEASURE,MORT_MEASURE,COMMENTS_1,COMMENTS_2,COMMENTS_3,COMMENTS_4
0,USFSR6_FSVeg,060502000020001_0001,,Tulalip,2013,Fixed_Area,WC,2016,48.025538,-121.668052,53.0,61.0,6.0,5.0,2.0,,CHF135,621.0,,71,31,1873,0.0,1.0,999.0,1.0,0.0,,0.54,0.0,5.0,0.0,5.0,5.0,,,,
1,USFSR6_FSVeg,060502000040001_0001,,Tulalip,2013,Fixed_Area,WC,2016,48.027919,-121.475637,53.0,61.0,6.0,5.0,2.0,,CHF135,621.0,,275,65,2694,0.0,1.0,999.0,1.0,0.0,,1.0,0.0,5.0,0.0,5.0,5.0,,,,
2,USFSR6_FSVeg,060502000050001_0001,,Tulalip,2013,Fixed_Area,WC,2016,48.037383,-121.696034,53.0,61.0,6.0,5.0,2.0,,CHF135,621.0,,38,23,2753,0.0,1.0,999.0,1.0,0.0,,0.9,0.0,5.0,0.0,5.0,5.0,,,,
3,USFSR6_FSVeg,060502000060001_0001,,Tulalip,2013,Fixed_Area,WC,2016,48.037724,-121.668653,53.0,61.0,6.0,5.0,2.0,,CHF135,621.0,,238,65,1923,0.0,1.0,999.0,1.0,0.0,,1.0,0.0,5.0,0.0,5.0,5.0,,,,
4,USFSR6_FSVeg,060502000070001_0001,,Tulalip,2013,Fixed_Area,WC,2016,48.038081,-121.641048,53.0,61.0,6.0,5.0,2.0,,CHF135,621.0,,333,46,3114,0.0,1.0,999.0,1.0,0.0,,1.0,0.0,10.0,0.0,5.0,5.0,,,,


In [None]:
agb_2021 = hudak_stand_data[hudak_stand_data.Year == 2021].drop("Year", axis="columns")

In [None]:
merged = pd.merge(hudak_stand_data, agb_2021, on=["StandID", "LATITUDE", "LONGITUDE"])

In [None]:
merged["delta_AGB_live"] = merged.Aboveground_Total_Live_x - merged.Aboveground_Total_Live_y
merged["delta_AGB_total"] = merged.Total_Stand_Carbon_x - merged.Total_Stand_Carbon_y
merged["frac_change_AGB_live"] = merged.delta_AGB_live / merged.Aboveground_Total_Live_y 
merged["frac_change_AGB_total"] = merged.delta_AGB_total / merged.Total_Stand_Carbon_y

In [None]:
merged.groupby("Year").agg({"delta_AGB_live": "mean", "delta_AGB_total": "mean", "frac_change_AGB_live": "mean", "frac_change_AGB_total": "mean"})

Unnamed: 0_level_0,delta_AGB_live,delta_AGB_total,frac_change_AGB_live,frac_change_AGB_total
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2031,11.572351,5.626219,0.162188,0.076955
2051,28.389304,22.879619,0.429093,0.249151


In [None]:
merged = merged[merged.Year.isin([2031, 2051])]

In [None]:
df = pd.merge(merged, plot_data, left_on=["StandID", "LATITUDE", "LONGITUDE"], right_on=["STAND_ID", "LATITUDE", "LONGITUDE"])

In [None]:
merged.head()

Unnamed: 0,MgmtID_x,StandID,Year,Aboveground_Total_Live_x,Aboveground_Merch_Live_x,Belowground_Live_x,Belowground_Dead_x,Standing_Dead_x,Forest_Down_Dead_Wood_x,Forest_Floor_x,Forest_Shrub_Herb_x,Total_Stand_Carbon_x,Total_Removed_Carbon_x,RunTitle_x,LATITUDE,LONGITUDE,MgmtID_y,Aboveground_Total_Live_y,Aboveground_Merch_Live_y,Belowground_Live_y,Belowground_Dead_y,Standing_Dead_y,Forest_Down_Dead_Wood_y,Forest_Floor_y,Forest_Shrub_Herb_y,Total_Stand_Carbon_y,Total_Removed_Carbon_y,RunTitle_y,delta_AGB_live,delta_AGB_total,frac_change_AGB_live,frac_change_AGB_total
3,A009,CoosBay_101_01,2031,193.166183,182.171799,83.177101,92.069321,395.990723,61.748306,13.619035,0.438048,840.208679,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,-33.666702,-51.827332,-0.148421,-0.0581
5,A009,CoosBay_101_01,2051,194.39801,184.22139,83.878418,40.691887,348.082428,81.401436,13.658746,0.496693,762.607666,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,-32.434875,-129.428345,-0.14299,-0.145093
15,A009,CoosBay_101_03,2031,323.896545,297.460785,86.654762,59.787716,261.603302,58.613071,14.447535,0.2,805.202942,0,Run 9,43.251176,-123.904846,A009,340.40155,313.068024,92.60247,76.375282,278.145325,32.954269,17.116972,0.2,837.795898,0,Run 9,-16.505005,-32.592957,-0.048487,-0.038903
17,A009,CoosBay_101_03,2051,317.491425,294.868927,84.676468,32.735943,239.334549,71.476341,14.56735,0.2,760.482056,0,Run 9,43.251176,-123.904846,A009,340.40155,313.068024,92.60247,76.375282,278.145325,32.954269,17.116972,0.2,837.795898,0,Run 9,-22.910126,-77.313843,-0.067303,-0.092282
27,A009,CoosBay_101_04,2031,244.136566,223.369171,68.119621,10.887504,49.169796,30.640354,13.932786,0.2,417.08667,0,Run 9,43.546825,-123.699342,A009,243.326248,221.705109,67.17968,14.188003,58.779743,25.167381,14.525542,0.2,423.366608,0,Run 9,0.810318,-6.279938,0.00333,-0.014833


In [None]:
merged.groupby("Year").agg({"delta_AGB_live": "mean", "delta_AGB_total": "mean"})

Unnamed: 0_level_0,delta_AGB_live,delta_AGB_total
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2031,11.572351,5.626219
2051,28.389304,22.879619


In [None]:
merged.delta_AGB_total.max()

169.6044388

In [None]:
merged = merged[["StandID", "Year", "LATITUDE", "LONGITUDE", "Aboveground_Total_Live_x", "Total_Stand_Carbon_x", "delta_AGB_live", "delta_AGB_total", "frac_change_AGB_live", "frac_change_AGB_total"]]

In [None]:
merged.delta_AGB_total.max()

169.6044388

In [None]:
merged.head()

Unnamed: 0,StandID,Year,LATITUDE,LONGITUDE,Aboveground_Total_Live_x,Total_Stand_Carbon_x,delta_AGB_live,delta_AGB_total,frac_change_AGB_live,frac_change_AGB_total
3,CoosBay_101_01,2031,42.990002,-123.800528,193.166183,840.208679,-33.666702,-51.827332,-0.148421,-0.0581
5,CoosBay_101_01,2051,42.990002,-123.800528,194.39801,762.607666,-32.434875,-129.428345,-0.14299,-0.145093
15,CoosBay_101_03,2031,43.251176,-123.904846,323.896545,805.202942,-16.505005,-32.592957,-0.048487,-0.038903
17,CoosBay_101_03,2051,43.251176,-123.904846,317.491425,760.482056,-22.910126,-77.313843,-0.067303,-0.092282
27,CoosBay_101_04,2031,43.546825,-123.699342,244.136566,417.08667,0.810318,-6.279938,0.00333,-0.014833


In [None]:
merged.rename({"Aboveground_Total_Live_x": "AGB_live", "Total_Stand_Carbon_x": "AGB_total"}, axis="columns", inplace=True)

In [None]:
merged.head()

Unnamed: 0,StandID,Year,LATITUDE,LONGITUDE,AGB_live,AGB_total,delta_AGB_live,delta_AGB_total,frac_change_AGB_live,frac_change_AGB_total
3,CoosBay_101_01,2031,42.990002,-123.800528,193.166183,840.208679,-33.666702,-51.827332,-0.148421,-0.0581
5,CoosBay_101_01,2051,42.990002,-123.800528,194.39801,762.607666,-32.434875,-129.428345,-0.14299,-0.145093
15,CoosBay_101_03,2031,43.251176,-123.904846,323.896545,805.202942,-16.505005,-32.592957,-0.048487,-0.038903
17,CoosBay_101_03,2051,43.251176,-123.904846,317.491425,760.482056,-22.910126,-77.313843,-0.067303,-0.092282
27,CoosBay_101_04,2031,43.546825,-123.699342,244.136566,417.08667,0.810318,-6.279938,0.00333,-0.014833


In [None]:
stands_2031 = merged[merged.Year == 2031].drop("Year", axis="columns")
stands_2051 = merged[merged.Year == 2051].drop("Year", axis="columns")

In [None]:
stands_2031.to_csv("hudak_stands_2031.csv", index=False)
stands_2051.to_csv("hudak_stands_2051.csv", index=False)

In [None]:
merged.groupby("Year").agg({"Standing_Dead_x": "sum"})

Unnamed: 0_level_0,Standing_Dead_x
Year,Unnamed: 1_level_1
2010,8401.299335
2020,42174.904675
2021,42372.055635
2031,35978.092332
2041,34107.956585
2051,32555.513942
2061,31742.880716
2071,30728.278262
2081,30425.821307
2091,29256.588919


In [None]:
merged.head()

Unnamed: 0,MgmtID_x,StandID,Year,Aboveground_Total_Live_x,Aboveground_Merch_Live_x,Belowground_Live_x,Belowground_Dead_x,Standing_Dead_x,Forest_Down_Dead_Wood_x,Forest_Floor_x,Forest_Shrub_Herb_x,Total_Stand_Carbon_x,Total_Removed_Carbon_x,RunTitle_x,LATITUDE,LONGITUDE,MgmtID_y,Aboveground_Total_Live_y,Aboveground_Merch_Live_y,Belowground_Live_y,Belowground_Dead_y,Standing_Dead_y,Forest_Down_Dead_Wood_y,Forest_Floor_y,Forest_Shrub_Herb_y,Total_Stand_Carbon_y,Total_Removed_Carbon_y,RunTitle_y,delta_AGB_live,delta_AGB_total
0,A009,CoosBay_101_01,2010,592.320007,545.131348,209.947678,11.811847,52.856281,29.800003,13.32,0.2,910.255798,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,365.487122,18.219788
1,A009,CoosBay_101_01,2020,231.969269,217.148773,98.269714,121.681816,403.367432,28.39426,15.164009,0.286885,899.133423,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,5.136383,7.097412
2,A009,CoosBay_101_01,2021,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,0.0,0.0
3,A009,CoosBay_101_01,2031,193.166183,182.171799,83.177101,92.069321,395.990723,61.748306,13.619035,0.438048,840.208679,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,-33.666702,-51.827332
4,A009,CoosBay_101_01,2041,194.179474,183.430344,83.576584,61.010204,368.634369,73.866402,13.639281,0.472956,795.379211,0,Run 9,42.990002,-123.800528,A009,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,-32.653412,-96.656799


In [None]:
hudak_stand_data.head()

Unnamed: 0,MgmtID,StandID,Year,Aboveground_Total_Live,Aboveground_Merch_Live,Belowground_Live,Belowground_Dead,Standing_Dead,Forest_Down_Dead_Wood,Forest_Floor,Forest_Shrub_Herb,Total_Stand_Carbon,Total_Removed_Carbon,RunTitle,LATITUDE,LONGITUDE
0,A009,CoosBay_101_01,2010,592.320007,545.131348,209.947678,11.811847,52.856281,29.800003,13.32,0.2,910.255798,0,Run 9,42.990002,-123.800528
1,A009,CoosBay_101_01,2020,231.969269,217.148773,98.269714,121.681816,403.367432,28.39426,15.164009,0.286885,899.133423,0,Run 9,42.990002,-123.800528
2,A009,CoosBay_101_01,2021,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,42.990002,-123.800528
3,A009,CoosBay_101_01,2031,193.166183,182.171799,83.177101,92.069321,395.990723,61.748306,13.619035,0.438048,840.208679,0,Run 9,42.990002,-123.800528
4,A009,CoosBay_101_01,2041,194.179474,183.430344,83.576584,61.010204,368.634369,73.866402,13.639281,0.472956,795.379211,0,Run 9,42.990002,-123.800528


In [None]:
plot_df = pd.read_csv("PlotData.csv")

In [None]:
df = pd.merge(hudak_stand_data, plot_df, left_on="StandID", right_on="STAND_ID")

In [None]:
df.head()

Unnamed: 0,MgmtID,StandID,Year,Aboveground_Total_Live,Aboveground_Merch_Live,Belowground_Live,Belowground_Dead,Standing_Dead,Forest_Down_Dead_Wood,Forest_Floor,Forest_Shrub_Herb,Total_Stand_Carbon,Total_Removed_Carbon,RunTitle,LATITUDE_x,LONGITUDE_x,PROJECT,STAND_ID,PLOT_ID,LIDAR_UNIT,LIDAR_YEAR,PLOT_TYPE,VARIANT,INV_YEAR,LATITUDE_y,LONGITUDE_y,STATE,COUNTY,REGION,FOREST,DISTRICT,LOCATION,PV_CODE,PV_REF_CODE,AGE,ASPECT,SLOPE,ELEVFT,BASAL_AREA_FACTOR,INV_PLOT_SIZE,BRK_DBH,NUM_PLOTS,NONSTK_PLOTS,SAM_WT,STK_PCNT,DG_TRANS,DG_MEASURE,HTG_TRANS,HTG_MEASURE,MORT_MEASURE,COMMENTS_1,COMMENTS_2,COMMENTS_3,COMMENTS_4,area
0,A009,CoosBay_101_01,2010,592.320007,545.131348,209.947678,11.811847,52.856281,29.800003,13.32,0.2,910.255798,0,Run 9,42.990002,-123.800528,CoosBay,CoosBay_101_01,,BLMSouthCoast2009,2009,Fixed_Area,PN,2010,42.990002,-123.800528,41.0,,,,,712.0,,,,220,83,2155,-8.0,50.0,5.5,1.0,,,,,,,,,,,,,0.02
1,A009,CoosBay_101_01,2020,231.969269,217.148773,98.269714,121.681816,403.367432,28.39426,15.164009,0.286885,899.133423,0,Run 9,42.990002,-123.800528,CoosBay,CoosBay_101_01,,BLMSouthCoast2009,2009,Fixed_Area,PN,2010,42.990002,-123.800528,41.0,,,,,712.0,,,,220,83,2155,-8.0,50.0,5.5,1.0,,,,,,,,,,,,,0.02
2,A009,CoosBay_101_01,2021,226.832886,214.583954,96.988709,118.004913,399.597931,34.028915,16.281956,0.300685,892.036011,0,Run 9,42.990002,-123.800528,CoosBay,CoosBay_101_01,,BLMSouthCoast2009,2009,Fixed_Area,PN,2010,42.990002,-123.800528,41.0,,,,,712.0,,,,220,83,2155,-8.0,50.0,5.5,1.0,,,,,,,,,,,,,0.02
3,A009,CoosBay_101_01,2031,193.166183,182.171799,83.177101,92.069321,395.990723,61.748306,13.619035,0.438048,840.208679,0,Run 9,42.990002,-123.800528,CoosBay,CoosBay_101_01,,BLMSouthCoast2009,2009,Fixed_Area,PN,2010,42.990002,-123.800528,41.0,,,,,712.0,,,,220,83,2155,-8.0,50.0,5.5,1.0,,,,,,,,,,,,,0.02
4,A009,CoosBay_101_01,2041,194.179474,183.430344,83.576584,61.010204,368.634369,73.866402,13.639281,0.472956,795.379211,0,Run 9,42.990002,-123.800528,CoosBay,CoosBay_101_01,,BLMSouthCoast2009,2009,Fixed_Area,PN,2010,42.990002,-123.800528,41.0,,,,,712.0,,,,220,83,2155,-8.0,50.0,5.5,1.0,,,,,,,,,,,,,0.02


In [None]:
df.BASAL_AREA_FACTOR.unique()

array([-8.])

In [None]:
df["area"] = 1 / df["INV_PLOT_SIZE"]
df.area.unique()

array([0.02])

In [None]:
import math

In [None]:
math.sqrt(4046.86 * 0.02 / 3.14)

5.077024556169592

In [None]:
X = assemble_input_features(aoi, gedi_canopy_data_fc, gedi_canopy_bands, 2020)
Y = assemble_targets(aoi, hudak_sites_2051, "frac_change_AGB_live")

In [None]:
bands = X.bandNames()

In [None]:
gedi_mask = X.select("canopy_max").mask()
hudak_site_mask = Y.mask()

In [None]:
input_features = X.addBands(Y)

In [None]:
# Train separate models using gedi_mask.and(hudak_site_mask) and gedi_mask.not().and(hudak_site_mask)

input_features_w_gedi = input_features.updateMask(gedi_mask.And(hudak_site_mask))
input_features_no_gedi = input_features.select(["NDVI_jul", "b1", "frac_change_AGB_live"]).updateMask(gedi_mask.Not().And(hudak_site_mask))

In [None]:
input_data_w_gedi = input_features_w_gedi.sample(factor=1, region=aoi, scale=300).randomColumn()

In [None]:
training = input_data_w_gedi.filter(ee.Filter.lt("random", 0.7))
validation = input_data_w_gedi.filter(ee.Filter.gte("random", 0.7))

In [None]:
def run_boosted_grid_search(training, validation, input_bands, target_band):
  results_df = pd.DataFrame()
  for _n in [100, 200, 500]:
    for _l in ["LeastAbsoluteDeviation"]:# ["LeastSquares", "LeastAbsoluteDeviation", "Huber"]:
      for _s in [0.5, 0.7, 0.9]:  
        for _leaves in [2, 4]:
          for _lr in [0.001, 0.005, 0.01]:  
            model = ee.Classifier.smileGradientTreeBoost(numberOfTrees=_n, loss=_l, samplingRate=_s, maxNodes=_leaves, shrinkage=_lr, seed=42).setOutputMode("REGRESSION").train(training, target_band, input_bands)
            holdout = validation.classify(model, "predicted")

            test = holdout.select(["predicted", "frac_change_AGB_live"]).getInfo()
            actual = [x["properties"]["frac_change_AGB_live"] for x in test["features"]]
            predicted = [x["properties"]["predicted"] for x in test["features"]]

            corr_matrix = np.corrcoef(actual, predicted)
            corr_xy = corr_matrix[0, 1]
            r_squared = corr_xy**2
            RMSE = np.sqrt(np.mean((np.array(predicted) - np.array(actual)) ** 2))

            results_df = results_df.append({"n_trees": _n, "loss": _l, "sampling_rate": _s, "max_nodes": _leaves, "learning_rate": _lr, "r_squared": r_squared, "rmse": RMSE}, ignore_index=True,)
            print(f"N_trees: {_n} || loss: {_l} || sampling rate: {_s} || max nodes: {_leaves} || learning rate: {_lr} || r-squared: {r_squared} || rmse: {RMSE}")
  
  return results_df

In [None]:
# With Hudak AGB 2016 as input
df = run_boosted_grid_search(training, validation, bands, "frac_change_AGB_live")

N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.001 || r-squared: 0.14290049209274547 || rmse: 0.8145639804118315
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.005 || r-squared: 0.13972115549194747 || rmse: 0.7984900124817153
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.01 || r-squared: 0.1361103223648596 || rmse: 0.7833986163565684
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.001 || r-squared: 0.13032643052527032 || rmse: 0.8126770398557339
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.005 || r-squared: 0.13915207478130487 || rmse: 0.7916228974027816
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.01 || r-squared: 0.11955295077650434 || rmse: 0.775266

In [None]:
df.sort_values(by=["r_squared"], ascending=False).head()

Unnamed: 0,learning_rate,loss,max_nodes,n_trees,r_squared,rmse,sampling_rate
12,0.001,LeastAbsoluteDeviation,2.0,100.0,0.166771,0.813201,0.9
13,0.005,LeastAbsoluteDeviation,2.0,100.0,0.165683,0.79671,0.9
14,0.01,LeastAbsoluteDeviation,2.0,100.0,0.165215,0.787563,0.9
30,0.001,LeastAbsoluteDeviation,2.0,200.0,0.164828,0.807779,0.9
34,0.005,LeastAbsoluteDeviation,4.0,200.0,0.16458,0.782224,0.9


In [None]:
model = ee.Classifier.smileGradientTreeBoost(numberOfTrees=100, shrinkage=0.005, samplingRate=0.9, maxNodes=2, seed=42).setOutputMode("REGRESSION").train(training, "frac_change_AGB_live", bands)

holdout = validation.classify(model, "predicted")

test = holdout.select(["predicted", "frac_change_AGB_live"]).getInfo()
actual = [x["properties"]["frac_change_AGB_live"] for x in test["features"]]
predicted = [x["properties"]["predicted"] for x in test["features"]]

corr_matrix = np.corrcoef(actual, predicted)
corr_xy = corr_matrix[0, 1]
print(corr_xy**2)

0.16568323294433732


In [None]:
classified_w_gedi = input_features.updateMask(gedi_mask).classify(model)

In [None]:
input_data_no_gedi = input_features_no_gedi.sample(factor=1, region=aoi, scale=300).randomColumn()
training_no_gedi = input_data_no_gedi.filter(ee.Filter.lt("random", 0.7))
validation_no_gedi = input_data_no_gedi.filter(ee.Filter.gte("random", 0.7))

In [None]:
df_no_gedi = run_boosted_grid_search(training_no_gedi, validation_no_gedi, ee.List(["NDVI_jul", "b1"]), "frac_change_AGB_live")

N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.001 || r-squared: 0.04611197667018767 || rmse: 0.54488816294135
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.005 || r-squared: 0.044973710144601824 || rmse: 0.5385239027947738
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 2 || learning rate: 0.01 || r-squared: 0.03300632597611977 || rmse: 0.5348653086091321
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.001 || r-squared: 0.04080364711186553 || rmse: 0.5442989736638645
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.005 || r-squared: 0.027269674159874975 || rmse: 0.5370607225504445
N_trees: 100 || loss: LeastAbsoluteDeviation || sampling rate: 0.5 || max nodes: 4 || learning rate: 0.01 || r-squared: 0.02277213436815181 || rmse: 0.53326

In [None]:
df_no_gedi.sort_values(by=["r_squared"], ascending=False).head()

Unnamed: 0,learning_rate,loss,max_nodes,n_trees,r_squared,rmse,sampling_rate
52,0.005,LeastAbsoluteDeviation,4.0,500.0,0.05929,0.524952,0.9
9,0.001,LeastAbsoluteDeviation,4.0,100.0,0.055586,0.54438,0.7
35,0.01,LeastAbsoluteDeviation,4.0,200.0,0.054657,0.526245,0.9
27,0.001,LeastAbsoluteDeviation,4.0,200.0,0.053847,0.542145,0.7
50,0.01,LeastAbsoluteDeviation,2.0,500.0,0.051802,0.528449,0.9


In [None]:
model_no_gedi = ee.Classifier.smileGradientTreeBoost(numberOfTrees=500, shrinkage=0.005, samplingRate=0.9, maxNodes=4, seed=42).setOutputMode("REGRESSION").train(training_no_gedi, "frac_change_AGB_live", ee.List(["NDVI_jul", "b1"]))

holdout_no_gedi = validation_no_gedi.classify(model_no_gedi, "predicted")

test_no_gedi = holdout_no_gedi.select(["predicted", "frac_change_AGB_live"]).getInfo()
actual_no_gedi = [x["properties"]["frac_change_AGB_live"] for x in test_no_gedi["features"]]
predicted_no_gedi = [x["properties"]["predicted"] for x in test_no_gedi["features"]]

corr_matrix = np.corrcoef(actual_no_gedi, predicted_no_gedi)
corr_xy = corr_matrix[0, 1]
print(corr_xy**2)

0.05928961532963118


In [None]:
classified_no_gedi = input_features.updateMask(gedi_mask.Not()).classify(model_no_gedi)

In [None]:
classified_col = ee.ImageCollection.fromImages([classified_w_gedi, classified_no_gedi])
combined = classified_col.mosaic().add(ee.Image(1)).multiply(hudak_agb_2016)

In [None]:
print(combined.reduceRegion(ee.Reducer.minMax(), aoi, scale=30).getInfo())

{'classification_max': 1039.9997681230307, 'classification_min': 13.633646354079247}


In [None]:
task = ee.batch.Export.image.toAsset(combined, description="hudak_2051_agb", assetId="users/connorr/agb_2051_predicted", region=aoi, scale=30)

In [None]:
task.start()

In [None]:
task.status()

{'attempt': 1,
 'creation_timestamp_ms': 1612219409588,
 'description': 'hudak_2051_agb',
 'destination_uris': ['https://code.earthengine.google.com/?asset=projects/earthengine-legacy/assets/users/connorr/agb_2051_predicted'],
 'id': '6NGZEY2RN2QZNZEOZFYBBPJA',
 'name': 'projects/earthengine-legacy/operations/6NGZEY2RN2QZNZEOZFYBBPJA',
 'start_timestamp_ms': 1612219427403,
 'state': 'COMPLETED',
 'task_type': 'EXPORT_IMAGE',
 'update_timestamp_ms': 1612219776717}