In [1]:
import ee
import geemap
import collections
collections.Callable = collections.abc.Callable
ee.Initialize()

In [10]:
Map = geemap.Map()


s2 = ee.ImageCollection("COPERNICUS/S2_SR")
basin = ee.FeatureCollection("WWF/HydroSHEDS/v1/Basins/hybas_7")
gcp = ee.FeatureCollection("users/ujavalgandhi/e2e/arkavathy_gcps")
alos = ee.Image("JAXA/ALOS/AW3D30/V2_2")

arkavathy = basin.filter(ee.Filter.eq('HYBAS_ID', 4071139640))
boundary = arkavathy.geometry()
rgbVis = {
  'min': 0.0,
  'max': 3000,
  'bands': ['B4', 'B3', 'B2'],
}
# Function to remove cloud and snow pixels from Sentinel-2 SR image

def maskCloudAndShadowsSR(image):
    cloudProb = image.select('MSK_CLDPRB')
    snowProb = image.select('MSK_SNWPRB')
    cloud = cloudProb.lt(10)
    scl = image.select('SCL')
    shadow = scl.eq(3); # 3 = cloud shadow
    cirrus = scl.eq(10); # 10 = cirrus
    # Cloud probability less than 10% or cloud shadow classification
    mask = cloud.And(cirrus.neq(1)).And(shadow.neq(1))
    return image.updateMask(mask)

filtered = s2 \
.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)) \
  .filter(ee.Filter.date('2019-01-01', '2020-01-01')) \
  .filter(ee.Filter.bounds(boundary)) \
  .map(maskCloudAndShadowsSR) \
  .select('B.*')

composite = filtered.median().clip(boundary)

visParams = {'bands': ['B4',  'B3',  'B2'], 'min': 0, 'max': 3000, 'gamma': 1.2}
Map.centerObject(boundary)
Map.addLayer(composite, visParams, 'RGB')

def addIndices(image):
    ndvi = image.normalizedDifference(['B8', 'B4']).rename(['ndvi'])
    ndbi = image.normalizedDifference(['B11', 'B8']).rename(['ndbi'])
    mndwi = image.normalizedDifference(['B3', 'B11']).rename(['mndwi'])
    bsi = image.expression(
      '(( X + Y ) - (A + B)) /(( X + Y ) + (A + B)) ', {
        'X': image.select('B11'), #swir1
        'Y': image.select('B4'),  #red
        'A': image.select('B8'), # nir
        'B': image.select('B2'), # blue
    }).rename('bsi')
    return image.addBands(ndvi).addBands(ndbi).addBands(mndwi).addBands(bsi)

composite = addIndices(composite)

# Calculate Slope and Elevation
elev = alos.select('AVE_DSM').rename('elev')
slope = ee.Terrain.slope(alos.select('AVE_DSM')).rename('slope')

composite = composite.addBands(elev).addBands(slope)

# Normalize the image

# Machine learning algorithms work best on images when all features have
# the same range

# Function to Normalize Image
# Pixel Values should be between 0 and 1
# Formula is (x - xmin) / (xmax - xmin)
#**************************************************************************
def normalize(image):
    bandNames = image.bandNames()
    # Compute min and max of the image
    minDict = image.reduceRegion(
        reducer=ee.Reducer.min(),
        geometry=boundary,
        scale=20,
        maxPixels=1e9,
        bestEffort=True,
        tileScale=16
    )
    maxDict = image.reduceRegion(
        reducer=ee.Reducer.max(),
        geometry=boundary,
        scale=20,
        maxPixels=1e9,
        bestEffort=True,
        tileScale=16
    )
    mins = ee.Image.constant(minDict.values(bandNames))
    maxs = ee.Image.constant(maxDict.values(bandNames))

    normalized = image.subtract(mins).divide(maxs.subtract(mins))
    return normalized

composite = normalize(composite)
# Add a random column and split the GCPs into training and validation set
gcp = gcp.randomColumn()

# This being a simpler classification, we take 60% points
# for validation. Normal recommended ratio is
# 70% training, 30% validation
trainingGcp = gcp.filter(ee.Filter.lt('random', 0.6))
validationGcp = gcp.filter(ee.Filter.gte('random', 0.6))
Map.addLayer(validationGcp)
# Overlay the point on the image to get training data.
training = composite.sampleRegions(
  collection=trainingGcp,
  properties=['landcover'],
  scale=10,
  tileScale=16
)
#print(training.getInfo())
# Train a classifier.
classifier = ee.Classifier.smileRandomForest(50) \
.train(
  features=training,
  classProperty='landcover',
  inputProperties=composite.bandNames()
)

#**************************************************************************
# Feature Importance
#**************************************************************************

# Run .explain() to see what the classifer looks like
#print(classifier.explain().getInfo())

# Calculate variable importance
importance = ee.Dictionary(classifier.explain().get('importance'))

# Calculate relative importance
sum = importance.values().reduce(ee.Reducer.sum())

def func_wuc(key, val):
    return (ee.Number(val).multiply(100)).divide(sum)

relativeImportance = importance.map(func_wuc)

#print(relativeImportance.getInfo())

#**************************************************************************
# Hyperparameter Tuning
#**************************************************************************

test = composite.sampleRegions(
  collection=validationGcp,
  properties=['landcover'],
  scale=10,
  tileScale=16
)

# Tune the numberOfTrees parameter.
numTreesList = ee.List.sequence(10, 150, 10)

def func_tree(numTrees):
    classifier = ee.Classifier.smileRandomForest(numTrees) \
    .train(
        features=training,
        classProperty='landcover',
        inputProperties=composite.bandNames()
    )

  # Here we are classifying a table instead of an image
  # Classifiers work on both images and tables
    return test \
    .classify(classifier) \
    .errorMatrix('landcover', 'classification') \
    .accuracy()

accuracies = numTreesList.map(func_tree)

# Tuning Multiple Parameters
# We can tune many parameters together using
# nested map() functions
# Let's tune 2 parameters
# numTrees and bagFraction
numTreesList = ee.List.sequence(10, 150, 10)
bagFractionList = ee.List.sequence(0.1, 0.9, 0.1)

def func_trees(numTrees):
    def func_bag(bagFraction):
        classifier = ee.Classifier.smileRandomForest(
           numberOfTrees=numTrees,
           bagFraction=bagFraction
        ) \
        .train(
            features=training,
            classProperty='landcover',
            inputProperties=composite.bandNames()
        )

        # Here we are classifying a table instead of an image
        # Classifiers work on both images and tables
        accuracy = test \
          .classify(classifier) \
          .errorMatrix('landcover', 'classification') \
          .accuracy()
        return ee.Feature(None, {'accuracy': accuracy,
          'numberOfTrees': numTrees,
          'bagFraction': bagFraction})
    return bagFractionList.map(func_bag)
    

accuracies = numTreesList.map(func_trees).flatten()
resultFc = ee.FeatureCollection(accuracies)

# Export the result as CSV if you wish

Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [3]:
# Create a FeatureCollection so we can chart it
importanceFc = ee.FeatureCollection([
  ee.Feature(None, relativeImportance)
])

In [4]:
type(importanceFc)

ee.featurecollection.FeatureCollection

In [None]:
df = geemap.ee_to_pandas(importanceFc)

In [17]:
#simple bar graph displaying importance of each band
#code to base graph on in future
#chart = ui.Chart.feature.byProperty({
#  'features': importanceFc
#}).setOptions({
#      'title': 'Feature Importance',
#      'vAxis': '{title': 'Importance'},
#      'hAxis': '{title': 'Feature'}
#  })
#print(chart)

In [11]:
accuracies = numTreesList.map(func_tree)

ee.ee_list.List

In [13]:
accuracy = accuracies.getInfo()

In [15]:
import pandas as pd
data = pd.DataFrame(accuracy)

In [22]:
#simple graph displaying accuracy in relation to number of trees (every ten)
#either add in new column which will be x, 15 rows that 10 step up to 150, or just make that set at time of graphing
#code to base graph on in future
#var chart = ui.Chart.array.values({
  #array: ee.Array(accuracies),
  #axis: 0,
  #xLabels: numTreesList
  #}).setOptions({
  #    title: 'Hyperparameter Tuning for the numberOfTrees Parameters',
  #    vAxis: {title: 'Validation Accuracy'},
  #    hAxis: {title: 'Number of Tress', gridlines: {count: 15}}
  #});
#print(chart)