In [1]:
import ee
import geemap
import pandas as pd
import os
from datetime import datetime
from tqdm import tqdm 
import time
from datetime import datetime

In [2]:
geemap.set_proxy(port=7890)
ee.Authenticate()
ee.Initialize(project='socd-liuziyan')

## import socd and AlphaEarth data

In [3]:
soc_data = ee.FeatureCollection('projects/socd-liuziyan/assets/1_final_socd_0_100cm')

embedding_dataset = ee.ImageCollection('GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL')
embedding_2024 = embedding_dataset.filterDate('2024-01-01', '2025-01-01').mosaic()

## building and tuning the RF model

In [None]:
# Split the SOC data into training (80%) and validation (20%) sets
soc_data = soc_data.randomColumn(columnName='random', seed=42)
training_fc = soc_data.filter(ee.Filter.lt('random', 0.8))
validation_fc = soc_data.filter(ee.Filter.gte('random', 0.8))

# Sample the embedding image at the locations of the SOC data
training = embedding_2024.sampleRegions(
    collection=training_fc, properties=['socd_100cm'], scale=10, tileScale=16
)
test = embedding_2024.sampleRegions(
    collection=validation_fc, properties=['socd_100cm'], scale=10, tileScale=16
)

# Tune the number of trees in Random Forest
band_names = embedding_2024.bandNames()
numTreesList = ee.List.sequence(0, 200, 20)

def tune(num):
    num = ee.Number(num)
    model = ee.Classifier.smileRandomForest(
        numberOfTrees=num,
        variablesPerSplit=8,
        minLeafPopulation=5,
        bagFraction=0.8,
        seed=42
    ).setOutputMode('REGRESSION').train(
        features=training,
        classProperty='socd_100cm',
        inputProperties=band_names
    )
    preds = test.classify(model).map(
        lambda f: f.set(
            'residual', ee.Number(f.get('classification')).subtract(f.get('socd_100cm')),
            'sq_residual', ee.Number(f.get('classification')).subtract(f.get('socd_100cm')).pow(2)
        )
    )
    mse = ee.Number(preds.reduceColumns(ee.Reducer.mean(), ['sq_residual']).get('mean'))
    rmse = ee.Number(mse).sqrt()
    return ee.Feature(None, {'numberOfTrees': num, 'rmse': rmse})

tuning_fc = ee.FeatureCollection(numTreesList.map(tune))
best = tuning_fc.sort('rmse', True).first()
optimal_num_trees = ee.Number(best.get('numberOfTrees'))


In [25]:
print('optimal numberOfTrees =', ee.Number(optimal_num_trees).getInfo())

optimal numberOfTrees = 160


In [26]:
# Training the optimal model
optimal_model = ee.Classifier.smileRandomForest(
    numberOfTrees=160,
    variablesPerSplit=8,
    minLeafPopulation=5,
    bagFraction=0.8,
    seed=42
).setOutputMode('REGRESSION').train(
    features=training,
    classProperty='socd_100cm',
    inputProperties=band_names
)

## predict the SOCD in mountain areas by random forest

In [27]:
mountain_mask = ee.Image('projects/socd-liuziyan/assets/global_mountain_classification_UNEP')
kernel = ee.Kernel.circle(radius=2500, units='meters', normalize=True)
dilated = mountain_mask.focal_max(kernel=kernel, iterations=1)
closed_mask = dilated.focal_min(kernel=kernel, iterations=1)

embedding_2024_masked = embedding_2024.updateMask(closed_mask)
orgc_prediction = embedding_2024_masked.classify(optimal_model)
orgc_prediction = orgc_prediction.rename('socd_100cm_prediction')

In [28]:
Map = geemap.Map(center=[20,0], zoom=2)
prediction_vis = {
    'min': 0,
    'max': 50,
    'palette': [    
        '#543005', '#bc8120', '#dfc27d', '#f6e8c3',
        '#f5f5f5', '#c7eae5', '#80cdc1', '#35978f',
        '#01665e', '#003c30'
    ]
}
Map.addLayer(orgc_prediction, prediction_vis, 'SOCD Prediction')

In [29]:
Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(child…

## building the SVR and LR model for comparison

In [30]:
svm_model = ee.Classifier.libsvm({
    'svmType': 'EPSILON_SVR',   # 支持向量回归
    'kernelType': 'RBF',
    'gamma': 0.1,
    'cost': 10,
    'epsilon': 0.1
}).setOutputMode('REGRESSION').train(
    features=training,
    classProperty='socd_100cm',
    inputProperties=band_names
)

linear_model = ee.Classifier.smileLinearRegression().setOutputMode('REGRESSION').train(
    features=training,
    classProperty='socd_100cm',
    inputProperties=band_names
)

AttributeError: type object 'Classifier' has no attribute 'smileLinearRegression'