# Set up the environment

Before running the notebook, please install the necessary packages and environment by running the following shell commands in your terminal:

```bash
# Create the conda environment from the provided environment file
conda env create -f ../conda_env_pkgs.yml -n soc_model_env

# Activate the new environment
conda activate soc_model_env

# Launch Jupyter Notebook from within the environment
jupyter notebook


In [87]:
import json
import ee
import geemap
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_squared_error, r2_score

# Authenticate and Initialize Earth Engine
ee.Authenticate()
ee.Initialize(project= "ee-christopherharrellgis")

# Optional: Display map
#Map = geemap.Map(basemap = "SATELLITE")
Map = geemap.Map()
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [107]:
# Global variables
start_year = 2023
end_year = 2024
month_start = 3
month_end = 2
max_cloud_cover = 20

#### Import SOC Samples and Study Area as Feature Collections

In [None]:
# Create a FeatureCollection from SOC_samples.csv
def create_feature_collection(df):
    features = []
    for idx, row in df.iterrows():
        point = ee.Geometry.Point([row['longitude'], row['latitude']])
        feature = ee.Feature(point, {
            'plot_no': row['plot_no'],
            'MgC_per_ha': row['MgC_per_ha'],
            'MgC_SE': row['MgC_SE']
        })
        features.append(feature)
    return ee.FeatureCollection(features)

# Load the study area GeoJSON file
study_area_geojson = '../soc/data/study_area.geojson'

# Load the SOC samples table
soc_samples_df = pd.read_csv("../soc/data/SOC_samples.csv")

with open(study_area_geojson) as f:
    geojson_data = json.load(f)

# Convert the study area to an Earth Engine FeatureCollection
study_area = ee.FeatureCollection(geojson_data)

# Convert the SOC samples table to an Earth Engine FeatureCollection
soc_samples_points = create_feature_collection(soc_samples_df)

Map.addLayer(study_area, {'color': 'red'}, 'Study Area', False)
Map.addLayer(soc_samples_points, {'color': 'yellow'}, 'Sample Points', False)
Map.centerObject(study_area, zoom=11)

In [None]:
""" wdpa = ee.FeatureCollection('WCMC/WDPA/current/polygons')

# Filter WDPA features that spatially intersect with your study area
overlapping_pas = wdpa.filter(ee.Filter.intersects('.geo', soc_samples_points.geometry()))

# Get the list of names
protected_areas_with_names = overlapping_pas.filter(ee.Filter.notNull(['NAME']))

# Get the names as a list
protected_area_names = protected_areas_with_names.aggregate_array('NAME')

# Print the names of protected areas that contain sample points
print('Protected Areas containing sample points:')
print(protected_area_names.getInfo())

Map.addLayer(overlapping_pas, {'color': 'blue'}, 'PAS') """

#### Generate an Image for each environmental covariate
- Elevation (Copernicus DEM)
- Slope (Copernicus DEM)
- Mean Annual Precipitation (MAP) (TerraClimate)
- Mean Annual Temperature (MAT) (TerraClimate)
- Mean NDVI (Sentinel-2 SR)
- Mean EVI (Sentinel-2 SR)
- ESA Landcover Classification (ESA WorldCover)

In [108]:

# COPERNICUS DEM (30m)
dem = ee.ImageCollection('COPERNICUS/DEM/GLO30').mosaic().select('DEM')

dem = dem.reproject(crs='EPSG:4326', scale=10)
slope = ee.Terrain.slope(dem)

# Visualization parameters for Elevation
vis_params_elevation = {
    'min': 0,
    'max': 3000,
    'palette': ['#00FFFF', '#0000FF', '#008000', '#FFFF00', '#FF0000', '#800000']
}

# Visualization parameters for slope
vis_params_slope = {
    'min': 0,
    'max': 60,
    'palette': ['#00FFFF', '#0000FF', '#008000', '#FFFF00', '#FF0000', '#800000']
}

Map.addLayer(dem.clip(study_area), vis_params_elevation, "DEM", False)
Map.addLayer(slope.clip(study_area), vis_params_slope, "slope", False)


In [130]:
def config_TC_bands(image):
    tmmn_band = image.select('tmmn').multiply(0.1).rename('tmmn')
    tmmx_band = image.select('tmmx').multiply(0.1).rename('tmmx')
    precip_band = image.select('pr')
    
    bands = precip_band.addBands([tmmn_band, tmmx_band], overwrite=True)
    return bands.copyProperties(image, ['system:time_start'])

def monthly_avg_temp(img):
    tavg = img.expression(
        '(tmmx + tmmn) / 2',
        {
            'tmmx': img.select('tmmx'),
            'tmmn': img.select('tmmn')
        }
    ).rename('tavg')
    return tavg.copyProperties(img, img.propertyNames())

imgCol_TC = ee.ImageCollection('IDAHO_EPSCOR/TERRACLIMATE') \
    .filter(ee.Filter.calendarRange(start_year, end_year, 'year')) \
    .filter(ee.Filter.calendarRange(month_start, month_end, 'month')) \
    .filterBounds(study_area) \
    .map(config_TC_bands)

terraclimate_temp = imgCol_TC.select(['tmmn', 'tmmx'])
terraclimate_precip = imgCol_TC.select(['pr'])

# Averaged monthly mean temperature
tc_avg_monthly_temp = terraclimate_temp.map(monthly_avg_temp).mean()

# Averaged monthly standard deviation of temperature
tc_stddev_monthly_temp = terraclimate_temp.reduce(ee.Reducer.stdDev()).rename(['std_tmmn', 'std_tmmx'])

# Averaged monthly mean of precipitation
tc_avg_monthly_precip = terraclimate_precip.mean()

# Averaged monthly standard deviation of precipitation
tc_stddev_monthly_precip = terraclimate_precip.reduce(ee.Reducer.stdDev())

avg_temp_vis_params = {
    'min': -10,  
    'max': 40,
    'palette': ['blue', 'cyan', 'green', 'yellow', 'orange', 'red', 'darkred']
}

stddev_temp_vis_params = {
    'min': 0,  
    'max': 15,
    'palette': ['white', 'lightyellow', 'yellow', 'orange', 'red', 'darkred']
}

avg_precip_vis_params = {
    'min': 0,
    'max': 2500,  
    'palette': ['lightblue', 'blue', 'darkblue', 'purple', 'darkred']
}

stddev_precip_vis_params = {
    'min': 0,
    'max': 500,
    'palette': ['white', 'lightgreen', 'green', 'yellow', 'orange', 'red']
}

Map.addLayer(tc_avg_monthly_temp.clip(study_area), avg_temp_vis_params, 'temp_avg', False)

Map.addLayer(tc_stddev_monthly_temp.select('std_tmmn').clip(study_area), stddev_temp_vis_params, 'tmmn_stdev', False)
Map.addLayer(tc_stddev_monthly_temp.select('std_tmmx').clip(study_area), stddev_temp_vis_params, 'tmmx_stdev', False)

Map.addLayer(tc_avg_monthly_precip.clip(study_area), avg_precip_vis_params, 'precip_avg', False)
Map.addLayer(tc_stddev_monthly_precip.clip(study_area), stddev_precip_vis_params, 'precip_stdev', False)


In [144]:
def config_s2_bands(img):
    bands = ['B2', 'B4', 'B8']
    new_bands = ['B', 'R', 'NIR']
    bands = img.select(bands).multiply(0.0001).rename(new_bands)
    return img.addBands(bands).copyProperties(img, ['system:time_start'])

def mask_s2_clouds(image):
    """Masks clouds and cirrus based on SCL band."""
    scl = image.select(['SCL'])
    cloudShadow = scl.eq(3)
    cloudsLow = scl.eq(7)
    cloudsMed = scl.eq(8)
    cloudsHigh = scl.eq(9)
    cirrus = scl.eq(10)
    mask = (cloudShadow.Or(cloudsLow).Or(cloudsMed).Or(cloudsHigh).Or(cirrus).Not())
    return image.updateMask(mask).divide(10000).copyProperties(image, image.propertyNames())

imgCol_S2_SR = ee.ImageCollection('COPERNICUS/S2_SR') \
    .filter(ee.Filter.calendarRange(start_year, end_year, 'year'))\
    .filter(ee.Filter.calendarRange(month_start, month_end, 'month'))\
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)) \
    .filterBounds(study_area) \
    .map(mask_s2_clouds) \
    .select(['B2', 'B4', 'B8']) \
    .map(lambda image: image.multiply(0.0001))
    

imgCol_S2_SR = imgCol_S2_SR.map(config_s2_bands)

def calc_ndvi(img):
    ndvi = img.normalizedDifference(['NIR', 'R']).rename('NDVI')
    return ndvi.copyProperties(img, img.propertyNames())

def calc_evi(img):
    evi = img.expression(
        '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))',
        {
            'NIR': img.select('B8'),
            'RED': img.select('B4'),
            'BLUE': img.select('B2')
        }
    ).rename('EVI')
    return evi.copyProperties(img, img.propertyNames())

mean_ndvi = imgCol_S2_SR.map(calc_ndvi).mean()

mean_evi = imgCol_S2_SR.map(calc_evi).mean()

# EVI visualization
evi_vis = {
    'min': 0.0,
    'max': 1.0,
    'palette': ['purple', 'white', 'green']
}

# NDVI visualization
ndvi_vis = {
    'min': 0.0,
    'max': 1.0,
    'palette': ['blue', 'white', 'green']
}

#NDVI_max:0.6291760206222534
#NDVI_min:-0.10195349901914597
ndvi_stats = mean_ndvi.reduceRegion(
    reducer=ee.Reducer.minMax(),
    geometry=study_area,  
    scale=30, 
    maxPixels=1e13  
)

#Map.addLayer(mean_ndvi.clip(study_area), ndvi_vis, 'NDVI', False)
Map.addLayer(mean_evi.clip(study_area), evi_vis, 'EVI', False)


In [98]:
lc = ee.ImageCollection('ESA/WorldCover/v200').first()

lc_vis = {
    'bands': ['Map']
}

Map.addLayer(lc.clip(study_area), lc_vis, 'Land Cover')

In [None]:
# This script was adapted from an Open-Source Github repo here: https://github.com/leonsnill/lst_landsat/blob/master/lst_landsat.py


t_threshold = 0

# Algorithm Specifications
# min/max ndvi
ndvi_v = 0.62
ndvi_s = -0.1

# Veg, soil, water emissivity
epsilon_v = 0.985
epsilon_s = 0.96
epsilon_w = 0.99

# Coefficients for Landsat 8
cs_l8 = [0.04019, 0.02916, 1.01523,
         -0.38333, -1.50294, 0.20324,
         0.00918, 1.36072, -0.27514]

def config_l8_bands(img):
    bands = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']
    thermal_band = ['ST_B10']
    new_bands = ['B', 'G', 'R', 'NIR', 'SWIR1', 'SWIR2']
    new_thermal_bands = ['TIR']
    vnirswir = img.select(bands).multiply(0.0001).rename(new_bands)
    tir = img.select(thermal_band).multiply(0.1).rename(new_thermal_bands)
    return vnirswir.addBands(tir).copyProperties(img, ['system:time_start'])

# Cloud mask for Surface Reflectance products
def mask_l8_clouds(img):
    cloudShadowBitMask = ee.Number(2).pow(3).int()
    cloudsBitMask = ee.Number(2).pow(5).int()
    qa = img.select('QA_PIXEL')
    mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
           qa.bitwiseAnd(cloudsBitMask).eq(0))
    return img.updateMask(mask)

# Radiometric Calibration
def fun_radcal(img):
    radiance = ee.Algorithms.Landsat.calibratedRadiance(img).rename('RADIANCE')
    return img.addBands(radiance)

# L to ee.Image
def fun_l_addband(img):
    l = ee.Image(img.get('L')).select('RADIANCE').rename('L')
    return img.addBands(l)

# NDVI - required for emissivity calculation
def fun_ndvi(img):
    ndvi = img.normalizedDifference(['NIR', 'R']).rename('NDVI')
    return img.addBands(ndvi)

# FVC (Fraction Vegetation Cover) - required for emissivity calculation
def fun_fvc(img):
    fvc = img.expression(
        '((NDVI-NDVI_s)/(NDVI_v-NDVI_s))**2',
        {
            'NDVI': img.select('NDVI'),
            'NDVI_s': ndvi_s,
            'NDVI_v': ndvi_v
        }
    ).rename('FVC')
    return img.addBands(fvc)

# Scale Emissivity - required for LST calculation
def fun_epsilon_scale(img):
    epsilon_scale = img.expression(
        'epsilon_s+(epsilon_v-epsilon_s)*FVC',
        {
            'FVC': img.select('FVC'),
            'epsilon_s': epsilon_s,
            'epsilon_v': epsilon_v
        }
    ).rename('EPSILON_SCALE')
    return img.addBands(epsilon_scale)

# Emissivity (Epsilon) - required for LST calculation
def fun_epsilon(img):
    pseudo = img.select(['NDVI']).set('system:time_start', img.get('system:time_start'))
    epsilon = pseudo.where(img.expression('NDVI > NDVI_v',
                                         {'NDVI': img.select('NDVI'),
                                          'NDVI_v': ndvi_v}), epsilon_v)
    epsilon = epsilon.where(img.expression('NDVI < NDVI_s && NDVI >= 0',
                                          {'NDVI': img.select('NDVI'),
                                           'NDVI_s': ndvi_s}), epsilon_s)
    epsilon = epsilon.where(img.expression('NDVI < 0',
                                          {'NDVI': img.select('NDVI')}), epsilon_w)
    epsilon = epsilon.where(img.expression('NDVI <= NDVI_v && NDVI >= NDVI_s',
                                          {'NDVI': img.select('NDVI'),
                                           'NDVI_v': ndvi_v,
                                           'NDVI_s': ndvi_s}), img.select('EPSILON_SCALE')).rename('EPSILON')
    return img.addBands(epsilon)

# Scale WV content
def fun_wv_scale(img):
    wv_scaled = ee.Image(img.get('WV')).multiply(0.1).rename('WV_SCALED')
    wv_scaled = wv_scaled.resample('bilinear')
    return img.addBands(wv_scaled)

# Atmospheric Functions - required for LST calculation
def fun_af1(img):
    af1 = img.expression(
        '('+str(cs_l8[0])+'*(WV**2))+('+str(cs_l8[1])+'*WV)+('+str(cs_l8[2])+')',
        {
            'WV': img.select('WV_SCALED')
        }
    ).rename('AF1')
    return img.addBands(af1)

def fun_af2(img):
    af2 = img.expression(
        '('+str(cs_l8[3])+'*(WV**2))+('+str(cs_l8[4])+'*WV)+('+str(cs_l8[5])+')',
        {
            'WV': img.select('WV_SCALED')
        }
    ).rename('AF2')
    return img.addBands(af2)

def fun_af3(img):
    af3 = img.expression(
        '('+str(cs_l8[6])+'*(WV**2))+('+str(cs_l8[7])+'*WV)+('+str(cs_l8[8])+')',
        {
            'WV': img.select('WV_SCALED')
        }
    ).rename('AF3')
    return img.addBands(af3)

# Gamma Function - required for LST calculation
def fun_gamma(img):
    gamma = img.expression('(BT**2)/(1324*L)',
                          {'BT': img.select('TIR'),
                           'L': img.select('L')
                          }).rename('GAMMA')
    return img.addBands(gamma)

# Delta Function - required for LST calculation
def fun_delta(img):
    delta = img.expression('BT-((BT**2)/1324)',
                          {'BT': img.select('TIR')
                          }).rename('DELTA')
    return img.addBands(delta)

# Land Surface Temperature calculation
def fun_lst(img):
    lst = img.expression(
        '(GAMMA*(((1/EPSILON)*(AF1*L+AF2))+AF3)+DELTA)-273.15',
        {
            'GAMMA': img.select('GAMMA'),
            'DELTA': img.select('DELTA'),
            'EPSILON': img.select('EPSILON'),
            'AF1': img.select('AF1'),
            'AF2': img.select('AF2'),
            'AF3': img.select('AF3'),
            'L': img.select('L')
        }
    ).rename('LST')
    return img.addBands(lst)

def fun_mask_lst(img):
    mask = img.select('LST').gt(t_threshold)
    return img.updateMask(mask)

# Create maxDifference-filter to match TOA and SR products
maxDiffFilter = ee.Filter.maxDifference(
    difference=2 * 24 * 60 * 60 * 1000,
    leftField='system:time_start',
    rightField='system:time_start'
)

# Define joins
join_wv = ee.Join.saveBest(
    matchKey='WV',
    measureKey='timeDiff'
)

join_l = ee.Join.saveBest(
    matchKey='L',
    measureKey='timeDiff'
)

# Main Execution

# Landsat 8 OLI-TIRS
imgCol_L8_TOA = ee.ImageCollection('LANDSAT/LC08/C02/T1_TOA')\
    .filterBounds(study_area)\
    .filter(ee.Filter.calendarRange(start_year, end_year, 'year'))\
    .filter(ee.Filter.calendarRange(month_start, month_end, 'month'))\
    .filter(ee.Filter.lt('CLOUD_COVER_LAND', max_cloud_cover))\
    .select(['B10']) # Thermal Infared 1

imgCol_L8_SR = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')\
    .filterBounds(study_area)\
    .filter(ee.Filter.calendarRange(start_year, end_year, 'year'))\
    .filter(ee.Filter.calendarRange(month_start, month_end, 'month'))\
    .filter(ee.Filter.lt('CLOUD_COVER_LAND', max_cloud_cover))\
    .map(mask_l8_clouds)

imgCol_L8_SR = imgCol_L8_SR.map(config_l8_bands)

# NCEP/NCAR Water Vapor Product
imgCol_WV = ee.ImageCollection('NCEP_RE/surface_wv')\
    .filterBounds(study_area)\
    .filter(ee.Filter.calendarRange(start_year, end_year, 'year'))\
    .filter(ee.Filter.calendarRange(month_start, month_end, 'month'))

# TOA (Radiance) and SR
imgCol_L8_TOA = imgCol_L8_TOA.map(fun_radcal)
imgCol_L8_SR = ee.ImageCollection(join_l.apply(imgCol_L8_SR, imgCol_L8_TOA, maxDiffFilter))
imgCol_L8_SR = imgCol_L8_SR.map(fun_l_addband)

# Water Vapor
imgCol_L8_SR = ee.ImageCollection(join_wv.apply(imgCol_L8_SR, imgCol_WV, maxDiffFilter))
imgCol_L8_SR = imgCol_L8_SR.map(fun_wv_scale)

# Atmospheric Functions
imgCol_L8_SR = imgCol_L8_SR.map(fun_af1)
imgCol_L8_SR = imgCol_L8_SR.map(fun_af2)
imgCol_L8_SR = imgCol_L8_SR.map(fun_af3)

# Delta and Gamma Functions
imgCol_L8_SR = imgCol_L8_SR.map(fun_delta)
imgCol_L8_SR = imgCol_L8_SR.map(fun_gamma)

# Parameters and Indices
imgCol_L8_SR = imgCol_L8_SR.map(fun_ndvi)
imgCol_L8_SR = imgCol_L8_SR.map(fun_fvc)
imgCol_L8_SR = imgCol_L8_SR.map(fun_epsilon_scale)
imgCol_L8_SR = imgCol_L8_SR.map(fun_epsilon)

# LST
imgCol_L8_SR = imgCol_L8_SR.map(fun_lst)
imgCol_L8_SR = imgCol_L8_SR.map(fun_mask_lst)

# Calculate mean LST
mean_lst = imgCol_L8_SR.select('LST').mean()

vis_params_lst = {
    'min': -10,
    'max': 30,
    'palette': ['#313695', '#74add1', '#fdae61', '#a50026']
}

mean_lst
Map.addLayer(mean_lst.clip(study_area), vis_params_lst, 'Mean LST')

EEException: reduce.mean: Error in map(ID=LC08_168060_20240622):
Image.select: Band pattern 'B10' did not match any bands. Available bands: [SR_B1, SR_B2, SR_B3, SR_B4, SR_B5, SR_B6, SR_B7, SR_QA_AEROSOL, ST_B10, ST_ATRAN, ST_CDIST, ST_DRAD, ST_EMIS, ST_EMSD, ST_QA, ST_TRAD, ST_URAD, QA_PIXEL, QA_RADSAT]