<a href="https://colab.research.google.com/github/gactyxc/NDCI-mGMM/blob/main/NDCI_mGMM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install earthengine-api
import ee
ee.Authenticate()
ee.Initialize(project='crops-mapping-gaoyuan')
!pip install geemap

In [None]:
# global variable defination
year = 2020
startDoy = 180
endDoy = 210
roi = ee.Geometry.Rectangle(
    coords=[124.0, 44.2, 125.0, 45.2], proj='EPSG:4326', geodesic=False
)

# part-1: data preprocession

In [None]:
# common functions
# function to get the sentinel-2 image collection based on the study data range and study area ----*/
# function to remove cloud
# function to exclude bad data at scene edges
def maskEdges(s2_img):
    return s2_img.updateMask(
        s2_img.select('B8A').mask().updateMask(s2_img.select('B9').mask()))

# Function to mask clouds in Sentinel-2 imagery.
def maskClouds(img):
    max_cloud_probabiltly = 50
    clouds = ee.Image(img.get('cloud_mask')).select('probability')
    isNotCloud = clouds.lt(max_cloud_probabiltly)
    return img.updateMask(isNotCloud)

def sentinel2_collection(start_data, end_data, roi):
    s2Sr = ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
    s2Clouds = ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY")

    # define the filter constraints
    criteria = ee.Filter.And(ee.Filter.geometry(roi), ee.Filter.date(start_data, end_data))

    # sentinel-2 data collection
    sentinel2_bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12']
    new_bands = ['B', 'G', 'R', 'RE1', 'RE2', 'RE3', 'NIR', 'RE4', 'SWIR1', 'SWIR2']

    # Filter input collections by desired data range and region.
    s2Sr = s2Sr.filter(criteria).map(maskEdges)
    s2Clouds = s2Clouds.filter(criteria)

    # Join S2 SR with cloud probability dataset to add cloud mask.
    s2SrWithCloudMask = ee.Join.saveFirst('cloud_mask').apply(**{
      "primary": s2Sr,
      "secondary": s2Clouds,
      "condition": ee.Filter.equals(**{"leftField": "system:index", "rightField":"system:index"})
      })

    # collect the images without cloud
    s2CloudMasked = ee.ImageCollection(s2SrWithCloudMask).map(maskClouds).select(sentinel2_bands, new_bands)
    return s2CloudMasked

# function to add all related VIs
# add NDCI, a normalized difference composite index for maize identification
def addNDCI(img):
    NDCI = img.expression('NDCI = 2500 * (10000-SWIR1 - G) / (7.5 * RE1 - SWIR1 + 20000)', {
        'RE1': img.select('RE1'),
        'G': img.select('G'),
        'SWIR1': img.select('SWIR1')
    })
    img = img.addBands(NDCI)
    img = img.toInt16()
    return img

# add LSWI
def addLSWI(image):
    LSWI = image.expression('LSWI = (NIR - SWIR1) / (NIR + SWIR1)', {
        'SWIR1': image.select('SWIR1'),
        'NIR': image.select('NIR')
    })
    return image.addBands(LSWI)

# add EVI
def addEVI(image):
    EVI = image.expression('EVI = 2.5 * (NIR - R) / (NIR + 6 * R - 7.5 * B + 10000)', {
        'NIR': image.select('NIR'),
        'R': image.select('R'),
        'B': image.select('B')
    })
    return image.addBands(EVI)

# part-2: get NDCI map

In [None]:
# year: the target identification year
# startDoy: the start Doy of optimal identification window
# endDoy: the end Doy of optimal identification window
# roi: target region of study
# output value of this function is the time series NDCI images in a given time period
def get_NDCI_map(year, startDoy, endDoy, roi):
    # define the start and end time of identification
    startDate = ee.Date.fromYMD(year, 1, 1).advance(startDoy, 'day')
    endDate = ee.Date.fromYMD(year, 1, 1).advance(endDoy, 'day')

    # define the image collection
    s2SR_imgCol = sentinel2_collection(ee.Date.fromYMD(year, 1, 1),
                                       ee.Date.fromYMD(year, 12, 31), roi)

    # Create a date range list with a specified 10-day interval，use millis as unit
    dates = ee.List.sequence(startDate.millis(), endDate.millis(), 1000 * 60 * 60 * 24 * 10)

    # function to resample time resolution of image collection to 10 day
    def resampleTo10Days(date):
        currentDate = ee.Date(date)
        endDate = currentDate.advance(10, 'day')
        summarizedImageCol = s2SR_imgCol.filterDate(currentDate, endDate)
        summarizedImage = summarizedImageCol.median()
        summarizedImage = summarizedImage.set('system:time_start', currentDate)
        return summarizedImage

    # Apply the time resampling function using map()
    resampledImages = ee.ImageCollection(dates.map(resampleTo10Days))
    resampledImages = resampledImages.map(addLSWI)
    resampledImages = resampledImages.map(addEVI)
    resampledImages = resampledImages.map(addNDCI)

    # get the BSMI index and the remove the outlier pixels as 0
    def NDCI_mask(image):
      LSWI = image.select('LSWI')
      EVI = image.select('EVI')
      mask = EVI.lte(0.35).And(LSWI.add(ee.Image.constant(0.05)).gte(EVI))
      valid_mask = mask.Not()
      NDCI = image.select('NDCI')
      NDCI = NDCI.multiply(valid_mask).rename('NDCI_mask')
      NDCI = NDCI.toInt16()
      image = image.addBands(NDCI)
      return image.select('NDCI_mask')
    NDCI_Images = resampledImages.map(NDCI_mask)
    return NDCI_Images

# part-3: mGMM construction

In [None]:
# ------ step-1. get the random sample in each 1°×1° grid
# this function use the ESA landMap as cropland mask
# roi: target region of study
# sampleSize: the random sample size in each grid, the default size is 0.1% of the number of pixels in each grid
# output value of this function is random samples in a given grid and given size
def get_random_sample(roi, sampleSize=None):
    ESA_landmap = ee.ImageCollection("ESA/WorldCover/v100")
    ESA_croplandMask = ESA_landmap.first().eq(40)

    if sampleSize is None:
        sampleSize = 124000

    randomPoints = ee.FeatureCollection.randomPoints(
        region=roi, points=sampleSize, seed=1234, maxError=1
        )

    # filter the sample use cropland mask
    roi_croplandMask = ESA_croplandMask.clip(roi)
    roi_croplandMask = roi_croplandMask.updateMask(roi_croplandMask)

    def mask_points(point):
      isInsideMask = roi_croplandMask.reduceRegion(
          reducer=ee.Reducer.first(),
          geometry=point.geometry(),
          scale=10,
          maxPixels=1
      ).getNumber('Map')
      return point.set('inside_mask', isInsideMask)

    maskedPoints = randomPoints.map(mask_points)
    finalPoints = maskedPoints.filter(ee.Filter.eq('inside_mask', 1))
    return finalPoints

In [None]:
#------ step-2. extract the NDCI value of each image for each random point
# imgCol: the extracted time series images
# pts: extraction points
# this funtion runs to derive the image values of given samples
def extract_points_value(imgCol, pts):

    # Define a function to extract values at points from an image
    def extract_values(image, points):
        # Sample the image at the points
        sampled_values = image.sampleRegions(
            collection=points,
            properties=['ID'],  # Properties to include from points
            scale=10
        )
        return sampled_values

    # Get the list of images in the collection
    image_list = imgCol.toList(imgCol.size())
    # Initialize an empty list to store results
    result_list = []
    # Iterate over each image in the collection
    for i in range(image_list.size().getInfo()):
      image = ee.Image(image_list.get(i))
      sampled_values = extract_values(image, pts)
      result_list.append(sampled_values)
    combined_results = ee.FeatureCollection(result_list).flatten()

    task = ee.batch.Export.table.toDrive(
        collection=combined_results,
        description='testRegion_cropSamples_VI',
        folder='cropSamples_VI',
        fileFormat='CSV'
    )
    task.start()

# main procedure

In [None]:
import geemap
##---- main producer of get_NDCI_samples
imgCol = get_NDCI_map(year, startDoy, endDoy, roi)
pts = get_random_sample(roi)
extract_points_value(imgCol, pts)
m = geemap.Map()
m.set_center(132.5, 46.7, 7)
m.add_layer(roi, {'color': 'yellow'}, 'Region')
m