In [2]:
import ee
import sys
import os
import re
import math

sys.path.append(os.path.abspath('../'))

from modules.SpectralIndexes import *
from modules.Miscellaneous import *
from modules.Mosaic import *
from modules.SmaAndNdfi import * 
from modules import Map

ee.Initialize(project='ee-barbaracostaipam')

In [None]:
## Define version to be processed 
version_in = '5'     ## version string
version_out  = '11'

## Set folder .multiply(1000).round(.multiply(1000).round())
dirout = 'projects/mapbiomas-workspace/COLECAO_DEV/COLECAO9_DEV/CERRADO/training/v' + version_out + '/'

## Set regions ids
regions = list(range(1, 39))
years = list(range(1985, 2024))

## set mosaic date range
dateStart = '-01-01'
dateEnd = '-12-31'

## list files in the folder 
files = ee.data.listAssets({'parent': dirout})
files = [asset['name'] for asset in files['assets']]
# Remove the prefix
files = [file.replace('projects/earthengine-legacy/assets/', '') for file in files]

# Generate expected patterns
expected = [
    f'projects/mapbiomas-workspace/COLECAO_DEV/COLECAO9_DEV/CERRADO/training/v{version_out}/train_col9_reg{region}_{year}_v{version_out}'
    for region in regions for year in years
]

# Find missing entries
missing = [entry for entry in expected if entry not in files]

# Biome layer
biomes = ee.Image('projects/mapbiomas-workspace/AUXILIAR/biomas-2019-raster')
cerrado = biomes.updateMask(biomes.eq(4))

## Import classification regions
regionsCollection = ee.FeatureCollection('users/dh-conciani/collection7/classification_regions/vector_v2')

## Import sample points
samples = ee.FeatureCollection('users/dh-conciani/collection9/sample/points/samplePoints_v' + version_in)

## Landsat collections
collectionId = 'LANDSAT/COMPOSITES/C02/T1_L2_32DAY'

## spectral bands selected
spectralBands = ['blue', 'red', 'green', 'nir', 'swir1', 'swir2']

## endemembers collection
endmembers = ENDMEMBERS['landsat-8']

## Time since last fire
fire_age = ee.Image('users/barbarasilvaIPAM/collection8/masks/fire_age_v2')
## add years
fire_age = fire_age.addBands(fire_age.select('classification_2022').rename('classification_2023'))\
    .addBands(fire_age.select('classification_2022').rename('classification_2024'))

In [None]:
for obj in missing:
    print(obj)
    ## get region id
    match = re.search(r"(?<=reg)\d+", obj)
    if match:
        region_list = int(match.group())

    ## get year
    match = re.search(r"\d{4}", obj)
    if match:
        year = int(match.group())
    
    ## Subset region
    region_i = regionsCollection.filterMetadata('mapb', "equals", region_list).geometry()

    ## Compute additional bands
    geo_coordinates = ee.Image.pixelLonLat().clip(region_i)
  
    ## Get latitude
    lat = geo_coordinates.select('latitude')\
        .add(5)\
        .multiply(-1)\
        .multiply(1000)\
        .toInt16()
        
    ## Get longitude
    lon_sin = geo_coordinates.select('longitude')\
        .multiply(math.pi)\
        .divide(180)\
        .sin()\
        .multiply(-1)\
        .multiply(10000)\
        .toInt16()\
        .rename('longitude_sin')
    
    ## Cosine
    lon_cos = geo_coordinates.select('longitude')\
        .multiply(math.pi)\
        .divide(180)\
        .cos()\
        .multiply(-1)\
        .multiply(10000)\
        .toInt16()\
        .rename('longitude_cos')
    
    ## Get heigth above nearest drainage
    hand = ee.ImageCollection("users/gena/global-hand/hand-100")\
        .mosaic()\
        .toInt16()\
        .clip(region_i)\
        .rename('hand')
    
    ##########################################################  build mosaic 
    collection = ee.ImageCollection(collectionId)\
            .filter(ee.Filter.date(str(year) + dateStart, str(year) + dateEnd))\
            .filter(ee.Filter.bounds(region_i))\
            .select(spectralBands)
        
    ## apply scaling factor    
    collection = collection.map(
            lambda image: image.multiply(10000).copyProperties(image, ['system:time_start', 'system:time_end'])
        )
    
    ## apply SMA
    collection = collection.map(
            lambda image: getFractions(image, endmembers)
        )
    
    ## calculate SMA indexes        
    collection = collection\
            .map(getNDFI)\
            .map(getSEFI)\
            .map(getWEFI)\
            .map(getFNS)

    ## calculate Spectral indexes  
    collection = collection\
            .map(getCAI)\
            .map(getEVI2)\
            .map(getGCVI)\
            .map(getHallCover)\
            .map(getHallHeigth)\
            .map(getNDVI)\
            .map(getNDWI)\
            .map(getPRI)\
            .map(getSAVI)
    
    mosaic = getMosaic(
            collection= collection,
            dateStart= str(year) + dateStart,
            dateEnd=  str(year) + dateEnd,
            percentileBand= 'ndvi',
            percentileDry= 25,
            percentileWet=75)
        
        ## get other bands
    mosaic = getSlope(mosaic)
    mosaic = getEntropyG(mosaic)
    mosaic = mosaic.clip(region_i)

    ## Join the mapbiomas mosaic with the auxiliary bands
    mosaic = mosaic\
        .addBands(lat)\
        .addBands(lon_sin)\
        .addBands(lon_cos)\
        .addBands(hand)\
        .addBands(fire_age.select('classification_' + str(year)).rename('fire_age').clip(region_i))\
        .addBands(ee.Image(year).int16().rename('year'))
    
    #print(mosaic.bandNames().getInfo())  

    ######################################################### end of mosaic building

    ## Subset sample points for each region 
    samples_ij = samples.filterBounds(regionsCollection.filterMetadata('mapb', "equals", region_list))

    ## do a subset 
    ##  add a random column
    #samples_ij = samples_ij.randomColumn("random")
    ## Filter 75% of the data
    #samples_ij = samples_ij.filter(ee.Filter.lt("random", 0.60))
    ###############################################################

    ## Get training samples
    training_i =mosaic.sampleRegions(collection= samples_ij,
                                        scale= 30,
                                        geometries= True,
                                        tileScale= 4)
    
    print('number of points: ' + str(samples_ij.size().getInfo()))

    ## Remove NA or NULL from extracted data
    training_i = training_i.filter(ee.Filter.notNull(mosaic.bandNames().getInfo()))
    
    ## Build task to export data
    task = ee.batch.Export.table.toAsset(
        training_i, 
        'train_col9_reg' + str(region_list) + '_' + str(year) + '_v' + version_out,
        dirout + 'train_col9_reg' + str(region_list) + '_' + str(year) + '_v' + version_out)
    
    ## Start task
    task.start()
    print ('============================================')