### Harmonic analysis to extract the seasonality of NDVI time series
  
Documentation:   
https://developers.google.com/earth-engine/tutorials/community/time-series-modeling;   https://docs.google.com/document/d/1mNIRB90jwLuASO1JYas1kuOXCLbOoy1Z4NlV1qIXM10/edit  

Test data: Landsat; test band: NDVI  

for Sentinel, check:  
https://medium.com/@moraesd90/creating-monthly-ndvi-composites-sentinel-2-on-google-earth-engine-a5c2d49bc9ca

In [49]:
import os
import os.path as osp
import ee 
ee.Initialize()
import geemap
import math
import numpy as np
import geopandas as gpd
import pandas as pd

### change the working directory and result path to store all the outputs

In [2]:
cwd = '/mnt/poseidon/remotesensing/arctic/data/training/testData_unmixingRegression/'
os.chdir(cwd)
os.getcwd
    
purePFT = cwd+'purePFT_merged_fCover_Macander2017_geometry.geojson'
randPFT = cwd+'randomPts_fCover_10kmDist_Macander2017_geometry.geojson'

### import region of interest (roi)

In [12]:
pureGJ = gpd.read_file(purePFT).reset_index(drop=True)
randGJ = gpd.read_file(randPFT).reset_index(drop=True)

pureGJ_simple = pureGJ[['id', 'geometry']].set_index('id')
randGJ_simple = randGJ[['id', 'geometry']].set_index('id')

# remove null geometries so GEE doesn't freak out
pureGJ_simple = pureGJ_simple[~pureGJ_simple['geometry'].isna()]
randGJ_simple = randGJ_simple[~randGJ_simple['geometry'].isna()]

purePoints = geemap.gdf_to_ee(pureGJ_simple)
randPoints = geemap.gdf_to_ee(randGJ_simple)

proj = 'EPSG:4326'

In [37]:
# visualize watersheds and poi
coi = ee.Geometry.Point([-155.6,69.818])
roi = coi.buffer(ee.Number(900000000000).sqrt().divide(2), 1).bounds()

# visualize watersheds and poi
Map = geemap.Map(basemap='HYBRID')
Map.centerObject(coi, 6);
Map.addLayer(purePoints, {}, 'observation_points')
Map.addLayer(randPoints, {'color': 'red'}, 'rand_observation_points')
Map

Map(center=[69.818, -155.6], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(child…

### required functions  
some functions need to change to Sentinel-2 version

### --- landsat 8 surface reflectance ----

In [38]:
# Function to cloud mask from the pixel_qa band of Landsat 8 SR data.
def reproject(image):
    return image.reproject(crs=proj, scale=30)  
    
def maskL8sr(image): #!! change to Sentinel-2
  # Bits 3 and 5 are cloud shadow and cloud, respectively.
    cloudShadowBitMask = 1 << 3
    cloudsBitMask = 1 << 5

    # Get the pixel QA band.
    qa = image.select('pixel_qa')

    # Both flags should be set to zero, indicating clear conditions.
    mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0) \
      .And(qa.bitwiseAnd(cloudsBitMask).eq(0))

    # Return the masked image, scaled to reflectance, without the QA bands.
    return image.updateMask(mask).divide(10000) \
                                .select("B[0-9]*") \
                                .copyProperties(image, ["system:time_start"])

### check the corresponding bands for Sentinel data
def addNDVI(image): #!!! check the band information of Sentinel-2
    ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI').float()
    return image.addBands(ndvi)

### add constant and time band
def addVariables(image):
     ## Compute time in fractional years since the epoch.
    date = ee.Date(image.get("system:time_start"));
    years = date.difference(ee.Date('1970-01-01'), 'year');
    return image.addBands(ee.Image(years).rename('t').float()).addBands(ee.Image.constant(1));
    

### load image collection  
change to Sentinel-2 if needed

In [51]:
imageCollection = ee.ImageCollection("LANDSAT/LC08/C01/T1_SR")

## suppose we only input one year time series
startdate = '2019-01-01'
enddate = '2019-12-31'

## filter the image collection by selected range, cloud masking, add NDVI, additional variables for harmonic analysis
filteredLandsat = imageCollection.filterBounds(roi) \
              .filterDate(ee.Date(startdate), ee.Date(enddate)) \
              .map(maskL8sr).map(addNDVI).map(addVariables).map(reproject)
print(filteredLandsat.first().bandNames().getInfo())

['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 't', 'constant']


#### detrend the time series first and then calculate the harmonic index (phase, amplitude)

In [52]:
###---- estimate the linear trend over time -----
independents = ee.List(['constant', 't']); 
dependent = ee.String('NDVI'); ## can be any vegetation indices
trend = filteredLandsat.select(independents.add(dependent))\
              .reduce(ee.Reducer.linearRegression(independents.length(), 1));
coefficients = trend.select('coefficients').arrayProject([0]).arrayFlatten([independents]);


## remove the trend from the time series before harmonic analysis
def detrendImageCollection(image):
    return image.select(dependent).subtract(image.select(independents).multiply(coefficients).reduce('sum'))\
                                  .rename(dependent)\
                                  .copyProperties(image, ["system:time_start"])

detrended = filteredLandsat.map(detrendImageCollection)
print(detrended.first().getInfo())

{'type': 'Image', 'bands': [{'id': 'NDVI', 'data_type': {'type': 'PixelType', 'precision': 'double'}, 'crs': 'EPSG:4326', 'crs_transform': [1, 0, 0, 0, 1, 0]}], 'properties': {'system:time_start': 1552251198570, 'system:index': 'LC08_066013_20190310'}}


In [53]:
#### set the number of harmonics you want to extract, the first corresponds to the entire time series, the second identifies cycles within the half time. and so on...
harmonics = 4; # if the length of the observation is one year, then 1 represents the annual cycle, 2 represents the half-year cycle.....
harmonicFrequencies = ee.List.sequence(1, 4).getInfo(); 

def getNames (base, lst_freq) : 
    name_lst = []
    for i in lst_freq:
        name_lst.append(ee.String(base + str(i))) 
    return name_lst

cosNames = getNames('cos_', harmonicFrequencies); 
sinNames = getNames('sin_', harmonicFrequencies); 
independents = ee.List(['constant','t']).cat(cosNames).cat(sinNames);

In [54]:
# # Function to add a constant band.
def addConstant (image) :
    return image.addBands(ee.Image(1));

# # Function to add a time band.
def addTime (image) :
    date = ee.Date(image.get('system:time_start'));
    years = date.difference(ee.Date('1970-01-01'), 'year'); 
    timeRadians = ee.Image(years.multiply(2 * math.pi)); 
    return image.addBands(timeRadians.rename('t').float());

def addHarmonics (image) :
    frequencies = ee.Image.constant(harmonicFrequencies)
    time = ee.Image(image).select('t')
    cosines = time.multiply(frequencies).cos().rename(cosNames) 
    sines = time.multiply(frequencies).sin().rename(sinNames) 
    return image.addBands(cosines).addBands(sines)

# add band count for each image, used for removing images with no bands, i.e., count = 0
def addCount(image):
    return image.set('count', image.bandNames().length())

harmonicLandsat = detrended.map(addTime).map(addConstant).map(addHarmonics).map(addCount);
print(harmonicLandsat.first().bandNames().getInfo())

### fit the harmonic models to the original observations, this might be helpful if we want a smoothed time series
fittedHarmonic = harmonicLandsat.map(lambda image : image \
                                    .addBands(image.select(independents) \
                                    .multiply(harmonicTrendCoefficients) \
                                    .reduce('sum') \
                                    .rename('fitted')))\
                                .map(reproject);

['NDVI', 't', 'constant', 'cos_1', 'cos_2', 'cos_3', 'cos_4', 'sin_1', 'sin_2', 'sin_3', 'sin_4']


In [55]:
# fit the harmonic trend
harmonicTrend = fittedHarmonic.select(independents.add(dependent))\   # incase the original Landsat has some gaps, we use the filtered curve for harmonic analysis
                      .reduce(ee.Reducer.linearRegression(independents.length(), 1));

# extract the coefficients for calculating the harmonic indices
harmonicTrendCoefficients = harmonicTrend.select('coefficients').arrayProject([0])\
                              .arrayFlatten([independents]);

In [44]:
# # extract the first, second harmonic variables
# phase_1 = harmonicTrendCoefficients.select('cos_1')\
#                 .atan2(harmonicTrendCoefficients.select('sin_1'));
# amplitude_1 = harmonicTrendCoefficients.select('cos_1')\
#                 .hypot(harmonicTrendCoefficients.select('sin_1'));

### create a pseudo image containing all harmonic indices as band for export

In [56]:
list = []
for i in range(4): 
    phase = harmonicTrendCoefficients.select('cos_'+str(i+1))\
                .atan2(harmonicTrendCoefficients.select('sin_'+str(i+1))).rename('phase_'+str(i+1));
    amplitude = harmonicTrendCoefficients.select('cos_'+str(i+1))\
                    .hypot(harmonicTrendCoefficients.select('sin_'+str(i+1))).rename('amplitude_'+str(i+1));
    list.append(phase)
    list.append(amplitude)

harmonicIDX = ee.ImageCollection(list)
harmonicIDX = harmonicIDX.toBands() # convert the image collection to different bands of same image

# print(phase.getInfo())
# print(harmonicIDX.bandNames().getInfo())

### sample points from image collection

In [57]:
# geemap.extract_values_to_points(in_fc, landsat7, os.path.join(outpath, 'landsat.csv'))

# sample sentinel 2 imagery using our observation points
def sample_raster(image, fcollection, scale=10, projection='EPSG:4326', geometries=False):
    fc = image.sampleRegions(collection = fcollection,
                             scale = scale,
                             projection = projection,
                             geometries = geometries)
    return fc

def fc_to_df(fc, idx_col):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        attr = f['properties']
        dictarr.append(attr)

    df = pd.DataFrame(dictarr)
    df.set_index(idx_col, inplace=True)
    return df

In [58]:
# get bands at each point
harmonicIDX_purePoints = sample_raster(harmonicIDX, purePoints)
harmonicIDX_purePoints_df = fc_to_df(harmonicIDX_purePoints, 'id')
harmonicIDX_randPoints = sample_raster(harmonicIDX, randPoints)
harmonicIDX_randPoints_df = fc_to_df(harmonicIDX_randPoints, 'id')

In [59]:
harmonicIDX_purePoints_df.head()

Unnamed: 0_level_0,0_phase_1,1_amplitude_1,2_phase_2,3_amplitude_2,4_phase_3,5_amplitude_3,6_phase_4,7_amplitude_4
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
103,-1.575566,229.738605,-1.582822,72.107721,-1.598657,20.338515,-1.611502,3.20109
104,1.440207,1032.817935,1.313311,311.456917,1.193688,82.082035,1.102005,12.734946
105,-1.9682,177.27139,-2.397047,56.790156,-2.84731,15.696199,3.019477,2.383692
106,-2.056948,185.649725,-2.543231,55.47547,-3.00975,13.990364,2.896202,1.915779
111,1.833212,2226.059595,2.107487,669.814892,2.398105,172.668913,2.692681,24.605748


In [63]:
## extract the perc
outpath = '/mnt/poseidon/remotesensing/arctic/data/training/testData_unmixingRegression'
harmonicIDX_purePoints_df.to_csv(f'{outpath}/macandar2017_purePFT_year2019_harmonic_v01_filtered.csv')
harmonicIDX_randPoints_df.to_csv(f'{outpath}/macandar2017_randomsample_year2019_harmonic_v01_filtered.csv')

In [None]:
### export the harmonic variables for the roi
# geemap.ee_export_image(phase_1, 
#                        result_path+'L8NDVI_phase_1.tif', scale=30,crs=proj,
#                        region=roi, file_per_band=False)