In [1]:
import geemap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pdb
from IPython.display import display
import ee
import os

In [2]:
#ee.Authenticate()
#geemap.update_package()

ee.Initialize()

Map = geemap.Map(center=[37.395746,-75.876094], zoom=10)

##Adding every plot coordinate
allplots_fc= pd.read_csv('C:/Users/arj26323/Documents/Data/Biomass datasets/Virginia/Coordinates and biomass/va_allpoints.csv')
allplots_fc = allplots_fc[allplots_fc['liveMass'].notna()]
allplots_fc = allplots_fc[allplots_fc['Latitude'].notna()]
allplots_fc = allplots_fc.drop(columns = ['deadMass', 'collectDate', 'totalMass', 'Ell', 'MSL', 'latitude', 'longitude'])

##Group df1 and average (midpoint) the replicates (a/b)
df_temp=allplots_fc.copy()

df_temp = df_temp.groupby(['Year','locationID','Transect','Latitude','Longitude'],as_index = False).aggregate(
    {
        'liveMass':[np.mean, np.size]
    }
)

df_temp.columns = [
    'Year','locationID','Transect','Latitude','Longitude','liveMass','N'
]

allplots_fc = df_temp

display(allplots_fc)

fc_all = geemap.pandas_to_ee(allplots_fc, latitude = "Latitude", longitude = "Longitude")

Map.addLayer(fc_all, {}, "fc_all")

Unnamed: 0,Year,locationID,Transect,Latitude,Longitude,liveMass,N
0,1999,1,A,37.167147,-75.940768,46.72,2
1,1999,1,A,37.174764,-75.942417,399.68,2
2,1999,1,A,37.180976,-75.940766,441.68,2
3,1999,1,A,37.287699,-75.929487,669.60,2
4,1999,1,A,37.345970,-75.901065,875.52,2
...,...,...,...,...,...,...,...
1003,2017,3,B,37.287875,-75.929008,96.08,2
1004,2017,3,C,37.167768,-75.944280,26.56,2
1005,2017,3,C,37.449973,-75.671952,279.36,1
1006,2018,1,C,37.396051,-75.876056,255.04,1


In [3]:
##Function to cloud mask from the pixel_qa band of Landsat 5/8 SR data.
def maskL5sr(image):
  ## Bits 3 and 5 are cloud shadow and cloud, respectively.
  cloudShadowBitMask = 1 << 3
  cloudsBitMask = 1 << 5

  ##Get the pixel QA band.
  qa = image.select('pixel_qa')

  ##Both flags should be set to zero, indicating clear conditions.
  mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0) \
      .And(qa.bitwiseAnd(cloudsBitMask).eq(0))

  ##Return the masked image, scaled to reflectance, without the QA bands.
  return image.updateMask(mask).divide(10000) \
      .select("B[0-9]*") \
      .copyProperties(image, ["system:time_start"])

In [60]:
##TIDAL FILTERING; from Narron et al. 2022
##Utilizes L8 bands 4 and 6 for NDWI, and bands 3 and 4 (for pheno)
##Does it work for Landsat 5?

def addFLATS(image):
    flats = ee.Image(0).expression(
        '1/(1+2.718281828459045**-(-1.57 + 20*(RED-SWIR)/(RED+SWIR) + 68.6*(GREEN-RED)/(GREEN+RED)))', {
            'SWIR': image.select('B6'),
            'RED': image.select('B4'),
            'GREEN': image.select('B3')
        })
    
    return image.addBands(flats.rename('flats'))

##Notes: This is setup for Landsat 8 - apply to l5?

def addFLATSL5(image):
    flats = ee.Image(0).expression(
        '1/(1+2.718281828459045**-(-1.57 + 20*(RED-SWIR)/(RED+SWIR) + 68.6*(GREEN-RED)/(GREEN+RED)))', {
            'SWIR': image.select('B5'),
            'RED': image.select('B3'),
            'GREEN': image.select('B2')
        })
    
    return image.addBands(flats.rename('flats'))

##MASKING FLATS
def maskFLATS(image):
    mask1 = image.select('flats').lte(0.1) #less than or equal to 0.1 - change?
    return image.updateMask(mask1)

##ADDING NDVI (for min/max variables)
def addL5ndvi(image):
    ndvi = image.expression(
        '(NIR-RED)/(RED+NIR)', {
            'NIR': image.select('B4'),
            'RED': image.select('B3'),
            'GREEN': image.select('B2')
        })
    
    return image.addBands(ndvi.rename('ndvi'))

def addL8ndvi(image):
    ndvi = image.expression(
        '(NIR-RED)/(RED+NIR)', {
            'NIR': image.select('B5'),
            'RED': image.select('B4'),
            'GREEN': image.select('B3')
        })
    
    return image.addBands(ndvi.rename('ndvi'))

In [61]:
##Pixel extraction functions - addDate for dateless images/collections
def addDate(image):
    img_date = ee.Date(image.date())
    img_date = ee.Number.parse(img_date.format('YYYYMMdd'))
    return image.addBands(ee.Image(img_date).rename('imagedate').toInt())

##For Landsat images:
def rasterExtraction(image):
    feature = image.sampleRegions(
        collection = fc_all,
        scale = 30 
    )
    return feature

##FOR 10m DEM:
def demExtraction(image):
    feature = image.sampleRegions(
        collection = fc_all,
        scale = 10 
    )
    return feature

##FOR 1m DEM:
def dem1Extraction(image):
    feature = image.sampleRegions(
        collection = fc_all,
        scale = 1 
    )
    return feature

In [62]:
##Adding DEM
dem = ee.Image('USGS/3DEP/10m') ##This is 1/3 arc second, or 10 m.
dem1 = ee.ImageCollection('USGS/3DEP/1m')

##Set visualization parameters.
dem_params = {
    'min': 0,
    'max': 4000,
    'palette': ['006633', 'E5FFCC', '662A00', 'D8D8D8', 'F5F5F5'],
}

Map.addLayer(dem, dem_params, '10m DEM')
Map.addLayer(dem1, dem_params, '1m DEM')

In [74]:
##Calculating mean pixel values for time periods within each year

def monthly_Avg (collection, years):
  avg = []
  for year in years: #Originally had a for month in months subloop, with (month,month,'month') being a filter and set month
      Monthly_avg = collection.filter(ee.Filter.calendarRange(year, year, 'year')) \
                              .filter(ee.Filter.calendarRange(5, 10, 'month')) \
                              .mean() \
                              .set({'year': year})
      avg.append (Monthly_avg)
  return ee.ImageCollection.fromImages(avg)

## Compute monthly averages
# monthly_sowing_Avg = monthly_Avg (ndvi_sowSeason, years, sowingMonths)

##Months and years are lists

years_ls5 = range(1999, 2012)
years_ls7 = range(2012, 2013)
years_ls8 = range(2013, 2021)
years_dm = range(2000, 2021)

months = range(5,11)
months_daymet = range(1,12)

ls5_collect = ee.ImageCollection('LANDSAT/LT05/C01/T1_SR').filterBounds(fc_all).map(maskL5sr).map(addFLATSL5).map(maskFLATS).map(addL5ndvi)
ls7_collect = ee.ImageCollection('LANDSAT/LE07/C01/T1_SR').filterBounds(fc_all).map(maskL5sr).map(addFLATS).map(maskFLATS).map(addL5ndvi)
ls8_collect = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR').filterBounds(fc_all).map(maskL5sr).map(addFLATS).map(maskFLATS).map(addL8ndvi)

monthly_ls5 = monthly_Avg(ls5_collect, years = years_ls5)
monthly_ls7 = monthly_Avg(ls7_collect, years = years_ls7)
monthly_ls8 = monthly_Avg(ls8_collect, years = years_ls8)

monthly_ls5.size().getInfo()

13

In [75]:
list_5 = monthly_ls5.toList(monthly_ls5.size())
list_7 = monthly_ls7.toList(monthly_ls7.size())
list_8 = monthly_ls8.toList(monthly_ls8.size())

ls5_1999 = ee.Image(list_5.get(0))
ls5_2000 = ee.Image(list_5.get(1))
ls5_2001 = ee.Image(list_5.get(2))
ls5_2002 = ee.Image(list_5.get(3)) ##No oct data for 2002 - consider increasing FLATS threshold; update: not flats, prob cloud
ls5_2003 = ee.Image(list_5.get(4)) 
ls5_2004 = ee.Image(list_5.get(5)) ##No oct data
ls5_2005 = ee.Image(list_5.get(6))
ls5_2006 = ee.Image(list_5.get(7)) 
ls5_2007 = ee.Image(list_5.get(8)) ##No oct data 
ls5_2008 = ee.Image(list_5.get(9))
ls5_2009 = ee.Image(list_5.get(10))
ls5_2010 = ee.Image(list_5.get(11))
ls5_2011 = ee.Image(list_5.get(12))

ls7_2012 = ee.Image(list_7.get(0))

ls8_2013 = ee.Image(list_8.get(0)) ##No oct data
ls8_2014 = ee.Image(list_8.get(1))
ls8_2015 = ee.Image(list_8.get(2))
ls8_2016 = ee.Image(list_8.get(3))
ls8_2017 = ee.Image(list_8.get(4))
ls8_2018 = ee.Image(list_8.get(5))
ls8_2019 = ee.Image(list_8.get(6))
ls8_2020 = ee.Image(list_8.get(7))

# ls5_2002.getInfo()

In [76]:
##Daymet

def dm_Avg (collection, years):
  avg = []
  for year in years: #Originally had a for month in months subloop, with (month,month,'month') being a filter
      Monthly_avg = collection.filter(ee.Filter.calendarRange(year, year, 'year')) \
                              .filter(ee.Filter.calendarRange(1, 12, 'month')) \
                              .mean() \
                              .set({'year': year})
      avg.append (Monthly_avg)
  return ee.ImageCollection.fromImages(avg)

daymet = ee.ImageCollection('NASA/ORNL/DAYMET_V4').filterBounds(fc_all)

monthly_dm = dm_Avg(daymet, years = years_dm)

dm_list = monthly_dm.toList(monthly_dm.size())

dm_1999 = ee.Image(dm_list.get(0))
dm_2000 = ee.Image(dm_list.get(1))
dm_2001 = ee.Image(dm_list.get(2))
dm_2002 = ee.Image(dm_list.get(3))
dm_2003 = ee.Image(dm_list.get(4))
dm_2004 = ee.Image(dm_list.get(5))
dm_2005 = ee.Image(dm_list.get(6))
dm_2006 = ee.Image(dm_list.get(7))
dm_2007 = ee.Image(dm_list.get(8))
dm_2008 = ee.Image(dm_list.get(9))
dm_2009 = ee.Image(dm_list.get(10))
dm_2010 = ee.Image(dm_list.get(11))
dm_2011 = ee.Image(dm_list.get(12))
dm_2012 = ee.Image(dm_list.get(13))
dm_2013 = ee.Image(dm_list.get(14))
dm_2014 = ee.Image(dm_list.get(15))
dm_2015 = ee.Image(dm_list.get(16))
dm_2016 = ee.Image(dm_list.get(17))
dm_2017 = ee.Image(dm_list.get(18))
dm_2018 = ee.Image(dm_list.get(19))

# dm_2020.getInfo()

In [77]:
##Monthly/yearly extraction - NOTE: Takes a while to run
##SUBSET BY YEAR

dem_vals = geemap.ee_to_pandas(demExtraction(dem)) ##10m dataset
# dem_vals = geemap.ee_to_pandas(dem1.map(dem1Extraction).flatten()) ##ONE METER DATASET

# daymet_vals = geemap.ee_to_pandas(monthly_dm.map(rasterExtraction).flatten())
xdm_1999 = geemap.ee_to_pandas(rasterExtraction(dm_1999))
xdm_2000 = geemap.ee_to_pandas(rasterExtraction(dm_2000))
xdm_2001 = geemap.ee_to_pandas(rasterExtraction(dm_2001))
xdm_2002 = geemap.ee_to_pandas(rasterExtraction(dm_2002))
xdm_2003 = geemap.ee_to_pandas(rasterExtraction(dm_2003))
xdm_2004 = geemap.ee_to_pandas(rasterExtraction(dm_2004))
xdm_2005 = geemap.ee_to_pandas(rasterExtraction(dm_2005))
xdm_2006 = geemap.ee_to_pandas(rasterExtraction(dm_2006))
xdm_2007 = geemap.ee_to_pandas(rasterExtraction(dm_2007))
xdm_2008 = geemap.ee_to_pandas(rasterExtraction(dm_2008))
xdm_2009 = geemap.ee_to_pandas(rasterExtraction(dm_2009))
xdm_2010 = geemap.ee_to_pandas(rasterExtraction(dm_2010))
xdm_2011 = geemap.ee_to_pandas(rasterExtraction(dm_2011))
xdm_2012 = geemap.ee_to_pandas(rasterExtraction(dm_2012))
xdm_2013 = geemap.ee_to_pandas(rasterExtraction(dm_2013))
xdm_2014 = geemap.ee_to_pandas(rasterExtraction(dm_2014))
xdm_2015 = geemap.ee_to_pandas(rasterExtraction(dm_2015))
xdm_2016 = geemap.ee_to_pandas(rasterExtraction(dm_2016))
xdm_2017 = geemap.ee_to_pandas(rasterExtraction(dm_2017))
xdm_2018 = geemap.ee_to_pandas(rasterExtraction(dm_2018))

##Raster vals
# geemap.ee_to_pandas(monthly_ls5.map(rasterExtraction).flatten())
x_1999 = geemap.ee_to_pandas(rasterExtraction(ls5_1999))
x_2000 = geemap.ee_to_pandas(rasterExtraction(ls5_2000))
x_2001 = geemap.ee_to_pandas(rasterExtraction(ls5_2001))
x_2002 = geemap.ee_to_pandas(rasterExtraction(ls5_2002))
x_2003 = geemap.ee_to_pandas(rasterExtraction(ls5_2003))
x_2004 = geemap.ee_to_pandas(rasterExtraction(ls5_2004))
x_2005 = geemap.ee_to_pandas(rasterExtraction(ls5_2005))
x_2006 = geemap.ee_to_pandas(rasterExtraction(ls5_2006))
x_2007 = geemap.ee_to_pandas(rasterExtraction(ls5_2007))
x_2008 = geemap.ee_to_pandas(rasterExtraction(ls5_2008))
x_2009 = geemap.ee_to_pandas(rasterExtraction(ls5_2009))
x_2010 = geemap.ee_to_pandas(rasterExtraction(ls5_2010))
x_2011 = geemap.ee_to_pandas(rasterExtraction(ls5_2011))
x_2012 = geemap.ee_to_pandas(rasterExtraction(ls7_2012))
x_2013 = geemap.ee_to_pandas(rasterExtraction(ls8_2013))
x_2014 = geemap.ee_to_pandas(rasterExtraction(ls8_2014))
x_2015 = geemap.ee_to_pandas(rasterExtraction(ls8_2015))
x_2016 = geemap.ee_to_pandas(rasterExtraction(ls8_2016))
x_2017 = geemap.ee_to_pandas(rasterExtraction(ls8_2017))
x_2018 = geemap.ee_to_pandas(rasterExtraction(ls8_2018))

display(x_2000)

# ls8_vals = geemap.ee_to_pandas(monthly_ls8.map(rasterExtraction).flatten())

Unnamed: 0,Year,locationID,Latitude,liveMass,Longitude,Transect,N,ndvi
0,1999,1,37.167147,46.72,-75.940768,A,2,0.230167
1,1999,1,37.174764,399.68,-75.942417,A,2,0.294216
2,1999,1,37.180976,441.68,-75.940766,A,2,0.219033
3,1999,1,37.287699,669.60,-75.929487,A,2,0.556824
4,1999,1,37.345970,875.52,-75.901065,A,2,0.559309
...,...,...,...,...,...,...,...,...
920,2017,3,37.287875,96.08,-75.929008,B,2,0.704938
921,2017,3,37.167768,26.56,-75.944280,C,2,0.480000
922,2017,3,37.449973,279.36,-75.671952,C,1,0.762797
923,2018,1,37.396051,255.04,-75.876056,C,1,0.635090


In [78]:
##Subsetting each year
x_1999 = x_1999[x_1999['Year'] == 1999]
x_2000 = x_2000[x_2000['Year'] == 2000]
x_2001 = x_2001[x_2001['Year'] == 2001]
x_2002 = x_2002[x_2002['Year'] == 2002]
x_2003 = x_2003[x_2003['Year'] == 2003]
x_2004 = x_2004[x_2004['Year'] == 2004]
x_2005 = x_2005[x_2005['Year'] == 2005]
x_2006 = x_2006[x_2006['Year'] == 2006]
x_2007 = x_2007[x_2007['Year'] == 2007]
x_2008 = x_2008[x_2008['Year'] == 2008]
x_2009 = x_2009[x_2009['Year'] == 2009]
x_2010 = x_2010[x_2010['Year'] == 2010]
x_2011 = x_2011[x_2011['Year'] == 2011]
x_2012 = x_2012[x_2012['Year'] == 2012]
x_2013 = x_2013[x_2013['Year'] == 2013]
x_2014 = x_2014[x_2014['Year'] == 2014]
x_2015 = x_2015[x_2015['Year'] == 2015]
x_2016 = x_2016[x_2016['Year'] == 2016]
x_2017 = x_2017[x_2017['Year'] == 2017]
x_2018 = x_2018[x_2018['Year'] == 2018]

landsat_list = [
    x_1999,x_2000,x_2001,x_2002,x_2003,x_2004,x_2005,x_2006,x_2007,x_2008,x_2009,x_2010,x_2011,x_2012,x_2013,x_2014,x_2015,
    x_2016,x_2017,x_2018
]

landsat_extract = pd.concat(landsat_list)

xdm_1999 = xdm_1999[xdm_1999['Year'] == 1999]
xdm_2000 = xdm_2000[xdm_2000['Year'] == 2000]
xdm_2001 = xdm_2001[xdm_2001['Year'] == 2001]
xdm_2002 = xdm_2002[xdm_2002['Year'] == 2002]
xdm_2003 = xdm_2003[xdm_2003['Year'] == 2003]
xdm_2004 = xdm_2004[xdm_2004['Year'] == 2004]
xdm_2005 = xdm_2005[xdm_2005['Year'] == 2005]
xdm_2006 = xdm_2006[xdm_2006['Year'] == 2006]
xdm_2007 = xdm_2007[xdm_2007['Year'] == 2007]
xdm_2008 = xdm_2008[xdm_2008['Year'] == 2008]
xdm_2009 = xdm_2009[xdm_2009['Year'] == 2009]
xdm_2010 = xdm_2010[xdm_2010['Year'] == 2010]
xdm_2011 = xdm_2011[xdm_2011['Year'] == 2011]
xdm_2012 = xdm_2012[xdm_2012['Year'] == 2012]
xdm_2013 = xdm_2013[xdm_2013['Year'] == 2013]
xdm_2014 = xdm_2014[xdm_2014['Year'] == 2014]
xdm_2015 = xdm_2015[xdm_2015['Year'] == 2015]
xdm_2016 = xdm_2016[xdm_2016['Year'] == 2016]
xdm_2017 = xdm_2017[xdm_2017['Year'] == 2017]
xdm_2018 = xdm_2018[xdm_2018['Year'] == 2018]

daymet_list = [
    xdm_1999,xdm_2000,xdm_2001,xdm_2002,xdm_2003,xdm_2004,xdm_2005,xdm_2006,xdm_2007,xdm_2008,xdm_2009,xdm_2010,xdm_2011,
    xdm_2012,xdm_2013,xdm_2014,xdm_2015,xdm_2016,xdm_2017,xdm_2018
]

daymet_extract = pd.concat(daymet_list)

dfx = pd.merge(landsat_extract, dem_vals, how = 'left')

df = pd.merge(dfx, daymet_extract, how = 'right')

display(df)

Unnamed: 0,Year,locationID,Latitude,liveMass,Longitude,Transect,N,ndvi,elevation,swe,tmax,srad,tmin,vp,prcp,dayl
0,1999,1,37.167147,46.72,-75.940768,A,2,0.250916,0.157865,0.019534,19.864056,313.638367,9.932028,1402.325928,3.752466,43200.109375
1,1999,1,37.174764,399.68,-75.942417,A,2,0.307150,0.170457,0.019534,19.864056,313.638367,9.932028,1402.325928,3.752466,43200.109375
2,1999,1,37.180976,441.68,-75.940766,A,2,0.414314,0.417727,0.019945,19.852356,313.898651,9.920822,1401.186523,3.718082,43200.109375
3,1999,1,37.287699,669.60,-75.929487,A,2,0.618321,0.601948,0.428658,19.680822,316.296082,9.763808,1387.684692,3.468603,43200.109375
4,1999,1,37.345970,875.52,-75.901065,A,2,0.424025,0.476658,0.374959,19.591616,317.127838,9.724110,1383.485474,3.346000,43200.109375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1003,2017,3,37.287875,96.08,-75.929008,B,2,0.760856,1.035588,0.610603,20.352081,304.125092,11.159507,1558.774902,3.166384,43200.109375
1004,2017,3,37.167768,26.56,-75.944280,C,2,0.552287,0.627959,0.893397,20.576056,302.493103,11.425890,1587.020996,3.330740,43200.109375
1005,2017,3,37.449973,279.36,-75.671952,C,1,0.670178,1.261021,0.618466,20.156055,304.111847,11.119096,1550.952148,3.260986,43200.109375
1006,2018,1,37.396051,255.04,-75.876056,C,1,0.570687,0.957294,0.047479,20.908657,310.197113,11.111095,1521.967773,2.771370,43200.109375


In [79]:
##Bands and indices
df['Sensor'] = np.where(df['Year']<2013, 'Landsat 5', 'Landsat 8') ##make sure no other sensors are being used

df.loc[df['Year'] == 2012, 'Sensor'] = 'Landsat 7'

df['ndvi'] = np.where(df['Sensor'] == 'Landsat 8', (df['B5']-df['B4'])/(df['B5']+df['B4']), \
                      (df['B4']-df['B3'])/(df['B4']+df['B3'])) ##ndvi conditional based on whether sensor is Landsat-5 or 8

df['Blue_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B2'], df['B1'])
df['Green_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B3'], df['B2'])
df['Red_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B4'], df['B3'])
df['NIR_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B5'], df['B4'])
df['SWIR1_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B6'], df['B5'])
df['SWIR2_band'] = np.where(df['Sensor'] == 'Landsat 8', df['B7'], df['B7'])

##Variables from Byrd et al. 2018 (make sure calculations are accurate):
df['savi'] = ((df['NIR_band']-df['Red_band'])*1.5)/(df['NIR_band']+df['Red_band']+0.5)
df['wdrvi5'] = (0.5*df['NIR_band']-df['Red_band'])/(0.5*df['NIR_band']+df['Red_band'])
df['nd_r_g'] = (df['Red_band']-df['Green_band'])/(df['Red_band']+df['Green_band'])
df['nd_g_b'] = (df['Green_band']-df['Blue_band'])/(df['Green_band']+df['Blue_band'])
df['nd_swir2_nir'] = (df['SWIR2_band']-df['NIR_band'])/(df['SWIR2_band']+df['NIR_band'])
df['nd_swir2_r'] = (df['SWIR2_band']-df['Red_band'])/(df['SWIR2_band']+df['Red_band'])

##EXPORT
out_dir = os.path.expanduser('~/Downloads')
out_csv = os.path.join(out_dir, 'va_min.csv')
# df.to_csv(out_csv, index = False)

In [None]:
##Next step: ML