In [20]:
import ee
import geemap
import pandas as pd
import geopandas as gpd
import glob
from pathlib import Path
import numpy as np
import time

In [21]:
#ee.Authenticate()
ee.Initialize()

In [22]:
# read files

In [23]:
purePFT = '/mnt/poseidon/remotesensing/arctic/data/training/testData_unmixingRegression/purePFT_merged_fCover_Macander2017_geometry.geojson'
randPFT = '/mnt/poseidon/remotesensing/arctic/data/training/testData_unmixingRegression/randomPts_fCover_10kmDist_Macander2017_geometry.geojson'

In [24]:
# cloud filter params
CLOUD_FILTER = 90
CLD_PRB_THRESH = 50
NIR_DRK_THRESH = 0.15
CLD_PRJ_DIST = 1
BUFFER = 10

In [25]:
START_DATE = '2019-06-01'
END_DATE = '2019-08-31'
COUNTRY = ''
STATE = 'AK'
GEOJSON_PATH = ''
ROI = 'WATERSHED' #STATE, COUNTRY, BBOX, or WATERSHED
INPUT_DIR = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
OUTPUT_DIR = '/mnt/poseidon/remotesensing/arctic/data/vectors/data_testing'
BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

In [26]:
# import admin data and select country to create grid around
if ROI == 'STATE':
    admin_fcol = (ee.FeatureCollection("FAO/GAUL/2015/level1")
                  .filterMetadata('ADM0_NAME', 'equals', COUNTRY)
                  .filterMetadata('ADM1_NAME', 'equals', STATE))

elif ROI == 'COUNTRY':
    admin_fcol = (ee.FeatureCollection("FAO/GAUL/2015/level1")
                  .filterMetadata('ADM0_NAME', 'equals', COUNTRY))

elif ROI == 'BBOX':
    admin_fcol = geemap.geojson_to_ee(GEOJSON_PATH)
    
elif ROI == 'WATERSHED':
    admin_fcol = (ee.FeatureCollection("USGS/WBD/2017/HUC06")
                  .filterMetadata('states', 'contains', STATE))

In [59]:
pureGJ = gpd.read_file(purePFT).reset_index(drop=True)
randGJ = gpd.read_file(randPFT).reset_index(drop=True)

In [60]:
pureGJ_simple = pureGJ[['id', 'geometry']].set_index('id')
randGJ_simple = randGJ[['id', 'geometry']].set_index('id')

In [61]:
# remove null geometries so GEE doesn't freak out
pureGJ_simple = pureGJ_simple[~pureGJ_simple['geometry'].isna()]
randGJ_simple = randGJ_simple[~randGJ_simple['geometry'].isna()]

In [62]:
purePoints = geemap.gdf_to_ee(pureGJ_simple)
randPoints = geemap.gdf_to_ee(randGJ_simple)

In [49]:
# visualize watersheds and poi
Map = geemap.Map(center=(65, -153), zoom=4, basemap='HYBRID')
Map.addLayer(purePoints, {}, 'observation_points')
Map.addLayer(randPoints, {'color': 'red'}, 'rand_observation_points')
Map

Map(center=[65, -153], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(T…

In [40]:
def get_s2_sr_cld_col(aoi, start_date, end_date):
    # Import and filter S2 SR.
    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterBounds(aoi)
        .filterDate(start_date, end_date)
        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', CLOUD_FILTER)))

    # Import and filter s2cloudless.
    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
        .filterBounds(aoi)
        .filterDate(start_date, end_date))

    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
    return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(
        primary = s2_sr_col,
        secondary = s2_cloudless_col,
        condition = ee.Filter.equals(
            leftField = 'system:index',
            rightField = 'system:index')
    ))

def add_cloud_bands(img):
    # Get s2cloudless image, subset the probability band.
    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')

    # Condition s2cloudless by the probability threshold value.
    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')

    # Add the cloud probability layer and cloud mask as image bands.
    return img.addBands(ee.Image([cld_prb, is_cloud]))


def add_shadow_bands(img):
    # Identify water pixels from the SCL band.
    not_water = img.select('SCL').neq(6)

    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).
    SR_BAND_SCALE = 1e4
    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')

    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).
    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));

    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})
        .select('distance')
        .mask()
        .rename('cloud_transform'))

    # Identify the intersection of dark pixels with cloud shadow projection.
    shadows = cld_proj.multiply(dark_pixels).rename('shadows')

    # Add dark pixels, cloud projection, and identified shadows as image bands.
    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))


def add_cld_shdw_mask(img):
    # Add cloud component bands.
    img_cloud = add_cloud_bands(img)

    # Add cloud shadow component bands.
    img_cloud_shadow = add_shadow_bands(img_cloud)

    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)

    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.
    is_cld_shdw = (is_cld_shdw.focalMin(2).focalMax(BUFFER*2/20)
        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20})
        .rename('cloudmask'))

    # Add the final cloud-shadow mask to the image.
    return img_cloud_shadow.addBands(is_cld_shdw)


def apply_cld_shdw_mask(img):
    # Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.
    not_cld_shdw = img.select('cloudmask').Not()

    # Subset reflectance bands and update their masks, return the result.
    #return img.select('B*').updateMask(not_cld_shdw)
    return img.updateMask(not_cld_shdw).select(BANDS)

In [81]:
s2_sr_cld_col = get_s2_sr_cld_col(randPoints, START_DATE, END_DATE)
s2_sr = (s2_sr_cld_col.map(add_cld_shdw_mask)
         .map(apply_cld_shdw_mask))

In [89]:
randPoints.size()

In [82]:
s2_sr_max = s2_sr.reduce(ee.Reducer.max())
s2_sr_med = s2_sr.reduce(ee.Reducer.median())

In [83]:
# sample sentinel 2 imagery using our observation points
def sample_raster(image, fcollection, scale=10, projection='EPSG:4326', geometries=False):
    fc = image.sampleRegions(collection = fcollection,
                             scale = scale,
                             projection = projection,
                             geometries = geometries)
    return fc

In [84]:
# get bands at each point
max_samples = sample_raster(s2_sr_max, randPoints)
med_samples = sample_raster(s2_sr_med, randPoints)

In [85]:
def fc_to_df(fc, idx_col):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        attr = f['properties']
        dictarr.append(attr)

    df = pd.DataFrame(dictarr)
    df.set_index(idx_col, inplace=True)
    return df

In [86]:
max_df = fc_to_df(max_samples, 'id')
med_df = fc_to_df(med_samples, 'id')

In [90]:
maxSamp = pd.merge(randGJ, max_df, on='id')
medmaxSamp = pd.merge(maxSamp, med_df, on='id')

In [91]:
medmaxSamp

Unnamed: 0,id,SAMPLE_1,SAMPLE_2,SAMPLE_3,SAMPLE_4,SAMPLE_5,SAMPLE_6,SAMPLE_7,xcoord,ycoord,...,B1_median,B2_median,B3_median,B4_median,B5_median,B6_median,B7_median,B8A_median,B8_median,B9_median
0,2,2.0,4.0,9.0,3.0,27.0,48.0,7.0,-151.733578,70.185051,...,303.0,448.000000,616.000000,775.00,1107.5,1669.0,1906.0,2227.000,2098.0,1691.50
1,3,1.0,18.0,24.0,4.0,26.0,51.0,0.0,-158.534556,69.985351,...,282.0,423.000000,644.000000,774.00,1336.0,2074.0,2328.0,2650.000,2460.0,2678.00
2,5,2.0,24.0,37.0,4.0,35.0,62.0,0.0,-157.014720,69.245473,...,526.0,407.000000,665.000000,792.00,1496.0,2649.0,2977.0,3170.500,3175.0,3038.00
3,6,4.0,30.0,14.0,14.0,22.0,53.0,2.0,-148.840472,68.900334,...,252.0,374.500000,585.000000,563.50,1134.0,2227.5,2478.5,2840.000,2733.0,2688.50
4,7,0.0,22.0,28.0,5.0,32.0,61.0,0.0,-152.873300,69.367081,...,307.5,426.333333,646.791667,802.00,1336.5,2260.0,2554.5,2909.500,2840.5,2903.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,992,21.0,10.0,14.0,6.0,22.0,49.0,2.0,-148.854747,69.629570,...,370.0,478.500000,648.500000,854.00,1210.0,1709.5,1906.0,2269.000,2053.0,2369.00
506,994,1.0,49.0,32.0,14.0,32.0,55.0,2.0,-149.262393,69.307968,...,256.5,317.333333,558.500000,529.75,1182.0,2574.5,2888.5,3281.375,3073.0,3104.25
507,995,2.0,31.0,43.0,5.0,35.0,56.0,2.0,-153.079714,68.963388,...,307.0,411.000000,683.500000,794.50,1420.0,2381.0,2680.0,3014.500,2939.0,3011.50
508,997,0.0,0.0,0.0,0.0,0.0,0.0,100.0,-156.510059,70.470835,...,824.0,790.000000,875.000000,643.00,594.0,484.0,490.0,502.000,616.0,823.00


In [92]:
outpath = '/mnt/poseidon/remotesensing/arctic/data/training/testData_unmixingRegression'

In [93]:
medmaxSamp.to_csv(f'{outpath}/macandar2017_randomsample_summer2019_medmax_v01.csv')