# Setup software libraries

Install needed libraries to the notebook VM.  Authenticate as necessary.

In [None]:
# Cloud authentication.
from google.colab import auth
auth.authenticate_user()

In [None]:
# Earth Engine install to notebook VM.
!pip install earthengine-api

In [None]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()
ee.Initialize()

In [None]:
# Tensorflow setup.
import tensorflow as tf

tf.enable_eager_execution()
print(tf.__version__)

## Specify your Cloud Storage Bucket
You must have write access to a bucket to run this demo!  To run it read-only, use the demo bucket below, but note that writes to this bucket will not work.

In [None]:
# INSERT YOUR BUCKET HERE:
BUCKET = 'ee-tf'

In [None]:
# normalize inputs
def basicNorm(img):
  img = img.toFloat();
  imgStd = img.reduceRegion(reducer=ee.Reducer.stdDev(),scale=0.5,maxPixels=1e13);
  imgMinMax = img.reduceRegion(reducer=ee.Reducer.minMax(),scale=0.5,maxPixels=1e13);
  
  imgMin = ee.Image.constant(imgMinMax.get(imgMinMax.keys().get(1)));
  imgMax = ee.Image.constant(imgMinMax.get(imgMinMax.keys().get(0)));
  
  normImg = img.subtract(imgMin).divide(imgMax.subtract(imgMin));
  return normImg.toFloat()


## Set other global variables

In [None]:
# Specify names locations for outputs in Cloud Storage. 
FOLDER = 'tahoe-ogfw-03112020-array-256'
TRAINING_BASE = 'Training_tahoe'
EVAL_BASE = 'Eval_tahoe'

# Specify inputs (Landsat bands) to the model and the response variable.
BANDS = ['R','G','B','NIR','L','O','ND']
RESPONSE = 'class'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256


# Imagery

Gather and setup the imagery to use for inputs (predictors).  

In [None]:
uas = ee.Image("projects/sig-ee/PC271-AquaticVegetation/tahoeMosaic_mask").select(['R','G','B','nir'],['R','G','B','NIR'])
# // CIE1931 Luminance we'll use as greyscale model inputs outside the model's "fovea"
uas_l =uas.select('R').multiply(0.2126).add(uas.select('G').multiply(0.7152).add(uas.select('B').multiply(0.0722))).rename('L');
ndvi = uas.normalizedDifference(['NIR','R']).rename('ND')
# dpth = ee.Image("projects/sig-ee/PC271-AquaticVegetation/dpth_lkth").rename('dpth')
dr40 = ee.Image("projects/sig-ee/PC271-AquaticVegetation/DR_40_grn_int_lkth_null_upadted").rename('O')
# rug = ee.Image("projects/sig-ee/PC271-AquaticVegetation/rugosity").rename('rug')
image = [basicNorm(uas),basicNorm(uas_l),basicNorm(dr40),basicNorm(ndvi)]
image = ee.Image.cat(image)


Prepare the response (what we want to predict).  Combine plant/no plant data, convert to image for sampling. 

In [None]:
def add1(f):
  return f.set('class',1.0)
def add0(f):
  return f.set('class',0.0)
plants = ee.FeatureCollection('projects/sig-ee/PC271-AquaticVegetation/SamplePloy/PlantSites').map(add1)
no_plants =ee.FeatureCollection('projects/sig-ee/PC271-AquaticVegetation/SamplePloy/NoPlantSites_12172019').map(add0)
samples = plants.merge(no_plants).randomColumn()

sampleImg = samples.reduceToImage(['class'],ee.Reducer.mean()).toFloat().rename('class')
image = ee.Image.cat([image,sampleImg])
# print(image.getInfo()['bands'])
# print(image.bandNames().getInfo())


In [None]:
samples = plants.merge(no_plants).randomColumn()
trainingSamples = samples.filter(ee.Filter.lt('random', 0.7))
evalSamples = samples.filter(ee.Filter.gte('random', 0.7))
# print('Eval NP',evalSamples.limit(1).get('class').getInfo()[0])
# these prints will time out occasionally 
print('Train NP',trainingSamples.filter(ee.Filter.eq('class',0)).size().getInfo())
print('Eval NP',evalSamples.filter(ee.Filter.eq('class',0)).size().getInfo())
print('Eval P',evalSamples.filter(ee.Filter.eq('class',1)).size().getInfo())
print('Train P',trainingSamples.filter(ee.Filter.eq('class',1)).size().getInfo())

Stack the 2D images to create a single image from which samples can be taken.  Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band.  This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points.

In [None]:
featureStack = image

list = ee.List.repeat(1, KERNEL_SIZE)
lists = ee.List.repeat(list, KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)

arrays = featureStack.neighborhoodToArray(kernel)


Use some pre-made geometries to sample the stack in strategic locations.  Specifically, these are hand-made polygons in which to take the 256x256 samples.  Display the sampling polygons on a map, red for training polygons, blue for evaluation.

# Sampling

![alt text](https://i.imgur.com/42nx7Zf.png)

The mapped data look reasonable so take a sample from each polygon and merge the results into a single export.  The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point.  It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record.  You do NOT need to export each training/testing patch to a different image.  Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error.  Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export.

In [None]:
# Convert the feature collections to lists for iteration.
trainingPolysList = trainingSamples.toList(trainingSamples.size())
evalPolysList = evalSamples.toList(evalSamples.size())

# These numbers determined experimentally.
n = 200 # Number of shards in each polygon.
N = 200#0 # Total sample size in each polygon.

# Export all the training data (in many pieces), with one task 
# per geometry.
for g in range(trainingSamples.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(n):
    sample = arrays.sample(
      region = ee.Feature(trainingPolysList.get(g)).geometry(), 
      scale = 0.5, 
      numPixels = N / n, # Size of the shard.
      seed = i,
      tileScale = 8
    )
    geomSample = geomSample.merge(sample)
  
  desc = TRAINING_BASE + '_g' + str(g)
  task = ee.batch.Export.table.toCloudStorage(
    collection = geomSample,
    description = desc, 
    bucket = BUCKET, 
    fileNamePrefix = FOLDER + '/' + desc,
    fileFormat = 'TFRecord',
    selectors = BANDS + [RESPONSE]
  )
  task.start()

# Export all the evaluation data.
for g in range(evalSamples.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(n):
    sample = arrays.sample(
      region = ee.Feature(evalPolysList.get(g)).geometry(), 
      scale = 0.5, 
      numPixels = N / n,
      seed = i,
      tileScale = 8
    )
    geomSample = geomSample.merge(sample)
  
  desc = EVAL_BASE + '_g' + str(g)
  task = ee.batch.Export.table.toCloudStorage(
    collection = geomSample,
    description = desc, 
    bucket = BUCKET, 
    fileNamePrefix = FOLDER + '/' + desc,
    fileFormat = 'TFRecord',
    selectors = BANDS + [RESPONSE]
  )
  task.start()