## How to contribute a slicing function for Xingu

Slicing functions define a spatial and temporal area that can be used to evaluate automatically generated models in Xingu

### Login GEE API

In [8]:
# Import the Earth Engine API and initialize it.
import ee
# Trigger the authentication flow.
ee.Authenticate()
# Initialize the library.
ee.Initialize()

  warn("IPython.utils.traitlets has moved to a top-level traitlets package.")


To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/wgFfG5vBBNUX3xbiQjGUQJGuYzg2VjUrEZc_ngI9B4zevjCVi-8m1lw

Successfully saved authorization token.


## Specify base maps

In [9]:
# Use these bands for prediction.
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
# Use Landsat 8 surface reflectance data.
l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
ndvi = ee.ImageCollection('LANDSAT/LC08/C01/T1_32DAY_NDVI')
hansen = ee.Image("UMD/hansen/global_forest_change_2018_v1_6")
#l7sr = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')
#sentinel2 = ee.ImageCollection('COPERNICUS/S2_SR').filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))

## get nighttime lights
nightlight_col = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')

def maskS2clouds(image):
  qa = image.select('QA60');
  # Bits 10 and 11 are clouds and cirrus, respectively.
  cloudBitMask = 1 << 10;
  cirrusBitMask = 1 << 11;

  # Both flags should be set to zero, indicating clear conditions.
  mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0));

  return image.updateMask(mask).divide(10000);
                                                 
                                                          
## Cloud masking function.
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  return image.updateMask(mask).select(bands).divide(10000)



## Get median composites for years 2013-2019

In [10]:
images_per_year = dict()
for year in range(2013, 2019):
    print(year)
    images_per_year[year] = {'ls8sr':  l8sr.filterDate('{}-01-01'.format(year), '{}-12-31'.format(year)).map(maskL8sr).median()}
    images_per_year[year]['nightlight'] = nightlight_col.filterDate('{}-01-01'.format(year), '{}-12-31'.format(year)).median()
    images_per_year[year]['ndvi'] = ndvi.filterDate('{}-01-01'.format(year), '{}-12-31'.format(year)).median()
    lossYear = hansen.select(['lossyear'])
    images_per_year[year]['hansen'] = lossYear.gt(year - 2000)


2013
2014
2015
2016
2017
2018


## Get all available scenario regions

In [11]:
import os
import geojson
base_dir = os.getcwd()
all_geojson_paths = [os.path.join(base_dir, x) for x in os.listdir(base_dir) if x.endswith('.geojson')]
all_geojsons = []
for geojson_path in all_geojson_paths:
    with open(geojson_path) as f:
        gj = geojson.load(f)
        all_geojsons.append(gj)
print('found {} scenario regions'.format(len(all_geojsons)))
#all_geojsons = sorted(all_geojsons)
import random
random.seed(123)
random.shuffle(all_geojsons)
train_range = round(0.7 * len(all_geojsons))
val_range = round(0.8 * len(all_geojsons))
train_regions = all_geojsons[:train_range]
val_regions = all_geojsons[train_range:val_range]
test_regions = all_geojsons[val_range:]


found 16 scenario regions


In [12]:
#get sample train region
#train_region = train_regions[0]

In [13]:
def get_coordinates_for_region(region):
    region = train_region["coordinates"][0]
    region_inv = [[x[1],x[0]] for x in region]
    x_center = sum(x[0] for x in region_inv) / len(region_inv)
    y_center = sum(x[1] for x in region_inv) / len(region_inv)
    edges = region_inv
    center_coords = [x_center, y_center]
    return edges, center_coords



## This is how we visualize it in colab

In [14]:
def get_scaled_img(image, target_geometry):
    minMax = image.reduceRegion(reducer= ee.Reducer.minMax(), geometry= target_geometry,scale= 30, maxPixels= 10e9,   # tileScale: 16
    );
    def scale(name):
        name = ee.String(name);
        #print('band name: {}'.format(name))
        band = image.select(name);
        
        return band.unitScale(ee.Number(minMax.get(name.cat('_min'))), ee.Number(minMax.get(name.cat('_max'))))
                    # eventually multiply by 100 to get range 0-100
                    #.multiply(100); 
    scaled_bands = image.bandNames().map(scale)
    unitScale = ee.ImageCollection.fromImages(scaled_bands)
    unitScale_bands = unitScale.toBands().rename(image.bandNames());
    
#    meanDict = unitScale_bands.reduceRegion(reducer=  ee.Reducer.mean(), geometry =target_geometry, scale=30 )
#    minMaxDict = unitScale_bands.reduceRegion(reducer=  ee.Reducer.minMax(), geometry =target_geometry, scale=30 )
#    print(meanDict.getInfo())
#    print(minMaxDict.getInfo())
    
    return unitScale_bands

def getMinMax(image, target_geometry):
    minMaxDict = image.reduceRegion(reducer=  ee.Reducer.minMax(), geometry=target_geometry, scale=30 )
    print(minMaxDict.getInfo())

def maskGeometry(image, geometry):
   mask =  ee.Image.constant(1).clip(geometry).mask()
   return image.updateMask(mask)

In [15]:

import folium

def display_img(image, center_location, value_name, mask_region):
    mapIdDict = image.getMapId({'bands': [value_name], 'min': 0.0, 'max': 1})
    folium_map = folium.Map(location=center_location)
    folium.TileLayer(
        tiles=mapIdDict['tile_fetcher'].url_format,
        attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
        overlay=True,
        name='median composite',
      ).add_to(folium_map)
    folium_map.add_child(folium.LayerControl())

    line_color='red'
    fill_color='red'
    weight=2
    text='Selected Region'
    
    folium_map.add_child(folium.vector_layers.Polygon(locations=mask_region, color=line_color, fill_color=fill_color,
                                                  weight=weight, popup=(folium.Popup(text))))

    return folium_map

### We use folium to visualize the map

In [16]:
def compute_and_display_yeardiff(images_per_year, year, img_type, value_name, region):
    image = images_per_year[year][img_type]
    img_cur = maskGeometry(images_per_year[year][img_type], region)
    img_old = maskGeometry(images_per_year[year-1][img_type], region)
    imgs_diff = img_cur.select(value_name).subtract(img_old.select(value_name))
    #normalized_diff
    #imgs_normdiff = imgs_diff.divide(img_cur.select(value_name).add(img_old.select(value_name)))

    #Define a kernel.
    #kernel = ee.Kernel.circle(radius= 15);
    #dilated = maskGeometry(imgs_normdiff.focal_max(kernel= kernel, iterations= 2), region)
    #roi_image = maskGeometry(image, region)
    scaled_img = get_scaled_img(imgs_diff, region)
    edges, center_coords = get_coordinates_for_region(region)
    
    folium_map = display_img(scaled_img, center_location=center_coords, value_name=value_name, mask_region=edges)
    return folium_map, scaled_img

def compute_and_display_year(images_per_year, year, img_type, value_name, region):
    image = images_per_year[year][img_type]
    img_cur = maskGeometry(images_per_year[year][img_type], region)
    scaled_img = get_scaled_img(img_cur, region)
    edges, center_coords = get_coordinates_for_region(region)
    folium_map = display_img(scaled_img, center_location=center_coords, value_name=value_name, mask_region=edges)
    return folium_map, scaled_img


## Display year for demo  

In [17]:
year = 2016
train_region = train_regions[0]
folium_map, scaled_img = compute_and_display_year(images_per_year, year=year, img_type='hansen', value_name='lossyear', region=train_region)
#folium_map, scaled_yeardiff = compute_and_display_yeardiff(images_per_year, year=year, img_type='ndvi', value_name='NDVI', region=train_region)
folium_map

## Collect "year difference" features for NDVI and nightlight

In [20]:

datasets = dict()

set_types = ['train', 'val', 'eval']
set_regions = [train_regions, val_regions, test_regions]


In [32]:
#print(folium.__version__)

for set_type, regions in zip(set_types, set_regions):
    for i, region in enumerate(regions):
        print('collecting features for region {} out of {}'.format(i+1, len(regions)))
        #print(region)
        scaled_features = dict()
        gt_labels = dict()
        
        for year in range(2015, 2018):
            if year not in scaled_features:
                scaled_features[year] = dict()
            for img_type, value_name in zip(['ndvi', 'nightlight'], ['NDVI', 'avg_rad']):
                if img_type not in scaled_features[year]:
                    scaled_features[year][img_type] = dict()

                _, scaled_yeardiff = compute_and_display_yeardiff(images_per_year, year=year, img_type=img_type, value_name=value_name, region=region)
                _, scaled_img = compute_and_display_year(images_per_year, year=year, img_type=img_type, value_name=value_name, region=region)
                scaled_features[year][img_type] = {'default': scaled_img, 'yeardiff': scaled_yeardiff}
            #yeardiff
            folium_map, scaled_img = compute_and_display_year(images_per_year, year=year, img_type='hansen', value_name='lossyear', region=region)
            gt_labels[year] = scaled_img
            if set_type not in datasets:
                datasets[set_type] = dict()
            datasets[set_type][i] = {'features': scaled_features, 'labels': gt_labels}




collecting features for region 1 out of 11
collecting features for region 2 out of 11
collecting features for region 3 out of 11
collecting features for region 4 out of 11
collecting features for region 5 out of 11
collecting features for region 6 out of 11
collecting features for region 7 out of 11
collecting features for region 8 out of 11
collecting features for region 9 out of 11
collecting features for region 10 out of 11
collecting features for region 11 out of 11
collecting features for region 1 out of 2
collecting features for region 2 out of 2
collecting features for region 1 out of 3
collecting features for region 2 out of 3
collecting features for region 3 out of 3


## Merge features and labels for all regions of their respective train/val/test sets


In [21]:
for set_type, regions in zip(set_types, set_regions):
    num_regions = len(regions)
    datasets[set_type]['merged'] = {'features': dict(), 'labels': dict()}
    for year in range(2015, 2018):
        datasets[set_type]['merged']['features'][year] = dict() 
        datasets[set_type]['merged']['labels'][year] = dict() 
        for img_type, value_name in zip(['ndvi', 'nightlight'], ['NDVI', 'avg_rad']):
            datasets[set_type]['merged']['features'][year][img_type] = dict() 
            #for i, region in enumerate(regions):
            for feature_type in ['default', 'yeardiff']:
                new_band_name = value_name + '_' + feature_type
                datasets[set_type]['merged']['features'][year][img_type][feature_type] = ee.ImageCollection([datasets[set_type][i]['features'][year][img_type][feature_type].select([value_name], [new_band_name]) for i in range(num_regions)])
                #datasets[set_type]['merged']['features'][year][img_type][feature_type] = ee.ImageCollection([datasets[set_type][i]['features'][year][img_type][feature_type] for i in range(num_regions)])
        datasets[set_type]['merged']['labels'][year] = ee.ImageCollection([datasets[set_type][i]['labels'][year].select(['lossyear'],['gt_lossyear']) for i in range(num_regions)])

KeyError: 'train'

## Inspect training and validation polygons

In [22]:
trainingPolys = ee.FeatureCollection(train_regions)
evalPolys = ee.FeatureCollection(test_regions)
valPolys = ee.FeatureCollection(val_regions)

polyImage = ee.Image(0).byte().paint(trainingPolys, 1).paint(valPolys, 2).paint(evalPolys, 3)
polyImage = polyImage.updateMask(polyImage)

mapid = polyImage.getMapId({'min': 1, 'max': 3, 'palette': ['red', 'green', 'blue']})
map = folium.Map(zoom_start=5)
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='training polygons',
  ).add_to(map)
map.add_child(folium.LayerControl())
map

In [57]:
import tensorflow as tf

##Create a demo feature from the training data
#imageStack = ee.Image.cat([
#     datasets['train']['merged']['features'][2016]['ndvi']['default'],
#     datasets['train']['merged']['features'][2016]['ndvi']['yeardiff'],
#     datasets['train']['merged']['features'][2016]['nightlight']['default'],
#     datasets['train']['merged']['features'][2016]['nightlight']['yeardiff']
#]).float()
#combinedFeatures = datasets['train']['merged']['features'][2016]['ndvi']['default'].merge(
#     datasets['train']['merged']['features'][2016]['ndvi']['yeardiff'])
#     datasets['train']['merged']['features'][2016]['nightlight']['default'],
#     datasets['train']['merged']['features'][2016]['nightlight']['yeardiff']
#])
#def mergeBands (image, previous, bands=['ndvi_default']):
#  return ee.Image(previous).addBands(image, [''])


#combinedFeatures = dict()
#labelMosaic = dict()
merged_feature_and_gt = dict()
for set_type in set_types:
    if set_type not in merged_feature_and_gt:
        merged_feature_and_gt[set_type] = dict()
    for year in range(2015, 2018):
    
        labelMosaic = datasets[set_type]['merged']['labels'][2016].mosaic()
        combinedFeatures = ee.ImageCollection([
             datasets[set_type]['merged']['features'][2016]['ndvi']['default'].mosaic(),
             datasets[set_type]['merged']['features'][2016]['ndvi']['yeardiff'].mosaic(),
             datasets[set_type]['merged']['features'][2016]['nightlight']['default'].mosaic(),
             datasets[set_type]['merged']['features'][2016]['nightlight']['yeardiff'].mosaic()
        ])
        merged_feature_and_gt[set_type][year] = combinedFeatures.merge(labelMosaic.select(['gt_lossyear'],['class'])).toBands()
        old_band_names = merged_feature_and_gt[set_type][year].bandNames().getInfo()
        new_band_names = ['ndvi_default', 'ndvi_yeardiff', 'nightlight_default', 'nightlight_yeardiff', 'class']
        #rename bands
        merged_feature_and_gt[set_type][year] = merged_feature_and_gt[set_type][year].select(old_band_names, new_band_names)
#old_band_names = merged_feature_and_gt[2016].bandNames().getInfo()
#new_band_names = ['ndvi_default', 'ndvi_yeardiff', 'nightlight_default', 'nightlight_yeardiff', 'class']
#band_names =merged_feature_and_gt['train'][2016].bandNames().getInfo() 
#print('band names: {}'.format(band_names))



#mapid = merged_feature_and_gt['train'][2016].getMapId({'bands': ['ndvi_default'], 'min': 0, 'max': 1})
mapid = merged_feature_and_gt['val'][2016].getMapId({'bands': ['ndvi_default'], 'min': 0, 'max': 1})
map = folium.Map(zoom_start=5)
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='ground truth forest loss',
  ).add_to(map)
map.add_child(folium.LayerControl())
map



In [23]:
import ten
BANDS = ['ndvi_default', 'ndvi_yeardiff', 'nightlight_default', 'nightlight_yeardiff']
LABEL = 'class'
FEATURES = BANDS + [LABEL]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for f in FEATURES 
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

list = ee.List.repeat(1, KERNEL_SIZE)
lists = ee.List.repeat(list, KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)

year=2016
train_featureStack = merged_feature_and_gt['train'][year]
train_arrays = train_featureStack.neighborhoodToArray(kernel)

val_featureStack = merged_feature_and_gt['val'][year]
val_arrays = val_featureStack.neighborhoodToArray(kernel)

eval_featureStack = merged_feature_and_gt['eval'][year]
eval_arrays = eval_featureStack.neighborhoodToArray(kernel)


NameError: name 'tf' is not defined

In [None]:

# Convert the feature collections to lists for iteration.
trainingPolysList = trainingPolys.toList(trainingPolys.size())
evalPolysList = evalPolys.toList(evalPolys.size())
valPolysList = valPolys.toList(valPolys.size())

In [6]:

# Specify names locations for outputs in Cloud Storage. 
FOLDER = 'xingu_demo_v1'
TRAINING_BASE = 'training_patches'
VAL_BASE = 'val_patches'
EVAL_BASE = 'eval_patches'
BUCKET = 'xingu_data'

# These numbers determined experimentally.
#n = 200 # Number of shards in each polygon.
#N = 2000 # Total sample size in each polygon.
n = 20 # Number of shards in each polygon.
N = 100 # Total sample size in each polygon.


In [61]:
# Export all the training data (in many pieces), with one task 
# per geometry.
training_polys = trainingPolys.size().getInfo()
print('training polygons: {}'.format(training_polys))
print("uploading {} samples per region to google cloud..".format(N))
for g in range(training_polys):
  geomSample = ee.FeatureCollection([])
  for i in range(n):
    sample = train_arrays.sample(
      region = ee.Feature(trainingPolysList.get(g)).geometry(), 
      scale = 30, 
      numPixels = N / n, # Size of the shard.
      seed = i,
      tileScale = 8
    )
    geomSample = geomSample.merge(sample)
  
  desc = TRAINING_BASE + '_g' + str(g)
  task = ee.batch.Export.table.toCloudStorage(
    collection = geomSample,
    description = desc, 
    bucket = BUCKET, 
    fileNamePrefix = FOLDER + '/' + desc,
    fileFormat = 'TFRecord',
    selectors = FEATURES 
  )
  print('(train) starting cloud storage upload')
  task.start()
    
    
for g in range(valPolys.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(n):
    sample = val_arrays.sample(
      region = ee.Feature(valPolysList.get(g)).geometry(), 
      scale = 30, 
      numPixels = N / n, # Size of the shard.
      seed = i,
      tileScale = 8
    )
    geomSample = geomSample.merge(sample)
  
  desc = VAL_BASE + '_g' + str(g)
  task = ee.batch.Export.table.toCloudStorage(
    collection = geomSample,
    description = desc, 
    bucket = BUCKET, 
    fileNamePrefix = FOLDER + '/' + desc,
    fileFormat = 'TFRecord',
    selectors = FEATURES 
  )
  print('(val) starting cloud storage upload')
  task.start()
    

for g in range(evalPolys.size().getInfo()):
  geomSample = ee.FeatureCollection([])
  for i in range(n):
    sample = eval_arrays.sample(
      region = ee.Feature(evalPolysList.get(g)).geometry(), 
      scale = 30, 
      numPixels = N / n, # Size of the shard.
      seed = i,
      tileScale = 8
    )
    geomSample = geomSample.merge(sample)
  
  desc = EVAL_BASE + '_g' + str(g)
  task = ee.batch.Export.table.toCloudStorage(
    collection = geomSample,
    description = desc, 
    bucket = BUCKET, 
    fileNamePrefix = FOLDER + '/' + desc,
    fileFormat = 'TFRecord',
    selectors = FEATURES 
  )
  print('(eval) starting cloud storage upload')
  task.start()


training polygons: 11
uploading 100 samples per region to google cloud..
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(train) starting cloud storage upload
(val) starting cloud storage upload
(val) starting cloud storage upload
(eval) starting cloud storage upload
(eval) starting cloud storage upload
(eval) starting cloud storage upload


In [72]:
#GS_AUTH_JSON = os.path.join(home, 'google_cloud/xingu_service.json')
def parse_tfrecord(example_proto):
  """The parsing function.
  Read a serialized example into the structure defined by FEATURES_DICT.
  Args:
    example_proto: a serialized Example.
  Returns: 
    A dictionary of tensors, keyed by feature name.
  """
  return tf.io.parse_single_example(example_proto, FEATURES_DICT)


def to_tuple(inputs):
  """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
  Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
  Args:
    inputs: A dictionary of tensors, keyed by feature name.
  Returns: 
    A dtuple of (inputs, outputs).
  """
  inputsList = [inputs.get(key) for key in FEATURES]
  stacked = tf.stack(inputsList, axis=0)
  # Convert from CHW to HWC
  stacked = tf.transpose(stacked, [1, 2, 0])
  return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]


def list_blobs_with_prefix(bucket_name, prefix):
#    storage_client = storage.Client.from_service_account_json(
#            GS_AUTH_JSON)
    storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(
        bucket_name, prefix=prefix, delimiter=delimiter
    )

    return blobs

def get_files_for_blobs(bucket_name, blobs):
#    storage_client = storage.Client.from_service_account_json(
#            GS_AUTH_JSON)
    storage_client = storage.Client()

    files= []
    for blob in blobs:
        blob_files = storage_client.list_blobs(
            bucket_name, prefix=prefix, delimiter=delimiter
        )

    return blobs


#def get_dataset(bucket, pattern):
def get_dataset(pattern):
    """Function to read, parse and format to tuple a set of input tfrecord files.
    Get all the files matching the pattern, parse and convert to tuple.
    Args:
    pattern: A file pattern to match in a Cloud Storage bucket.
    Returns: 
    A tf.data.Dataset
    """
  #glob = tf.gfile.Glob(pattern)
    glob = tf.io.gfile.glob(pattern)
    print('dataset matches: {}'.format(glob))
    dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
    #print(dataset)
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
    dataset = dataset.map(to_tuple, num_parallel_calls=5)
    return dataset

In [73]:
from pathlib import Path
#home = str(Path.home())

## Authorize Google Storage

from google.cloud import storage
# Instantiates a client
storage_client = storage.Client()
#storage_client = storage.Client.from_service_account_json(GS_AUTH_JSON)


# The name for the new bucket

# Make an authenticated API request
buckets = storage_client.list_buckets()
for bucket in buckets:
    print(bucket.name)
#bucket = storage_client.list_bucket(BUCKET)


xingu_data


In [74]:
# Sizes of the training and evaluation datasets.
TRAIN_SIZE = 16000
EVAL_SIZE = 8000
VAL_SIZE = 6000

# Specify model training parameters.
BATCH_SIZE = 16
EPOCHS = 50
BUFFER_SIZE = 3000
OPTIMIZER = 'SGD'
LOSS = 'MeanSquaredError'
METRICS = ['RootMeanSquaredError']

In [4]:
def get_training_dataset():
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'
    dataset = get_dataset(glob)
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    return dataset

training = get_training_dataset()

NameError: name 'BUCKET' is not defined

In [3]:
#first_training_sample = iter(training.take(1)).next()
import tensorflow_datasets as tfds
np_dataset_train = tfds.as_numpy(training)


NameError: name 'training' is not defined

In [2]:
import numpy as np
import tqdm
#all_samples = list(np_dataset_train)
all_features = []#list(np_dataset_train)
all_labels = []#list(np_dataset_train)
for sample in tqdm.tqdm(np_dataset_train):
    feat_sample, label_sample = sample
    all_features.append(feat_sample)
    all_labels.append(label_sample)
    #print(label_sample)


NameError: name 'np_dataset_train' is not defined

In [None]:
train_features_path = 'xingu_demo_v1_train_features.npy'
train_labels_path = 'xingu_demo_v1_train_labels.npy'
all_features_np = np.array(all_features)
all_labels_np = np.array(all_labels)
np.save(train_features_path, all_features_np)
np.save(train_labels_path, all_labels_np)


In [71]:
#inspect data

print(first_training_sample[0].shape)
numpy_test =first_training_sample[0].numpy()
print(numpy_test)


(16, 256, 256, 4)
[[[[6.7669576e-01 4.8915616e-01 1.5808110e-03 4.2670196e-01]
   [6.8123525e-01 4.7705665e-01 1.5808110e-03 4.2670196e-01]
   [6.7849588e-01 4.8527911e-01 1.5808110e-03 4.2670196e-01]
   ...
   [8.1073135e-01 4.8042423e-01 1.4606729e-03 4.2566425e-01]
   [8.0519527e-01 4.7699094e-01 1.4606729e-03 4.2566425e-01]
   [8.3492845e-01 5.0669253e-01 1.4606729e-03 4.2566425e-01]]

  [[6.8544567e-01 4.9352220e-01 1.5808110e-03 4.2670196e-01]
   [7.0655853e-01 4.8961857e-01 1.5808110e-03 4.2670196e-01]
   [6.9226313e-01 4.8721275e-01 1.5808110e-03 4.2670196e-01]
   ...
   [8.3686662e-01 5.0549346e-01 1.4606729e-03 4.2566425e-01]
   [8.3248365e-01 5.0441867e-01 1.4606729e-03 4.2566425e-01]
   [8.6755431e-01 5.3563303e-01 1.4606729e-03 4.2566425e-01]]

  [[6.9102162e-01 4.8917937e-01 1.5808110e-03 4.2670196e-01]
   [7.0474422e-01 4.9829191e-01 1.5808110e-03 4.2670196e-01]
   [6.9755954e-01 4.8937619e-01 1.5808110e-03 4.2670196e-01]
   ...
   [8.4538281e-01 5.2332193e-01 1.4606729e

In [None]:
def get_val_dataset():
	"""Get the preprocessed valuation dataset
  Returns: 
    A tf.data.Dataset of evaluation data.
  """
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + VAL_BASE + '*'
	dataset = get_dataset(glob)
	dataset = dataset.batch(1).repeat()
	return dataset

validation = get_val_dataset()

In [None]:

first_validation_sample = iter(validation.take(1)).next()
print(first_training_sample[0].shape)

In [None]:
from tensorflow.python.keras import layers
from tensorflow.python.keras import losses
from tensorflow.python.keras import models
from tensorflow.python.keras import metrics
from tensorflow.python.keras import optimizers

def conv_block(input_tensor, num_filters):
	encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)
	encoder = layers.BatchNormalization()(encoder)
	encoder = layers.Activation('relu')(encoder)
	encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)
	encoder = layers.BatchNormalization()(encoder)
	encoder = layers.Activation('relu')(encoder)
	return encoder

def encoder_block(input_tensor, num_filters):
	encoder = conv_block(input_tensor, num_filters)
	encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)
	return encoder_pool, encoder

def decoder_block(input_tensor, concat_tensor, num_filters):
	decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)
	decoder = layers.concatenate([concat_tensor, decoder], axis=-1)
	decoder = layers.BatchNormalization()(decoder)
	decoder = layers.Activation('relu')(decoder)
	decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = layers.BatchNormalization()(decoder)
	decoder = layers.Activation('relu')(decoder)
	decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)
	decoder = layers.BatchNormalization()(decoder)
	decoder = layers.Activation('relu')(decoder)
	return decoder

def get_model():
	inputs = layers.Input(shape=[None, None, len(BANDS)]) # 256
	encoder0_pool, encoder0 = encoder_block(inputs, 32) # 128
	encoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64
	encoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32
	encoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16
	encoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8
	center = conv_block(encoder4_pool, 1024) # center
	decoder4 = decoder_block(center, encoder4, 512) # 16
	decoder3 = decoder_block(decoder4, encoder3, 256) # 32
	decoder2 = decoder_block(decoder3, encoder2, 128) # 64
	decoder1 = decoder_block(decoder2, encoder1, 64) # 128
	decoder0 = decoder_block(decoder1, encoder0, 32) # 256
	outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)

	model = models.Model(inputs=[inputs], outputs=[outputs])

	model.compile(
		optimizer=optimizers.get(OPTIMIZER), 
		loss=losses.get(LOSS),
		metrics=[metrics.get(metric) for metric in METRICS])

	return model

In [None]:
m = get_model()

m.fit(
    x=training, 
    epochs=EPOCHS, 
    steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE))
#    validation_data=validation,
#    validation_steps=VAL_SIZE)