## Benchmarking 

Sentinel-2 image patches of 7x7 pixels for 10 bands including labels \
\
train data part 1 (includes index files): https://drive.google.com/open?id=14zjSZhqeikghe2HKnxzJJgRkJGZPGx7T \
train data part 2: https://drive.google.com/open?id=1KffO6s2xA842cuMzDvA1CXiQ8Wb6o-9N \
validation data: https://drive.google.com/open?id=1PSeyevz9KUV7XLf0sPWyw_BI5_TheZ5b \
test data: https://drive.google.com/open?id=1_hj9DXf3VK9E4OegsDyTXjMJv4q-dUUf \
\
data format: TFRecord files with featureDict format as defined below, each file containing ca. 20 samples

In [1]:
from __future__ import print_function
%matplotlib inline
import os
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
import png
import matplotlib.pyplot as plt
#import sklearn

import ee
#import ee.mapclient
from IPython.display import Image
import IPython.display as display

ee.Initialize()

In [2]:
tf.disable_v2_behavior()
try:
    tf.enable_eager_execution()
except Exception:
    pass

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

Instructions for updating:
non-resource variables are not supported in the long term


### Import and parse TFRecord data

In [None]:
## Prepare file paths

filenames_train = []
filenames_validate = []
no_files_train = 3000
no_files_validate = 1000

for i in range(no_files_train):
    filenames_train.append("../data/EE_data/EE_data_training/train_patches_"+"{0:04}".format(i)+".tfrecord")  

for i in range(no_files_train):
    filenames_train.append("../data/EE_data/EE_data_validation/validate_patches_"+"{0:04}".format(i)+".tfrecord")        
    
print(len(filenames_train))
print(len(filenames_validate))

In [3]:
## Import data set from TFRecord files

dataset_train = tf.data.TFRecordDataset(filenames_train)
#print(len(list(dataset_train)))

for raw_record in dataset_train.take(100):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

3000
features {
  feature {
    key: "B11"
    value {
      float_list {
        value: 0.23675625026226044
        value: 0.23054374754428864
        value: 0.2245812565088272
        value: 0.22049999237060547
        value: 0.218299999833107
        value: 0.21524375677108765
        value: 0.21133124828338623
        value: 0.23711875081062317
        value: 0.22853125631809235
        value: 0.221243754029274
        value: 0.21649999916553497
        value: 0.2143000066280365
        value: 0.21133124828338623
        value: 0.20759373903274536
        value: 0.2381500005722046
        value: 0.2290000021457672
        value: 0.2204499989748001
        value: 0.2142687439918518
        value: 0.2104562520980835
        value: 0.20631875097751617
        value: 0.20185624063014984
        value: 0.2394937425851822
        value: 0.23194999992847443
        value: 0.22219999134540558
        value: 0.21380624175071716
        value: 0.20676875114440918
        value: 0.20020624995

In [4]:
bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12']
label_names = ['vegetation_elementstree_element_cover_label', \
                'vegetation_elementsshrub_element_cover_label', \
                'vegetation_elementspalm_element_cover_label', \
                'vegetation_elementsbamboo_element_cover_label', \
                'vegetation_elementscrop_element_cover_label', \
                'infrastructure_elementshouse_element_cover_label', \
                'infrastructure_elementsother_buildings_element_cover_label', \
                'infrastructure_elementspaved_road_element_cover_label', \
                'infrastructure_elementsunpaved_road_element_cover_label', \
                'water_bodieslake_water_cover_label', \
                'water_bodiesriver_water_cover_label', \
                'total_water_bodies_cover_label'];

featureDict = {
  'public_id': tf.io.FixedLenFeature(shape=[1], dtype=tf.string),
  'B2': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B3': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B4': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B5': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B6': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B7': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B8': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B8A': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B11': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'B12': tf.io.FixedLenFeature(shape=[7,7], dtype=tf.float32),
  'vegetation_elementstree_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'vegetation_elementsshrub_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'vegetation_elementspalm_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'vegetation_elementsbamboo_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'vegetation_elementscrop_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'infrastructure_elementshouse_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'infrastructure_elementsother_buildings_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'infrastructure_elementspaved_road_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'infrastructure_elementsunpaved_road_element_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'water_bodieslake_water_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'water_bodiesriver_water_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32),
  'total_water_bodies_cover_label': tf.io.FixedLenFeature(shape=[1], dtype=tf.float32)
}

In [6]:
## Parse TFRecord dataset

def _parse_function(example):
  # Parse the input `tf.Example` proto using the dictionary above.
  return tf.io.parse_example(example, featureDict)

#V1
#batches = dataset_train.batch(100, drop_remainder=False)
#parsedDataset_train = batches.map(_parse_function)

#V2
#parsedDataset_train = dataset_train.map(_parse_function)

#V3
parsedDataset_train = dataset_train.map(lambda example: tf.io.parse_single_example(example, featureDict))


print(parsedDataset_train)
#print(len(list(parsedDataset_train)))

for i, parsed_record in enumerate(parsedDataset_train.take(100)):
    print(i)
    #print(repr(parsed_record))
    

<DatasetV1Adapter shapes: {B11: (7, 7), B12: (7, 7), B2: (7, 7), B3: (7, 7), B4: (7, 7), B5: (7, 7), B6: (7, 7), B7: (7, 7), B8: (7, 7), B8A: (7, 7), infrastructure_elementshouse_element_cover_label: (1,), infrastructure_elementsother_buildings_element_cover_label: (1,), infrastructure_elementspaved_road_element_cover_label: (1,), infrastructure_elementsunpaved_road_element_cover_label: (1,), public_id: (1,), total_water_bodies_cover_label: (1,), vegetation_elementsbamboo_element_cover_label: (1,), vegetation_elementscrop_element_cover_label: (1,), vegetation_elementspalm_element_cover_label: (1,), vegetation_elementsshrub_element_cover_label: (1,), vegetation_elementstree_element_cover_label: (1,), water_bodieslake_water_cover_label: (1,), water_bodiesriver_water_cover_label: (1,)}, types: {B11: tf.float32, B12: tf.float32, B2: tf.float32, B3: tf.float32, B4: tf.float32, B5: tf.float32, B6: tf.float32, B7: tf.float32, B8: tf.float32, B8A: tf.float32, infrastructure_elementshouse_eleme

InvalidArgumentError: Feature: B11 (data type: float) is required but could not be found.
	 [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]

In [7]:
## Import and parse TFRecord dataset V4

def _parse_(serialized_example):
    example = tf.parse_single_example(serialized_example, featureDict)
    B2 = tf.reshape(example['B2'], (49,))
    B3 = tf.reshape(example['B3'], (49,))
    B4 = tf.reshape(example['B4'], (49,))
    B5 = tf.reshape(example['B5'], (49,))
    B6 = tf.reshape(example['B6'], (49,))
    B7 = tf.reshape(example['B7'], (49,))
    B8 = tf.reshape(example['B8'], (49,))
    B8A = tf.reshape(example['B8A'], (49,))
    B11 = tf.reshape(example['B11'], (49,))
    B12 = tf.reshape(example['B12'], (49,))
    label = tf.cast(example['vegetation_elementstree_element_cover_label'], tf.int32)
    
    return (dict({'B2':B2,'B3':B3,'B4':B4,'B5':B5,'B6':B6,'B7':B7,'B8':B8,'B8A':B8A,'B11':B11,'B12':B12}),label)


def tfrecord_train_input_fn(batch_size=32):
    tfrecord_dataset = tf.data.TFRecordDataset(filenames_train)
    tfrecord_dataset = tfrecord_dataset.map(lambda x:_parse_(x)).shuffle(True).batch(batch_size)
    tfrecord_iterator = tfrecord_dataset.make_one_shot_iterator()
    
    return tfrecord_iterator.get_next()

### Classification

In [8]:
from tensorflow.keras import layers
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.ensemble import RandomForestClassifier

# A utility method to create a feature column
# and to transform a batch of data
def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(parsedTrain_dataset).numpy())

In [9]:
#B2 = tf.feature_column.numeric_column(key='B2', shape=(7,7))
#B3 = tf.feature_column.numeric_column(key='B3', shape=(7,7))
#B4 = tf.feature_column.numeric_column(key='B4', shape=(7,7))
#B5 = tf.feature_column.numeric_column(key='B5', shape=(7,7))
#B6 = tf.feature_column.numeric_column(key='B6', shape=(7,7))
#B7 = tf.feature_column.numeric_column(key='B7', shape=(7,7))
#B8 = tf.feature_column.numeric_column(key='B8', shape=(7,7))
#B8A = tf.feature_column.numeric_column(key='B8A', shape=(7,7))
#B11 = tf.feature_column.numeric_column(key='B11', shape=(7,7))
#B12 = tf.feature_column.numeric_column(key='B12', shape=(7,7))

B2 = tf.feature_column.numeric_column(key='B2', shape=(49,))
B3 = tf.feature_column.numeric_column(key='B3', shape=(49,))
B4 = tf.feature_column.numeric_column(key='B4', shape=(49,))
B5 = tf.feature_column.numeric_column(key='B5', shape=(49,))
B6 = tf.feature_column.numeric_column(key='B6', shape=(49,))
B7 = tf.feature_column.numeric_column(key='B7', shape=(49,))
B8 = tf.feature_column.numeric_column(key='B8', shape=(49,))
B8A = tf.feature_column.numeric_column(key='B8A', shape=(49,))
B11 = tf.feature_column.numeric_column(key='B11', shape=(49,))
B12 = tf.feature_column.numeric_column(key='B12', shape=(49,))

In [13]:
import tempfile
model_dir = tempfile.mkdtemp()
model = tf.estimator.DNNClassifier(
    [100,100],
    model_dir=model_dir,
    n_classes=10,
    feature_columns=[B2, B3, B4, B5, B6, B7, B8, B8A, B11, B12]
)

model2 = tf.estimator.BaselineClassifier(
    model_dir=model_dir,
    n_classes=10,
    weight_column=None,
    label_vocabulary=None,
    optimizer='Ftrl',
    config=None
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/hr/r8bv52hn6sj5ny319b4nfg0h0000gn/T/tmp139zpmvj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_mo

In [11]:
tf.compat.v1.disable_eager_execution()

model.train(lambda:tfrecord_train_input_fn(32),steps=200)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/hr/r8bv52hn6sj5ny319b4nfg0h0000gn/T/tmpf8mea8v1/model.ckpt.


InvalidArgumentError: Feature: B11 (data type: float) is required but could not be found.
	 [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
	 [[IteratorGetNext]]

In [14]:
model2.train(lambda:tfrecord_train_input_fn(32),steps=200)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/hr/r8bv52hn6sj5ny319b4nfg0h0000gn/T/tmp139zpmvj/model.ckpt.


InvalidArgumentError: Feature: B11 (data type: float) is required but could not be found.
	 [[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
	 [[IteratorGetNext]]