<a href="https://colab.research.google.com/github/jdilger/TensorFlowNotebooks/blob/master/onehot_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf

In [4]:
indices = [[0, 1, 2],
           [4, 5, 1],
           [0, 1, 2]]
depth = 6
tf.one_hot(indices, depth)

<tf.Tensor: shape=(3, 3, 6), dtype=float32, numpy=
array([[[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.]],

       [[0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0., 0.]],

       [[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.]]], dtype=float32)>

In [2]:
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras import backend as K

import ee
try:
  ee.Initialize()
except:
  ee.Authenticate()
  ee.Initialize()

# Cloud authentication.
from google.colab import auth
auth.authenticate_user()

In [76]:
# Specify cloud storage bucket to save data too
BUCKET = 'landfire'

# Specify names locations for outputs in Cloud Storage. 
FOLDER = 'lucas/pools2fire_tf'
TRAINING_BASE = 'training_patches_v11'
TESTING_BASE = 'testing_patches_v11'
VAL_BASE = 'val_patches_v4'

BANDS = ['agfast', 'agmed', 'agslo', 'agvfast', 'bgfast', 'bgslo', 'bgvfast',
         'croot', 'froot', 'foliage', 'merch', 'otherw', 'snbran', 'snstem',
         'ldist', 'elevation', 'slope', 'aspect', 'age']
CATEGORICAL = ['pyrome_freq']
CATEGORICAL_DICt = {
    'pyrome_freq':10
}
RESPONSE = ['BP']
FEATURES = CATEGORICAL + BANDS + RESPONSE
FEATURES

['pyrome_freq',
 'agfast',
 'agmed',
 'agslo',
 'agvfast',
 'bgfast',
 'bgslo',
 'bgvfast',
 'croot',
 'froot',
 'foliage',
 'merch',
 'otherw',
 'snbran',
 'snstem',
 'ldist',
 'elevation',
 'slope',
 'aspect',
 'age',
 'BP']

10

In [4]:
# specify a kernel/image size to use for the model
KERNEL_SIZE = 256

# create an EE kernel opject from the kernel size
list = ee.List.repeat(1, KERNEL_SIZE)
lists = ee.List.repeat(list, KERNEL_SIZE)
kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)

In [None]:
kernel.getInfo()

In [77]:
# Sizes of the training and evaluation datasets.
# based on sizes of exported data and spliting performed earlier
# ~20 counties with 300 samples per county = ~6000 samples
# ~60% are training, ~25% are testing, ~15% are validation
TRAIN_SIZE = 3 
TEST_SIZE =  3
VAL_SIZE = 3

# Specify model training parameters.
BATCH_SIZE = 1
EPOCHS = 3
BUFFER_SIZE = 1#1500 # setting too large will give an Out of Memory (OOM) error

# Specify the size and shape of patches expected by the model.
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

In [78]:
FEATURES_DICT

{'BP': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'age': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'agfast': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'agmed': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'agslo': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'agvfast': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'aspect': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'bgfast': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'bgslo': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'bgvfast': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'croot': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None),
 'elevation': FixedLenFeature(shape=[256, 256], dtype=tf.float32, default_value=None)

In [116]:
def parse_tfrecord(example_proto):
    """The parsing function.
    Read a serialized example into the structure defined by FEATURES_DICT.
    Args:
    example_proto: a serialized Example.
    Returns: 
    A dictionary of tensors, keyed by feature name.
    """
    return tf.io.parse_single_example(example_proto, FEATURES_DICT)


def to_tuple_both(inputs):
  # cast to int32 for one hot, it stays as original input for some reason 
  one_hot_int = [tf.one_hot(tf.cast(inputs.get(key), tf.int32), CATEGORICAL_DICt.get(key)) for key in CATEGORICAL]
  stacked = tf.stack(one_hot_int,axis=-1)
  
  stacked = tf.squeeze(stacked)

  # contin
  conInputsList = [inputs.get(key) for key in FEATURES if key not in CATEGORICAL]
  conStacked = tf.stack(conInputsList, axis=0)
  conStacked = tf.transpose(conStacked, [1, 2, 0])

  # stack and split off predictor
  outStack = tf.concat([conStacked[:,:,:len(BANDS)], stacked],axis=-1), conStacked[:,:,len(BANDS):]
  return outStack

def to_tuple(inputs):
    """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
    Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
    Args:
    inputs: A dictionary of tensors, keyed by feature name.
    Returns: 
    A dtuple of (inputs, outputs).
    """
    inputsList = [inputs.get(key) for key in FEATURES]
    stacked = tf.stack(inputsList, axis=0)
    # Convert from CHW to HWC
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]

def to_tuple_cat(inputs):
    """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
    Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
    Args:
    inputs: A dictionary of tensors, keyed by feature name.
    Returns: 
    A dtuple of (inputs, outputs).
    """
    inputsList = [tf.one_hot(inputs.get(key), CATEGORICAL_DICt.get(key)) for key in CATEGORICAL]
    stacked = tf.stack(inputsList, axis=0)
    # Convert from CHW to HWC
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(CATEGORICAL)], stacked[:,:,len(CATEGORICAL):]

def get_dataset(pattern,training=False):
    """Function to read, parse and format to tuple a set of input tfrecord files.
    Get all the files matching the pattern, parse and convert to tuple.
    Args:
    pattern: A file pattern to match in a Cloud Storage bucket.
    Returns: 
    A tf.data.Dataset
    """
    glob = tf.io.gfile.glob(pattern)
    dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
    dataset = dataset.map(to_tuple_both, num_parallel_calls=5)
    return dataset

In [117]:
# custom decoder block to upsample the features in the network
# this specific decoder block uses a cov2d -> concat -> conv2d * n -> bilinear upsample
def decoder_block(input_tensor, concat_tensor=None, nFilters=512,nConvs=2,i=0,name_prefix="decoder_block"):
    deconv = input_tensor
    for j in range(nConvs):
        deconv = layers.Conv2D(nFilters, 3, activation='relu',
                               padding='same',name=f"{name_prefix}{i}_deconv{j+1}")(deconv)
        deconv = layers.BatchNormalization(name=f"{name_prefix}{i}_batchnorm{j+1}")(deconv)
        if j == 0:
            if concat_tensor is not None:
                 deconv = layers.concatenate([deconv,concat_tensor],name=f"{name_prefix}{i}_concat")
            deconv = layers.Dropout(0.2, seed=0+i,name=f"{name_prefix}{i}_dropout")(deconv)
    
    up = layers.UpSampling2D(interpolation='bilinear',name=f"{name_prefix}{i}_upsamp")(deconv)
    return up
# here we define the network using the VGG-16 encoder 
# and build our decoder from there
# tf_run.final_activation = 'sigmoid'#TODO change this
# specify an input tensor with an arbitrary shape for x and y dims
# has sample length channels as landsat bands we exported

inTensor = layers.Input(shape=[None,None,len(BANDS)+sum(CATEGORICAL_DICt.values())],name="input")#todo change len

# grab the vgg-16 encoder and build based off our input tensor
vgg16 = keras.applications.VGG19(include_top=False,weights=None,input_tensor=inTensor)

# grab the input and output tensors
base_in = vgg16.input
base_out = vgg16.output

# extract the tensors we will use to concatenate our decoders with
concat_layers = ["block5_conv3","block4_conv3","block3_conv3","block2_conv2","block1_conv2"]
concat_tensors = [vgg16.get_layer(layer).output for layer in concat_layers]

# define the decoder branch

decoder0 = decoder_block(base_out, nFilters=512,nConvs=1,i=0) # center block with no upsampling
decoder1 = decoder_block(decoder0, concat_tensor=concat_tensors[0], nFilters=512,nConvs=1,i=2) 
decoder2 = decoder_block(decoder1, concat_tensor=concat_tensors[1], nFilters=256,nConvs=1,i=3) 
decoder3 = decoder_block(decoder2, concat_tensor=concat_tensors[2], nFilters=128,nConvs=1,i=4) 
decoder4 = decoder_block(decoder3, concat_tensor=concat_tensors[3], nFilters=64,nConvs=1,i=5) 
# concat the final decoder block with the first encoder output
# drop out correlated connections in spatial space
outBranch = layers.concatenate([decoder4,concat_tensors[4]],name="out_block_concat1")
outBranch = layers.SpatialDropout2D(rate=0.2,seed=0,name="out_block_spatialdrop")(outBranch)

# perform some additional convolutions before predicting probabilites
outBranch = layers.Conv2D(64, 3, activation='relu', 
                          padding='same',name="out_block_conv1")(outBranch)
outBranch = layers.BatchNormalization(name="out_block_batchnorm1")(outBranch)
outBranch = layers.Conv2D(64, 3, activation='relu', 
                          padding='same',name="out_block_conv2")(outBranch)
outBranch = layers.BatchNormalization(name="out_block_batchnorm2")(outBranch)
# final convolution and softmax activation to get output probabilities
# nodes will equal the number of classes
outBranch = layers.Conv2D(len(RESPONSE), (1, 1),name='final_conv')(outBranch)#to cahnge len back
output = layers.Activation('sigmoid', name="final_out")(outBranch)
 
# declare our model with the inputs from the encoder and outputs from the decoder
model = models.Model(inputs=[base_in], outputs=[output],name="vgg16-unet")

# define an adaptive learning rate based on training
lr_schedule = keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=500,
  decay_rate=1,
  staircase=False)

# compile the model
# uses Adam loss with adaptive learning rate
# soft dice loss as opjective function
# outputs accuracy, precision, recall, and f1
model.compile(optimizer=keras.optimizers.Adam(lr_schedule),
              loss='mean_absolute_error',#dice_loss,
              metrics=['mean_squared_error',#keras.metrics.categorical_accuracy,
                       tf.keras.metrics.RootMeanSquaredError(),
                       tf.keras.metrics.MeanAbsoluteError()
                       ])

# display the model summary to see layers and parameters
model.summary()

Model: "vgg16-unet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, None, None,  0           []                               
                                 29)]                                                             
                                                                                                  
 block1_conv1 (Conv2D)          (None, None, None,   16768       ['input[0][0]']                  
                                64)                                                               
                                                                                                  
 block1_conv2 (Conv2D)          (None, None, None,   36928       ['block1_conv1[0][0]']           
                                64)                                                      

In [118]:
def get_training_dataset():
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'
    dataset = get_dataset(glob,training=True)
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    return dataset

training = get_training_dataset()

In [119]:
def get_testing_dataset():
	"""Get the preprocessed evaluation dataset
  Returns: 
    A tf.data.Dataset of evaluation data.
  """
	glob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TESTING_BASE + '*'
	dataset = get_dataset(glob)
	dataset = dataset.batch(1).repeat()
	return dataset

testing = get_testing_dataset()

In [120]:
# train the model!!!
history = model.fit(x=training,
                    epochs=EPOCHS,
                    steps_per_epoch=(TRAIN_SIZE // BATCH_SIZE),
                    validation_data=testing,
                    validation_steps=TEST_SIZE,
                    # callbacks=[earlyStopping],
                   )

Epoch 1/3
Epoch 2/3
Epoch 3/3


##dev area

figuring out shapes


In [58]:
 pattern = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '*'
 glob = tf.io.gfile.glob(pattern)
 dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')
 print(len(glob))
 dataset = dataset.map(parse_tfrecord).map(to_tuple_both)

#  parsed = tf.io.parse_single_example(dataset, FEATURES_DICT)

17


In [110]:
from tensorflow.python.ops.gen_array_ops import one_hot
# for key in CATEGORICAL: print(CATEGORICAL_DICt[key])
singleGlob = "gs://landfire/lucas/pools2fire_tf/testing_patches_v11_i17.tfrecord.gz"
glob = tf.io.gfile.glob(singleGlob)
dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP').map(parse_tfrecord)

def ok(inputs):
  # cast to int32 for one hot, it stays as original input for some reason 
  one_hot_int = [tf.one_hot(tf.cast(inputs.get(key), tf.int32), CATEGORICAL_DICt.get(key)) for key in CATEGORICAL]

  stacked = tf.stack(one_hot_int,axis=-1)
  # squeeze to remove extra dimention
  return tf.squeeze(stacked)
def reg(inputs):
    """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
    Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
    Args:
    inputs: A dictionary of tensors, keyed by feature name.
    Returns: 
    A dtuple of (inputs, outputs).
    """
    inputsList = [inputs.get(key) for key in FEATURES]
    stacked = tf.stack(inputsList, axis=0)
    print('CHW',stacked)
    # Convert from CHW to HWC
    stacked = tf.transpose(stacked, [1, 2, 0])
    return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]

def okidoki(inputs):
  # cast to int32 for one hot, it stays as original input for some reason 
  one_hot_int = [tf.one_hot(tf.cast(inputs.get(key), tf.int32), CATEGORICAL_DICt.get(key)) for key in CATEGORICAL]
  stacked = tf.stack(one_hot_int,axis=-1)

  stacked = tf.squeeze(stacked)

  # contin
  conInputsList = [inputs.get(key) for key in BANDS if key not in CATEGORICAL]
  conStacked = tf.stack(conInputsList, axis=0)
  conStacked = tf.transpose(conStacked, [1, 2, 0])

  # stack and split off predictor tf.concat([,stacked], axis-1)
  outStack = tf.concat([conStacked[:,:,:len(BANDS)], stacked],axis=-1), conStacked[:,:,len(BANDS):]
  return outStack
inputs = dataset
inputsListCate =  dataset.map(okidoki)
# dataset
print(inputsListCate)



# inputsListCon =  dataset.map(to_tuple)
# inputsListCon
# shape=(256, 256, 19)

<MapDataset element_spec=(TensorSpec(shape=(256, 256, 29), dtype=tf.float32, name=None), TensorSpec(shape=(256, 256, 0), dtype=tf.float32, name=None))>


In [99]:
a = [[1],[2],[3]]
v = 1
print(a[:v])

[[1]]


In [61]:
# Print the first parsed record to check.
from pprint import pprint
pprint(iter(inputsListCate).next())

<tf.Tensor: shape=(256, 256, 31), dtype=float32, numpy=
array([[[1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        ...,
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00]],

       [[1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        ...,
        [1.913e+03, 0.000e+00, 0.000e+00, ..., 0.000e+00, 0.000e+00,
         0.000e+00],
        [1.913e+

In [None]:
def get_training_dataset():
    """Get the preprocessed training dataset
    Returns: 
    A tf.data.Dataset of training data.
    """
    glob = 'gs://' + tf_run.bucket + '/' + tf_run.folder + '/' + tf_sample.training + '*'
    dataset = get_dataset(glob,training=True)
    dataset = dataset.shuffle(tf_run.buffer_size).batch(tf_run.batch_size).repeat()
    return dataset

training = get_training_dataset()