In [None]:
#@title Copyright 2023 Google LLC. { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<table class="ee-notebook-buttons" align="left"><td>
<a target="_blank"  href="http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_Vertex_AI.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" /> Run in Google Colab</a>
</td><td>
<a target="_blank"  href="https://github.com/google/earthengine-community/blob/master/guides/linked/Earth_Engine_TensorFlow_Vertex_AI.ipynb"><img width=32px src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" /> View source on GitHub</a></td></table>

# Connecting Earth Engine to a Vertex AI hosted model

This notebook demonstrates training a per-pixel neural network in TensorFlow, hosting the model on Vertex AI and using it in Earth Engine for interactive prediction from an `ee.Model.fromVertexAIPredictor`.

**Running this demo may incur charges to your Google Cloud Account!**

In [None]:
from google.colab import auth
from pprint import pprint

import ee
import folium
import google
import tensorflow as tf
from tensorflow import keras

## Authentication and Initialization

In [None]:
auth.authenticate_user()

In [None]:
# REPLACE WITH YOUR CLOUD PROJECT!
PROJECT = 'your-project'

In [None]:
credentials, _ = google.auth.default()
ee.Initialize(credentials, project=PROJECT, opt_url='https://earthengine-highvolume.googleapis.com')

# Define variables

The training data are land cover labels with a single vector of Landsat 8 pixel values (`BANDS`) as predictors.  See [this example notebook](http://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/Earth_Engine_TensorFlow_DNN_from_scratch.ipynb) for details on how to generate these training data.

In [None]:
# Output bucket for trained models. REPLACE WITH YOUR WRITABLE BUCKET!
OUTPUT_BUCKET = 'your-bucket'

REGION = 'us-central1'

# Cloud Storage bucket with training and testing datasets.
DATA_BUCKET = 'ee-docs-demos'

MODEL_NAME = 'vertex_dnn_demo'
MODEL_DIR = 'gs://' + OUTPUT_BUCKET + '/' + MODEL_NAME
CONTAINER_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-11:latest'

ENDPOINT_NAME = 'dnn_demo_endpoint'

# Training and testing dataset file names in the Cloud Storage bucket.
TRAIN_FILE_PREFIX = 'Training_demo'
TEST_FILE_PREFIX = 'Testing_demo'
file_extension = '.tfrecord.gz'
TRAIN_FILE_PATH = 'gs://' + DATA_BUCKET + '/' + TRAIN_FILE_PREFIX + file_extension
TEST_FILE_PATH = 'gs://' + DATA_BUCKET + '/' + TEST_FILE_PREFIX + file_extension

# The labels, consecutive integer indices starting from zero, are stored in
# this property, set on each point.
LABEL = 'landcover'
# Number of label values, i.e. number of classes in the classification.
N_CLASSES = 3

# Use Landsat 8 surface reflectance data for predictors.
L8SR = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')
# Use these bands for prediction.
BANDS = ['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']

# These names are used to specify properties in the export of
# training/testing data and to define the mapping between names and data
# when reading into TensorFlow datasets.
FEATURE_NAMES = BANDS + [LABEL]

# List of fixed-length features, all of which are float32.
columns = [
  tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES
]

# Dictionary with feature names as keys, fixed-length features as values.
FEATURES_DICT = dict(zip(FEATURE_NAMES, columns))

ATTRIBUTION = 'Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>'

# Read data

### Check existence of the data files

Check that you have permission to read the files in the output Cloud Storage bucket.

In [None]:
print('Found training file.' if tf.io.gfile.exists(TRAIN_FILE_PATH)
    else 'No training file found.')
print('Found testing file.' if tf.io.gfile.exists(TEST_FILE_PATH)
    else 'No testing file found.')

## Read into a `tf.data.Dataset`

Here we are going to read a file in Cloud Storage into a `tf.data.Dataset`.  ([these TensorFlow docs](https://www.tensorflow.org/guide/data) explain more about reading data into a `tf.data.Dataset`).  Check that you can read examples from the file.  The purpose here is to ensure that we can read from the file without an error.  The actual content is not necessarily human readable.  Note that we will use all data for training.


In [None]:
# Create a dataset from the TFRecord file in Cloud Storage.
train_dataset = tf.data.TFRecordDataset(
    [TRAIN_FILE_PATH, TEST_FILE_PATH], compression_type='GZIP')

# Print the first record to check.
print(iter(train_dataset).next())

## Parse the dataset

Now we need to make a parsing function for the data in the TFRecord files.  The data comes in flattened 2D arrays per record and we want to use the first part of the array for input to the model and the last element of the array as the class label.  The parsing function reads data from a serialized `Example` proto (i.e. [`example.proto`](https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/core/example/example.proto)) into a dictionary in which the keys are the feature names and the values are the tensors storing the value of the features for that example.

In [None]:
def parse_tfrecord(example_proto):
  """Deserialize an example proto.  Reshape to (1, 1, 1)."""
  parsed_features = tf.io.parse_example(example_proto, FEATURES_DICT)
  labels = parsed_features.pop(LABEL)
  # (1) -> (1, 1, 1)
  return ({k: [[v]] for k, v in parsed_features.items()},
          tf.expand_dims(tf.expand_dims(
              tf.cast(labels, tf.int32), axis=0), axis=0))

# Map the function over the dataset.
parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=4)

# Print the first parsed record to check.
check = iter(parsed_dataset.take(1)).next()
pprint(check)

# Model setup

Make a densely-connected convolutional model, where the convolution occurs in a 1x1 kernel.  This allows Earth Engine to apply the model spatially, as demonstrated below.  Note that the model used here is purely for demonstration purposes.

## Create the Keras model
This is a very basic [Keras functional model](https://www.tensorflow.org/guide/keras/functional).

In [None]:
# Note that the input is a [1, 1, P] vector of P bands, i.e. a 1x1 kernel.
inputs = keras.Input(shape=(1, 1, len(BANDS)))
x = tf.keras.layers.Conv2D(64, (1, 1), activation=tf.nn.relu)(inputs)
x = tf.keras.layers.Dropout(0.1)(x)
x = tf.keras.layers.Conv2D(N_CLASSES, (1, 1), activation=tf.nn.softmax)(x)
model = tf.keras.Model(inputs=inputs, outputs=x)

# A Layer to stack and reshape the input tensors.
class MyPreprocessing(keras.layers.Layer):
  def __init__(self, **kwargs):
    super(MyPreprocessing, self).__init__(**kwargs)

  def call(self, features_dict):
    # (None, 1, 1, 1) -> (None, 1, 1, P)
    return tf.concat([features_dict[b] for b in BANDS], axis=3)

  def get_config(self):
    config = super().get_config()
    return config


# A Model that wraps the base model with the preprocessing layer.
class MyModel(keras.Model):
  def __init__(self, preprocessing, backbone, **kwargs):
    super().__init__(**kwargs)
    self.preprocessing = preprocessing
    self.backbone = backbone

  def call(self, features_dict):
    x = self.preprocessing(features_dict)
    return self.backbone(x)

  def get_config(self):
    config = super().get_config()
    return config

wrapped_model = MyModel(MyPreprocessing(), model)

In [None]:
# Compile the model with the specified loss and optimizer functions.
wrapped_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

# Fit the model to the training data.  Lucky number 13.
wrapped_model.fit(parsed_dataset.batch(4), epochs=13)

In [None]:
print(wrapped_model.summary())

## De/serialization

De/serialization prepares the model for hosting on Google Cloud.  Specifically, you need to provide image data to the Vertex AI API by sending the image data as base64-encoded text ([reference](https://cloud.google.com/vertex-ai/docs/general/base64)).  Wrap the trained model in de/serialization layers to handle the conversion.

In [None]:
class DeSerializeInput(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__(**kwargs)

  def call(self, inputs_dict):
    return {
      k: tf.map_fn(lambda x: tf.io.parse_tensor(x, tf.float32),
                   tf.io.decode_base64(v),
                   fn_output_signature=tf.float32)
        for (k, v) in inputs_dict.items()
    }

  def get_config(self):
    config = super().get_config()
    return config


class ReSerializeOutput(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__(**kwargs)

  def call(self, output_tensor):
    return tf.map_fn(lambda x: tf.io.encode_base64(tf.io.serialize_tensor(x)),
                    output_tensor,
                    fn_output_signature=tf.string)

  def get_config(self):
    config = super().get_config()
    return config

input_deserializer = DeSerializeInput()
output_deserializer = ReSerializeOutput()

serialized_inputs = {
    b: tf.keras.Input(shape=[], dtype='string', name=b) for b in BANDS
}

updated_model_input = input_deserializer(serialized_inputs)
updated_model = wrapped_model(updated_model_input)
updated_model = output_deserializer(updated_model)
updated_model= tf.keras.Model(serialized_inputs, updated_model)

In [None]:
tf.keras.utils.plot_model(updated_model)

## Save the trained model

Export the trained model to TensorFlow `SavedModel` format in your cloud storage bucket.  The [Cloud Platform storage browser](https://console.cloud.google.com/storage/browser) is useful for checking on these saved models.

In [None]:
!gsutil rm -r {MODEL_DIR + '_test'}
updated_model.save(MODEL_DIR + '_test')

# Deploy the model on Vertex AI

### [Upload the model](https://cloud.google.com/sdk/gcloud/reference/ai/models/upload)
Add an entry to the model registry that points to the location of the saved model and a container image needed to run the model.

In [None]:
!gcloud ai models delete {MODEL_NAME + '_test'} --project={PROJECT} --region={REGION}

In [None]:
!gcloud ai models upload \
  --artifact-uri={MODEL_DIR + '_test'} \
  --project={PROJECT} \
  --region={REGION} \
  --container-image-uri={CONTAINER_IMAGE} \
  --description={MODEL_NAME + '_test'} \
  --display-name={MODEL_NAME + '_test'} \
  --model-id={MODEL_NAME + '_test'}

### [Create an endpoint](https://cloud.google.com/sdk/gcloud/reference/ai/endpoints/create)

Create an endpoint from which to serve the model.

In [None]:
!gcloud ai endpoints create \
  --display-name={ENDPOINT_NAME + '_test'} \
  --region={REGION} \
  --project={PROJECT}

### [Deploy the model to the endpoint](https://cloud.google.com/sdk/gcloud/reference/ai/endpoints/deploy-model)

First, look up the endpoint ID, then deploy the model.

In [None]:
ENDPOINT_ID = !gcloud ai endpoints list \
  --project={PROJECT} \
  --region={REGION} \
  --filter=displayName:{ENDPOINT_NAME + '_test'} \
  --format="value(ENDPOINT_ID.scope())"
ENDPOINT_ID = ENDPOINT_ID[-1]

In [None]:
!gcloud ai endpoints deploy-model {ENDPOINT_ID} \
  --project={PROJECT} \
  --region={REGION} \
  --model={MODEL_NAME + '_test'} \
  --display-name={MODEL_NAME + '_test'}

# Connect to the hosted model from Earth Engine

1. Generate the input imagery.  This should be done in exactly the same way as the training data were generated.  See [this example notebook](http://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/TF_demo1_keras.ipynb) for details.
2. Connect to the hosted model.
3. Use the model to make predictions.
4. Display the results.

Note that it may take the model a couple minutes to spin up before it is ready to make predictions.

In [None]:
def maskL8sr(image):
  """Cloud masking function for Landsat 8, collection 2."""
  qa_mask = image.select('QA_PIXEL').bitwiseAnd(31).eq(0)
  saturation_mask = image.select('QA_RADSAT').eq(0)

  optical_bands = image.select('SR_B.').multiply(0.0000275).add(-0.2)
  thermal_bands = image.select('ST_B.*').multiply(0.00341802).add(149.0)

  return (image.addBands(optical_bands, None, True)
                .addBands(thermal_bands, None, True)
                .updateMask(qa_mask)
                .updateMask(saturation_mask).select('SR_B*.'))


# The image input data is a 2018 cloud-masked median composite.
image = L8SR.filterDate('2018-01-01', '2018-12-31').map(maskL8sr).median()

# Get a URL to serve image tiles.
mapid = image.getMapId({'bands': ['SR_B4', 'SR_B3', 'SR_B2'], 'min': 0, 'max': 0.3})

# Use folium to visualize the imagery.
map = folium.Map(location=[38., -122.5])

# Inputs.
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr=ATTRIBUTION,
    overlay=True,
    name='median composite',
  ).add_to(map)

endpoint_path = (
    'projects/' + PROJECT + '/locations/' + REGION + '/endpoints/' + str(ENDPOINT_ID))

# Connect to the hosted model.
vertex_model = ee.Model.fromVertexAi(
  endpoint=endpoint_path,
  inputTileSize=[8, 8],
  proj=ee.Projection('EPSG:4326').atScale(30),
  fixInputProj=True,
  outputBands={'output': {
      'type': ee.PixelType.float(),
      'dimensions': 1
    }
  })

predictions = vertex_model.predictImage(image.select(BANDS).float())
probabilities = predictions.arrayFlatten([['bare', 'veg', 'water']])
probability_vis = {'bands': ['bare', 'veg', 'water'], 'min': 0.2, 'max': 0.5}
probability_mapid = probabilities.getMapId(probability_vis)
folium.TileLayer(
    tiles=probability_mapid['tile_fetcher'].url_format,
    attr=ATTRIBUTION,
    overlay=True,
    name='predictions',
  ).add_to(map)

map.add_child(folium.LayerControl())
display(map)