In [None]:
#@title Copyright 2023 Google LLC. { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<table class="ee-notebook-buttons" align="left"><td>
<a target="_blank"  href="http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_PyTorch_Vertex_AI.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" /> Run in Google Colab</a>
</td><td>
<a target="_blank"  href="https://github.com/google/earthengine-community/blob/master/guides/linked/Earth_Engine_PyTorch_Vertex_AI.ipynb"><img width=32px src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" /> View source on GitHub</a></td></table>

# Connecting Earth Engine to a Vertex AI hosted PyTorch model

This notebook demonstrates training a per-pixel neural network in PyTorch, hosting the model on Vertex AI, and using it in Earth Engine for interactive prediction using `ee.Model.fromVertexAi` with the `'ND_ARRAYS'` payloadFormat parameter.

**Running this demo may incur charges to your Google Cloud Account!**

# Set up

In [None]:
from google.colab import auth
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

import ee

import folium
import google
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np

In [None]:
# Authenticate the notebook.
auth.authenticate_user()

In [None]:
# REPLACE WITH YOUR CLOUD PROJECT!
PROJECT = 'your-project'

In [None]:
!gcloud config set project {PROJECT}

## Define variables


In [None]:
""" Training variables"""

# Cloud Storage bucket with training and testing datasets.
DATA_BUCKET = 'ee-docs-demos'
TRAIN_FILE_PATH = 'gs://' + DATA_BUCKET + '/Pytorch_training_demo.csv'
TEST_FILE_PATH = 'gs://' + DATA_BUCKET + '/Pytorch_testing_demo.csv'

# Training parameters.
BANDS = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
INPUT_TILE_X_DIM = 1
INPUT_TILE_Y_DIM = 1
BATCH_SIZE = 4
OUTPUT_CLASS = 'landcover'


""" Model deployment variables"""

# Output bucket for trained models. REPLACE WITH YOUR WRITABLE BUCKET!
OUTPUT_BUCKET = 'gs://your-bucket'

# Metadata for model deployment
REGION = 'us-central1'
MODEL_NAME = 'vertex_pytorch_demo'
MODEL_DIR = OUTPUT_BUCKET + '/' + MODEL_NAME
CONTAINER_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/pytorch-cpu.1-12:latest'
ENDPOINT_NAME = 'vertex_pytorch_demo_endpoint'


""" Earth Engine visualization variables"""

ATTRIBUTION = 'Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>'

# Prepare training data

The training data are landcover labels with a single vector of Landsat 8 pixel values (BANDS) as predictors. See [this Code Editor script](https://code.earthengine.google.com/a96ddba52131951f5613c88a0ceb8a96) for an example on how to generate this training data.

## Read into tensors



In [None]:
# Install additional package
!pip install gcsfs

In [None]:
# Read the training data into a Pandas dataframe.
df_train = pd.read_csv(TRAIN_FILE_PATH)

# Split into features and labels.
features = df_train[BANDS].values
target = df_train[OUTPUT_CLASS].values

# Convert to PyTorch tensors.
features = torch.tensor(features, dtype=torch.float32)
target = torch.tensor(target, dtype=torch.long)

## Reshape tensors

 Once we have the data as tensors, we need to reshape the features and target to have the shape that our PyTorch model will expect as input.

In [None]:
reshaped_features = torch.reshape(features, (-1, len(BANDS), INPUT_TILE_X_DIM, INPUT_TILE_Y_DIM))
print(reshaped_features.shape)

reshaped_targets = torch.reshape(target, (-1, INPUT_TILE_X_DIM, INPUT_TILE_Y_DIM))
print(reshaped_targets.shape)

## Load tensors into a `DataLoader`

Finally, we load the tensors into a PyTorch DataLoader, which makes it easier to batch and shuffle the data for training.

In [None]:
train_dataset = TensorDataset(reshaped_features, reshaped_targets)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Model setup

Now we will define and train a simple landcover classification model with 2 convolutional layers. Note that the model used here is purely for demonstration purposes.

## Create the model

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=len(BANDS), out_channels=15, kernel_size=1)
        self.conv2 = nn.Conv2d(in_channels=15, out_channels=3, kernel_size=1)
        self.dropout = nn.Dropout(p=0.1)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.dropout(x)
        x = self.conv2(x)
        x = self.softmax(x)
        return x

# Create an instance of the model
model = ClassificationModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Fit the model to the training data.
num_epochs = 13
for epoch in range(num_epochs):
  for inputs, targets in train_loader:
    # Forward pass
    output = model(inputs)
    loss = criterion(output, targets)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  # Print the loss every other epoch
  if (epoch) % 2 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

## Test the model

The model is trained! Now let's test how well our model is able to classify our target landcover classes. When we prepared the training data, we reserved 30% of it for testing. Let's read that testing data from Cloud Storage now and prepare it for our model.

In [None]:
df_test = pd.read_csv(TEST_FILE_PATH)

testing_features = torch.tensor(df_test[BANDS].values, dtype=torch.float32)
testing_target = torch.tensor(df_test[OUTPUT_CLASS].values, dtype=torch.long)

reshaped_testing_features = torch.reshape(testing_features, (-1, len(BANDS), INPUT_TILE_X_DIM, INPUT_TILE_Y_DIM))
reshaped_testing_target = torch.reshape(testing_target, (-1, INPUT_TILE_X_DIM, INPUT_TILE_Y_DIM))

Now we can pass the testing features to our model and compare the returned predictions with the ground truth targets to get an accuracy percentage.

In [None]:
model.eval()
with torch.no_grad():
    test_predictions = model(reshaped_testing_features)

testing_loss = criterion(test_predictions, reshaped_testing_target)

print("Loss is:", testing_loss)

test_predicted_labels = torch.argmax(test_predictions, dim=1)
accuracy = (test_predicted_labels == reshaped_testing_target).float().mean()
print("Accuracy is:", accuracy)

# Save model artifacts

Once we're happy with our model architecture and accuracy, we can move on to preparing all of the model artifacts we'll need to deploy it on Vertex AI.

## Save the model's script module

In [None]:
!mkdir pytorch_model
script_module = torch.jit.script(model)
script_module.save('pytorch_model/script_module.pt')

## Write a custom handler

Now we need to specify a handler for our model. We could  use a Torchserve default handler or write our own custom one. Here, we'll define a custom handler since our model returns the predictions as a tensor of probabilities, but we'll just look at the highest probability class value.

In [None]:
%%writefile pytorch_model/custom_handler.py

import logging

import torch
from ts.torch_handler.base_handler import BaseHandler
import numpy as np


logger = logging.getLogger(__name__)

class ClassifierHandler(BaseHandler):

  def postprocess(self, data):
    # Data comes in as a pytorch tensor of probabilities, so use argmax to
    # select the class with the highest probability.
    predictions = torch.argmax(data, dim=1).float()

    # Return the data as a list.
    return predictions.tolist()

  def handle(self, data, context):
    self.context = context

    input_tensor = self.preprocess(data)
    pred_out = self.inference(input_tensor)
    return self.postprocess(pred_out)

## Save a model archive

In [None]:
!pip install torch-model-archiver

In [None]:
!torch-model-archiver -f \
  --model-name model \
  --version 1.0 \
  --serialized-file 'pytorch_model/script_module.pt' \
  --handler 'pytorch_model/custom_handler.py' \
  --export-path pytorch_model/

## Copy files to GCS

In [None]:
# Create the bucket if it doesn't exist
!gcloud storage buckets create {OUTPUT_BUCKET}

In [None]:
!gsutil cp -r pytorch_model {MODEL_DIR}

# Deploy to Vertex AI

### Upload the model

Add an entry to the model registry that points to the location of the saved model and a container image needed to run the model. See [Vertex Docs](https://cloud.google.com/sdk/gcloud/reference/ai/models/upload) for more info.

In [None]:
!gcloud ai models upload \
  --artifact-uri={MODEL_DIR} \
  --project={PROJECT} \
  --region={REGION} \
  --container-image-uri={CONTAINER_IMAGE} \
  --description={MODEL_NAME} \
  --display-name={MODEL_NAME} \
  --model-id={MODEL_NAME}

### Create an endpoint

Create an endpoint from which to serve the model. See [Vertex Docs](https://cloud.google.com/sdk/gcloud/reference/ai/endpoints/create) for more info.

In [None]:
!gcloud ai endpoints create \
  --display-name={ENDPOINT_NAME} \
  --region={REGION} \
  --project={PROJECT}

### Deploy the model to the endpoint

First, look up the endpoint ID, then deploy the model. See [Vertex Docs](https://cloud.google.com/sdk/gcloud/reference/ai/endpoints/deploy-model) for more info.

In [None]:
ENDPOINT_ID = !gcloud ai endpoints list \
  --project={PROJECT} \
  --region={REGION} \
  --filter=displayName:{ENDPOINT_NAME} \
  --format="value(ENDPOINT_ID.scope())"
ENDPOINT_ID = ENDPOINT_ID[-1]

In [None]:
!gcloud ai endpoints deploy-model {ENDPOINT_ID} \
  --project={PROJECT} \
  --region={REGION} \
  --model={MODEL_NAME} \
  --display-name={MODEL_NAME}

# Connect to the hosted model from Earth Engine

1. Generate the input imagery.  This should be done in exactly the same way as the training data were generated.  See [this example notebook](http://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/TF_demo1_keras.ipynb) for details.
2. Connect to the hosted model.
3. Use the model to make predictions.
4. Display the results.

Note that it may take the model a couple minutes to spin up before it is ready to make predictions.

In [None]:
credentials, _ = google.auth.default()
ee.Initialize(credentials, project=PROJECT, opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
# The image input data is a 2018 cloud-masked median composite.
landsatCollection = ee.ImageCollection('LANDSAT/LC08/C02/T1').filterDate('2018-01-01', '2018-12-31')

composite = ee.Algorithms.Landsat.simpleComposite(
  collection=landsatCollection,
  asFloat=True
);

# Get a URL to serve image tiles.
mapid = composite.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})

# Use folium to visualize the imagery.
map = folium.Map(location=[38., -122.5])

# Inputs.
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr=ATTRIBUTION,
    overlay=True,
    name='median composite',
  ).add_to(map)

endpoint_path = (
    'projects/' + PROJECT + '/locations/' + REGION + '/endpoints/' + str(ENDPOINT_ID))

# Connect to the hosted model.
vertex_model = ee.Model.fromVertexAi(
  endpoint=endpoint_path,
  inputTileSize=[50, 50],
  proj=ee.Projection('EPSG:4326').atScale(30),
  fixInputProj=True,
  outputBands={'output': {
      'type': ee.PixelType.float(),
      'dimensions': 0
    }},
  payloadFormat='ND_ARRAYS'
  )

predictions = vertex_model.predictImage(composite.select(BANDS))
# probabilities = predictions.arrayFlatten([['bare', 'veg', 'water']])
predictions_vis = {'min': 0, 'max': 2, 'palette': ['red', 'green', 'blue']}
predictions_mapid = predictions.getMapId(predictions_vis)
folium.TileLayer(
    tiles=predictions_mapid['tile_fetcher'].url_format,
    attr=ATTRIBUTION,
    overlay=True,
    name='predictions',
  ).add_to(map)

map.add_child(folium.LayerControl())
display(map)