In [None]:
#@title Copyright 2023 Google LLC. { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<table class="ee-notebook-buttons" align="left"><td>
<a target="_blank"  href="http://colab.research.google.com/github/google/earthengine-community/blob/master/guides/linked/Earth_Engine_AutoML_Vertex_AI.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" /> Run in Google Colab</a>
</td><td>
<a target="_blank"  href="https://github.com/google/earthengine-community/blob/master/guides/linked/Earth_Engine_AutoML_Vertex_AI.ipynb"><img width=32px src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" /> View source on GitHub</a></td></table>


# Crop Classification with AutoML and Earth Engine

This notebook demonstrates how to use the Vertex AI Python SDK to train and deploy an AutoML model and then connect to it from Earth Engine using `ee.model.fromVertexAi` with the `RAW_JSON` payloadFormat parameter.


**Note:** This demo was presented at Geo for Good 2023 but used the Google Cloud Console UI and the Earth Engine Code Editor rather than a Python Colab notebook. See the [recording](https://youtu.be/_BjtxPSO1Ho?t=4068) and the [slides](https://docs.google.com/presentation/d/1e5ppyNts-KJx1YIawP-04QxkAXelOrjzKJ0HPpwNILY/edit#slide=id.g28dc28cb8c7_0_0) for that presentation.

This demo was adapted from that demo as well as the [AutoML Text Classification](https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/automl-text-classification.ipynb) example.

**Running this demo may incur charges to your Google Cloud Account!**

# Set up

In [None]:
from google.colab import auth

from google.cloud import aiplatform, storage
from google.cloud.aiplatform import jobs

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

REGION = "us-central1"  # @param {type: "string"}

# The diplay name of your model (this can be any string).
MODEL_NAME = "[model-name]" # @param {type: "string"}

In [None]:
# Authenticate the notebook.
auth.authenticate_user()

# Set the gcloud project.
! gcloud config set project $PROJECT_ID

# Initialize the Vertex AI Python SDK.
aiplatform.init(project=PROJECT_ID, location=REGION)

# Create a Vertex AI `Dataset` resource

The training data you use to train your model depends on your objective and can come from a variety of places. In this demo, we generated the training data using the Code Editor and exported it to GCS as a CSV file.

See the script used to generate and export the data here: https://code.earthengine.google.com/a3304c5bb365d32d9692ec0e4011641c

In [None]:
# The GCS bucket the Code Editor script exported the training data to.
GCS_BUCKET = 'gs://ee-docs-demos/object_based_classififcation_demo_45e12596da31631aa2e785997346b491.csv'

Once the data is stored in GCS, you can use the Vertex AI Python SDK to create a `TabularDataset`, which is what you'll use to train the model in the next section.

Creating data is a long-running operation. This next step can take a while. The `create()` method waits for the operation to complete, outputting statements as the operation progresses. The statements contain the full name of the dataset that you use in the following section.

**Note**: You can close the noteboook while you wait for this operation to complete.

In [None]:
dataset = aiplatform.TabularDataset.create(
    display_name='demo-dataset',
    gcs_source=GCS_BUCKET
)

# Train model

Now you can begin training your model. Training the model is a two part process:

1. **Define the training job.** You must provide a display name and the type of training you want when you define the training job.
2. **Run the training job.** When you run the training job, you need to supply a reference to the dataset to use for training. At this step, you can also configure the data split percentages.

You do not need to specify [data splits](https://cloud.google.com/vertex-ai/docs/general/ml-use). The training job has a default setting of  training 80%/ testing 10%/ validate 10% if you don't provide these values.

As with importing data into the dataset, training your model can take a substantial amount of time. The client library prints out operation status messages while the training pipeline operation processes. You must wait for the training process to complete before you can get the resource name and ID of your new model, which is required for model evaluation and model deployment.

**Note:** You can close the notebook while you wait for the operation to complete.

In [None]:
# Define the training job
job = aiplatform.AutoMLTabularTrainingJob(
    display_name='demo-training-job',
    optimization_prediction_type="classification"
)

In [None]:
# Run the training job (this may take a few hours to complete).
model = job.run(
    dataset=dataset,
    model_display_name=MODEL_NAME,
    target_column = 'crop_index',
    sync=True
)

# Evaluate model

After your model training has finished, you can review the evaluation scores for it using the list_model_evaluations() method. This method will return an iterator for each evaluation slice. You can see further evaluation details by visiting the Vertex AI section of the Google Cloud Console.

In [None]:
model_evaluations = model.list_model_evaluations()

for model_evaluation in model_evaluations:
    print(model_evaluation.to_dict())

# Deploy to Vertex AI

Now you can deploy the model to an endpoint! Deploying to an endpoint is necessary for actually connecting to the model from Earth Engine and getting online predictions from it. When you deploy the model to an endpoint, a copy of the model is made on the endpoint with a new resource name and display name.

In [None]:
endpoint = model.deploy(deployed_model_display_name=MODEL_NAME, sync=True)

# Print the endpoint ID once the model is deployed.
ENDPOINT_ID = endpoint.resource_name
ENDPOINT_ID

# Connect to the model from Earth Engine

Now that the model is deployed to an endpoint, you can get online predictions from it. From Earth Engine, you will prepare the prediction input to send to your model, connect to the hosted model and get predictions from it, then visualize the results.

### Set up and authentication

In [None]:
import google
import geemap
import ee

# Authenticate to Earth Engine.
credentials, _ = google.auth.default()
ee.Initialize(credentials, project=PROJECT_ID, opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
# The region used to generate training data. This is used to determine spectral
# stats of the prediction input, and also defines the possible class values of our model.
trainingRegion = ee.Geometry.Polygon(
        [[[-121.89511299133301, 38.98496606984683],
          [-121.89511299133301, 38.909335196675435],
          [-121.69358253479004, 38.909335196675435],
          [-121.69358253479004, 38.98496606984683]]], None, False)


# The geometry of the field we want to get predictions for. We'll add additional
# properties to this FeatureCollection before sending it to Vertex.
predictInput = ee.FeatureCollection(
        [ee.Feature(
            ee.Geometry.Polygon(
                [[[-121.79220199584975, 38.96437397212627],
                  [-121.79220199584975, 38.953228092306446],
                  [-121.78310394287124, 38.953228092306446],
                  [-121.78310394287124, 38.96437397212627]]], None, False),
            {
              "system:index": "0"
            })]);

# The properties that will be sent to Vertex for prediction.
PREDICTORS = [
    "R", "G", "B", "N", "R_mean", "G_mean", "B_mean", "N_mean",
    "area", "perimeter", "width", "height"]

naipImage = ee.ImageCollection('USDA/NAIP/DOQQ').filterDate('2020-01-01', '2021-01-01').filterBounds(trainingRegion).mosaic()
naipImage = ee.Image(naipImage).clip(trainingRegion).divide(255).select(['R', 'G', 'B', 'N'])

cdl = ee.Image("USDA/NASS/CDL/2020")

###Prepare the prediction input

In [None]:
'''Adds additional metadata (area, perimeter, width, height) to a feature.'''
def addProperties(f):
  spectralStats = naipImage.reduceRegion(
    reducer= ee.Reducer.mean().combine(
      reducer2= ee.Reducer.stdDev(),
      sharedInputs= True
    ),
    geometry= f.geometry(),
    scale= 1
  ).rename(
    ['B_stdDev', 'G_stdDev', 'R_stdDev', 'N_stdDev'],
    ['B', 'G', 'R', 'N']
  )
  coords = ee.Image.pixelLonLat().reduceRegion(
    reducer= ee.Reducer.minMax(),
    geometry= f.geometry(),
    scale= 1
  )
  height = ee.Number(coords.get('latitude_max')).subtract(coords.get('latitude_min'))
  width = ee.Number(coords.get('longitude_max')).subtract(coords.get('longitude_min'))
  area = f.geometry().area(1)
  perimeter = f.geometry().perimeter(1)
  return f.set(spectralStats).set({
    'area': area,
    'perimeter': perimeter,
    'width': width,
    'height': height
  })

'''Converts a given property (p) of a feature (f) to a string'''
def toString(p):
  def f(f):
    return f.set(p, ee.String.encodeJSON(f.get(p)))
  return f

# Add the properties that the model expects.
predictInput = predictInput.map(addProperties)

# Convert the feature values to strings, which is necessary for Auto ML prediction.
for i in range(len(PREDICTORS)):
  predictInput = predictInput.map(toString(PREDICTORS[i]))

### Get predictions

In [None]:
# Connect to the hosted model.
vertex_model = ee.Model.fromVertexAi(
  endpoint=ENDPOINT_ID,
  inputProperties=PREDICTORS,
  outputProperties={
    'classes': {'type': ee.PixelType.float(), 'dimensions': 1},
    'scores': {'type': ee.PixelType.float(), 'dimensions': 1},
  },
  payloadFormat='RAW_JSON'
  )


predictions = vertex_model.predictProperties(predictInput)

# Print the raw predictions returned from Vertex.
print(predictions.getInfo())

### Postprocess and visualize predictions

When we generated the training data, we created a FeatureCollection of only the commonly occurring crops in our region of interest and used only those crops to train the model. Now you'll re-create that FeatureCollection so that you have a mapping of the possible classes returned from Vertex and what crop name each maps to.

In [None]:
names = ee.List(cdl.get('cropland_class_names'))
values = ee.List(cdl.get('cropland_class_values'))
cropland = cdl.select('cropland').int()
cropMask = cropland.lt(100)

# Get the set of crops present and their frequencies.
cropFrequencies= ee.Dictionary(cropland.updateMask(cropMask).reduceRegion(
  reducer= ee.Reducer.frequencyHistogram(),
  geometry= trainingRegion,
  scale= 30
).get('cropland'))

# Keep only the commonly occurring crops.
cropsFeatures = ee.FeatureCollection(cropFrequencies.map(
    lambda k, v:
      ee.Feature(None, {'code': ee.Number.parse(k).int(), 'size': v})
).values())
cropsFeatures = cropsFeatures.filter(ee.Filter.gt('size', 1000))

# Make the crops codes consecutive integers.
indicesArray = cropsFeatures.aggregate_array('code')

# Create a FeatureCollection of the final crops we're interested in.
# This will be used to lookup the name of a crop for an index.
cropsFeatures = cropsFeatures.map(
    lambda f:
      f.set({
        'name': names.get(values.indexOf(ee.Number(f.get('code')).int())),
        'index': indicesArray.indexOf(ee.Number(f.get('code')).int())
      })
)

In [None]:
# There should only be one element in the returned FeatureCollection, so we can extract it by calling first().
prediction = predictions.first()

# Determine the name of crop that corresponds to the index returned from Vertex.
predictedClassIndex = ee.Array(prediction.get('classes')).get(ee.Array(prediction.get('scores')).argmax())
predictedClassName = cropsFeatures.filter(ee.Filter.eq('index', predictedClassIndex)).first().get('name')

print(predictedClassName.getInfo())

# Add the name to the FeatureCollection for visualizing on a Map.
processedFc = predictInput.map(
    lambda f:
      f.set({
        'crop_name': predictedClassName
      })
)

In [None]:
# Visualize the base imagery and the predicted crop cover.
Map = geemap.Map(zoom=15)
Map.centerObject(trainingRegion)

Map.addLayer(naipImage)

style = {'color': 'yellow', 'fillColor': "orange"}
Map.addLayer(processedFc.style(**style))

Map.add_labels(
    processedFc,
    "crop_name",
    font_size="12pt",
    font_family="arial",
    font_weight="bold",
)

Map