<a href="https://colab.research.google.com/github/boothmanrylan/canadaMSSForestDisturbances/blob/main/SpatioTemporalUNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import auth
auth.authenticate_user()

PROJECT_ID = "api-project-269347469410"
!gcloud config set project {PROJECT_ID}

In [None]:
!pip install --quiet msslib

In [None]:
!git clone --quiet https://github.com/boothmanrylan/canadaMSSForestDisturbances.git
%cd canadaMSSForestDisturbances
from mss_forest_disturbances import constants, grid, preprocessing, model

In [None]:
import os
import math
import json

import tensorflow as tf
from tensorflow.python.tools import saved_model_utils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm

# Config

In [None]:
BASE_PATH = f'gs://{constants.BUCKET}/scratch/test_export4/ecozone*/'

TEST_PATTERN = os.path.join(BASE_PATH, '*-00000-of-*.tfrecord.gz')
TRAIN_PATTERN = os.path.join(BASE_PATH, '*-000[0-9][1-9]-of*.tfrecord.gz')

train_dataset, normalize_subset = dataset.build_dataset(
    pattern=TRAIN_PATTERN,
    parse_options=constants.DEFAULT_PARSE_OPTIONS,
    train=True,
)
test_dataset = dataset.build_dataset(
    pattern=TEST_PATTERN,
    parse_options=constants.DEFAULT_PARSE_OPTIONS,
    train=False
)

model = model.build_model(
    normalization_subset=normalization_subset,
    **constants.DEFAULT_MODEL_OPTIONS
)

RNG = tf.random.Generator.from_seed(42, alg="philox")

# AI Platform Hosting Config
REGION = "us-central1"
MODEL_DIR = f"gs://{constants.BUCKET}/scratch/models/"
EEIFIED_DIR = f"gs://{constants.BUCKET}/scratch/eeified_models/test_model_hosting/"
MODEL_NAME = "test_model"
ENDPOINT_NAME = "test_endpoint"



###Model architecture explanation


In [None]:
def calc_erf(kernels, dilation_rates):
    k = np.array(kernels)
    d = np.array(dilation_rates)
    ek = k + ((k - 1) * (d - 1))

    erf = np.sum(ek) - (len(kernels) - 1)
    return erf

In [None]:
calc_erf(KERNELS, DILATION_RATES)

# Train Model

In [None]:
# checkpoint to save progress during training and for easier loading of the
# model later on, but need to use model.save(...) for EEification
checkpoint_path = os.path.join(MODEL_DIR, "test", "checkpoints")
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
)

model.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4)
)

low_importance = 1
high_importance = 2
disturbance_classes = [4, 5, 6, 7]
class_weight = {
    x: (high_importance if x in disturbance_classes else low_importance)
    for x in range(NUM_OUTPUTS)
}

# model.load_weights(checkpoint_path)
model.fit(
    train_dataset,
    steps_per_epoch=50,
    epochs=20,
    callbacks=[checkpoint],
    class_weight=class_weight,
)

# Model Hosting

In [None]:
!gcloud ai models delete {MODEL_NAME} --project={PROJECT_ID} --region={REGION}

In [None]:
# upload the model
CONTAINER_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/tf2-gpu.2-11:latest'

!gcloud ai models upload \
    --project={PROJECT_ID} \
    --artifact-uri={SAVED_MODEL_PATH} \
    --region={REGION} \
    --container-image-uri={CONTAINER_IMAGE} \
    --description={MODEL_NAME} \
    --display-name={MODEL_NAME} \
    --model-id={MODEL_NAME}

In [None]:
# create endpoint for model
!gcloud ai endpoints create \
    --display-name={ENDPOINT_NAME} \
    --region={REGION} \
    --project={PROJECT_ID}

In [None]:
# deploy the model

# may need to filter, if you have multiple of these
ENDPOINT_ID = !gcloud ai endpoints list \
    --project={PROJECT_ID} \
    --region={REGION} \
    --format="value(ENDPOINT_ID.scope())"
ENDPOINT_ID = ENDPOINT_ID[-1]

!gcloud ai endpoints deploy-model {ENDPOINT_ID} \
    --project={PROJECT_ID} \
    --region={REGION} \
    --model={MODEL_NAME} \
    --machine-type=n1-standard-8 \
    --accelerator=type=nvidia-tesla-t4,count=1 \
    --display-name={MODEL_NAME}

# Verify Model Hosting Was Successful

In [None]:
import ee
ee.Authenticate()
ee.Initialize()

In [None]:
!git clone https://github.com/boothmanrylan/canadaMSSForestDisturbances.git
%cd canadaMSSForestDisturbances

In [None]:
!pip install --quiet msslib
!pip install --quiet geemap

In [None]:
from mss_forest_disturbances import data
import geemap
from msslib import msslib

In [None]:
Map = geemap.Map()
Map

In [None]:
aoi = Map.draw_features[0]
year = 1990

collection = msslib.getCol(
    aoi=aoi.geometry(),
    yearRange=[year, year],
    doyRange=data.DOY_RANGE,
    maxCloudCover=100
)

image = collection.sort('CLOUD_COVER').first()

Map.addLayer(image, msslib.visDn2, "Image")

In [None]:
ecozone = ee.FeatureCollection(data.ECOZONES).filterBounds(aoi.geometry()).first()
ecozone_id = ecozone.getNumber('ECOZONE_ID')
prepared_image, target_label = data.prepare_image_for_export(image)
prepared_image = prepared_image.set('ecozone', ecozone_id)

In [None]:
endpoint_path = os.path.join('projects', PROJECT_ID, 'locations', REGION, 'endpoints', ENDPOINT_ID)
hosted_model = ee.Model.fromVertexAi(
    endpoint=endpoint_path,
    inputTileSize=(constants.PATCH_SIZE, constants.PATCH_SIZE),
    inputOverlapSize=(constants.OVERLAP, constants.OVERLAP),
    inputProperties=METADATA,
    proj=data.get_default_projection(),
    fixInputProj=True,
    outputBands={
        'label': {
            'type': ee.PixelType.float(),
            'dimensions': 1
        },
    },
    maxPayloadBytes=3000000,
)

In [None]:
prediction = hosted_model.predictImage(prepared_image)

task = ee.batch.Export.image.toAsset(
    image=prediction,
    description="test_vertex_ai_hosting",
    assetId="projects/api-project-269347469410/assets/rylan-mssforestdisturbances/scratch/test_vertex_ai_hosting",
    pyramidingPolicy={".default": "mode"},
    region=image.geometry(),
    scale=60,
    crs=data.get_default_projection(),
)
task.start()

# TODO
* __not enough disturbances in exported data__
* Add index to distinguish new harvest from old harvest
    * red / ndvi
    * need way to prove/argue that this is a useful spectral index
* Add index to distinguish new burn scar from old burn scar
* temporal model
    * write code
    * figure out how to export training data
* Figure out how to run colab with a paid backend
* Vertex AI hosted model called through earth engine exporting the result is very slow (24 minutes for one image) Batch export and running everything in google cloud is likely faster, but more expensive and for the next step we need to be able to look at pixels through time which will be more difficult outside of earth engine
