In [None]:
## Imports
import ee
from google.colab import auth
import tensorflow as tf
import folium
from google.cloud import storage
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
from tqdm import tqdm
import pandas as pd
from copy import copy
import time
import matplotlib.pyplot as plt
from glob import glob

# Install and import rasterio
!pip install rasterio==1.3.3
import rasterio

# Install and import catboost
!pip install catboost
from catboost import CatBoostClassifier

In [None]:
## Authentication cell

# Authenticate Google account
# auth.authenticate_user()
from google.oauth2.service_account import Credentials
credentials = Credentials.from_service_account_file('')

## Accesses S2 imagery to make irrigation predictions
#### The following portion of this notebook loads a pretrained model, accesses the S2 imagery uploaded to GCP, processes the imagery, makes predictions, and then uploads the results back to GCP.


In [None]:
## Load model

# Specify bucket and model name
bucket_name = 'gee_irrigation_detection'
model_name = 'Nigeria_transformer' #'catboost_trained_2021_Nigeria'
model_uri = f'saved_models/{model_name}'
region = 'Nigeria29'

# Load in model
if "transformer" in model_name:
    # model = tf.keras.models.load_model(f'gs://{bucket_name}/{model_uri}')
    model = tf.keras.models.load_model(f'/content/{model_name}')
elif "catboost" in model_name:

    storage_client = storage.Client()
    blob = storage_client.bucket(bucket_name).blob(model_uri).download_as_bytes()
    model = CatBoostClassifier()
    model.load_model(blob = blob)

In [None]:
# Load raw image filenames

# Initialize client and find folders in bucket
# storage_client = storage.Client()
storage_client = storage.Client(credentials=credentials)
blobs = storage_client.list_blobs(bucket_name,
                        prefix=f'smoothed_imagery/imagery_for_inference/{region}')

# Can change this line if you want to take a subset of images
cleaned_images = [blob.name for blob in blobs if '.tif' in blob.name]

In [None]:
## Define function for finding irrigible pixels

def find_irrigible_px(smoothed_ts):

    # Find 10th and 90th percentile EVI
    min_evi = np.percentile(smoothed_ts, q=10, axis=1) + np.finfo(float).eps
    max_evi = np.percentile(smoothed_ts, q=90, axis=1)

    # Compute 90:10 percentile EVI ratio
    evi_ratio_boolean = ((max_evi / min_evi) >= 2)

    # Determine the rows that satisfy the max/min thresholds
    min_evi_boolean = (min_evi <= 0.2)
    max_evi_boolean = (max_evi >= 0.2)

    # Extract dry season values
    dry_season_start_ix = 1
    dry_season_end_ix = 36
    dry_season_evi = smoothed_ts[:, dry_season_start_ix:dry_season_end_ix]

    # Compute dry season max EVI
    dry_season_max_evi = np.max(dry_season_evi, axis=-1)

    # Determine the rows that satisfy the dry season max EVI requirement
    valid_dry_season = (dry_season_max_evi >= 0.2)

    # Stack filters
    out_image = np.stack((min_evi_boolean,
                          max_evi_boolean,
                          evi_ratio_boolean,
                          valid_dry_season),
                         axis=-1)

    # Find pixels that satisfy all filters
    valid_px = np.all(out_image, axis=-1)

    return valid_px


In [None]:
## Make model predictions and save locally
import os

# Define out directory
out_dir = f'model_predictions/{model_name}/{region}'

# Iterate through S2 imagery
for image in cleaned_images:
    image_uri = f'gs://{bucket_name}/{image}'
    print(f'Processing and predicting over image: {image_uri}')
    with rasterio.open(image_uri, 'r', driver='GTiff') as src:

        # Load profile and compute total number of windowed reads
        profile = src.profile
        # total_windows = np.ceil(profile['width']/profile['blockxsize']) * \
        #                 np.ceil(profile['height']/profile['blockysize'])

        # Define outfile name and profile
        out_profile = src.meta.copy()
        out_profile['dtype'] = rasterio.uint8
        out_profile['count'] = 1

        # Create outdirectory if it doesnt exist
        os.makedirs(out_dir, exist_ok=True)
        out_name = f'{out_dir}/{image.split("/")[-1]}'

        # Open outfile
        with rasterio.open(out_name, 'w+', **out_profile) as dest:

            # Iterate through windowed reads
            for ji, window in tqdm(src.block_windows(1)): #, total=total_windows):
                image_window = src.read(window=window)

                imagery_stack = image_window.transpose(1,2,0)

                # Reshape and convert to DF
                imagery_stack = imagery_stack.reshape(imagery_stack.shape[0]*\
                                            imagery_stack.shape[1],
                                            imagery_stack.shape[2])

                # Smooth timeseries -- If pulling from
                # gs://gee_irrigation_detection/smoothed_imagery/imagery_for_inference
                # imagery is already smoothed + interp. If not, will need to change
                # next line.
                smoothed_ts = imagery_stack

                # Find valid px
                valid_px = find_irrigible_px(smoothed_ts).astype(bool)

                # Take valid_pixels
                valid_ts = smoothed_ts[valid_px, :]


                # Normalize
                valid_ts =  (valid_ts - 0.2555) / 0.16886

                # Loop for deep learning / transformer models
                if 'transformer' in model_name:
                    # Convert to up as tf.data.Dataset
                    ds = tf.data.Dataset.from_tensor_slices(valid_ts).batch(512)

                    # Create list to hold predictions
                    predictions_list = []

                    # Make predictions
                    for ix, features in ds.enumerate():
                        predictions = model(features, training=False)
                        predictions = predictions[:, 1]
                        predictions_list.extend(predictions.numpy())

                    predictions_array = np.array(predictions_list)

                # Loop for catboost model
                elif 'catboost' in model_name:
                    predictions_array = model.predict(valid_ts)

                # Create output array for predictions, fill with predictions
                prediction_output = np.zeros((len(smoothed_ts),))
                prediction_array = (predictions_array >= 0.5).astype(int)
                prediction_output[np.argwhere(valid_px)[:,0]] = predictions_array

                prediction_output = prediction_output.reshape(1, image_window.shape[1],
                                                              image_window.shape[2])

                ## Write out
                dest.write(prediction_output.astype(rasterio.uint8),
                           window=window)

In [None]:
## Export to GCP

# Define export function
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # storage_client = storage.Client()
    storage_client = storage.Client(credentials=credentials)
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

files_to_export = glob(f"{out_dir}/*.tif")
for file_name in files_to_export:
    upload_blob(bucket_name, file_name, file_name)