In [None]:
## Imports
from google.colab import auth
from google.cloud import storage
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
from tqdm import tqdm
import pandas as pd
from copy import copy
import time
from glob import glob
import os

# Install and import rasterio
!pip install rasterio==1.3.3
import rasterio

In [None]:
# Authenticate Google account
auth.authenticate_user()

#### Collect S2 raw imagery for interpolation and smoothing


In [None]:
## Load raw image filenames

# Specify bucket name
bucket_name = 'gee_irrigation_detection'
region = 'Nigeria29'

# Initialize client and find folders in bucket
storage_client = storage.Client()
blobs = storage_client.list_blobs(bucket_name,
                        prefix=f'raw_imagery/imagery_for_inference/{region}')

# Can change this line if you want to take a subset of images
raw_images = [blob.name for blob in blobs if '.tif' in blob.name]

In [None]:
# raw_images=raw_images[25:29]

In [None]:
## Define functions for processing S2 images for prediction


def temporal_interp_and_smoothing(imagery_stack):

    # Convert to float32
    imagery_stack = imagery_stack.astype(np.float32)

    # Pad imagery for interpolation
    imagery_stack = np.concatenate([imagery_stack[-1][None, ...], imagery_stack,
                                        imagery_stack[0][None, ...]], axis=0)
    # Transpose
    imagery_stack = imagery_stack.transpose(1,2,0)

    # Reshape and convert to DF
    df = pd.DataFrame(imagery_stack.reshape(imagery_stack.shape[0]*\
                                            imagery_stack.shape[1],
                                            imagery_stack.shape[2]))

    # Change 0s to NaNs for interpolation
    df = df.replace(0, np.nan)

    # Interpolate. Will leave any all-NaN row as is
    df = df.interpolate(method='linear', axis=1)

    # Extract original timeseries
    interpolated_ts = df.iloc[:, 1:-1]

    # Smooth
    smoothed_ts = savgol_filter(interpolated_ts,
                                window_length=5,
                                polyorder=3,
                                axis=1)


    # Fill all remaining NaNs with zeros
    smoothed_ts = np.nan_to_num(smoothed_ts, nan=0)

    # Clip
    smoothed_ts = np.clip(smoothed_ts, 0, 1)

    # Reshape
    smoothed_ts = smoothed_ts.reshape(imagery_stack.shape[0],
                                      imagery_stack.shape[1],
                                      smoothed_ts.shape[1])

    # Transpose
    smoothed_ts = smoothed_ts.transpose(2,0,1)

    # Returns array of size (36, window_height, window_width)
    return smoothed_ts

In [None]:
# Define out directory
out_dir = f'smoothed_imagery/imagery_for_inference/{region}'

# Iterate through S2 imagery
for image in raw_images:
    print(f'Total number of images for smoothing + interpolation: {len(raw_images)}')
    image_uri = f'gs://{bucket_name}/{image}'
    print(f'Smoothing and interpolating image: {image_uri}')
    with rasterio.open(image_uri, 'r', driver='GTiff') as src:

        # Load profile and compute total number of windowed reads
        profile = src.profile
        total_windows = np.ceil(profile['width']/profile['blockxsize']) * \
                        np.ceil(profile['height']/profile['blockysize'])

        # Define outfile name and profile
        out_profile = src.profile.copy()
        out_profile['dtype'] = 'float32'

        # Create outdirectory if it doesnt exist
        os.makedirs(out_dir, exist_ok=True)
        out_name = f'{out_dir}/{image.split("/")[-1]}'

        # Open outfile
        with rasterio.open(out_name, 'w+', **out_profile) as dest:

            # Iterate through windowed reads
            for ji, window in tqdm(src.block_windows(1), total=total_windows):
                image_window = src.read(window=window)

                # Smooth timeseries
                smoothed_ts = temporal_interp_and_smoothing(image_window)

                ## Write out
                dest.write(smoothed_ts, window=window)

In [None]:
## Export to GCP

# Define export function
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))

files_to_export = glob(f"{out_dir}/*.tif")

for file_name in files_to_export:
    upload_blob(bucket_name, file_name, file_name)