In [None]:
# Enable importing of utilities.
import sys
import os
sys.path.append(os.environ.get('NOTEBOOK_ROOT'))

# ARDC Training: Python Notebooks
Task-A: Cloud-free Mosaics and K-means Clustering

> ### Import the Datacube Configuration

In [None]:
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  

from datacube.utils.aws import configure_s3_access
configure_s3_access(requester_pays=True)

api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_a')
api.dc = dc

>### Browse the available Data Cubes   

In [None]:
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products

>### Pick a product  
>Use the platform and product names from the previous block to select a Data Cube.  

In [None]:
# Change the data platform and data cube here

platform = 'LANDSAT_7'
product = 'ls7_usgs_sr_scene'

> #### Display Latitude-Longitude and Time Bounds of the Data Cube

In [None]:
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))

# Visualize Data Cube Region

In [None]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)

> ### Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing.
Pick the time extents for your mosaic product (keep to 1 year or less).

In [None]:
## Vietnam - Central Lam Dong Province ##
# longitude_extents = (107.80, 108.00)
# latitude_extents  = (11.70, 11.90)

## Zanzibar - Zanzibar City
latitude_extents  = (-6.25, -6.07)
longitude_extents = (39.15, 39.29)

time_extents = ('2015-01-01', '2015-12-31')

In [None]:
display_map(latitude = latitude_extents, longitude = longitude_extents)

## Load the dataset and the required spectral bands or other parameters
After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.

In [None]:
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa']) 

In [None]:
landsat_dataset
#view the dimensions and sample content from the cube

# Display Example Images  

>#### Single band visualization   
> For a quick inspection, let's look at two images. The first image will allow the selection of any band (red, blue, green, nir, swir1, swir2) to produce a grey-scale image of any band. The second image will mask clouds with bright red on an RGB image. 
> Select the desired acquisition (time slice) in the block below. You can select from 1 to #, where the max value is the number of time slices noted in the block above. Change the comment statements below to select the bands for the first image.

In [None]:
acquisition_number = 10
# select an acquisition number from 1 to "time" using the array limits above

In [None]:
%matplotlib inline
# landsat_dataset.red.isel(time = acquisition_number).plot(cmap = "Greys")
landsat_dataset.green.isel(time = acquisition_number).plot(cmap = "Greys")
# landsat_dataset.blue.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.nir.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir1.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir2.isel(time = acquisition_number).plot(cmap = "Greys")

>#### Define Cloud Masking Function   
Removes clouds and cloud shadows based on the Landsat pixel QA information
This is only for reference ... nothing to modify here

In [None]:
import numpy as np  

def generate_cloud_mask(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return np.invert(a_clean_mask)

def remove_clouds(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return dataset.where(a_clean_mask)

> #### Mask clouds from your selected acquisition and visualize the scene and mask 
Now we will look at two RGB images where the second image includes the cloud, cloud shadow and no data mask in RED. Also, the scene is the same as the acquistion selected above.

In [None]:
cloud_mask = generate_cloud_mask(landsat_dataset)
cloudless = remove_clouds(landsat_dataset)

In [None]:
import matplotlib.pyplot as plt
from utils.data_cube_utilities.dc_rgb import rgb

rgb(landsat_dataset, time_index = acquisition_number)
plt.show()

In [None]:
red = [255,0,0]

In [None]:
rgb(landsat_dataset,time_index = acquisition_number,
    paint_on_mask = [(cloud_mask, red)])


# Cleaning up the clouds and creating a cloud-free mosaic
Remember that this process will filter clouds from the entire time series stack

>** Most Recent Pixel Mosaic **   
>Masks clouds from imagery and uses the most recent cloud-free pixels.  

In [None]:
from utils.data_cube_utilities.dc_mosaic import create_mosaic

def mrf_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_mosaic(dataset, clean_mask = cloud_free_boolean_mask)

In [None]:
recent_composite = mrf_mosaic(landsat_dataset)

In [None]:
recent_composite.nir.plot(cmap = "Greys")

In [None]:
rgb(recent_composite)

----  
> **Median Mosaic**  
>  Masks clouds from imagery using the median valued cloud-free pixels in the time series  

In [None]:
from utils.data_cube_utilities.dc_mosaic import create_median_mosaic

def median_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_median_mosaic(dataset, clean_mask = cloud_free_boolean_mask)

In [None]:
median_composite = median_mosaic(landsat_dataset)

In [None]:
median_composite.nir.plot(cmap = "Greys")

In [None]:
rgb(median_composite)

## Select bands used for clustering

In [None]:
cluster_bands = ['red', 'green', 'blue', 'swir1']

# Perform K-Means clustering and view the output

In [None]:
def figure_ratio(ds, fixed_width = 10):
    width = fixed_width
    height = len(ds.latitude) * (fixed_width / len(ds.longitude))
    return (width, height)

In [None]:
from utils.data_cube_utilities.dc_clustering import kmeans_cluster_dataset

# change the number of clusters in the line below, as desired
# this example uses the "median composite" image from above
classification_x =  kmeans_cluster_dataset(median_composite, cluster_bands, n_clusters=8)

In [None]:
# plot the k-mean classification result 
classification_x.plot(figsize = figure_ratio(classification_x))