In [None]:
# Enable importing of utilities.
import sys
import os
sys.path.append(os.environ.get('NOTEBOOK_ROOT'))

# ARDC Training: Python Notebooks
Task-E: This notebook will demonstrate 2D transect analyses and 3D Hovmoller plots. We will run these for NDVI (land) and TSM (water quality) to show the spatial and temporal variation of data along a line (transect) for a given time slice and for the entire time series. 

> ### Import the Datacube Configuration

In [None]:
import xarray as xr
import numpy as np

import datacube
import utils.data_cube_utilities.data_access_api as dc_api  

from datacube.utils.aws import configure_s3_access
configure_s3_access(requester_pays=True)

api = dc_api.DataAccessApi()
dc = api.dc

>### Browse the available Data Cubes   

In [None]:
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products

>### Pick a product  
>Use the platform and product names from the previous block to select a Data Cube.  

In [None]:
# Change the data platform and data cube here

platform = 'LANDSAT_7'
product = 'ls7_usgs_sr_scene'
collection = 'c1'
level = 'l2'

> #### Display Latitude-Longitude and Time Bounds of the Data Cube

In [None]:
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))

# Visualize Data Cube Region

In [None]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)

> ### Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing. You can click on the map above to find the Lat-Lon coordinates of any location. You will want to identify a region with an inland water body and some vegetation. Pick a time window of several years.

In [None]:
## Vietnam - Central Lam Dong Province ##
# longitude_extents = (107.0, 107.2)
# latitude_extents  = (11.7, 12.0)

## Vietnam Ho Tri An Lake
# longitude_extents = (107.0, 107.2)
# latitude_extents  = (11.1, 11.3)

## Sierra Leone - Delta du Saloum
latitude_extents = (13.55, 14.12)
longitude_extents = (-16.80, -16.38)

time_extents = ('2005-01-01', '2005-12-31')

In [None]:
display_map(latitude = latitude_extents, longitude = longitude_extents)

## Load the dataset and the required spectral bands or other parameters
After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.

In [None]:
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa']) 

In [None]:
landsat_dataset
#view the dimensions and sample content from the cube

## Preparing the data
We will filter out the clouds and the water using the Landsat pixel_qa information. Next, we will calculate the values of NDVI (vegetation index) and TSM (water quality). 

In [None]:
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask

plt_col_lvl_params = dict(platform=platform, collection=collection, level=level)
clear_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['clear'], **plt_col_lvl_params)
water_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['water'], **plt_col_lvl_params)
shadow_xarray = landsat_qa_clean_mask(landsat_dataset, cover_types=['cld_shd'], **plt_col_lvl_params) 
cloud_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['cloud'], **plt_col_lvl_params) 

In [None]:
clean_xarray = (clear_xarray | water_xarray).rename("clean_mask")

In [None]:
def NDVI(dataset):
    return ((dataset.nir - dataset.red)/(dataset.nir + dataset.red)).rename("NDVI")

In [None]:
ndvi_xarray = NDVI(landsat_dataset)  # Vegetation Index

In [None]:
from utils.data_cube_utilities.dc_water_quality import tsm

tsm_xarray = tsm(landsat_dataset, clean_mask = water_xarray.values.astype(bool) ).tsm

### Combine everything into one XARRAY for further analysis

In [None]:
combined_dataset = xr.merge([landsat_dataset,
          clean_xarray,
          clear_xarray,
          water_xarray,
          shadow_xarray,
          cloud_xarray,                  
          ndvi_xarray,
          tsm_xarray])

# Copy original crs to merged dataset 
combined_dataset = combined_dataset.assign_attrs(landsat_dataset.attrs)

## Define a path for a transect  
A transect is just a line that will run across our region of interest. Use the display map above to find the end points of your desired line. If you click on the map it will give you precise Lat-Lon positions for a point.

### Start with a line across a mix of water and land

In [None]:
# Water and Land Mixed Examples

mid_lon = np.mean(longitude_extents)
mid_lat = np.mean(latitude_extents)

# North-South Path
start = (latitude_extents[0], mid_lon)
end = (latitude_extents[1], mid_lon)

# East-West Path
# start = (mid_lat, longitude_extents[0])
# end = (mid_lat, longitude_extents[1])

# East-West Path for Lake Ho Tri An
# start = ( 11.25, 107.02 )
# end   = ( 11.25, 107.18 )

### Plot the transect line

In [None]:
import folium
import numpy as np  
from folium.features import CustomIcon

def plot_a_path(points , zoom = 15):
    xs,ys = zip(*points)
    
    map_center_point = (np.mean(xs), np.mean(ys))
    the_map = folium.Map(location=[map_center_point[0], map_center_point[1]], zoom_start = zoom, tiles='http://mt1.google.com/vt/lyrs=y&z={z}&x={x}&y={y}', attr = "Google Attribution")
    path = folium.PolyLine(locations=points, weight=5, color = 'orange')
    the_map.add_child(path)
    
    start = ( xs[0] ,ys[0] )
    end   = ( xs[-1],ys[-1])
    
    return the_map  

plot_a_path([start,end])    

### Find the nearest pixels along the transect path

In [None]:
from utils.data_cube_utilities.transect import line_scan

import numpy as np

def get_index_at(coords, ds):
    '''Returns an integer index pair.'''
    lat = coords[0]
    lon = coords[1]
    
    nearest_lat = ds.sel(latitude = lat, method = 'nearest').latitude.values
    nearest_lon = ds.sel(longitude = lon, method = 'nearest').longitude.values
    
    lat_index = np.where(ds.latitude.values == nearest_lat)[0]
    lon_index = np.where(ds.longitude.values == nearest_lon)[0]
    
    return (int(lat_index), int(lon_index))

def create_pixel_trail(start, end, ds):
    a = get_index_at(start, ds)
    b = get_index_at(end, ds)
    
    indices = line_scan.line_scan(a, b)

    pixels = [ ds.isel(latitude = x, longitude = y) for x, y in indices]
    return pixels

In [None]:
list_of_pixels_along_segment = create_pixel_trail(start, end, landsat_dataset)

## Groundwork for Transect (2-D) and Hovmöller (3-D) Plots

In [None]:
import xarray
import matplotlib.pyplot as plt  
from matplotlib.ticker import FuncFormatter  
from datetime import datetime  
import time

def plot_list_of_pixels(list_of_pixels, band_name, y = None): 
    start = (
             "{0:.2f}".format(float(list_of_pixels[0].latitude.values )),
             "{0:.2f}".format(float(list_of_pixels[0].longitude.values))
            )  
    end = (
             "{0:.2f}".format(float(list_of_pixels[-1].latitude.values)),
             "{0:.2f}".format(float(list_of_pixels[-1].longitude.values))
            )
    
    def reformat_n64(t):
        return time.strftime("%Y.%m.%d", time.gmtime(t.astype(int)/1000000000))   
    
    def pixel_to_array(pixel):
        return(pixel.values)
    
    def figure_ratio(x,y, fixed_width = 10):
        width = fixed_width
        height = y * (fixed_width / x)
        return (width, height)
    
    pixel_array = np.transpose([pixel_to_array(pix) for pix in list_of_pixels])
    
    #If the data has one acquisition, then plot transect (2-D), else Hovmöller (3-D) 
    if y.size == 1:
        plt.figure(figsize = (15,5))
        plt.scatter(np.arange(pixel_array.size), pixel_array)
        plt.title("Transect (2-D) \n Acquisition date: {}".format(reformat_n64(y)))
        plt.xlabel("Pixels along the transect \n {} -  {} \n ".format(start,end))
        plt.ylabel(band_name)

    else:
        m = FuncFormatter(lambda x :x )
        figure = plt.figure(figsize = figure_ratio(len(list_of_pixels),
                                                   len(list_of_pixels[0].values),
                                                   fixed_width = 15))
        number_of_y_ticks = 5 

        ax = plt.gca()
        cax = ax.imshow(pixel_array, interpolation='none')
        figure.colorbar(cax,fraction=0.110, pad=0.04)

        ax.set_title("Hovmöller (3-D) \n Acquisition range: {} -  {} \n ".format(reformat_n64(y[0]),reformat_n64(y[-1])))
        plt.xlabel("Pixels along the transect \n {} -  {} \n ".format(start,end))
        ax.get_yaxis().set_major_formatter( FuncFormatter(lambda x, p: reformat_n64(list_of_pixels[0].time.values[int(x)]) if int(x) < len(list_of_pixels[0].time) else ""))    
        plt.ylabel("Time")
    plt.show()

In [None]:
def transect_plot(start,
                  end,
                  da):
    if type(da) is not xarray.DataArray and (type(da) is xarray.Dataset)  :
        raise Exception('You should be passing in a data-array, not a Dataset')

    pixels = create_pixel_trail(start, end,da)
    dates = da.time.values  

    lats = [x.latitude.values for x in pixels]
    lons = [x.longitude.values for x in pixels]
    plot_list_of_pixels(pixels, da.name, y = dates)

In [None]:
pixels = create_pixel_trail(start, end, landsat_dataset)

In [None]:
t = 2
subset = list( map(lambda x: x.isel(time = t), pixels))

### Mask Clouds

In [None]:
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask

clean_mask = landsat_qa_clean_mask(landsat_dataset, platform=platform, 
                                   collection=collection, level=level)

In [None]:
cloudless_dataset = landsat_dataset.where(clean_mask)

### Select an acquisition date and then plot a 2D transect without clouds

In [None]:
# select an acquisition number from the start (t=0) to "time" using the array limits above
acquisition_number = 10

In [None]:
#If plotted will create the 2-D transect
cloudless_dataset_for_acq_no = cloudless_dataset.isel(time = acquisition_number) 

In [None]:
#If Plotted will create the 3-D Hovmoller plot for a portion of the time series (min to max)
min_acq = 1
max_acq = 4

cloudless_dataset_from_1_to_acq_no = cloudless_dataset.isel(time = slice(min_acq, max_acq)) 

## Select one of the XARRAY parameters for analysis

In [None]:
band = 'green'

### Create a 2D Transect plot of the "band" for one date

In [None]:
transect_plot(start, end, cloudless_dataset_for_acq_no[band])

### Create a 2D Transect plot of NDVI for one date

In [None]:
transect_plot(start, end, NDVI(cloudless_dataset_for_acq_no))

### Create a 3D Hovmoller plot of NDVI for the entire time series

In [None]:
transect_plot(start, end, NDVI(cloudless_dataset))

### Create a 2D Transect plot of water existence for one date

In [None]:
transect_plot(start, end, water_xarray.isel(time = acquisition_number))

### Create a 3D Hovmoller plot of water extent for the entire time series

In [None]:
transect_plot(start, end, water_xarray)

### Create a 2D Transect plot of water quality (TSM) for one date

In [None]:
transect_plot(start, end, tsm_xarray.isel(time = acquisition_number))

### Create a 3D Hovmoller plot of water quality (TSM) for one date

In [None]:
transect_plot(start, end, tsm_xarray)