In [None]:
# Enable importing of utilities.
import sys
import os
sys.path.append(os.environ.get('NOTEBOOK_ROOT'))

import xarray as xr

# ARDC Training: Python Notebooks
Task-F: Data Export
<br><br>
The code in this notebook subsets a data cube, selects a specific set of variables, creates a new XARRAY, and then outputs that data into a GeoTIFF file and CSV file. This output file can be used in other software programs (e.g. QGIS, ArcGIS, EXCEL) for more specific analyses. We will keep the region small so that we can control file sizes. 

> ### Import the Datacube Configuration

In [None]:
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  

from datacube.utils.aws import configure_s3_access
configure_s3_access(requester_pays=True)

api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_f')
api.dc = dc

>### Browse the available Data Cubes on the storage platform    
> You might want to learn more about what data is stored and how it is stored.


In [None]:
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products

>### Pick a product  
>Use the platform and product names from the previous block to select a Data Cube.  

In [None]:
# Change the data platform and data cube here
# This data export notebook will only work for Landsat-7 datasets

platform = 'LANDSAT_7'
product = 'ls7_usgs_sr_scene'
collection = 'c1'
level = 'l2'

In [None]:
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))

# Visualize Data Cube Region

In [None]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)

> ### Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing. You can click on the map above to find the Lat-Lon coordinates of any location. Pick a time window of 1 year to keep the file small.

In [None]:
## Vietnam - Central Lam Dong Province ##
# longitude_extents = (105.2, 105.5)
# latitude_extents  = (11.25, 11.55)

## Kenya - Mombasa
longitude_extents = (39.55, 39.74)
latitude_extents = (-4.12, -3.98)

time_extents = ('2010-01-01', '2010-12-31')

In [None]:
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)

In [None]:
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'],
                          dask_chunks={'time':1, 'latitude':1000, 'longitude':1000})

In [None]:
landsat_dataset # this is a printout of the first few values of each parameter in the XARRAY

# Create Several Common Application Products

>## Unpack pixel_qa
This is the Landsat-7 pixel quality data that is used to screen for clouds, shadows, snow, etc. These values can be quite valuable when doing an analysis in a GIS tool, but you will not need all of them.

In [None]:
from utils.data_cube_utilities.clean_mask import landsat_clean_mask_full, landsat_qa_clean_mask

# obtain the clean mask
plt_col_lvl_params = dict(platform=platform, collection=collection, level=level)
clean_mask = landsat_clean_mask_full(dc, landsat_dataset, product=product, **plt_col_lvl_params)
clear_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['clear'], **plt_col_lvl_params)
water_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['water'], **plt_col_lvl_params)
shadow_xarray = landsat_qa_clean_mask(landsat_dataset, cover_types=['cld_shd'], **plt_col_lvl_params) 
cloud_xarray  = landsat_qa_clean_mask(landsat_dataset, cover_types=['cloud'], **plt_col_lvl_params)

In [None]:
clean_xarray = (clear_xarray | water_xarray).rename("clean_mask")

> ##  Spectral Indices
Below are a number of common spectral indices. 

In [None]:
def NDVI(dataset):
    return ((dataset.nir - dataset.red)/(dataset.nir + dataset.red)).rename("NDVI")

In [None]:
def NDWI(dataset):
    return ((dataset.green - dataset.nir)/(dataset.green + dataset.nir)).rename("NDWI")

In [None]:
def NDBI(dataset):
        return ((dataset.swir2 - dataset.nir)/(dataset.swir2 + dataset.nir)).rename("NDBI")

In [None]:
def EVI(dataset, c1 = None, c2 = None, L = None):
        return ((dataset.nir - dataset.red)/((dataset.nir  + (c1 * dataset.red) - (c2 *dataset.blue) + L))).rename("EVI")

In [None]:
ndbi_xarray = NDBI(landsat_dataset)  # Urbanization - Reds
ndvi_xarray = NDVI(landsat_dataset)  # Dense Vegetation - Greens
ndwi_xarray = NDWI(landsat_dataset)  # High Concentrations of Water - Blues  
evi_xarray = EVI(landsat_dataset, c1 = 6, c2 = 7.5, L = 1 ) # Enhanced Vegetation Index

>## TSM
This is the Total Suspended Matter (TSM) index which measures the quality of water using a simple equation with one of Landsat bands. For the analysis below we will use the water pixels from the Landsat "pixel_qa" so that the code run faster than using the WOFS water analysis. 

In [None]:
from utils.data_cube_utilities.dc_water_quality import tsm

tsm_xarray = tsm(landsat_dataset, clean_mask = water_xarray.values.astype(bool) ).tsm

# Combine Everything  

In [None]:
combined_dataset = xr.merge([landsat_dataset,
          clean_xarray,
          clear_xarray,
          water_xarray,
          shadow_xarray,
          cloud_xarray,                  
          evi_xarray,
          ndbi_xarray,
          ndvi_xarray,
          ndwi_xarray,
          tsm_xarray])

# Copy original crs to merged dataset 
combined_dataset = combined_dataset.assign_attrs(landsat_dataset.attrs)

combined_dataset  # this is a printout of the first few values of each parameter in the XARRAY

## Export CSV
This section will be used to create a CSV export file for a given pixel. You will identify the pixel by selecting a specific Lat-Lon position and then the code will find the closest pixel to that point (nearest neighbor). Use the map at the top of this notebook to view your region and pick a Lat-Lon location. You can find an exact location by clicking on the map. The CSV file will contain the time series data for each XARRAY parameter.

In [None]:
# Lat and Lon coordinates extracted from the map above 
pixel_lat = 11.3972
pixel_lon = 105.3528

In [None]:
pixel = combined_dataset.sel(latitude  = pixel_lat,
                             longitude = pixel_lon,
                              method = 'nearest') # nearest neighbor selection  

In [None]:
import xarray as xr
import csv

def ts_pixel_to_csv(pixel: xr.Dataset,
                    csv_file_name: str):
    def __yield_from_time_axis(px):
        for i in range(len(px.time)):
            yield px.isel(time = i)
    def __format_time(t):
        return t
    
    with open(csv_file_name,'w') as out:
        csv_out=csv.writer(out)
        column_names = ['time'] + list(pixel.data_vars)
        csv_out.writerow(column_names)

        for row in __yield_from_time_axis(pixel):
            csv_out.writerow([__format_time(row.time.values)] + [row[key].values for key in list(pixel.data_vars)])

In [None]:
csv_name = 'test.csv'

In [None]:
ts_pixel_to_csv(pixel, csv_name)

## Export GeoTIFF
This section will be used to create a GeoTIFF export.

----  
File formatting  

In [None]:
import time
def time_to_string(t):
    return time.strftime("%Y_%m_%d_%H_%M_%S", time.gmtime(t.astype(int)/1000000000))

----  
This function can be used to write a single time slice from an xarray to geotiff

In [None]:
from utils.data_cube_utilities import dc_utilities
def export_slice_to_geotiff(ds, path):
    dc_utilities.write_geotiff_from_xr(path,
                                        ds.astype(np.float32),
                                        list(combined_dataset.data_vars.keys()),
                                        crs="EPSG:4326")

----  
For each time slice in a dataset we call `export_slice_to_geotif`  

In [None]:
def export_xarray_to_geotiff(ds, path):
    for t in ds.time:
        time_slice_xarray = ds.sel(time = t)
        export_slice_to_geotiff(time_slice_xarray,
                                path + "_" + time_to_string(t) + ".tif")

## Start Export
This is where we will start the GeoTIFF export and review the final product. The lines after this text box have been "commented out" so that they can be run at the end, after you have completed the creation of the XARRAY above and reviewed the data. 

In [None]:
# export_xarray_to_geotiff(combined_dataset, "geotiffs/landsat7")

----  
Check to see what files exist in `geotiffs`

In [None]:
# !ls -lah geotiffs/*.tif