# Product Transform Deriver

This notebooks derives transformations between products (e.g. Landsat 8 Collection 2 Level 2 to Landsat 8 Collection 1 Level 2).

In [1]:
import os
import sys
sys.path.append(os.environ.get('NOTEBOOK_ROOT'))

import numpy as np
import xarray as xr
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt

import datacube
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask, landsat_clean_mask_invalid

dc = datacube.Datacube()

from datacube.utils.rio import configure_s3_access
configure_s3_access(requester_pays=True)

<botocore.credentials.Credentials at 0x7fc16b6c8400>

In [2]:
dc.list_products()

Unnamed: 0_level_0,name,description,label,lon,platform,format,instrument,creation_time,product_type,time,lat,crs,resolution,tile_size,spatial_dimensions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,ls8_l2_c2,Landsat 8 Collection 1 Level2 Surface Reflecta...,,,LANDSAT_8_C2_L2,GeoTiff,OLI_TIRS,,LaSRCollection2,,,EPSG:4326,"(-0.00027777777778, 0.00027777777778)",,"(latitude, longitude)"
1,ls8_usgs_sr_scene,Landsat 8 USGS Collection 1 Level2 Surface Ref...,,,LANDSAT_8,GeoTiff,OLI_TIRS,,LaSRC,,,EPSG:4326,"(-0.00027777777778, 0.00027777777778)",,"(latitude, longitude)"


In [3]:
ds_params = {'orig': dict(product='ls8_l2_c2', platform='LANDSAT_8', 
                          collection='c2', level='l2'),
             'dst': dict(product='ls8_usgs_sr_scene', platform='LANDSAT_8', 
                         collection='c1', level='l2')}

# Freetown, Sierra Leone
lat = (8.07, 8.52)
lon = (-13.34, -13.07)
# Sierra Leone
# lat = (6.8, 10.0)
# lon = (-13.4, -10.2)
time = ('2016-01-01', '2016-03-31')

composite_dask_chunks = dict(time=1, latitude=500, longitude=500)

**TODO:** 
1. Use strided geographic chunks for the **dc.load()** calls.
2. Obtain median mosaic for each chunk.
3. Update a map of integer band values of the origin dataset 
   to integer band values of the destination dataset.
4. Once all geographic chunks have been processed,

## Landsat 8 Collection 2 Level 2 to Landsat 8 Collection 1 Level 2

**Get data of matching shapes.**

In [28]:
ds_map = {}
for orig_dst, params in ds_params.items():
    ds = dc.load(product=params['product'], lon=lon, lat=lat, time=time, 
                 dask_chunks=composite_dask_chunks)
#     stride = 100 # Reduce the amount of data to process.
#     ds = ds.isel(latitude=slice(0, len(ds.latitude), stride), 
#                  longitude=slice(0, len(ds.longitude), stride))
    if params['platform'] in ['LANDSAT_5', 'LANDSAT_7', 'LANDSAT_8']:
        clean_mask = landsat_qa_clean_mask(ds, platform=params['platform'], 
                                           collection=params['collection'], level=params['level']) & \
                     landsat_clean_mask_invalid(ds, platform=params['platform'], 
                                                collection=params['collection'], level=params['level'])
    else: 
        clean_mask = xr.full_like(ds, True)
    ds = ds.where(clean_mask)
    ds_mean_comp = ds.mean('time').persist()
    ds_map[orig_dst] = ds_mean_comp

  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)


In [29]:
# from utils.data_cube_utilities.dc_water_classifier import NDWI
# ndwi_orig = NDWI(ds_map['orig'])
# print(ndwi_orig.min().compute())
# print(ndwi_orig.mean().compute())
# print(ndwi_orig.max().compute())

In [30]:
# ndwi_dst = NDWI(ds_map['dst'])
# print(ndwi_dst.min().compute())
# print(ndwi_dst.mean().compute())
# print(ndwi_dst.max().compute())

In [31]:
# print(ds_map['orig'].min().compute())
# print(ds_map['orig'].mean().compute())
# print(ds_map['orig'].max().compute())

In [32]:
# print(ds_map['dst'].min().compute())
# print(ds_map['dst'].mean().compute())
# print(ds_map['dst'].max().compute())

In [33]:
# orig_dst = 'orig'
# band = 'green'
# print(ds_map[orig_dst][band].min().compute())
# print(ds_map[orig_dst][band].mean().compute())
# print(ds_map[orig_dst][band].max().compute())

In [34]:
# orig_dst = 'dst'
# print(ds_map[orig_dst][band].min().compute())
# print(ds_map[orig_dst][band].mean().compute())
# print(ds_map[orig_dst][band].max().compute())

In [35]:
# ls8_l2_c2_mean_comp = ls8_l2_c2.mean('time').persist()

In [36]:
# ls8_l2_c2_min = ls8_l2_c2_mean_comp.min()
# ls8_l2_c2_mean = ls8_l2_c2_mean_comp.mean()
# ls8_l2_c2_max = ls8_l2_c2_mean_comp.max()

# ls8_l2_c2_mean_comp

In [37]:
# ls8_l2_c1 = dc.load(product='ls8_usgs_sr_scene', lon=lon, lat=lat, time=time, 
#                     dask_chunks=composite_dask_chunks)

In [38]:
# ls8_l2_c1_mean_comp = ls8_l2_c1.mean('time').persist()

**Derive transform**

In [39]:
# for orig_dst in ds_map:
#     print(f"Mean of {orig_dst}:")
#     print(ds_map[orig_dst].mean().compute())

In [40]:
# print(len(x))
# print(np.isfinite(x).sum())
# print(len(y))
# print(y[np.isfinite(y)].sum())
# # unique, counts = np.unique(x, return_counts=True)
# # count_sort_inds = np.argsort(counts)
# # unique[count_sort_inds], counts[count_sort_inds]

In [41]:
ds_map['orig'].mean().compute()

In [44]:
from utils.data_cube_utilities import dc_utilities
import importlib
importlib.reload(dc_utilities)
from utils.data_cube_utilities.dc_utilities import convert_range

converted_ds = convert_range(ds_map['orig'], from_platform=ds_params['orig']['platform'], 
             from_collection=ds_params['orig']['collection'], from_level=ds_params['orig']['level'],
             to_platform=ds_params['dst']['platform'], 
             to_collection=ds_params['dst']['collection'], to_level=ds_params['dst']['level']).mean()

In [45]:
converted_ds / ds_map['dst'].mean().compute()

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Count 20 Tasks 1 Chunks Type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Count 20 Tasks 1 Chunks Type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [43]:
ds_map['dst'].mean().compute()

In [None]:
orig_prod_data_vars = set(ls8_l2_c2_mean_comp.data_vars.keys())
dst_prod_data_vars = set(ls8_l2_c1_mean_comp.data_vars.keys())
data_vars = orig_prod_data_vars.intersection(dst_prod_data_vars)

for data_var in data_vars:
#     ls8_l2_c2_mean_comp[data_var]
#     poly_coefs = np.polyfit(ls8_l2_c2_mean_comp[data_var], ls8_l2_c1_mean_comp[data_var], deg=3)
    x = ds_map['orig'][data_var].values.flatten()
    y = ds_map['dst'][data_var].values.flatten()
    unique_x_inds = np.unique(x, return_index=True)[1]
    x, y = x[unique_x_inds], y[unique_x_inds]
    both_finite = np.isfinite(x) & np.isfinite(y)
    x, y = x[both_finite], y[both_finite]
    sorted_inds = np.argsort(x)
    stride = 500
#     cs = CubicSpline(x[sorted_inds][::stride], y[sorted_inds][::stride])
    from utils.data_cube_utilities.curve_fitting import gaussian_filter_fit
    x_smooth, y_smooth = gaussian_filter_fit(x, y)
    plt.plot(x[::stride], y[::stride])
    plt.title(f'{data_var} x,y')
    plt.show()
    plt.plot(x_smooth[::stride], y_smooth[::stride])
    plt.title(f'{data_var} x,cs(x)')
    plt.show()
    break