# Topic 3: Analysis In-Place / Data Proximate Compute 

---

## Import Required Packages

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import requests
from collections import defaultdict
import numpy as np
import xarray as xr
import rasterio as rio        # https://rasterio.readthedocs.io/en/latest/
from rasterio.plot import show
import rioxarray       # https://corteva.github.io/rioxarray/stable/index.html
import geopandas
import pyproj
from pyproj import Proj
from shapely.ops import transform
import geoviews as gv
from cartopy import crs
import hvplot.xarray
import holoviews as hv
gv.extension('bokeh', 'matplotlib')

---

## Set GDAL Configuration Options

**Set the gdal configuration options for this session**

In [None]:
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
                   AWS_NO_SIGN_REQUEST='YES',
                   GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
                   GDAL_SWATH_SIZE='200000000',
                   VSI_CURL_CACHE_SIZE='200000000',
                   GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                   GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))


os.environ.update(env)

**Read in geoJSON for subsetting**

In [None]:
field = geopandas.read_file('./data/ne_w_agfields.geojson')
fieldShape = field['geometry'][0]  

In [None]:
foa_url = "https://lpdaac.earthdata.nasa.gov/lp-prod-protected/HLSS30.015/HLS.S30.T13TGF.2020191T172901.v1.5.B04.tif"
with rio.open(foa_url) as src:
    hls_proj = src.crs.to_string()

In [None]:
geo_CRS = Proj('+proj=longlat +datum=WGS84 +no_defs', preserve_units=True)   # Source coordinate system of the ROI
project = pyproj.Transformer.from_proj(geo_CRS, hls_proj)                    # Set up the transformation
fsUTM = transform(project.transform, fieldShape) 

**Query the LP DAAC Provider STAC Catalog**

In [None]:
cmr_stac_search = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/search'  

In [None]:
params = {
    'limit': 100,
    'bbox': '-101.67271614074707,41.04754380304359,-101.65344715118408,41.06213891056728',
    'datetime': '2020-01-01T00:00:00Z/2021-01-01T23:59:59Z',
    'collections': ['HLSS30.v1.5', 'HLSL30.v1.5']
}

In [None]:
hls_items = requests.post(cmr_stac_search, json=params).json()['features']

In [None]:
evi_band_links = []
for i in hls_items:
    if i['collection'] == 'HLSS30.v1.5':
        evi_bands = ['B8A', 'B04', 'B02', 'Fmask'] # NIR RED BLUE Quality for S30
    else:
        evi_bands = ['B05', 'B04', 'B02', 'Fmask'] # NIR RED BLUE Quality for L30
        
    for a in i['assets']:
        if any(b==a for b in evi_bands):
            evi_band_links.append(i['assets'][a]['href'])

In [None]:
tile_dicts = defaultdict(list)
for l in evi_band_links:
    tile = l.split('.')[-6]
    tile_dicts[tile].append(l)

In [None]:
tile_links_T14TKL = tile_dicts['T14TKL']
tile_links_T13TGF = tile_dicts['T13TGF']

In [None]:
bands_dicts = defaultdict(list)
for b in tile_links_T13TGF:
    band = b.split('.')[-2]
    bands_dicts[band].append(b)

In [None]:
bands_dicts['B04'][:10]

In [None]:
with open('files.txt', 'w') as f:
    lines = [x.replace('https://lpdaac.earthdata.nasa.gov/', '/vsis3/') + '\n' for x in bands_dicts['B04'][:10]]
    f.writelines(lines)

**Start up a dask client**

In [None]:
from dask.distributed import Client

In [None]:
client = Client(n_workers=2)
client

**Create a function for read HLS data as a time series xarray** 

In [None]:
def create_ts_xr(path_list):
    time_j_list = [x.split('.')[-5].split('T')[0] for x in path_list]
    # Convert date in path from 'day of year' to datetime
    time_d_list = [datetime.strptime(t, '%Y%j') for t in time_j_list]
    # Create a xarray time variable
    time_var = xr.Variable('time', time_d_list)
    # Chunk the dask array
    #chunks = {'x': 512, 'y': 512}
    chunks = {'x':1024, 'y': 1024}
    
    da = xr.concat([rioxarray.open_rasterio(f, lock=False, chunks=chunks) for f in path_list], dim=time_var)
    
    return da

**Create an xarray time series for the near-infrared band (B8A)**

In [None]:
%%time
nir = create_ts_xr(bands_dicts['B8A'])

In [None]:
nir

**Clip the xarray by our geoJSON boundary**

In [None]:
nir_clipped = nir.rio.clip([fsUTM]).squeeze('band', drop=True)
nir_clipped

**Load the time series into memory**

In [None]:
nir_clipped = nir_clipped.load()
nir_clipped

In [None]:
nir_clipped.hvplot.image(x = 'x', y = 'y', crs = hls_proj, cmap='PuOr', width=800, height=600, colorbar=True)

**Read, clip, and load time series for the red band (BO4)**

In [None]:
red = create_ts_xr(bands_dicts['B04'])

In [None]:
red

In [None]:
red_clipped = red.rio.clip([fsUTM]).squeeze('band', drop=True).load()
red_clipped

In [None]:
red_clipped.hvplot.image(x = 'x', y = 'y', crs = hls_proj, cmap='PuOr', width=800, height=600, colorbar=True)

**Calculate NDVI using the clipped nir and red xarrays**

In [None]:
ndvi = (nir_clipped - red_clipped) / (nir_clipped + red_clipped)

In [None]:
ndvi

In [None]:
ndvi.hvplot.image(x = 'x', y = 'y', crs = hls_proj, cmap='YlGn', width=800, height=600, colorbar=True).opts(clim=(0.0, 1.0))

**Use the Fmask to screen out clouds, cloud shadows, snow/ice, and water**

In [None]:
%%time
fmask = create_ts_xr(bands_dicts['Fmask'])

In [None]:
fmask

In [None]:
fmask_clipped = fmask.rio.clip([fsUTM]).squeeze('band', drop=True)

In [None]:
fmask_clipped.sel(time='2020-11-21')

In [None]:
f = fmask_clipped.load()

In [None]:
f.sel(time='2020-11-21')

**Keep ndvi data values where Fmask equals 0 (no clouds, no cloud shadow, no snow/ice, no water)**

In [None]:
ndvi_filtered = ndvi.where(f==0, np.nan)

In [None]:
ndvi_filtered

In [None]:
ndvi_filtered.sel(time='2020-11-21')

In [None]:
ndvi_filtered.hvplot.image(x = 'x', y = 'y', crs = hls_proj, cmap='YlGn', width=800, height=600, colorbar=True).opts(clim=(0.0, 1.0))

**Create a monthly aggregate**

In [None]:
ndvi_filtered.groupby('time.month').mean('time').hvplot.image(x = 'x', y = 'y', crs = hls_proj, groupby='month', cmap='YlGn', width=800, height=600, colorbar=True).opts(clim=(0.0, 1.0))

## References

- https://tutorial.dask.org/index.html
- https://examples.dask.org/applications/satellite-imagery-geotiff.html

---

# [Next: Topic 4 - Migrating ArcGIS Function](Topic_4_Migrating_ArcGIS_Function_Workflows.ipynb)