# Load and save Landsat data using ODC


I needed to use the `dea/20220225` module to load the HDF5 files

`dea/20230710`

Need to install odc-geo for dea_tools to work

`pip install -v --no-binary :all: --upgrade-strategy only-if-needed --prefix /g/data/os22/chad_tmp/climate-carbon-interactions/envs/EXTRA_PYTHON_LIBS odc-geo`

### Load packages
Load key Python packages and any supporting functions for the analysis.

In [1]:
import datacube
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import folium

import sys
sys.path.insert(1, '/g/data/os22/chad_tmp/dea-notebooks/Tools/')
from dea_tools.dask import create_local_dask_cluster
from dea_tools.bandindices import calculate_indices
from dea_tools.datahandling import load_ard

#special import for DEA/20230710 module as its missing
sys.path.insert(1, '/g/data/os22/chad_tmp/climate-carbon-interactions/envs/EXTRA_PYTHON_LIBS/lib/python3.8/site-packages')
from odc.geo.xr import assign_crs

In [None]:
client = create_local_dask_cluster(return_client=True)

### Analysis parameters

* `path`: The path to the input vector file from which we will extract training data. A default geojson is provided.
* `field`: This is the name of column in your shapefile attribute table that contains the class labels. **The class labels must be integers**
* `time`: The date range to analyse (e.g. `('2013', '2016') `)
* `measurement`: Bands for the imagery to use.
* `resampling`: Resampling method to use 
* `resolution`: 
* `output_crs`: 

In [None]:
# product='ga_ls_fc_3'

# time = ('1987', '2022')
# measurements =  ['pv', 'npv', 'bs']
# resampling = {'*':'average'}
# resolution = (-1000,1000)
# output_crs='epsg:3577'
# dask_chunks=dict(x=2000, y=2000)

measurements =  ['nbart_nir', 'nbart_red', 'nbart_green']
resampling = {'nbart_nir':'average',
              'nbart_red':'average',
              'nbart_green':'average',
              '*':'mode'}
resolution = (-1000,1000)
output_crs='epsg:3577'
dask_chunks=dict(x=4000, y=4000, time=1)
# filters=[("opening", 2), ("dilation", 2)]

# central_lat = -33.5597
# central_lon = 150.231667
# # Set the buffer to load around the central coordinates
# buffer = 0.07

## Load data

In [None]:
dc = datacube.Datacube(app='test')

In [None]:
years = [str(i) for i in range(1988,2023)]
for y in years:
    print(y)
    query = {
        'time':y,
        'measurements': measurements,
        'resolution': resolution,
        'output_crs': output_crs,
        'resampling':resampling,
        'dask_chunks':dask_chunks,
        'group_by':'solar_day'
    }
    
    ds = load_ard(dc=dc,
              products=[
                    'ga_ls5t_ard_3'
              ],
              # mask_contiguity=True,
              mask_pixel_quality=True,
              # mask_filters=filters,
              **query
             )

    ds = calculate_indices(ds, ['NDVI', 'NDWI'], collection='ga_ls_3', drop=True)
    
    # ds = ds.resample(time='QS-DEC').mean()
    ds = ds.resample(time='MS', loffset=pd.Timedelta(14, 'd')).mean()
    
    break
    
#     del ds.attrs['grid_mapping']
#     del ds.time.attrs['units']
#     for var in ds.data_vars:
#         del ds[var].attrs['grid_mapping']
    
#     ds.to_netcdf('data/FC_median_'+y+'.nc')
    

In [None]:
%%time
ds = ds.compute()

In [None]:
ds_time = np.array(ds.time, dtype='datetime64[h]')
wo_time = np.array(wo.time, dtype='datetime64[h]')

ds['time'] = ds_time
wo['time'] = wo_time

ds = ds.sel(time=wo.time, method='nearest')
wo = wo.sel(time=ds.time, method='nearest')

In [None]:
ds.time.isel(time=116).values

In [None]:
ds.resample(time='1W').mean()

In [None]:
np.sum(wo.resample(time='1W').mean().time == ds.resample(time='1W').mean().time)

In [None]:
ds.time.values == wo.time.values

In [None]:
np.array(ds.time.values[0], dtype='datetime64[D]')

In [None]:
np.sum(ds_time == wo_time)

In [None]:
ds.time.isel(time=-2)# ==wo.time.isel(time=19)

In [None]:
wo.time.isel(time=-2)# == ds.time.values

In [None]:
ds.time

In [None]:
ds.time.values[0]

In [None]:
np.array(ds.time.values[0], dtype='datetime64[h]')

In [None]:
%%time
ds = ds.compute()

In [None]:
ds.pv.isel(time=0).plot.imshow(size=6)

In [None]:
m = folium.Map(control_scale = True)
ds.pv.isel(time=0).odc.add_to(m, robust=True)

# Zoom map
m.fit_bounds(ds.pv.odc.map_bounds())

tile = folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri',
        name = 'Esri Satellite',
        overlay = True,
        control = True
       ).add_to(m)

folium.LayerControl().add_to(m)
display(m)

## Stitch together output

In [None]:
import os

In [None]:
base = 'data/'
files = [base+i for i in os.listdir(base) if i.endswith('.nc')]
files.sort()
print(len(files))

In [None]:
ds = xr.open_mfdataset(files) #chunks=dict(latitude=1000, longitude=1000)
# ds = assign_crs(ds, crs='epsg:4326')

In [None]:
ds.to_netcdf('Fractional_cover_LS_annual_1987_2002.nc')