# Testing

* **Products used:** 


### Background 

### Description


## Getting started


### Load packages
Load key Python packages and any supporting functions for the analysis.

In [None]:
import datacube
import datacube.utils.rio
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import pandas as pd
import xarray as xr
import geopandas as gpd
from odc.io.cgroups import get_cpu_quota
from sklearn.metrics import mean_absolute_error
from datacube.utils.geometry import assign_crs
from deafrica_tools.datahandling import load_ard
from deafrica_tools.plotting import map_shapefile
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.classification import collect_training_data

#This will speed up loading data
datacube.utils.rio.set_default_rio_config(aws='auto', cloud_defaults=True)

### Analysis parameters


In [None]:
path = 'data/harmonization_training_sample.geojson' 
field = 'Class'
time = ('2013', '2020')
measurements =  ['red','nir'] 
resampling = {'red':'bilinear', 'nir':'bilinear', '*':'nearest'}
resolution = (-30,30)
output_crs='epsg:6933'

In [None]:
ncpus=round(get_cpu_quota())
print('ncpus = '+str(ncpus))

## View the selected location
The next cell will display the selected area on an interactive map.
The red border represents the area of interest of the study.
Zoom in and out to get a better understanding of the area of interest.
Clicking anywhere on the map will reveal the latitude and longitude coordinates of the clicked point.

In [None]:
# Load input data shapefile
input_data = gpd.read_file(path)
input_data.head()

In [None]:
print('crop = '+str(len(input_data[input_data.Class==1])))
print('noncrop = '+str(len(input_data[input_data.Class==0])))

In [None]:
# input_data.explore(column=field, cmap='viridis')

In [None]:
query = {
    'time':time,
    'measurements': measurements,
    'resolution': resolution,
    'output_crs': output_crs,
    'resampling':resampling
}

def feature_layers(query):
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    
    #load landsat 8
    ds_ls = load_ard(dc=dc,
                  products=['ls8_sr'],
#                   min_gooddata=0.99,
                  verbose=False,
                  **query,
                 )
    
    ds_ls=calculate_indices(ds_ls, 'NDVI', collection='c2', drop=False)
    
    # load landsat 7
    ds_ls7 = load_ard(dc=dc,
                  products=['ls7_sr'],
                  like=ds_ls.geobox,
                  time=time,
                  measurements=['red','nir'],
#                   min_gooddata=0.99,
                  verbose=False
                 )

    ds_ls7 = calculate_indices(ds_ls7, 'NDVI', collection='c2', drop=False)
#     print('LS7 BEFORE reindex NDVI',ds_ls7.NDVI.values)
#     print('LS8 times',ds_ls.time.values)
#     print('LS7 times',ds_ls7.time.values)
    
    #match LS7 at LS8 times with 2-Day tolerance
    ds_ls7 = ds_ls7.reindex(time=ds_ls.time, method='nearest', tolerance='2D')
    
#     print('LS7 after reindex NDVI',ds_ls7.NDVI)
    
    #remove outliers
    ds_ls7 = ds_ls7.where(ds_ls7>0)
    ds_ls = ds_ls.where(ds_ls>0)
    ds_ls7 = ds_ls7.where(ds_ls7<1)
    ds_ls = ds_ls.where(ds_ls<1)
    
    try:
        t_max = ds_ls7.NDVI.idxmax("time")
        ds_ls7 = ds_ls7.sel(time=t_max)
        ds_ls = ds_ls.sel(time=t_max)
        
        #rename bands to include sensor
        for band in ds_ls.data_vars:
            ds_ls = ds_ls.rename({band: band + '_ls8'})

        #rename bands to include sensor
        for band in ds_ls7.data_vars:
            ds_ls7 = ds_ls7.rename({band: band + '_ls7'})
        
        result = xr.merge([ds_ls, ds_ls7],compat='override')
        result = assign_crs(result, crs='epsg:6933')
#         print('succeed', result)

    except:
        # If above fails (no pairs) return an array filled with NaNs
        # that matches the output of the successful ones
        ds_ls7 = ds_ls7.mean('time')
        ds_ls = ds_ls.mean('time')
        
        #rename bands to include sensor
        for band in ds_ls.data_vars:
            ds_ls = ds_ls.rename({band: band + '_ls8'})

        #rename bands to include sensor
        for band in ds_ls7.data_vars:
            ds_ls7 = ds_ls7.rename({band: band + '_ls7'})
        
        result = xr.merge([ds_ls, ds_ls7],compat='override')
        result = result.where(result>1000) #make everything NaN
        result = assign_crs(result, crs='epsg:6933')
#         print('fail', result)
    return result
        

In [None]:
%%time
column_names, model_input = collect_training_data(
                                    gdf=input_data[7500:].reset_index(drop=True),
                                    dc_query=query,
                                    ncpus=25,
                                    field=field,
                                    feature_func=feature_layers,
                                    )

In [None]:
df = pd.DataFrame(data=model_input, columns=column_names)
df.head()

In [None]:
df.to_csv('results/ndvi_ls8_ls7_7500_10000.csv')

In [None]:
# 1330 start time