## ewf-ext-02-03-06 - NDVI long term averages of growing season time series per parcel

NDVI long term averages of growing season time series per parcel

---

### <a name="service">Service definition

In [1]:
service = dict([('title', 'NDVI long term averages of growing season time series per parcel'),
                ('abstract', 'NDVI long term averages of growing season time series per parcel'),
                ('id', 'ewf-ext-02-03-06')])

### <a name="parameter">Parameter Definition 

In [2]:
regionOfInterest = dict([('id', 'regionOfInterest'),
                         ('value', 'POLYGON ((-8.864205 38.88616500000001, -8.864205 38.986165, -8.964205000000002 38.986165, -8.964205000000002 38.88616500000001, -8.864205 38.88616500000001))'),
                         ('title', 'WKT Polygon for the Region of Interest'),
                         ('abstract', 'Set the value of WKT Polygon')])

In [3]:
nameOfRegion = dict([('id', 'nameOfRegion'),
                     ('value', 'P001'),
                     ('title', 'Name of Region'),
                     ('abstract', 'Name of the region of interest'),
                     ('minOccurs', '1')])

In [4]:
aggIndex = dict([('id', 'aggIndex'),
                 ('value', 'better-ext-02-03-02'),
                 ('title', 'NDVI growing season statistics catalog index'),
                 ('abstract', 'index to access ndvi catalog'),
                 ('minOccurs', '1')])

In [5]:
aggApikey = dict([('id', 'aggApikey'),
                  ('value', ''),
                  ('title', 'NDVI growing season statistics catalog apikey'),
                  ('abstract', 'apikey to access ndvi catalog'),
                  ('minOccurs', '1')])

### <a name="runtime">Runtime parameter definition

**Input identifiers**

This is the NDVI stats' identifiers

In [6]:
# 2015, 2015
#difNdvi
#input_identifiers = ('LE07_ndviStats_P001_2015005_2015365.xlsx', 'LE07_ndviStats_P001_2015005_2015365.xlsx')
#'LE07_ndviStats_P001_2015005_2015365.xlsx', 'LE07_ndviStats_P001_2016005_2016365.xlsx', 'LE07_ndviStats_P001_2017005_2017365.xlsx'

input_identifiers = ('97BB90B4EE53686D5254029BD0EA464E17303967','C7EDAD587D159682876390A2B37F3D23FFF07794','390E80B53CE5E8438EFE620936E55E79CC0EB476')

**Input references**

This is the MODIS stack catalogue references

In [7]:
input_references = ['https://catalog.terradue.com/better-ext-02-03-02/search?format=atom&uid=97BB90B4EE53686D5254029BD0EA464E17303967','https://catalog.terradue.com/better-ext-02-03-02/search?format=atom&uid=C7EDAD587D159682876390A2B37F3D23FFF07794','https://catalog.terradue.com/better-ext-02-03-02/search?format=atom&uid=390E80B53CE5E8438EFE620936E55E79CC0EB476']

**Data path**

This path defines where the data is staged-in. 

In [8]:
data_path = "/workspace/dev/ewf-ext-02-03-02/src/main/app-resources/notebook/libexec"

**Aux folders**

In [9]:
output_folder = ''

#### Import Modules

In [10]:
import os
import shutil

import sys
import string
import numpy as np
from osgeo import gdal, ogr, osr
from shapely.wkt import loads

import datetime

import pdb

import pandas as pd
import geopandas as gpd

import cioppy
ciop = cioppy.Cioppy()

#### Auxiliary vars

In [11]:
check_results = True

#### Auxiliary methods

In [12]:
def get_input_metadata (input_refs):
    
    # for each product get metadata
    Result_Prod = []
    
    for index,product_ref in enumerate(input_refs):
        
        # since the search is by identifier 
        Result_Prod.append(ciop.search(end_point = product_ref,params =[],output_fields='self,identifier,startdate,enclosure,startdate,enddate,wkt,title',creds='{}:{}'.format(aggIndex['value'],aggApikey['value']))[0] )
    

    input_metadata = gpd.GeoDataFrame.from_dict(Result_Prod)

    input_metadata['startdate'] = pd.to_datetime(input_metadata['startdate'])
    input_metadata['enddate'] = pd.to_datetime(input_metadata['enddate'])
    
    return input_metadata

def get_formatted_date(date_str):
    date = datetime.datetime.strftime(date_str, '%Y-%m-%dT00:00:00Z')
    return date


def write_properties_file(output_name, first_date, last_date, region_of_interest):
    
    title = 'Output %s' % output_name
    
    first_date = get_formatted_date(first_date)
    last_date = get_formatted_date(last_date)
    
    with open(output_name + '.properties', 'wb') as file:
        file.write('title=%s\n' % title)
        file.write('date=%s/%s\n' % (first_date, last_date))
        file.write('geometry=%s' % (region_of_interest))

#### Workflow

##### Load metadata from catalog

In [13]:
message = 'Loading metadata from catalog' 
ciop.log('INFO', message)

input_metadata = get_input_metadata (input_references)

# order by startdate
input_metadata = input_metadata.sort_values(by='startdate')

input_metadata

reporter:status:2019-11-04T15:23:12.477802 [INFO   ] [user process] Loading metadata from catalog
2019-11-04T15:23:12.477802 [INFO   ] [user process] Loading metadata from catalog


Unnamed: 0,enclosure,enddate,identifier,self,startdate,title,wkt
0,https://store.terradue.com/better-ext-02-03-02...,2015-12-31,97BB90B4EE53686D5254029BD0EA464E17303967,https://catalog.terradue.com/better-ext-02-03-...,2015-01-05,Output LE07_ndviStats_P001_2015005_2015365.xlsx,"POLYGON((-8.864205 38.886165,-8.864205 38.9861..."
1,https://store.terradue.com/better-ext-02-03-02...,2016-12-30,C7EDAD587D159682876390A2B37F3D23FFF07794,https://catalog.terradue.com/better-ext-02-03-...,2016-01-05,Output LE07_ndviStats_P001_2016005_2016365.xlsx,"POLYGON((-8.864205 38.886165,-8.864205 38.9861..."
2,https://store.terradue.com/better-ext-02-03-02...,2017-12-31,390E80B53CE5E8438EFE620936E55E79CC0EB476,https://catalog.terradue.com/better-ext-02-03-...,2017-01-05,Output LE07_ndviStats_P001_2017005_2017365.xlsx,"POLYGON((-8.864205 38.886165,-8.864205 38.9861..."


##### Compute Long Term Averages

In [14]:
# get file paths
#file_list = [os.path.join(data_path, in_id.split('/')[-1]) for in_id in input_identifiers]
file_list = [os.path.join(data_path, os.path.basename(enclosure).split('?')[0]) for enclosure in input_metadata['enclosure']]

# load data into a python dictionary
# key -> variable name
# content -> list of pandas dataframe, one per season (TS)
data = {}

var_names = ['start_growing_season', 'end_growing_season', 'dif_ndvi', 'cumulative_ndvi', 'peak_ndvi']

for var in var_names:

    df_list = []
    for f in file_list:
        df = pd.read_excel (f, sheet_name=var)
        #print (df)
    
        # remove useless column
        if 'Unnamed: 0' in df.columns:
            df = df.drop(columns=['Unnamed: 0'])
    
        df_list.append(df)
        
    data[var] = df_list

In [15]:

# new python dictionary to store LTAs
LTA_data = {}

# to each var computes mean
for var in var_names:
    
    # concatnate all columns of var
    df_concat = pd.concat( (d for d in data[var]) )

    # group by row index
    by_row_index = df_concat.groupby(df_concat.index)
    df_means = by_row_index.mean()

    # create new dataframe where first column is the startdate of the first set of data
    # and enddate is the enddate os the last set of data
    # and var is the mean value
    LTA_data[var] = pd.concat([data[var][0]['start_date'], data[var][-1]['end_date'], df_means[var]], axis=1)

#### write output

In [16]:
name_parts = file_list[0].split('/')[-1].split('_')

mission = name_parts[0]
prod = name_parts[1]
aoi_name = name_parts[2]

start_date = str(LTA_data['start_growing_season']['start_date'][0].year)
end_date = str(LTA_data['start_growing_season']['end_date'][0].year)

excel_output_name = '_'.join(['LTA', mission, prod, aoi_name, start_date, end_date]) + '.xlsx'
    
excel_output_name = os.path.join(output_folder, excel_output_name)

print(excel_output_name)

with pd.ExcelWriter(excel_output_name) as writer:  # doctest: +SKIP
    
    for key in LTA_data:
    
        LTA_data[key].to_excel(writer, sheet_name=key)

write_properties_file(excel_output_name, LTA_data['start_growing_season']['start_date'][0], LTA_data['start_growing_season']['end_date'][0], regionOfInterest['value'])

LTA_LE07_ndviStats_P001_2015_2017.xlsx
