## ewf-ext-02-03-06 - NDVI long term averages of growing season time series per parcel

NDVI long term averages of growing season time series per parcel

---

### <a name="service">Service definition

In [15]:
service = dict([('title', 'NDVI long term averages of growing season time series per parcel'),
                ('abstract', 'NDVI long term averages of growing season time series per parcel'),
                ('id', 'ewf-ext-02-03-06')])

### <a name="parameter">Parameter Definition 

In [16]:
regionOfInterest = dict([('id', 'regionOfInterest'),
                         ('value', 'POLYGON ((-8.864205 38.88616500000001, -8.864205 38.986165, -8.964205000000002 38.986165, -8.964205000000002 38.88616500000001, -8.864205 38.88616500000001))'),
                         ('title', 'WKT Polygon for the Region of Interest'),
                         ('abstract', 'Set the value of WKT Polygon')])

nameOfRegion = dict([('id', 'nameOfRegion'),
                     ('value', 'P001'),
                     ('title', 'Name of Region'),
                     ('abstract', 'Name of the region of interest'),
                     ('minOccurs', '1')])

### <a name="runtime">Runtime parameter definition

**Input identifiers**

This is the NDVI stats' identifiers

In [17]:
# 2015, 2015
#difNdvi
input_identifiers = ('LE07_ndviStats_P001_2015005_2015365.xlsx', 'LE07_ndviStats_P001_2015005_2015365.xlsx')

**Input references**

This is the MODIS stack catalogue references

In [18]:
input_references = tuple(['https://catalog.terradue.com/modis/search?format=atom&uid={0}'.format(pid) for pid in input_identifiers])

**Data path**

This path defines where the data is staged-in. 

In [19]:
data_path = "/workspace/dev/ewf-ext-02-03-02/src/main/app-resources/notebook/libexec"

**Aux folders**

In [20]:
output_folder = ''

#### Import Modules

In [22]:
import os
import shutil

import sys
import string
import numpy as np
from osgeo import gdal, ogr, osr
from shapely.wkt import loads

import datetime

import pdb

import pandas as pd

#### Auxiliary vars

In [23]:
check_results = True

#### Workflow

Load data

In [24]:
# get file paths
file_list = [os.path.join(data_path, in_id.split('/')[-1]) for in_id in input_identifiers]


# load data into a python dictionary
# key -> variable name
# content -> list of pandas dataframe, one per season (TS)
data = {}

var_names = ['start_growing_season', 'end_growing_season', 'dif_ndvi', 'cumulative_ndvi', 'peak_ndvi']

for var in var_names:

    df_list = []
    for f in file_list:
        df = pd.read_excel (f, sheet_name=var)
        #print (df)
    
        # remove useless column
        df = df.drop(columns=['Unnamed: 0'])
    
        df_list.append(df)
        
    data[var] = df_list

Compute Long Term Averages

In [25]:

# new python dictionary to store LTAs
LTA_data = {}

# to each var computes mean
for var in var_names:
    
    # concatnate all columns of var
    df_concat = pd.concat( (d for d in data[var]) )

    # group by row index
    by_row_index = df_concat.groupby(df_concat.index)
    df_means = by_row_index.mean()

    # create new dataframe where first column is the startdate of the first set of data
    # and enddate is the enddate os the last set of data
    # and var is the mean value
    LTA_data[var] = pd.concat([data[var][0]['start_date'], data[var][-1]['end_date'], df_means[var]], axis=1)

#### write output

In [26]:
name_parts = input_identifiers[0].split('_')

mission = name_parts[0]
prod = name_parts[1]
aoi_name = name_parts[2]

start_date = str(LTA_data['start_growing_season']['start_date'][0].year)
end_date = str(LTA_data['start_growing_season']['end_date'][0].year)

excel_output_name = '_'.join(['LTA', mission, prod, aoi_name, start_date, end_date]) + '.xlsx'
    
excel_output_name = os.path.join(output_folder, excel_output_name)

print(excel_output_name)

with pd.ExcelWriter(excel_output_name) as writer:  # doctest: +SKIP
    
    for key in LTA_data:
    
        LTA_data[key].to_excel(writer, sheet_name=key)


LTA_LE07_ndviStats_P001_2015_2015.xlsx
