### <a name="service">Service Definition

In [None]:
service = dict([('title', 'Long Term Averages'),
                ('abstract', 'Long term averages of aggregated vegetation indicators, aggregated LST and rainfall estimates'),
                ('id', 'wfp-01-03-04')])

### <a name="parameter">Parameter Definition 

In [None]:
oe_product = dict([('id', 'oe_product'),
                   ('value', 'CHIRPSv2'),
                   ('title', 'CHIRPSv2, LAI or FAPAR'),
                   ('abstract', 'CHIRPSv2, LAI or FAPAR')])

In [None]:
startdate = dict([('id', 'startdate'),
                  ('value', '2014-01-01T00:00Z'),
                  ('title', 'Start date'),
                  ('abstract', 'Start date')]) 

In [None]:
enddate = dict([('id', 'enddate'),
                ('value', '2018-01-01T00:00Z'),
                ('title', 'End date'),
                ('abstract', 'End date')])

In [None]:
update = dict([('id', 'update'),
               ('value', '2020-06-01T00:00Z/2020-06-11T00:00Z'),
               ('title', 'update'),
               ('abstract', 'update')])

In [None]:
catalogue_url = dict([('id', 'catalogue_url'),
                      ('value', 'https://catalog.terradue.com/better-wfp-00007/search'),
                      ('title', 'catalogue url'),
                      ('abstract', 'catalogue url')])

### <a name="runtime">Runtime parameter definition

**Input references**

In [None]:
# chirps
input_references = ['https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=D439CE02D08C17357E7F74AE4F705B1AA8B36ED4',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=D802F6B72172CDA321D72900A7CEF999A4425D6F',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=07F4BDE764FFC44A383C456544E254509FC71240',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=449AC1A215D5808A38C276067214069E7B098984',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=B1BDEC2E01B64B1E1F66DE76E45EAC3EC3ADB0B7',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=4CE22627C6210612F47B7EF696AE0F58E50DB966',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=9DC33D4A6F0C56BCED43A69C863C0CAB3FF215C4',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=07E4EE4D7B7969C7EC7E49AFDE6D7C2E8FF7E060',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=5DA7E0E6B4639993E30E96FC0F91E3227CDC2510',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=7FF175AEA419219213F7A0FE744E665205B90075',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=B251E2E5E19E7F408CA3556C80A0F2FF51F97A64',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=44B0123B0518B335A0EAD40AFF753C41C66B7FA1',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=2F0061E6315C54E1FA569C20CE337917A33BBE31',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=17E7E24A8B0632E6C65560CE1630A2DB91EE2466',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=AEBF293831A874781E68687E1B96747AA8CEF2A2',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=2EBE0ABE04CBE6B780276983CB060928CA7299DF',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=4A7721E122974D16ECFDC91F195944F9FF6DF2DE',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=BEAFDC105C63BFF667CFFFCC759C6FD1BCA0F7EE',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=BF4A447F6D0CF6E60EB74DDB86140EC6C4CC9238',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=F5E80E0FEC4FFC22EC940D37DCDE5EAA50FDC25C',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=CF9023C1EB011513A02DA689C12BA7EB479B6210',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=FB5FCBE5C6BD5A98081B1F20FE58D8E2B05021E9',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=F0BAF9029BCAE2BAE1C518FBC0743B88FFCBB08B',
                    'https://catalog.terradue.com/better-wfp-00007/search?format=atom&uid=0E731F7CCFA4794AD9CD6CDCB5448A1CD9A6F3B9']

### <a name="workflow">Workflow

#### Import the packages required for processing the data

In [None]:
from osgeo import gdal
import gzip
import shutil
import sys
import numpy as np
import pandas as pd
import math
import re
import os
import cioppy
import requests

sys.path.append('/application/notebook/libexec')
sys.path.append('/workspace/wfp-01-03-04/src/main/app-resources/notebook/libexec')
from aux_functions import calc_average, matrix_sum, write_output_image, get_matrix_list

ciop = cioppy.Cioppy()

#### Auxiliary methods

In [None]:
def get_info(row):
    search = ciop.search(end_point=row['catalogue_url'], 
                                  params=[],
                                  output_fields='self,enclosure,startdate,enddate,wkt,updated,title',
                                  model='GeoTime')[0]
    
    series = pd.Series(search)
    
    series['startdate'] = pd.to_datetime(series['startdate'])
    series['enddate'] = pd.to_datetime(series['enddate'])
    return series


In [None]:
def get_product(url, dest):
    
    #request_headers = {'X-JFrog-Art-Api': api_key}

    r = requests.get(url)
    
    open(dest, 'wb').write(r.content)
    
    return r.status_code

In [None]:
def get_metadata(filepath):
    ds = gdal.Open(filepath)
    projection = ds.GetProjection()
    geotransform = ds.GetGeoTransform()
    no_data_value = ds.GetRasterBand(1).GetNoDataValue()
    data_type = ds.GetRasterBand(1).DataType
    return projection, geotransform, no_data_value, data_type

In [None]:
def get_formatted_date(product_reference):
    metadata = ciop.search(end_point=product_reference,
                           params=[],
                           output_fields='identifier,startdate,enddate',
                           model="GeoTime")
    return metadata[0]['startdate'], metadata[0]['enddate']

In [None]:
def remove_match_duplicates(gdf_match):
    years = gdf_match['startdate'].dt.year.unique()
    for year in years:
        products = gdf_match[gdf_match['startdate'].dt.year == year]
        if len(products.index.values) > 1:
            outdated_indexes = products[products['updated'] != max(products['updated'])].index.values
            gdf_match = gdf_match.drop(outdated_indexes)
    return gdf_match

In [None]:
def write_properties_file(dataframe, output_name):
    
    title = 'Output %s' % output_name
    first_date = get_formatted_date(dataframe.iloc[0]['self'])[0]
    last_date = get_formatted_date(dataframe.iloc[-1]['self'])[1]
    with open(output_name + '.properties', 'wb') as file:
        file.write('title=%s\n' % title)
        file.write('date=%s/%s\n' % (first_date, last_date))
        file.write('geometry=%s' % (dataframe.iloc[0]['wkt']))

In [None]:
def calc_lta(dataframe):
    
    file_list = []
    
    for enclosure in dataframe['enclosure'].tolist():
        filepath = os.path.join('tmp_data', os.path.basename(enclosure))
        status = get_product(enclosure, filepath)
        #status = 200 # TEMP
        if status == 200:
            file_list.append(filepath)
    print(file_list)
    
    if file_list:
        n_years = len(file_list)
        agr_period_matrix = get_matrix_list(file_list)
        print('Aggregations converted to matrices')
        lta = calc_average(agr_period_matrix, n_years)
        projection, geotransform, no_data_value, data_type = get_metadata(file_list[0])
        for file_ in file_list:
            os.remove(file_)
        return lta, projection, geotransform, no_data_value, data_type
    else:
        return None, None, None

In [None]:
def write_output(lta, period_start_date, period_end_date, product_type, period_N, agr_type, region, projection, geo_transform, image_format, no_data_value, data_type):
    start_day_month = str(period_start_date.month) + '-' + str(period_start_date.day)
    end_day_month = str(period_end_date.month) + '-' + str(period_end_date.day)
    output_name = 'LTA_' + product_type + '_' + region + '_' + str(period_N) + '_' + agr_type + '_' + start_day_month + '_' + end_day_month + '_' + str(period_start_date.year) + '_' + str(period_end_date.year) + '.tif'
    write_output_image(output_name, lta, image_format, data_type, projection, geo_transform, no_data_value=no_data_value)
    return output_name

In [None]:
def write_output_chirps(lta, period_start_date, period_end_date, product_type, period_N, agr_type, region, dekad_flag, projection, geo_transform, image_format, no_data_value, data_type):
    
    end_day_month = '{0:02}'.format(period_end_date.month) + '-' + dekad_flag
    
    output_name = 'LTA_' + product_type + '_' + region + '_' + str(period_N) + '_' + agr_type + '_' + end_day_month + '_' + str(period_start_date.year) + '_' + str(period_end_date.year) + '.tif'
    
    write_output_image(output_name, lta, image_format, data_type, projection, geo_transform, no_data_value=no_data_value)
    
    return output_name

#### Calculate Long Term Averages

In [None]:
if not os.path.isdir('tmp_data'):
    os.mkdir('tmp_data')

In [None]:
if isinstance(input_references, str):
    input_references = [input_references]

gpd_data = pd.DataFrame(input_references, columns=['catalogue_url'])
#gpd_data = GeoDataFrame(columns=['enclosure', 'start_date', 'end_date', 'product_type', 'aggregation', 'region'])
gpd_data = gpd_data.apply(lambda row: get_info(row), axis=1)


'''for i, enc in enumerate(gpd_data['enclosure'].tolist()):
    print(i)
    print(enc)
    filename = os.path.splitext(os.path.basename(enc))[0]
    file_comp = filename.split('_')
    start_date = file_comp[-2]
    end_date = file_comp[-1]
    product_type = file_comp[0]
    region = file_comp[1]
    aggregation = file_comp[3]'''

gpd_data.head(10)

In [None]:
gpd_data = gpd_data.sort_values(by='enddate')
gpd_data.head(10)

In [None]:
if oe_product['value'] == 'FAPAR' or oe_product['value'] == 'LAI':
    
    while not gpd_data.empty:
        
        # gets first product
        l1 = gpd_data.iloc[0]
        filename = os.path.splitext(os.path.basename(l1['enclosure']))[0].split('_')
    
        agr = filename[3]
        prod_type = filename[0]
        N_value = filename[2]
        region = filename[1]
    
        #
        # match those with same start day, start month, end day, end month, wkt, agr (avrg, daytotal), product type (chirps,fapar)
        #
        match = gpd_data[(gpd_data['startdate'].dt.day == l1['startdate'].day) & 
                         (gpd_data['startdate'].dt.month == l1['startdate'].month) & 
                         (gpd_data['enddate'].dt.day == l1['enddate'].day) & 
                         (gpd_data['enddate'].dt.month == l1['enddate'].month) &
                         (gpd_data['wkt'] == l1['wkt']) &
                         (gpd_data['enclosure'].str.contains(agr)) &
                         (gpd_data['enclosure'].str.contains(prod_type))]
    
        indexes = match.index.values
    
        if len(indexes) > 1:
        
            print(len(match.index.values))
            match = remove_match_duplicates(match)
            print(len(match.index.values))
            lta, projection, geo_transform, no_data_value, data_type = calc_lta(match)
            if lta is not None:
                filename = write_output(lta, match.iloc[0]['startdate'], match.iloc[-1]['enddate'], prod_type, N_value, agr, region, projection, geo_transform, 'GTiff', no_data_value, data_type)
                print(filename)
                write_properties_file(match, filename)
            
        gpd_data = gpd_data.drop(indexes)

In [None]:
if oe_product['value'] == 'CHIRPSv2':
    
    while not gpd_data.empty:
        
        # gets first product
        l1 = gpd_data.iloc[0]
        #filename = os.path.splitext(os.path.basename(l1['enclosure']))[0].split('_')
    
        filename = os.path.basename(l1['enclosure']).split('_')
    
        agr = filename[3]
        prod_type = filename[0]
        N_value = filename[2]
        region = filename[1]
        
        str_date = filename[4]
        
        dekad_flag = str_date.split('-')[-1]
    
        #
        # match those with same start day, start month, end day, end month, wkt, agr (avrg, daytotal), product type (chirps,fapar)
        #
        match = gpd_data[(gpd_data['enddate'].dt.month == l1['enddate'].month) &
                         (gpd_data['wkt'] == l1['wkt']) &
                         (gpd_data['enclosure'].str.contains('_' + agr + '_')) &
                         (gpd_data['enclosure'].str.contains('_' + N_value + '_')) &
                         (gpd_data['enclosure'].str.contains(prod_type)) &
                         (gpd_data['enclosure'].str.contains(dekad_flag))]
        
    
        indexes = match.index.values
    
        if len(indexes) > 1:
        
            print('match att:')
            print(l1['enddate'].month, '_' + agr + '_', '_' + N_value + '_', prod_type, dekad_flag)
            
            print(len(match.index.values))
            #match = remove_match_duplicates(match)
            #print(len(match.index.values))
            lta, projection, geo_transform, no_data_value, data_type = calc_lta(match)
            if lta is not None:
                dekad_flag = dekad_flag.split('.')[0] # remove .tif
                filename = write_output_chirps(lta, match.iloc[0]['startdate'], match.iloc[-1]['enddate'], prod_type, N_value, agr, region, dekad_flag, projection, geo_transform, 'GTiff', no_data_value, data_type)
                print(filename)
                write_properties_file(match, filename)
            
        gpd_data = gpd_data.drop(indexes)

#### Remove temporay files and folders

In [None]:
try:
    shutil.rmtree('tmp_data')
except OSError as e:
    print("Error: %s : %s" % ('tmp_data', e.strerror))

This work is licenced under a [Attribution-ShareAlike 4.0 International License (CC BY-SA 4.0)](http://creativecommons.org/licenses/by-sa/4.0/) 

YOU ARE FREE TO:

* Share - copy and redistribute the material in any medium or format.
* Adapt - remix, transform, and built upon the material for any purpose, even commercially.

UNDER THE FOLLOWING TERMS:

* Attribution - You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
* ShareAlike - If you remix, transform, or build upon the material, you must distribute your contributions under the same license as the original.