In [1]:
"""
This scripts runs post-processing steps for Eddy covariance data coming
in one file in the format of europe-fluxdata.eu. This format is very similar
to the ICOS format (the only known difference is the unit of pressure,
which is hPa in europe-fluxdata.eu and kPa in ICOS).

The script covers the following steps:
- spike / outlier detection with mean absolute deviation filter
  after Papale et al. (Biogeosci, 2006)
- ustar filtering after Papale et al. (Biogeosci, 2006)
- carbon flux partitioning with the nighttime method
  of Reichstein et al. (Global Change Biolo, 2005) and
  the daytime method of Lasslop et al. (Global Change Biolo, 2010)
- gap filling with marginal distribution sampling (MDS)
  of Reichstein et al. (Global Change Biolo, 2005)
- flux error estimates using MDS after Lasslop et al. (Biogeosci, 2008)

The script is controlled by a config file in Python's standard configparser
format. The config file includes all possible parameters of used routines.
Default parameter values follow the package REddyProc where appropriate. See
comments in config file for details.

The script currently flags on input all NaN values and given *undefined*
values. Variables should be set to *undefined* in case of other existing flags
before calling the script. Otherwise it should be easy to set the appropriate
flags in the pandas DataFrame dff for the flags after its creation around line
160.

The output file can either have all flagged variables set to *undefined*
and/or can include flag columns for each variable (see config file).

Note, ustar filtering needs at least one full year.

Examples
--------
python postproc_europe-fluxdata.py hesseflux_example.cfg

History
-------
Written, Matthias Cuntz, April 2020
"""

"""

27/09/2021

Integration of Footprint predictor model and satellite images from google earth engine 
to derive empirical remote sensing models and monthly and annual maps.

Written, Mario Alberto Fuentes Monjaraz, October 2021


"""

'\n\n27/09/2021\n\nIntegration of Footprint predictor model and satellite images from google earth engine \nto derive empirical remote sensing models and monthly and annual maps.\n\nWritten, Mario Alberto Fuentes Monjaraz, October 2021\n\n\n'

In [2]:
#Python packages used in the code
from __future__ import division, absolute_import, print_function
import time as ptime
import sys
import configparser
import os.path
import datetime as dt
from datetime import timedelta
import numpy as np
import pandas as pd
import hesseflux as hf
import math
from pyproj import Proj
import matplotlib.pyplot as plt
%matplotlib inline
import ee
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from scipy import stats
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import folium
from folium import plugins

In [3]:
#Function to identify columns with specific beggining 
def _findfirststart(starts, names):
    """
    Function that finds variables on the head of a table indicating the name or label of the variable  
    and creates a list of the variables located. The 
    """
    hout = []
    for hh in starts:
        for cc in names:
            if cc.startswith(hh):
                hout.append(cc)
                break
    return hout

#The workflow start here
if __name__ == '__main__':
    t1 = ptime.time()
    
    #*********************************************************************************************************************************************************************
    #1)   Read configuration file
    print('1)   Readding configuration file')
    
    #1.a) Read from command-line interpreter (It must include in the cosole "GPP.py Configs.cfg" located in the same file)
    #if len(sys.argv) <= 1:
    #raise IOError('Input configuration file must be given.')
    #configfile = sys.argv[1]                                                                          #Change 1. Read configuration (different methods)    
                                                                                                       #For this option a Configs folder is required in the main directory
    #1.b)Read from directory path
    #configfile = 'C:/Users/Administrador/OneDrive/Documentos/MSc Thesis/Configs/DNP_e_shape_configuration.cfg'  
    #configfile = 'C:/Users/Usuario/Documents/Pdrive/Final python codes Anna/Codes/'
    configfile = 'Configs/Configs.cfg'                                                
    
    #1.c)Read from gui window
    #configfile = hf.files_from_gui(initialdir='.', title='configuration file')
    
    config = configparser.ConfigParser(interpolation=None)                                             #Constructor
    config.read(configfile)                                                                            #Read configuration file with the constructor
    
    # file path
    datadir   = config['GENERAL'].get('datadir', ".")                                                  #Change 2. Add datadir to read folder from Data folder
    outdir    = config['GENERAL'].get('outdir', ".")
    
    # meteorological data
    meteo_file    = config['DB1FILES'].get('meteo_file', ".")
    
    # program switches                                                                                 #Activates each module of the workflow)
    #------------------------------------------------------------
    outlier   = config['POSTSWITCH'].getboolean('outlier',   True)
    ustar     = config['POSTSWITCH'].getboolean('ustar',     True)
    ustar_noyear  = config['POSTSWITCH'].getboolean('ustar_noyear',     True)                          #Change 3. Add method ustar_noyear to compute the u* filter with a given threshold.
    partition = config['POSTSWITCH'].getboolean('partition', True)                                     #ustar_noyear method has to be used instead of ustar when there is not data 
    fill      = config['POSTSWITCH'].getboolean('fill',      True)                                     #for a full date to calculate automatically a threshold
    fluxerr   = config['POSTSWITCH'].getboolean('fluxerr',   True)
    #------------------------------------------------------------
    daily_values              =  config['POSTSWITCH'].getboolean('daily_values',                True)  #Change 4. All the modules 
    #------------------------------------------------------------
    climatological_footprint  =  config['POSTSWITCH'].getboolean('climatological_footprint ',   True) 
    #------------------------------------------------------------
    vegetation_indices        =  config['POSTSWITCH'].getboolean('vegetation_indices',          True)
    #------------------------------------------------------------
    environmental_variables_station     =  config['POSTSWITCH'].getboolean('environmental_variables_station',          True)
    environmental_variables_satellite   =  config['POSTSWITCH'].getboolean('environmental_variables_satellite',          True)
    tower_observations                  =  config['POSTSWITCH'].getboolean('tower_observations',          True)
    #------------------------------------------------------------
    correlation_analysis        =  config['POSTSWITCH'].getboolean('correlation_analysis',          True)
    correlation_analysis_simple =  config['POSTSWITCH'].getboolean('correlation_analysis',          True)
    calibration_validation      =  config['POSTSWITCH'].getboolean('calibration_validation',          True) 
 
    # input file format
    eufluxfile  = config['POSTIO'].get('inputfile',  '')
    timeformat  = config['POSTIO'].get('timeformat', '%Y%m%d%H%M')
    sep         = config['POSTIO'].get('sep',        ',')
    skiprows    = config['POSTIO'].get('skiprows',   '')
    undef       = config['POSTIO'].getfloat('undef', -9999.)
    swthr       = config['POSTIO'].getfloat('swthr', 10.)
    outputfile  = config['POSTIO'].get('outputfile'  '')
    outundef    = config['POSTIO'].getboolean('outundef',    False)
    outflagcols = config['POSTIO'].getboolean('outflagcols', False)

    # input file variables 
    carbonflux     = config['POSTVAR'].get('carbonflux',        'FC')                                  #Change 4. Add variable to identify the name of the carbon fluxes to compute 
                                                                                                       #Carbon flux name to process in the code (e.g. NEE, FC, FC_1)                                                                                              #This change can be done for all the column names of the input file 
    # mad parameters
    nscan = config['POSTMAD'].getint('nscan', 15)
    nfill = config['POSTMAD'].getint('nfill',  1)
    z     = config['POSTMAD'].getfloat('z',    7)
    deriv = config['POSTMAD'].getint('deriv',  2)
    
    # ustar parameters
    ustarmin       = config['POSTUSTAR'].getfloat('ustarmin',    0.1)
    nboot          = config['POSTUSTAR'].getint('nboot',         1)
    plateaucrit    = config['POSTUSTAR'].getfloat('plateaucrit', 0.95)
    seasonout      = config['POSTUSTAR'].getboolean('seasonout', False)                                #Change 5. Add these parameters in the configuration file                      
    applyustarflag = config['POSTUSTAR'].getboolean('applyustarflag', True)

    # gap-filling parameters
    sw_dev  = config['POSTGAP'].getfloat('sw_dev',  50.)
    ta_dev  = config['POSTGAP'].getfloat('ta_dev',  2.5)
    vpd_dev = config['POSTGAP'].getfloat('vpd_dev', 5.0)
    longgap = config['POSTGAP'].getint('longgap',   60)
    
    # partitioning parameters 
    nogppnight = config['POSTPARTITION'].getboolean('nogppnight', False)
    
    # climatological footprint parameters
    altitude                        = config['CLIMATOLOGICAL'].getfloat('altitude',1.0)                                
    latitude                        = config['CLIMATOLOGICAL'].getfloat('latitude', 36.9985)                        
    longitude                       = config['CLIMATOLOGICAL'].getfloat('longitude', -6.4345)                        
    canopy_height                   = config['CLIMATOLOGICAL'].getfloat('canopy_height ',  0.7)                           
    displacement_height             = config['CLIMATOLOGICAL'].getfloat('displacement_height',  0.2)                          
    roughness_lenght                = config['CLIMATOLOGICAL'].getfloat('roughness_lenght ',  -999)                           
    instrument_height_anenometer    = config['CLIMATOLOGICAL'].getfloat('instrument_height_anenometer',  3.95)
    instrument_height_gas_analyzer  = config['CLIMATOLOGICAL'].getfloat('instrument_height_gas_analyzer',  4.03)
    projection_site                 = config['CLIMATOLOGICAL'].get('projection_site ', '+proj=utm +zone=29 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs') #Change 5.1 Projection of climatological footprint
    
    # vegetation indices parameters 
    max_cloud_coverage              = config['VI'].getint('max_cloud_coverage',         100)
    crs                             = config['VI'].get('crs',                  'EPSG:4326')
    ndviMask                        = config['VI'].getfloat('ndviMask',-100)
    mndviMask                       = config['VI'].getfloat('mndviMask',-100)
 
    #*********************************************************************************************************************************************************************
    #2)   Setting data frames
    print('2)   Formatting data frames')
    t01 = ptime.time()
    
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    #2.a)   Read eddy covariance files (eufluxfiles)
    print('      Read data: ', eufluxfile)
    
    # Assert iterable                                                                                  #This process reads the names of the eufluxfiles and adds the directory information
    if ',' in eufluxfile:
        eufluxfile = eufluxfile.split(',')
        
        eufluxfile = [ datadir + ee.strip() for ee in eufluxfile ]                                     #Change 6. Add datadir                                                                                 
    else:                                                                                              #If the datadir is not added in this section, the input file need to be in the main directory and not in the datadir file          
        if eufluxfile:                                                                                   
            eufluxfile = [datadir + eufluxfile]
        else:
            try:
                eufluxfile = hf.files_from_gui(
                    initialdir='.', title='europe-fluxdata.eu file(s)')
            except:
                raise IOError("GUI for europe-fluxdata.eu file(s) failed.")

    # Identify rows in the dataframe to skipt              
    if skiprows == 'None':                                                                             #This process allows to identify the rows to skipt in the data frames
        skiprows = ''
    if skiprows:
        import json  # to analyse int or list, tuple not working
        skiprows = json.loads(skiprows.replace('(', '[').replace(')', ']'))
        
    # Read input files into Panda data frame and check variable availability
    parser = lambda date: dt.datetime.strptime(date, timeformat)                               

    infile = eufluxfile[0]                                                                             #Loads the first file in the eufluxfile list                                                                                          
    df = pd.read_csv(infile, sep, skiprows=skiprows, parse_dates=[0], 
                     date_parser=parser, index_col=0, header=0)
    if len(eufluxfile) > 1:                                                                            #Iterate to integrate all the files in case of data for different years is available 
        for infile in eufluxfile[1:]:                    
            df1 = pd.read_csv(infile, sep, skiprows=skiprows, parse_dates=[0],
                              date_parser=parser, index_col=0, header=0)
            df  = df.append(df1, sort=False)
            
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    #2.b)   Formatting the input file    
    print('      Formating data: ', eufluxfile)
    
    # Fill the undef values (e.g. -9999.) with null values (NaN)
    df.fillna(undef, inplace=True)
    #df.replace(undef, np.nan, inplace=True)
            
    # Flag.                                                                                             #Create file with flags
    dff              = df.copy(deep=True)
    dff[:]           = 0
    dff[df == undef] = 2                                                                               #(Flag 2) for null values
    #dff[df.isna()]   = 2

    # day / night
    #isday = df['SW_IN'] > swthr                                                                       #This column in the data frame indicates the short wave radiation which
    hsw = ['SW_IN']                                                                                    #can be use to identify difference between day an night. Threshold is set in the configuration file
    hout = _findfirststart(hsw, df.columns)                                                            #Change 7. Use _findfirststart method to look for the SW_IN column
    isday = df[hout[0]] >= swthr
    
    # Remove 'SW_IN' data from the data frame. 
    df['SW_IN']=-9999.                                                                                 #Change 7.1 Remove SW_IN
    df['SW_IN'].replace(-9999., np.nan, inplace=True)                                                                               #Change 7.1 Remove SW_IN. This change is just relevant for the study case of Doñana
       
    # Check Ta in Kelvin
    hta = ['TA']                                                                                       #Change 8. Change TA_ for TA. Allows more flexibility in the column names of the input file
    hout = _findfirststart(hta, df.columns)
    if df[hout[0]].max() < 100.:
        tkelvin = 273.15
    else:
        tkelvin = 0.
        
    # Add tkelvin only where not flagged
    df.loc[dff[hout[0]] == 0, hout[0]] += tkelvin
    
    # Add vpd if not given
    hvpd = ['VPD']
    hout = _findfirststart(hvpd, df.columns)
    if len(hout) == 0:
        hvpd = ['TA', 'RH']                                                                            #Change 9. Change TA_ and RH_ for TA and RH                                                                               
        hout = _findfirststart(hvpd, df.columns)
        if len(hout) != 2:
            raise ValueError('Cannot calculate VPD.')
        ta_id = hout[0]
        rh_id = hout[1]
        if df[ta_id].max() < 100.:
            tk = df[ta_id] + 273.15
        else:
            tk = df[ta_id]
        if df[rh_id].max() > 10.:
            rh = df[rh_id] / 100.
        else:
            rh = df[rh_id]
        vpd = (1. - rh) * hf.esat(tk)
        vpd_id = 'VPD_CALC'
        df[vpd_id] = vpd
        df[vpd_id].where((df[ta_id] != undef) | (df[rh_id] != undef),
                         other=undef, inplace=True)
        dff[vpd_id] = np.where((dff[ta_id] + dff[rh_id]) > 0, 2, 0)                                    #(Flag 2) in 'VPD_CALC'  where ta or rh is not available
        df.loc[dff[vpd_id] == 0, vpd_id] /= 100.                                                       #Converts from 

    # Check VPD in Pa
    hvpd = ['VPD']
    hout = _findfirststart(hvpd, df.columns)
    if df[hout[0]].max() < 10.:     # kPa
        vpdpa = 1000.
    elif df[hout[0]].max() < 100.:  # hPa
        vpdpa = 100.
    else:
        vpdpa = 1.                  # Pa
    df.loc[dff[hout[0]] == 0, hout[0]] *= vpdpa   
    
    # Time stepping                                                                                    #Derives the number of datapoints per day
    dsec  = (df.index[1] - df.index[0]).seconds
    ntday = np.rint(86400 / dsec).astype(np.int)

    #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    t02   = ptime.time()                                                                               #Change 9. Change legend of computation time
    strin = ( '{:.1f} [minutes]'.format((t02 - t01) / 60.)                                           
              if (t02 - t01) > 60.
              else '{:d} [seconds]'.format(int(t02 - t01))
            )
    print('     Computation setting data frames in ', strin , end='\n')    

    #********************************************************************************************************************************************************************* 
    # 3)   Outlier detection

    if outlier:
        print('3)   Spike detection')
        t11 = ptime.time()

        # Finds carbon flux data (e.g. NEE or FC)
        houtlier = [carbonflux]                                                                        #Change 10. Process only the carbonflux variable (H and LE could be processed in the same way)                                            
        hout = _findfirststart(houtlier, df.columns)
        print('      Using:', hout)
        
        # Applies the spike detection. Only one call to mad for all variables                         #carbonflux variable can be a list with NEE, H, LE, etc. and the .madspike() requires to be called only once for alll the variables
        sflag = hf.madspikes(df[hout], flag=dff[hout], isday=isday,                                    #This function creates flags with value 2 for outliers that are translated to flags 3 in the dff file
                             undef=undef, nscan=nscan * ntday,                                 
                             nfill=nfill * ntday, z=z, deriv=deriv, plot=False)
        
        for ii, hh in enumerate(hout):
            dff.loc[sflag[hh] == 2, hh] = 3                                                            #(Flag 3) for outlieres
            dff.loc[df[hh] == undef , hh] = 2

        t12   = ptime.time()                                                                           #Change 11. Change legend of computation time  
        strin = ( '{:.1f} [minutes]'.format((t12 - t11) / 60.)                                                
                  if (t12 - t11) > 60.
                  else '{:d} [seconds]'.format(int(t12 - t11))
                )
        print('     Computation outlier detection in ', strin)  
        
    #********************************************************************************************************************************************************************* 
    # 4)   u* filtering (data for a full year)
 
    if  ustar:                                                                                         #This method requires a data set with data for a full year
        print('4)   u* filtering')
        t21 = ptime.time()
        
        #Looking for carbonflux, u*, and temperature data
        hfilt = [carbonflux, 'USTAR', 'TA']                                                            #Change 12. Change 'NEE' for carbonflux variable
        hout  = _findfirststart(hfilt, df.columns)
        assert len(hout) == 3, 'Could not find CO2 flux (NEE or FC), USTAR or TA in input file.'
        print('      Using:', hout)
        
        #Saves a copy of the flags of the carbonflux data
        ffsave = dff[hout[0]].to_numpy()
        
        #Sets a temporal flag 
        iic    = np.where((~isday) & (df[hout[0]] < 0.))[0]                        
        dff.iloc[iic, list(df.columns).index(hout[0])] = 4                                             #(Flag 4). Temporal flag for data with negative values in the carbon fluxes during night
        
        # Applies the u* filtering
        ustars, flag = hf.ustarfilter(df[hout], flag=dff[hout],                                        #Check method to identify why the temporal flag is required in the ustarfilter
                                      isday=isday, undef=undef,                                        #ustarfilter function creates flags with value 2 for outliers that are translated to flags 3 in the dff file                 
                                      ustarmin=ustarmin, nboot=nboot,
                                      plateaucrit=plateaucrit,
                                      seasonout=seasonout,
                                      plot=True)
        dff[hout[0]] = ffsave                                                                          #Return to original flags file without 4-flag
        df  = df.assign(USTAR_TEST=flag)                                                               #Change 14. Change 'USTAR_TEST_1_1_1' column name for 'USTAR_TEST'                                                                
        dff = dff.assign(USTAR_TEST=np.zeros(df.shape[0], dtype=np.int))                               #This line adds a column in the dataframe of flags 
        
        if applyustarflag:
        #if False:
            hustar = [carbonflux]                                                                      #Change 15. Process only the carbonflux variable (H and LE could be processed in the same way)   
            hout = _findfirststart(hustar, df.columns)
            print('      Using:', hout)
            for ii, hh in enumerate(hout):
                dff.loc[flag [hh] == 2, hh] = 5                                                        #(Flag 5) for carbon fluxes with ustar(friction velocity) below calculated threshold
                                
        t22   = ptime.time()                                                                           #Change 16. Change legend of computation time.
        strin = ( '{:.1f} [minutes]'.format((t22 - t21) / 60.)                                           
                  if (t22 - t21) > 60.
                  else '{:d} [seconds]'.format(int(t22 - t21))
                )
        print('     Computation u* filtering detection in ', strin) 

    #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # 4)   u* filtering (data for partial year)

    if  ustar_noyear:                                                                                  #Change 17. ustar_noyear is a copy of ustar without the 'ustarfilter'
        print('4)   u* filtering (less than 1-year data)')                                             #The ustar_noyear method is simple approach to manually set a ustar threshold when 
        t21 = ptime.time()                                                                             #there is no data for a full year required to compute ustar
        
        #Looking for carbonflux, u*, and temperature data
        hfilt = [carbonflux, 'USTAR', 'TA']                                                            
        hout  = _findfirststart(hfilt, df.columns)
        assert len(hout) == 3, 'Could not find CO2 flux (NEE or FC), USTAR or TA in input file.'
        print('      Using:', hout)
        
        ffsave = dff[hout[0]].to_numpy()
        flag = sflag.copy().multiply(0)
        
        #flag.loc[(df['USTAR'] < ustarmin) & (dff[carbonflux] == 2), carbonflux] = 2.
        #flag.loc[(df['USTAR'] < ustarmin) & (dff[carbonflux] == 3), carbonflux] = 2.
        #flag.loc[(df['USTAR'] < ustarmin), carbonflux] = 2.
        flag.loc[(df['USTAR'] < ustarmin) & (dff['USTAR'] != 2) & (dff[carbonflux] != 2), carbonflux] = 2.
        
        dff[hout[0]] = ffsave                         
        df  = df.assign(USTAR_TEST=flag)               
        dff = dff.assign(USTAR_TEST=np.zeros(df.shape[0], dtype=np.int))

        if applyustarflag:
        #if False:
            hustar = [carbonflux]
            hout = _findfirststart(hustar, df.columns)
            print('      Using:', hout)
            for ii, hh in enumerate(hout):
                dff.loc[flag[hh] == 2, hh] = 5 

        t22   = ptime.time()                                                                           
        strin = ( '{:.1f} [minutes]'.format((t22 - t21) / 60.)                                           
                  if (t22 - t21) > 60.
                  else '{:d} [seconds]'.format(int(t22 - t21))
                )
        print('     Computation u* filtering detection in ', strin) 
                
    #********************************************************************************************************************************************************************* 
    # 5)   Flux partitioning

    if partition:
        print('5)   Flux partitioning')
        t31 = ptime.time()
        
        #Looking for carbon flux, global radiation, temperature and vpd data
        hpart = [carbonflux, 'SW_IN', 'TA', 'VPD']                                                     #Change 18. Change 'NEE' for carbonflux variable                                                                      
        hout  = _findfirststart(hpart, df.columns)
        assert len(hout) == 4, 'Could not find CO2 flux (NEE or FC), SW_IN, TA, or VPD in input file.'
        print('      Using:', hout)

        suff = hout[0]                                                                                 #Change 20. Rename with the carbonflux variable              
 
        # nighttime method
        print('      Nighttime partitioning')
        dfpartn = hf.nee2gpp(df[hout], flag=dff[hout], isday=isday,
                             undef=undef, method='reichstein',
                             nogppnight=nogppnight)

        dfpartn.rename(columns=lambda c: c + '_' + suff + '_rei', inplace=True)                          #Change 21. Add '_' before suff and change '1' with '_1'
            
        # falge method                                                                                 #Change 22. Falge method instead of lasslop method
        print('      Falge method')
        dfpartf = hf.nee2gpp(df[hout], flag=dff[hout], isday=isday,
                             undef=undef, method='falge',         
                             nogppnight=nogppnight)  
        
        dfpartf.rename(columns=lambda c: c + '_' + suff + '_fal', inplace=True)
        
        # daytime method                                                                               #Change 23. Day time method 'lasslop' can be integrated as a third method
        print('      Daytime partitioning')
        dfpartd = hf.nee2gpp(df[hout], flag=dff[hout], isday=isday,
                             undef=undef, method='lasslop',
                             nogppnight=nogppnight)
        
        dfpartd.rename(columns=lambda c: c  + '_' + suff + '_las', inplace=True) 

        df = pd.concat([df, dfpartn, dfpartf, dfpartd],  axis=1)

        # take flags from NEE or FC same flag
        for dn in ['rei', 'fal', 'las']:
            for gg in ['GPP', 'RECO']:                                                                 #Change 24. Adds '_' between labels
                dff[gg + '_' + suff + '_'+ dn] = dff[hout[0]]                                          #Takes flags from the carbonflux variable
                
        # flag GPP and RECO if they were not calculated
        for dn in ['rei', 'fal', 'las']:
            for gg in ['GPP', 'RECO']:
                dff.loc[df['GPP' + '_' + suff + '_'+ dn] == undef, gg + '_' + suff + '_'+ dn ] = 2 

        # flag RECO when GPP was not calculated
        #for dn in ['1', '2']:                                                                          #Change 25. This method flags with 2 value the 'RECO' columns when 'GPP was not calculated  
        #    for gg in ['GPP']:                                                                         #('GPP' == undef)
        #        dff.loc[df[gg + '_' + suff + '_'+ dn] == undef, 'RECO' + '_' + suff + '_'+ dn ] = 2 
          
        t32   = ptime.time()
        strin = ( '{:.1f} [minutes]'.format((t32 - t31) / 60.)                                         #Change 26. Change legend of computation time.         
                  if (t32 - t31) > 60.
                  else '{:d} [seconds]'.format(int(t32 - t31))
                )
        print('     Computation flux partitioning detection in ', strin)  
        
    #********************************************************************************************************************************************************************* 
    # 6)   Gap-filling

    if fill:        
        print('6)   Gap-filling')
        t41 = ptime.time()
        
        #Looking for meteorological data
        hfill = ['SW_IN', 'TA', 'VPD']
        hout  = _findfirststart(hfill, df.columns)
        assert len(hout) == 3, 'Could not find SW_IN, TA or VPD in input file.'

        # if available
        rei_gpp = 'GPP_'+carbonflux+'_rei'
        rei_res = 'RECO_'+carbonflux+'_rei'
        fal_gpp = 'GPP_'+carbonflux+'_fal'
        fal_res = 'RECO_'+carbonflux+'_fal'
        las_gpp = 'GPP_'+carbonflux+'_las'
        las_res = 'RECO_'+carbonflux+'_las'
        
        hfill = [ carbonflux,                                                                          #Change 27. Change names of columns to process
                  rei_gpp,rei_res,fal_gpp,fal_res,las_gpp,las_res,
                  'SW_IN', 'TA', 'VPD']
        
        hout  = _findfirststart(hfill, df.columns)
        print('      Using:', hout)
        
        df_f, dff_f = hf.gapfill(df[hout], flag=dff[hout],
                                 sw_dev=sw_dev, ta_dev=ta_dev, vpd_dev=vpd_dev,
                                 longgap=longgap, undef=undef, err=False,
                                 verbose=1)
        
        #hdrop = ['SW_IN', 'TA', 'VPD']                           
        #hout = _findfirststart(hdrop, df.columns)
        #df_f.drop(columns=hout,  inplace=True)
        #dff_f.drop(columns=hout, inplace=True)

        
        def _add_f(c):
            return '_'.join(c.split('_')[:-3] + c.split('_')[-3:]  + ['f'])                            #Change 28. 'f' of fill till the end of the name of the column names
        df_f.rename(columns=_add_f,  inplace=True)
        dff_f.rename(columns=_add_f, inplace=True)    
        
        df  = pd.concat([df,  df_f],  axis=1)
        dff = pd.concat([dff, dff_f], axis=1)
        #df.replace(undef, np.nan, inplace=True)
        
        t42   = ptime.time()                                                                           #Change 29. Change legend of computation time.
        strin = ( '{:.1f} [minutes]'.format((t42 - t41) / 60.)                                           
                  if (t42 - t41) > 60.
                  else '{:d} [seconds]'.format(int(t42 - t41))
                )
        print('     Computation filling gaps detection in ', strin) 
        
    #********************************************************************************************************************************************************************* 
    # 7)   Error estimate

    if fluxerr:
        print('7)   Flux error estimates')
        t51 = ptime.time()
        
        #Looking for meteorological data
        hfill = ['SW_IN', 'TA', 'VPD']
        hout  = _findfirststart(hfill, df.columns)
        assert len(hout) == 3, 'Could not find SW_IN, TA or VPD in input file.'

        # if available 
        rei_gpp = 'GPP_'+carbonflux+'_rei'
        rei_res = 'RECO_'+carbonflux+'_rei'
        fal_gpp = 'GPP_'+carbonflux+'_fal'
        fal_res = 'RECO_'+carbonflux+'_fal'
        las_gpp = 'GPP_'+carbonflux+'_las'
        las_res = 'RECO_'+carbonflux+'_las'
        
        hfill = [ carbonflux,                                                                                  #Change 30. Change names of columns to process
                  rei_gpp,rei_res,fal_gpp,fal_res,las_gpp,las_res,
                  'SW_IN', 'TA', 'VPD']
        
        hout  = _findfirststart(hfill, df.columns)
        print('      Using:', hout)
        
        df_f = hf.gapfill(df[hout], flag=dff[hout],
                          sw_dev=sw_dev, ta_dev=ta_dev, vpd_dev=vpd_dev,
                          longgap=longgap, undef=undef, err=True, 
                          verbose=1)
        
        hdrop = ['SW_IN', 'TA', 'VPD']
        hout = _findfirststart(hdrop, df.columns)
        df_f.drop(columns=hout, inplace=True)

        colin = list(df_f.columns)

        def _add_e(c):                                                                                 #Change 31. Create _add_e instead of reusing _add_f
            return '_'.join(c.split('_')[:-3] + c.split('_')[-3:] + ['e'])

        # rename the variables with e (error)
        df_f.rename(columns=_add_e,  inplace=True)
        colout = list(df_f.columns)
        df = pd.concat([df, df_f], axis=1)
        
        # take flags of non-error columns with the same label
        for cc in range(len(colin)):
            dff[colout[cc]] = dff[colin[cc]]

        t52   = ptime.time()                                                                           #Change 32. Change legend of computation time.
        strin = ( '{:.1f} [minutes]'.format((t52 - t51) / 60.)                                           
                  if (t52 - t51) > 60.
                  else '{:d} [seconds]'.format(int(t52 - t51))
                )
        print('     Computation flux error estimates in ', strin) 

    #********************************************************************************************************************************************************************* 
    # 8)   Output
    
    print('8)   Outputfile')
    t61 = ptime.time()

    if not outputfile:
        try:
            outputdir = hf.directory_from_gui(initialdir='.',
                                              title='Output directory')
        except:
            raise IOError("GUI for output directory failed.")
            
        outputfile = configfile[:configfile.rfind('.')]                                                #Takes the name from the configurtion file
        outputfile = outputdir + '/' + os.path.basename(outputfile + '.csv')                           #Change 33. Change outdir for outputdir to select directly the output folder
    else:
        outputfile = outdir + outputfile                                                               #Change 34. Create outputfile in case outputfile and outputdir are available 
        
    print('      Write output ', outputfile)

    # Back to original units
    hta = ['TA']
    hout = _findfirststart(hta, df.columns)
    df.loc[dff[hout[0]] == 0, hout[0]] -= tkelvin
    hvpd = ['VPD']
    hout = _findfirststart(hvpd, df.columns)
    df.loc[dff[hout[0]] == 0, hout[0]] /= vpdpa

    if outundef:
        print('      Set flags to undef.')
        for cc in df.columns:
            if cc.split('_')[-1] != 'f' and cc.split('_')[-1] != 'e':  # exclude gap-filled columns    #Change 35. Change [-4] for [-1] and exclude error 'e' columns
                df[cc].where(dff[cc] == 0, other=undef, inplace=True)                                  #This line writes undef (-9999.) for all the flagged data

    if outflagcols:
        print('      Add flag columns.')

        def _add_flag(c):
            return 'flag_' + c
        dff.rename(columns=_add_flag, inplace=True)
        
        # no flag columns for flags
        dcol = []
        for hh in dff.columns:
            if '_TEST' in hh:                                                                          #Change 36. Change '_TEST_' for '_TEST'
                dcol.append(hh)
        if dcol:
            dff.drop(columns=dcol, inplace=True)                                                       #Remove the TEST columns
        df = pd.concat([df, dff], axis=1)
    else:
        print('      Add flag columns for gap-filled variables.')
        occ = []
        for cc in df.columns:
            if cc.split('_')[-1] == 'f' or cc.split('_')[-1] == 'e':                                  #Change 37. Add the error columns 'e' in the condition
                occ.append(cc)
        dff1 = dff[occ].copy(deep=True)
        dff1.rename(columns=lambda c: 'flag_' + c, inplace=True)
        df = pd.concat([df, dff1], axis=1)
    print('      Write.')
    
    
    df.to_csv(outputfile, sep=sep, na_rep=str(undef), index=True,
              date_format=timeformat)
    
    
    t62   = ptime.time()
    strin = ( '{:.1f} [minutes]'.format((t62 - t61) / 60.)                                             #Change 37. Change legend of computation time.          
              if (t62 - t61) > 60.
              else '{:d} [seconds]'.format(int(t62 - t61))
            )
    print('      Creating output file in ', strin) 

    #********************************************************************************************************************************************************************* 
   # Next elements are complement modules to compute Remote Sensing empirical models of GPP            #Change 39. All below code is extra code to derive empirical models
    #********************************************************************************************************************************************************************* 
    # 9)   Daily estimations 

    if daily_values:                                                                                    ####Parameter
        
        print('9)   Daily GPP')
        t71 = ptime.time()

        # Daily GPP and enviromental drivers
        gpp = df.copy()
        gpp = gpp[(gpp[carbonflux+'_f'] < 20) & (gpp[carbonflux+'_f'] > -20)]                           ####Parameter
        gpp = gpp[(gpp[rei_res+'_f'] < 15) & (gpp[rei_res+'_f'] > -15)] 

        gpp_mean = gpp[['TA_f','VPD_f','SW_IN_f']]
        gpp_sum  = gpp[[carbonflux+'_f',rei_gpp+'_f',rei_res+'_f',fal_gpp+'_f',fal_res+'_f',las_gpp+'_f',las_res+'_f']] * 12 * 30 * 60 /1000000

        gpp_mean = gpp_mean.reset_index()
        gpp_sum  = gpp_sum.reset_index()

        gpp_mean['date']  =  gpp_mean['TIMESTAMP_START'].dt.date
        gpp_sum ['date']  =  gpp_sum['TIMESTAMP_START'].dt.date

        gpp_mean.replace(-9999, np.nan, inplace=True)
        gpp_sum.replace(-9999, np.nan, inplace=True) 

        gpp_mean_daily = gpp_mean.groupby('date').mean()
        gpp_sum_daily  = gpp_sum.groupby('date').sum()

        df_gpp = pd.concat([gpp_mean_daily, gpp_sum_daily], axis=1)

        # identify beggining and end of the time series
        df_time = df_gpp.reset_index()
        time1 = df_time.iloc[0, 0]
        time2 = df_time.iloc[df_gpp.shape[0] -1,0]

        # create time series with daily frequency (Not needed if usinf gap filled variables)
        time_series = pd.date_range(time1, time2, freq="D")
        time_series = pd.DataFrame(time_series).rename(columns={0: 'date'}).set_index('date')
        df_gpp_time = pd.merge(left= time_series, right = df_gpp,
                                 how="left", left_index = True , right_index = True)

        # smoth time series
        df_gpp_smoth  = df_gpp_time.rolling(3).mean()                                                ####Parameter
        df_gpp_smoth  = df_gpp_smoth.interpolate(method='akima', order=1, limit_direction ='forward')
        
        # save file of daily GPP
        df_gpp_smoth.to_csv(outdir + "/GPP/GPP_daily.txt")

        t72   = ptime.time()
        strin = ( '{:.1f} [minutes]'.format((t72 - t71) / 60.)                                                      
                  if (t72 - t71) > 60.
                  else '{:d} [seconds]'.format(int(t72 - t71))
                )
        print('     Computed daily GPP in ', strin) 
        
    #********************************************************************************************************************************************************************* 
    # Finish Correction of Data with the Hesseflux package 

    t2   = ptime.time()                                                                                 #Change 38. Change legend of computation time.
    strin = ( '{:.1f} [minutes]'.format((t2 - t1) / 60.)                                            
              if (t2 - t1) > 60.
              else '{:d} [seconds]'.format(int(t2 - t1))
            )
    print('Total time of correction of data ', strin) 

    #********************************************************************************************************************************************************************* 

1)   Readding configuration file
2)   Formatting data frames
      Read data:  \NEE\NEE_EU_format_2020.txt, \NEE\NEE_EU_format_2021.txt
      Formating data:  ['Data\\NEE\\NEE_EU_format_2020.txt', 'Data\\NEE\\NEE_EU_format_2021.txt']
     Computation setting data frames in  0 [seconds]
3)   Spike detection
      Using: ['FC_2']
     Computation outlier detection in  1 [seconds]
4)   u* filtering (less than 1-year data)
      Using: ['FC_2', 'USTAR', 'TA']
      Using: ['FC_2']
     Computation u* filtering detection in  0 [seconds]
5)   Flux partitioning
      Using: ['FC_2', 'SW_IN', 'TA', 'VPD']
      Nighttime partitioning
      Falge method
      Daytime partitioning
     Computation flux partitioning detection in  8 [seconds]
6)   Gap-filling
      Using: ['FC_2', 'GPP_FC_2_rei', 'RECO_FC_2_rei', 'GPP_FC_2_fal', 'RECO_FC_2_fal', 'GPP_FC_2_las', 'RECO_FC_2_las', 'SW_IN', 'TA', 'VPD']
  Filling  FC_2
  Filling  GPP_FC_2_rei
  Filling  RECO_FC_2_rei
  Filling  GPP_FC_2_fal
  Filling 