# LST SMOOTHING ANALYSIS

We want to compare: 

- Mid point interpolation with 8-day smoothing
- Daily smoothing

We will be working on a sample of 179 points with :
- MYD LST Night data
- MYD LST Day data


In [1]:
import array
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
from termcolor import colored
import time
import xarray as xr

%reload_ext Cython

## Functions

Modified `ws2doptv` and `ws2doptvp` in order to get the V-curve

In [2]:
%%cython

from cpython.array cimport array, clone
from libc.math cimport log, pow, sqrt
cimport numpy as np
import numpy as np

tFloat = np.double
ctypedef np.double_t dtype_t

cpdef lag1corr(np.ndarray[dtype_t] data1, np.ndarray[dtype_t] data2, double nd):
    """Calculates Lag-1 autocorrelation.

    Adapted from https://stackoverflow.com/a/29194624/5997555

    Args:
        data1: fist data series
        data2: second data series
        nd: no-data value (will be exluded from calulation)

    Returns:
        Lag-1 autocorrelation value
    """

    cdef int M, sub
    cdef double sum1, sum2, var_sum1, var_sum2, cross_sum, std1, std2, cross_mean

    M = data1.size

    sum1 = 0.
    sum2 = 0.
    sub = 0
    for i in range(M):
        if data1[i] != nd and data2[i] != nd:
            sum1 += data1[i]
            sum2 += data2[i]
        else:
            sub += 1
    mean1 = sum1 / (M-sub)
    mean2 = sum2 / (M-sub)

    var_sum1 = 0.
    var_sum2 = 0.
    cross_sum = 0.
    for i in range(M):
        if data1[i] != nd and data2[i] != nd:
            var_sum1 += (data1[i] - mean1) ** 2
            var_sum2 += (data2[i] - mean2) ** 2
            cross_sum += (data1[i] * data2[i])

    std1 = (var_sum1 / (M-sub)) ** .5
    std2 = (var_sum2 / (M-sub)) ** .5
    cross_mean = cross_sum / (M-sub)
    return (cross_mean - mean1 * mean2) / (std1 * std2)

cpdef ws2d(np.ndarray[dtype_t] y, double lmda, np.ndarray[dtype_t] w):
    cdef array dbl_array_template = array('d', [])
    cdef int i, i1, i2, m, n
    cdef array z, d, c, e

    n = y.shape[0]
    m = n - 1

    z = clone(dbl_array_template, n, zero=False)
    d = clone(dbl_array_template, n, zero=False)
    c = clone(dbl_array_template, n, zero=False)
    e = clone(dbl_array_template, n, zero=False)

    d.data.as_doubles[0] = w[0] + lmda
    c.data.as_doubles[0] = (-2 * lmda) / d.data.as_doubles[0]
    e.data.as_doubles[0] = lmda /d.data.as_doubles[0]
    z.data.as_doubles[0] = w[0] * y[0]
    d.data.as_doubles[1] = w[1] + 5 * lmda - d.data.as_doubles[0] * (c.data.as_doubles[0] * c.data.as_doubles[0])
    c.data.as_doubles[1] = (-4 * lmda - d.data.as_doubles[0] * c.data.as_doubles[0] * e.data.as_doubles[0]) / d.data.as_doubles[1]
    e.data.as_doubles[1] =  lmda / d.data.as_doubles[1]
    z.data.as_doubles[1] = w[1] * y[1] - c.data.as_doubles[0] * z.data.as_doubles[0]
    for i in range(2, m-1):
        i1 = i - 1
        i2 = i - 2
        d.data.as_doubles[i]= w[i] + 6 *  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
        c.data.as_doubles[i] = (-4 *  lmda - d.data.as_doubles[i1] * c.data.as_doubles[i1] * e.data.as_doubles[i1])/ d.data.as_doubles[i]
        e.data.as_doubles[i] =  lmda / d.data.as_doubles[i]
        z.data.as_doubles[i] = w[i] * y[i] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]
    i1 = m - 2
    i2 = m - 3
    d.data.as_doubles[m - 1] = w[m - 1] + 5 *  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
    c.data.as_doubles[m - 1] = (-2 *  lmda - d.data.as_doubles[i1] * c.data.as_doubles[i1] * e.data.as_doubles[i1]) / d.data.as_doubles[m - 1]
    z.data.as_doubles[m - 1] = w[m - 1] * y[m - 1] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]
    i1 = m - 1
    i2 = m - 2
    d.data.as_doubles[m] = w[m] +  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
    z.data.as_doubles[m] = (w[m] * y[m] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]) / d.data.as_doubles[m]
    z.data.as_doubles[m - 1] = z.data.as_doubles[m - 1] / d.data.as_doubles[m - 1] - c.data.as_doubles[m - 1] * z.data.as_doubles[m]
    for i in range(m-2, -1, -1):
        z.data.as_doubles[i] = z.data.as_doubles[i] / d.data.as_doubles[i] - c.data.as_doubles[i] * z.data.as_doubles[i + 1] - e.data.as_doubles[i] * z.data.as_doubles[i + 2]
    return z

cpdef ws2dp(np.ndarray[dtype_t] y, double lmda, np.ndarray[dtype_t] w, double p):
  """Whittaker smoother with asymmetric smoothing and fixed lambda (S).

  Args:
      y: time-series numpy array
      l: smoothing parameter lambda (S)
      w: weights numpy array
      p: "Envelope" value

  Returns:
      Smoothed time-series array z
  """
  cdef array template = array('d', [])
  cdef int m, i, j
  cdef double y_tmp, z_tmp, p1

  m = y.shape[0]
  i = 0
  j = 0
  p1 = 1-p

  template = array('d', [])
  z = clone(template, m, True)
  znew = clone(template, m, True)
  wa = clone(template, m, False)
  ww = clone(template, m, False)

  # Calculate weights

  for i in range(10):
    for j in range(m):
      y_tmp = y[j]
      z_tmp = z.data.as_doubles[j]

      if y_tmp > z_tmp:
        wa.data.as_doubles[j] = p
      else:
        wa.data.as_doubles[j] = p1
      ww.data.as_doubles[j] = w[j] * wa.data.as_doubles[j]

    znew[0:m] = _ws2d(y, lmda, ww)
    z_tmp = 0.0
    j = 0
    for j in range(m):
      z_tmp += abs(znew.data.as_doubles[j] - z.data.as_doubles[j])

    if z_tmp == 0.0:
      break

    z[0:m]= znew[0:m]

  z[0:m] = _ws2d(y, lmda, ww)
  return z

cdef _ws2d(np.ndarray[dtype_t] y, double lmda, array[double] w):
    """Internal whittaker function for use in asymmetric smoothing.
    Args:
      y: time-series numpy array
      lmbda: lambda (s) value
      w: weights numpy array
    Returns:
        smoothed time-series array z
    """

    cdef array dbl_array_template = array('d', [])
    cdef int i, i1, i2, m, n
    cdef array z, d, c, e

    n = y.shape[0]
    m = n - 1

    z = clone(dbl_array_template, n, zero=False)
    d = clone(dbl_array_template, n, zero=False)
    c = clone(dbl_array_template, n, zero=False)
    e = clone(dbl_array_template, n, zero=False)

    d.data.as_doubles[0] = w.data.as_doubles[0] + lmda
    c.data.as_doubles[0] = (-2 * lmda) / d.data.as_doubles[0]
    e.data.as_doubles[0] = lmda /d.data.as_doubles[0]
    z.data.as_doubles[0] = w.data.as_doubles[0] * y[0]
    d.data.as_doubles[1] = w.data.as_doubles[1] + 5 * lmda - d.data.as_doubles[0] * (c.data.as_doubles[0] * c.data.as_doubles[0])
    c.data.as_doubles[1] = (-4 * lmda - d.data.as_doubles[0] * c.data.as_doubles[0] * e.data.as_doubles[0]) / d.data.as_doubles[1]
    e.data.as_doubles[1] =  lmda / d.data.as_doubles[1]
    z.data.as_doubles[1] = w.data.as_doubles[1] * y[1] - c.data.as_doubles[0] * z.data.as_doubles[0]
    for i in range(2, m-1):
        i1 = i - 1
        i2 = i - 2
        d.data.as_doubles[i]= w.data.as_doubles[i] + 6 *  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
        c.data.as_doubles[i] = (-4 *  lmda - d.data.as_doubles[i1] * c.data.as_doubles[i1] * e.data.as_doubles[i1])/ d.data.as_doubles[i]
        e.data.as_doubles[i] =  lmda / d.data.as_doubles[i]
        z.data.as_doubles[i] = w.data.as_doubles[i] * y[i] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]
    i1 = m - 2
    i2 = m - 3
    d.data.as_doubles[m - 1] = w.data.as_doubles[m - 1] + 5 *  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
    c.data.as_doubles[m - 1] = (-2 *  lmda - d.data.as_doubles[i1] * c.data.as_doubles[i1] * e.data.as_doubles[i1]) / d.data.as_doubles[m - 1]
    z.data.as_doubles[m - 1] = w.data.as_doubles[m - 1] * y[m - 1] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]
    i1 = m - 1
    i2 = m - 2
    d.data.as_doubles[m] = w.data.as_doubles[m] +  lmda - (c.data.as_doubles[i1] * c.data.as_doubles[i1]) * d.data.as_doubles[i1] - (e.data.as_doubles[i2] * e.data.as_doubles[i2]) * d.data.as_doubles[i2]
    z.data.as_doubles[m] = (w.data.as_doubles[m] * y[m] - c.data.as_doubles[i1] * z.data.as_doubles[i1] - e.data.as_doubles[i2] * z.data.as_doubles[i2]) / d.data.as_doubles[m]
    z.data.as_doubles[m - 1] = z.data.as_doubles[m - 1] / d.data.as_doubles[m - 1] - c.data.as_doubles[m - 1] * z.data.as_doubles[m]
    for i in range(m-2, -1, -1):
        z.data.as_doubles[i] = z.data.as_doubles[i] / d.data.as_doubles[i] - c.data.as_doubles[i] * z.data.as_doubles[i + 1] - e.data.as_doubles[i] * z.data.as_doubles[i + 2]
    return z


cpdef ws2doptv(np.ndarray[dtype_t] y, np.ndarray[dtype_t] w, array[double] llas):
    """Whittaker smoother with normal V-curve optimization of lambda (S).
    Args:
        y: time-series numpy array
        w: weights numpy array
        llas: array with lambda values to iterate (S-range)
    Returns:
        Smoothed time-series array z and optimized lambda (S) value lopt
    """
    cdef array template = array('d', [])
    cdef array fits, pens, diff1, lamids, v, z
    cdef int m, m1, m2, nl, nl1, lix, i, k
    cdef double w_tmp, y_tmp, z_tmp, z2, llastep, f1, f2, p1, p2, l, l1, l2, vmin, lopt

    m = y.shape[0]
    m1 = m - 1
    m2 = m - 2
    nl = len(llas)
    nl1 = nl - 1
    i = 0
    k = 0

    template = array('d', [])

    fits = clone(template, nl, True)
    pens = clone(template, nl, True)
    z = clone(template, m, False)
    diff1 = clone(template, m1, True)
    lamids = clone(template, nl1, False)
    v = clone(template, nl1, False)

    # Compute v-curve
    for lix in range(nl):
        l = pow(10,llas.data.as_doubles[lix])
        z[0:m] = ws2d(y, l, w)
        for i in range(m):
            w_tmp = w[i]
            y_tmp = y[i]
            z_tmp = z.data.as_doubles[i]
            fits.data.as_doubles[lix] += pow(w_tmp * (y_tmp - z_tmp),2)
        fits.data.as_doubles[lix] = log(fits.data.as_doubles[lix])

        for i in range(m1):
            z_tmp = z.data.as_doubles[i]
            z2 = z.data.as_doubles[i+1]
            diff1.data.as_doubles[i] = z2 - z_tmp
        for i in range(m2):
            z_tmp = diff1.data.as_doubles[i]
            z2 = diff1.data.as_doubles[i+1]
            pens.data.as_doubles[lix] += pow(z2 - z_tmp,2)
        pens.data.as_doubles[lix] = log(pens.data.as_doubles[lix])

    # Construct v-curve
    llastep = llas[1] - llas[0]

    for i in range(nl1):
        l1 = llas.data.as_doubles[i]
        l2 = llas.data.as_doubles[i+1]
        f1 = fits.data.as_doubles[i]
        f2 = fits.data.as_doubles[i+1]
        p1 = pens.data.as_doubles[i]
        p2 = pens.data.as_doubles[i+1]
        v.data.as_doubles[i] = sqrt(pow(f2 - f1,2) + pow(p2 - p1,2)) / (log(10) * llastep)
        lamids.data.as_doubles[i] = (l1+l2) / 2

    vmin = v.data.as_doubles[k]
    for i in range(1, nl1):
        if v.data.as_doubles[i] < vmin:
            vmin = v.data.as_doubles[i]
            k = i

    lopt = pow(10, lamids.data.as_doubles[k])

    z[0:m] = ws2d(y, lopt, w)

    return z, lopt, v, lamids


cpdef ws2doptvp(np.ndarray[dtype_t] y, np.ndarray[dtype_t] w, array[double] llas, double p):
    """Whittaker smoother with asymmetric V-curve optimization of lambda (S).
    Args:
        y: time-series numpy array
        w: weights numpy array
        llas: array with lambda values to iterate (S-range)
        p: "Envelope" value
    Returns:
        Smoothed time-series array z and optimized lambda (S) value lopt
    """
    cdef array template = array('d', [])
    cdef array fits, pens, diff1, lamids, v, z
    cdef int m, m1, m2, nl, nl1, lix, i, j, k
    cdef double w_tmp, y_tmp, z_tmp, z2, llastep, fit1, fit2, pen1, pen2, l, l1, l2, vmin, lopt, p1

    m = y.shape[0]
    m1 = m - 1
    m2 = m - 2
    nl = len(llas)
    nl1 = nl - 1
    i = 0
    k = 0
    j = 0
    p1 = 1-p

    template = array('d', [])
    fits = clone(template, nl, True)
    pens = clone(template, nl, True)
    z = clone(template, m, True)
    znew = clone(template, m, True)
    diff1 = clone(template, m1, True)
    lamids = clone(template, nl1, False)
    v = clone(template, nl1, False)
    wa = clone(template, m, False)
    ww = clone(template, m, False)

    # Compute v-curve
    for lix in range(nl):
        l = pow(10,llas.data.as_doubles[lix])

        for i in range(10):
          for j in range(m):
            y_tmp = y[j]
            z_tmp = z.data.as_doubles[j]
            if y_tmp > z_tmp:
              wa.data.as_doubles[j] = p
            else:
              wa.data.as_doubles[j] = p1
            ww.data.as_doubles[j] = w[j] * wa.data.as_doubles[j]

          znew[0:m] = _ws2d(y, l, ww)
          z_tmp = 0.0
          j = 0
          for j in range(m):
            z_tmp += abs(znew.data.as_doubles[j] - z.data.as_doubles[j])

          if z_tmp == 0.0:
            break

          z[0:m]= znew[0:m]

        for i in range(m):
            w_tmp = w[i]
            y_tmp = y[i]
            z_tmp = z.data.as_doubles[i]
            fits.data.as_doubles[lix] += pow(w_tmp * (y_tmp - z_tmp),2)
        fits.data.as_doubles[lix] = log(fits.data.as_doubles[lix])

        for i in range(m1):
            z_tmp = z.data.as_doubles[i]
            z2 = z.data.as_doubles[i+1]
            diff1.data.as_doubles[i] = z2 - z_tmp
        for i in range(m2):
            z_tmp = diff1.data.as_doubles[i]
            z2 = diff1.data.as_doubles[i+1]
            pens.data.as_doubles[lix] += pow(z2 - z_tmp,2)
        pens.data.as_doubles[lix] = log(pens.data.as_doubles[lix])

    # Construct v-curve
    llastep = llas[1] - llas[0]

    for i in range(nl1):
        l1 = llas.data.as_doubles[i]
        l2 = llas.data.as_doubles[i+1]
        fit1 = fits.data.as_doubles[i]
        fit2 = fits.data.as_doubles[i+1]
        pen1 = pens.data.as_doubles[i]
        pen2 = pens.data.as_doubles[i+1]
        v.data.as_doubles[i] = sqrt(pow(fit2 - fit1,2) + pow(pen2 - pen1,2)) / (log(10) * llastep)
        lamids.data.as_doubles[i] = (l1+l2) / 2

    vmin = v.data.as_doubles[k]
    for i in range(1, nl1):
        if v.data.as_doubles[i] < vmin:
            vmin = v.data.as_doubles[i]
            k = i

    lopt = pow(10, lamids.data.as_doubles[k])

    del z
    z = clone(template, m, True)

    for i in range(10):
      for j in range(m):
        y_tmp = y[j]
        z_tmp = z.data.as_doubles[j]

        if y_tmp > z_tmp:
          wa.data.as_doubles[j] = p
        else:
          wa.data.as_doubles[j] = p1
        ww.data.as_doubles[j] = w[j] * wa.data.as_doubles[j]

      znew[0:m] = _ws2d(y, lopt, ww)
      z_tmp = 0.0
      j = 0
      for j in range(m):
        z_tmp += abs(znew.data.as_doubles[j] - z.data.as_doubles[j])

      if z_tmp == 0.0:
        break

      z[0:m]= znew[0:m]

    z[0:m] = _ws2d(y, lopt, ww)
    
    return z, lopt, v, lamids

Other functions

In [3]:
def daily_interpolation(rawdates: np.array):
    
    #create daily date range (we add 7 days so that we don't have any problems when shifting the values later)
    pd_daily_dates = pd.date_range(start=rawdates[0],end=rawdates[-1] + datetime.timedelta(16))
    
    #convert pd data range to numpy array 
    np_daily_dates = np.array(pd_daily_dates.to_pydatetime())
    
    #convert datetime to date
    np_daily_dates = np.array([x.date() for x in np_daily_dates])
    
    return np_daily_dates

In [5]:
def fromstring(x):
    
    '''Converts string to datetime object'''
    
    try:
        d = datetime.datetime.strptime(x, '%d/%m/%Y').date()
    except:
        d = datetime.datetime.strptime(x, '%Y-%m-%d').date()
        
    return d

def fromjulian(x):
    
    '''Converts julian to datetime object'''

    return datetime.datetime.strptime(x, '%Y%j').date()

Extracting time series functions

In [6]:
def lst_extract_ts(MYD: pd.DataFrame, MYD8: pd.DataFrame, dataset: str, location: str, date_begin: int, date_end: int):
    
    y8 = MYD8[dataset].loc[location].values
    dts8 = MYD8['Date'].loc[location].values

    y = MYD[dataset].loc[location].values
    dts = MYD['Date'].loc[location].values
    
    #Crop to date range
    
    date_range8 = np.all([dts8>=datetime.date(date_begin,1,1), dts8<=datetime.date(date_end,12,31)], axis=0)
    y8 = y8[date_range8]
    dts8 = dts8[date_range8]
    
    date_range = np.all([dts>=datetime.date(date_begin,1,1), dts<=datetime.date(date_end,12,31)], axis=0)
    y = y[date_range]
    dts = dts[date_range]
    
    return (y, y8, dts, dts8)

Smoothing functions

In [7]:
def lst_smoothing_A0(y8: np.ndarray, dts8: np.ndarray, pvalue: float, nopval: bool, lrange, nd: int):
    
    # create weights
    w = np.array((y8!=nd)*1,dtype='double')

    # apply whittaker filter with V-curve
    if (nopval):
        z, lopt, vcurve, l = ws2doptv(y8, w, array.array('d',lrange))
    else:
        z, lopt, vcurve, l = ws2doptvp(y8, w, array.array('d',lrange), pvalue)
    
    return (z, lopt, vcurve, l)

def lst_smoothing_A1(y8: np.ndarray, dts8: np.ndarray, pvalue: float, nopval: bool, lrange, nd: int):
    
    # create weights
    w = np.array((y8!=nd)*1,dtype='double')

    # apply whittaker filter with V-curve
    if (nopval):
        z, lopt, vcurve, l = ws2doptv(y8, w, array.array('d',lrange))
    else:
        z, lopt, vcurve, l = ws2doptvp(y8, w, array.array('d',lrange), pvalue)

    # Temporal interpolation
    daily = daily_interpolation(dts8)
    dvec = np.full(len(daily), nd, dtype='double')

    # shift observations to midpoint of acquisition (these positions are set to 10 instead of nodata)
    mid_point = 8/2
    for d in dts8:
        dl = daily.tolist()
        dvec[dl.index(d + datetime.timedelta(mid_point))] = 10

    # place des filtered values in the midpoints
    dvec[ dvec != nd ] = z

    # recreate weights
    w = np.array((dvec != nd) * 1,dtype='double')

    #refilter with low lanmbda
    z =  ws2d(dvec, 0.0001, w)
    
    return (z, lopt, vcurve, l, daily)


def lst_smoothing_A2(y: np.ndarray, pvalue: float, nopval: bool, lrange, nd: int):
    
    # create weights
    w = np.array((y!=nd)*1,dtype='double')

    # apply whittaker filter with V-curve    
    if (nopval):
        z, lopt, vcurve, l = ws2doptv(y, w, array.array('d',lrange))
    else:
        z, lopt, vcurve, l = ws2doptvp(y, w, array.array('d',lrange),pvalue)
    
    
    return (z, lopt, vcurve, l)

Print and Plot functions

In [9]:
def lst_print_info(location: str, latlon: dict, lagCorr1: float, lagCorr2: float):
    
    print('\033[1m' + 'Selected Point: ', location, '\033[0m') 
    print('(Lat, Lon) =', latlon[location] )
    print('\n')
    
    print('LagCorr 8 days', round(lagCorr1,3))
    print('LagCorr daily', round(lagCorr2,3))
    print('\n')
    
    
def lst_plot_all(A0: bool, A1: bool, A2: bool, 
                 z0: np.ndarray, z1: np.ndarray, z2: np.ndarray,
                 dts8: np.ndarray, daily: np.ndarray, dts: np.ndarray, 
                 y8: np.ndarray,
                 nd: int, 
                 yauto: bool, ylimits: tuple):
    
    #replace nd by nan
    y8nan = y8.copy()
    y8nan[y8nan == nd] = np.nan
    
    plt.figure(figsize=(20,10))
    
    A = [A0, A1, A2]
    xA = [dts8, daily, dts]
    z = [z0, z1, z2]
    col = ['b', 'g', 'r']
    label = ['A0', 'A1', 'A2']
    leg = []
    
    for i,a in enumerate(A):
        if a:
            plt.plot(xA[i], z[i], color = col[i], alpha = 0.5)
            leg.append(label[i])

    leg.append('raw 8 days values')
    plt.plot(dts8, y8nan, color = 'grey',  marker = 'o', alpha = 0.5)
    
    if not(yauto):
        plt.ylim(ylimits)
    
    plt.xlabel('Date', fontsize=15)
    plt.ylabel('LST', fontsize=15)
    plt.legend(leg, fontsize=17)
    plt.show()


    
def lst_plot_vcurve(A0: bool, A1: bool, A2: bool,
                    l0: np.ndarray, l1: np.ndarray, l2: np.ndarray, 
                    vcurve0: np.ndarray, vcurve1: np.ndarray, vcurve2: np.ndarray, 
                    lopt0: float, lopt1: float, lopt2: float):
    
    plt.figure(figsize=(20,10))
    
    A = [A0, A1, A2]
    xA = [l0, l1, l2]
    v = [vcurve0, vcurve1, vcurve2]
    lopt = [lopt0, lopt1, lopt2]
    col = ['b', 'g', 'r']
    leg = []
    
    for i,a in enumerate(A):
        if a:
            plt.plot(xA[i], v[i], color = col[i], alpha = 0.5, marker = 'o')
            leg.append('lopt: ' + str(round(np.log10(lopt[i]),2)))
            
    for i,a in enumerate(A):
        if a:
            plt.axvline(x = np.log10(lopt[i]), ls = '--', color = col[i])
            
    
    plt.xlabel('log10(l)', fontsize=15)
    plt.ylabel('V', fontsize=15)
    plt.title('V-curves', fontsize=23)
    plt.legend(leg, fontsize=17)
    plt.show()

    
def lst_plot_year(A0: bool, A1: bool, A2: bool,
                  year: int, month: int, 
                  lta_mean: dict, lta_std: dict, 
                  z0: np.ndarray, z1: np.ndarray, z2: np.ndarray, 
                  z0_lta: np.ndarray, z1_lta: np.ndarray, z2_lta: np.ndarray, 
                  dts8: np.ndarray, daily: np.ndarray, dts: np.ndarray, 
                  y: np.ndarray, 
                  nd: int, 
                  yauto: bool, ylimits: tuple):
    
    #replace nd by nan
    ynan = y.copy()
    ynan[ynan == nd] = np.nan
    
    fig, axs = plt.subplots(1,2, figsize=(20, 10))
    
    # cropping daily data to year
    year_index0 = np.all([dts8>=datetime.date(year,month,1), dts8<datetime.date(year+1,month,1)], axis=0)
    year_index1 = np.all([daily>=datetime.date(year,month,1), daily<datetime.date(year+1,month,1)], axis=0)
    year_index2 = np.all([dts>=datetime.date(year,month,1), dts<datetime.date(year+1,month,1)], axis=0)

    year_dts8 = dts8[year_index0]
    year_z0 = np.array(z0)[year_index0]
    year_z0_lta = np.array(z0_lta)[year_index0]
    
    year_daily = daily[year_index1]
    year_z1 = np.array(z1)[year_index1]
    year_z1_lta = np.array(z1_lta)[year_index1]
    
    year_dts = dts[year_index2]
    year_z2 = np.array(z2)[year_index2]     
    year_z2_lta = np.array(z2_lta)[year_index2]
    year_ynan = ynan[year_index2]
    
    year_lta_mean = []
    year_lta_std = []
    for dt in year_dts8:
        year_lta_mean.append(lta_mean[(dt.day, dt.month)])
        year_lta_std.append(lta_std[(dt.day, dt.month)])
                
        
    A = [A0, A1, A2]
    xA = [year_dts8, year_daily, year_dts]
    z = [year_z0, year_z1, year_z2]
    col = ['b', 'g', 'r']
    label = ['A0', 'A1', 'A2']
    leg = []
    
    for i,a in enumerate(A):
        if a:
            axs[0].plot(xA[i], z[i], color = col[i], alpha = 0.5)
            leg.append(label[i])

    leg.append('raw daily values')
    axs[0].plot(year_dts, year_ynan, color = 'grey', marker = 'o', alpha = 0.5)
    
    if not(yauto):
        axs[0].set_ylim(ylimits)

    axs[0].set_xlabel('Date', fontsize=15)
    axs[0].set_ylabel('LST', fontsize=15)
    axs[0].legend(leg, fontsize=17)
    axs[0].set_title('Year ' + str(year), fontsize = 23)
    
    
    #PLOTTING THE LTA
    
    z = [year_z0_lta, year_z1_lta, year_z2_lta]
    label = ['A0 lta', 'A1 lta', 'A2 lta']
    leg = []
    
    for i,a in enumerate(A):
        if a:
            axs[1].plot(xA[i], z[i], color = col[i], alpha = 0.5)
            leg.append(label[i])

    leg.append('raw daily values')
    leg.append('+- 2sd')
    leg.append('+-1sd')
    
    year_lta_meannan = np.array(year_lta_mean).copy()
    year_lta_meannan[year_lta_meannan == nd] = np.nan

    axs[1].plot(year_dts8, year_lta_meannan, color = 'grey', marker = 'o', alpha = 0.5)
    axs[1].fill_between(year_dts8, np.array(year_lta_meannan)-2*np.array(year_lta_std), np.array(year_lta_meannan)+2*np.array(year_lta_std), color = 'whitesmoke')
    axs[1].fill_between(year_dts8, np.array(year_lta_meannan)-np.array(year_lta_std), np.array(year_lta_meannan)+np.array(year_lta_std), color = 'lightgrey')
    
    if not(yauto):
        axs[1].set_ylim(ylimits)

    axs[1].set_xlabel('Date', fontsize=15)
    axs[1].set_ylabel('LTA LST', fontsize=15)
    axs[1].legend(leg, fontsize=15, loc = 'lower right')
    axs[1].set_title('Long Term Averages ' + str(year), fontsize = 20)

    
    


LTA

In [3]:
def lst_lta_dict(y: np.ndarray, dts: np.ndarray, nd: float):
    
    dict_index = set((dt.day, dt.month) for dt in dts)

    # initialize the dicts
    lta_mean = {}
    lta_std = {}

    for dt in dict_index:

        lta_date = [] 

        for ix, date_sel in enumerate(dts):
            
            if (date_sel.month == dt[1]) & (date_sel.day == dt[0]):     
                if (y[ix] != nd):
                    lta_date.append(y[ix])


        # add to LTA dict
        if (lta_date != []):
            lta_mean[dt] = np.mean(lta_date)
            lta_std[dt] = np.std(lta_date)
        else:
            lta_mean[dt] = nd
            lta_std[dt] = nd
        
    return lta_mean, lta_std

Main function

In [42]:
def lst_main(A0: bool, A1: bool, A2: bool,
             MD: tuple, location: str, latlon: dict, dataset: str, 
             pvalue:float, nopval: bool,
             lrange1: tuple, lrange2: tuple, step: float, 
             nd: int,
             date_begin: int, date_end: int, 
             year: int, month: int,
             yauto: bool, ylimits: tuple):
    
    lrange1 = np.arange(lrange1[0],lrange1[1], step)
    lrange2 = np.arange(lrange2[0],lrange2[1], step)
    (MYD, MYD8) = MD
    
    #extract time series
    (y, y8, dts, dts8) = lst_extract_ts(MYD, MYD8, dataset, location, date_begin, date_end)
    
    #LTA
    # calulated over daily data
    lta_mean, lta_std = lst_lta_dict(y, dts, nd)
    lta_mean8, lta_std8 = lst_lta_dict(y8, dts8, nd)
    
    #smoothing
    (z0, lopt0, vcurve0, l0) = lst_smoothing_A0(y8, dts8, pvalue, nopval, lrange1, nd)
    (z1, lopt1, vcurve1, l1, daily) = lst_smoothing_A1(y8, dts8, pvalue, nopval, lrange1, nd)
    (z2, lopt2, vcurve2, l2) = lst_smoothing_A2(y, pvalue, nopval, lrange2, nd)
    
    #smoothing LTA data
    lta_mean_list = []
    for dt in dts:
        lta_mean_list.append(lta_mean[(dt.day, dt.month)])
    lta_mean_list8 = []
    for dt in dts8:
        lta_mean_list8.append(lta_mean8[(dt.day, dt.month)])

    
    (z0_lta, lopt0_lta, vcurve0_lta, l0_lta) = lst_smoothing_A0(np.array(lta_mean_list8), dts8, pvalue, nopval, lrange1, nd)
    (z1_lta, lopt1_lta, vcurve1_lta, l1_lta, daily_lta) = lst_smoothing_A1(np.array(lta_mean_list8), dts8, pvalue, nopval, lrange1, nd)
    (z2_lta, lopt2_lta, vcurve2_lta, l2_lta) = lst_smoothing_A2(np.array(lta_mean_list), pvalue, nopval, lrange2, nd)

    
    #lagCorr
    lagCorr1 = lag1corr(np.array(y8[0:len(y8)-1]), np.array(y8[1:]), nd)
    lagCorr2 = lag1corr(np.array(y[0:len(y)-1]), np.array(y[1:]), nd)
    
    
    #Prints and plots
    lst_print_info(location, latlon, lagCorr1, lagCorr2)
    lst_plot_all(A0, A1, A2, z0, z1, z2, dts8, daily, dts, y8, nd, yauto, ylimits)
    lst_plot_year(A0, A1, A2, year, month, lta_mean, lta_std, z0, z1, z2, z0_lta, z1_lta, z2_lta, dts8, daily, dts, y, nd, yauto, ylimits)
    lst_plot_vcurve(A0, A1, A2, l0, l1, l2, vcurve0, vcurve1, vcurve2, lopt0, lopt1, lopt2)

## Loading the data

In [21]:
lst_nd = 0

In [22]:
# loading 8 days MYD data from csv
lst_MYD8 = pd.read_csv(
    'data/MYD11A2-MYD11A2-006-results.csv', 
    index_col=0, 
    usecols = ['ID', 
               'Date', 
               'MYD11A2_006_LST_Day_1km',
               'MYD11A2_006_LST_Night_1km'])


#renaming the columns
lst_MYD8 = lst_MYD8.rename(columns={'MYD11A2_006_LST_Day_1km': "LTD",
                            'MYD11A2_006_LST_Night_1km': 'LTN'})


# Convert string Date to datetime.date
lst_MYD8['Date'] = lst_MYD8['Date'].apply(fromstring)

In [23]:
# loading daily MYD data from csv (loading and concatenating 2 halfs)
MYD_h1 = pd.read_csv(
    'data/MYD11A1-MYD11A1-006-results.csv', 
    index_col=0, 
    usecols = ['ID', 
               'Date', 
               'MYD11A1_006_LST_Day_1km',
               'MYD11A1_006_LST_Night_1km'])

MYD_h2 = pd.read_csv(
    'data/MYD11A1h2-MYD11A1-006-results.csv', 
    index_col=0, 
    usecols = ['ID', 
               'Date', 
               'MYD11A1_006_LST_Day_1km',
               'MYD11A1_006_LST_Night_1km'])

lst_MYD = pd.concat([MYD_h1, MYD_h2])

#renaming the columns
lst_MYD = lst_MYD.rename(columns={'MYD11A1_006_LST_Day_1km': "LTD",
                            'MYD11A1_006_LST_Night_1km': 'LTN'})


# Convert string Date to datetime.date
lst_MYD['Date'] = lst_MYD['Date'].apply(fromstring)