In [1]:
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
import pandas as pd
%matplotlib inline
from scipy.stats import norm, mstats


def mk_test(x, alpha = 0.05):  
    """   
    Input:
        x:   a vector of data
        alpha: significance level (0.05 default)

    Output:
        trend: tells the trend (increasing, decreasing or no trend)
        h: True (if trend is present) or False (if trend is absence)
        p: p value of the significance test
        z: normalized test statistics 

    Examples
    --------
      >>> x = np.random.rand(100)
      >>> trend,h,p,z = mk_test(x,0.05) 
    """
    n = len(x)

    # calculate S 
    s = 0
    for k in range(n-1):
        for j in range(k+1,n):
            s += np.sign(x[j] - x[k])

    # calculate the unique data
    unique_x = np.unique(x)
    g = len(unique_x)

    # calculate the var(s)
    if n == g: # there is no tie
        var_s = (n*(n-1)*(2*n+5))/18
    else: # there are some ties in data
        tp = np.zeros(unique_x.shape)
        for i in range(len(unique_x)):
            tp[i] = sum(unique_x[i] == x)
        var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

    if s>0:
        z = (s - 1)/np.sqrt(var_s)
    elif s == 0:
            z = 0
    elif s<0:
        z = (s + 1)/np.sqrt(var_s)

    # calculate the p_value
    p = 2*(1-norm.cdf(abs(z))) # two tail test
    h = abs(z) > norm.ppf(1-alpha/2) 

    if (z<0) and h:
        trend = 'decreasing'
    elif (z>0) and h:
        trend = 'increasing'
    else:
        trend = 'no trend'

    return trend, h, p, z

# we will plot standard deviation of FMC, and its linear regression
# then use the Mann-Kendall test to determine significance of the linear trend

# because the 5km upscaling did not have a significant effect on correlation
# we will use the 9c (3x3) filtered FMC data set

In [2]:
'''
info = array([list(['Majura', -35.2778, 149.1966, 'Grassland']),
       list(['Tidbinbilla', -35.4191, 148.9506, 'Grassland']),
       list(['Coppins crossing road', -35.2787, 149.0559, 'Grassland']),
       
data = array([{38351: [26.116945, 0.0009765625, 0.1806640625], 
38355: [24.171112, 0.0654296875, 0.2041015625], 
38359: [23.414999, 0.0048828125, 0.1767578125], 
38363: [26.40889, 0.0048828125, 0.1435546875],

data.size = number of sites 
'''

info, data = np.load('created_data/control_points_9c_filter.npy')
frames = []
#create yearly data in pandas

#create dictionary
for si in range(info.size):
    
    yd = {'fmc_mean':[],'fmc_sd':[],'ss_mean':[],'ss_sd':[],'s0_mean':[],'s0_sd':[]}
    time = np.array(sorted(data[si].keys()), dtype = int)
    
    for y in range(2005,2019):
        
        t1 = datetime(y, 1, 1) - datetime(1900, 1, 1)
        t1 = t1.days
        t2 = datetime(y+1, 1, 1) - datetime(1900, 1, 1)
        t2 = t2.days
        ty = np.where((time >= t1) & (time < t2))[0]
        fmc, ss, s0 = [], [], []
        
        for j in ty:
            
            t= time[j]
            fmc.append(data[si][t][0])
            s0.append(data[si][t][1])
            ss.append(data[si][t][2])
        
        # this is the same as the accessiing the same variable with the formula using the append fxn
        # fmc_mean = np.nanmean(fmc)
        # yd['fmc_mean'].append(fmc_mean)
        # s0_mean = np.nanmean(s0)
        # ss_mean = np.nanmean(ss)
        # fmc_std = np.nanstd(fmc)
        # s0_std = np.nanstd(s0)
        # ss_std = np.nanstd(ss)
        yd['fmc_mean'].append(np.nanmean(fmc))
        yd['fmc_sd'].append(np.nanstd(fmc))
        yd['s0_mean'].append(np.nanmean(s0))
        yd['s0_sd'].append(np.nanstd(s0))
        yd['ss_mean'].append(np.nanmean(ss))
        yd['ss_sd'].append(np.nanstd(ss))
        
    frames.append(pd.DataFrame(yd))
   # print(info[si],yd)

  a = np.array(a, subok=True, copy=True)


In [5]:
#Here we are creating plots of the mean and sd of fmc and s0 and ss for each site and year
x = np.arange(2005,2019)


def mk_info(ax, data, xpct = 0.1, ypct = 0.7):
    
    xd = np.arange(len(data))
    slope, intercept, r_value, p_value, std_err = stats.linregress(xd, data)
    strlin = str('r$^2$ = %f' %(r_value**2))
    minx, maxx = np.min(xd), np.max(xd)
    miny, maxy = slope*minx + intercept, slope*maxx + intercept
    ax.plot([minx+2005, maxx+2005], [miny,maxy], 'k-', label = strlin)
    
    values = mk_test(data)
    ax.text(xpct, ypct, str('%s: p-value = %f' % (values[0], values[2])),
            fontweight='bold', transform=ax.transAxes)
    
    return r_value, values[0]
    

csv = open('timeseries_trend.csv', 'w')
csv.write('Site,Vegetation,FMC mean,,FMC std,,S0 mean,,S0 std,,SS mean,,SS std\n')

for si in range(info.size):
    
    fig, axes = plt.subplots(nrows=2,ncols=3, figsize=(16,8))
    r = {}
    
    axes[0,0].plot(x,frames[si]['fmc_mean'],label = 'FMC MEAN')
    r['FMC mean'] = mk_info(axes[0,0], frames[si]['fmc_mean'])
    
    axes[1,0].plot(x,frames[si]['fmc_sd'],label = 'FMC STD')
    r['FMC std'] = mk_info(axes[1,0], frames[si]['fmc_sd'])
    
    axes[0,1].plot(x,frames[si]['s0_mean'],label = 'S0 MEAN')
    r['S0 mean'] = mk_info(axes[0,1], frames[si]['s0_mean'])
    
    axes[1,1].plot(x,frames[si]['s0_sd'],label = 'S0 STD')
    r['S0 std'] = mk_info(axes[1,1], frames[si]['s0_sd'])
    
    axes[0,2].plot(x,frames[si]['ss_mean'],label = 'SS MEAN')
    r['SS mean'] = mk_info(axes[0,2], frames[si]['ss_mean'])
    
    axes[1,2].plot(x,frames[si]['ss_sd'],label = 'SS STD')
    r['SS std'] = mk_info(axes[1,2], frames[si]['ss_sd'])
    
    for ax in axes.flatten():
        ax.legend()
        ax.grid()
    
    fig.suptitle(info[si][0] + ' : ' + info[si][3])
    plt.tight_layout()
    plt.subplots_adjust(top = 0.95)
    #plt.show()
    plt.savefig(str('images/mannken_%2.2i.png' % si))
    plt.close(fig)
    
    #Writing r2-value and trend output from MK test
    csv.write('%s,%s' % (info[si][0], info[si][3]))
    for k in ['FMC mean','FMC std','S0 mean','S0 std','SS mean','SS std']:
        csv.write(',%f,%s' % (r[k][0], r[k][1]))
    csv.write('\n')
    
csv.close()

In [23]:
# scatterplots

def linreg(x, y): 
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    strlin = str('r$^2$ = %f' %(r_value**2))
    minx, maxx = np.min(x), np.max(x)
    miny, maxy = slope*minx + intercept, slope*maxx + intercept
    return [minx, maxx], [miny, maxy], strlin

x = np.arange(2005,2019)
for si in range(info.size):
    
    fig, axes = plt.subplots(nrows=2,ncols=2, figsize=(10,10))
    
    axes[0, 0].scatter(frames[si]['s0_mean'],frames[si]['fmc_mean'],label = 'FMC/s0 mean')
    xl, yl, l = linreg(frames[si]['s0_mean'], frames[si]['fmc_mean'])
    axes[0, 0].plot(xl, yl, 'k-', label = l)
    
    axes[1, 0].scatter(frames[si]['s0_sd'],frames[si]['fmc_sd'],label = 'FMC/s0 std')
    xl, yl, l = linreg(frames[si]['s0_sd'], frames[si]['fmc_sd'])
    axes[1, 0].plot(xl, yl, 'k-', label = l)
    
    axes[0, 1].scatter(frames[si]['ss_mean'],frames[si]['fmc_mean'],label = 'FMC/ss mean')
    xl, yl, l = linreg(frames[si]['ss_mean'], frames[si]['fmc_mean'])
    axes[0, 1].plot(xl, yl, 'k-', label = l)
    
    axes[1, 1].scatter(frames[si]['ss_sd'],frames[si]['fmc_sd'],label = 'FMC/ss std')
    xl, yl, l = linreg(frames[si]['ss_sd'], frames[si]['fmc_sd'])
    axes[1, 1].plot(xl, yl, 'k-', label = l)
    
    for ax in axes.flatten():
        ax.legend()
        ax.grid()
    fig.suptitle(info[si][0] + ' : ' + info[si][3])
    plt.tight_layout()
    plt.subplots_adjust(top = 0.95)
    #plt.show()
    plt.savefig(str('images/yearscatter_%2.2i.png' % si))
    plt.close(fig)