In [31]:
from netCDF4 import Dataset
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
import pickle
from scipy import stats
%matplotlib inline

plt.style.use('ggplot')

#BoM site data and FMC data
with open('../Vegetation_sites_monthly/maindata.pkl', 'rb') as f:
    info, data, mdata = pickle.load(f)
    
# mdata[site, mi, 0] = np.nanmean(fmc['fmc_mean'][fmcw,a-1:a+2,b-1:b+2])
# mdata[site, mi, 1] = np.nanmean(s0['s0_pct'][smw,c,d])
# mdata[site, mi, 2] = np.nanmean(ss['ss_pct'][smw,c,d])
# mdata[site, mi, 3] = np.nanmean(sd['sd_pct'][smw,c,d])

nsites = len(info)
nyears = 2019 - 2005
x = list(range(12*nyears))
anm = np.zeros(mdata.shape) * np.nan

# compute the long term average per month per site

for i in range(nsites):
    aux = np.zeros((12, 4, nyears))*np.nan
    
    for y in range(nyears):
        a = y*12
        b = a + 12
        aux[:, :, y] = mdata[i,a:b, :]
    
    clim = np.nanmean(aux, axis = 2)
    for y in range(nyears):
        a = y*12
        b = a + 12
        anm[i, a:b, :] = aux[:, :, y] - clim[:, :]
        
    #print(anm)
    

#     break 


In [46]:
# Time series plots
for i in range(nsites):
    
    fig, ax1 = plt.subplots(figsize = (10, 6))
    lns1 = ax1.plot(x, anm[i,:, 0], label = 'FMC', color = 'green')
    ax1.set_ylabel('FMC')
    ax1.set_xlabel('Months')
    ax1.set_title(info[i][0] + ': ' + info[i][3])

    ax2 = ax1.twinx()
    lns2 = ax2.plot(x, anm[i,:, 1], label = 's0')
    lns3 = ax2.plot(x, anm[i,:, 2], label = 'ss')
    lns4 = ax2.plot(x, anm[i,:, 3], label = 'sd')
    ax2.set_ylabel('Soil moisture')

    lns = lns1+lns2+lns3+lns4
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc=0)

    lims1, lims2 = ax1.get_ylim(), ax2.get_ylim()
    ax1.set_yticks(np.linspace(lims1[0], lims1[1], 6))
    ax2.set_yticks(np.linspace(lims2[0], lims2[1], 6))

    fig.tight_layout()
    plt.savefig(str('time_series_%2.2i.png' % i))
    plt.close(fig)
    #plt.show()

In [44]:
# now we can see if there is a correlation (linear) between SM anomalies and FMC anomalies

# creating one figure per site with subplots of each soil depth

table = np.zeros((nsites, 3)) - 1 #for values of r2 storage
layernames = [None, 'Upper', 'Lower', 'Deep']

for si in range(nsites):
    fig, ax = plt.subplots(nrows=1,ncols=3, figsize=(10,4))
    
    for j in range(1, 4):
    
        # scatter plot
        ax[j-1].scatter(anm[si,:,j], anm[si, :, 0], label='_nolegend_')
        ax[j-1].set_title(layernames[j]+ ' soil layer')

        # gathering only real values (removing nan)
        Xlr, Ylr = anm[si,:,j], anm[si, :, 0]
        mask = np.isfinite([Xlr, Ylr]).all(axis=0)

        # computing linear regression
        slope, intercept, r_value, p_value, std_err = stats.linregress(Xlr[mask], Ylr[mask])
        strlin = str('r$^2$ = %f' %(r_value**2))
        table[si,j-1] = r_value**2
        minx, maxx = np.min(Xlr[mask]), np.max(Xlr[mask])
        miny, maxy = slope*minx + intercept, slope*maxx + intercept
        ax[j-1].plot([minx, maxx], [miny,maxy], 'k-', label = strlin)

        ax[j-1].legend()
        #ax[j-1].grid()
        
    fig.suptitle(info[si][0] + ': ' + info[si][3], fontsize=18)
    plt.tight_layout()
    plt.subplots_adjust(top=0.85)
    plt.savefig(str('scatter_%2.2i.png' % si))
    plt.close(fig)
    #plt.show()