# Exploring and comparing the EC tower data with gridded datasets

* Per-site, facet plots:
    * For common in-situ and gridded variables: Ta, VPD, rainfall, incoming solar
        * Correlation matrices
        * 1:1 plots with r2 and MAE
        * Distributions
* Group all sites together
    * Distributions 

     

## Load modules

In [1]:
import os
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sb
import xarray as xr
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error


## Analysis parameters

In [2]:
path = '/g/data/os22/chad_tmp/NEE_modelling/results/training_data/'

First, stitch all the trainng data together add site name as field 

In [3]:
dffs = []
for i in os.listdir(path):
    if 'csv' in i:
        df = pd.read_csv(path+i,index_col='time', parse_dates=True)
        df['site_name'] = i[0:3]
        dffs.append(df)

df = pd.concat(dffs)

## Facet plot of 1:1 comparisons

Function for producing nice 1:1 plots grouped by a variable in the dataframe

In [None]:
def one_to_one(df, x, y, groupby_var, title, col_wrap, adjust, output_path, ax_lim=(None,None)):
    df = df[[x, y, groupby_var]].dropna()
    fg = sb.FacetGrid(df, col=groupby_var, col_wrap=col_wrap, margin_titles=True)
    fg.map(sb.scatterplot,x,y,color="#338844", edgecolor="white", s=50, lw=1, alpha=0.5)
    fg.map(sb.regplot, x, y, scatter=False, color='m')
    fg.map(sb.regplot, x, x, scatter=False, color='black', line_kws={'linestyle':'dashed'})
    fg.set(ylim=ax_lim, xlim=ax_lim)
    fg.set_axis_labels("In-situ", "Gridded")

    def annotate(data,**kws):
        r, p = sp.stats.pearsonr(data[x], data[y])
        mae = mean_absolute_error(data[x], data[y])
        ax = plt.gca()
        ax.text(.05, .8, 'r={:.3f}'.format(r),
                transform=ax.transAxes)
        ax.text(.05, .7, 'mae={:.3g}'.format(mae),
                transform=ax.transAxes)

    fg.map_dataframe(annotate)
    fg.fig.subplots_adjust(top=adjust)
    fg.fig.suptitle(title)
    plt.savefig(output_path);

Site specific 1:1 plots

In [None]:
one_to_one(df,
           x='Precip_EC',
           y='precip_RS',
           groupby_var='site_name',
           title=f'Rainfall (mm)', #W/m\N{SUPERSCRIPT TWO}
           col_wrap=5,
           ax_lim=(0,400),
           adjust=0.935, 
           output_path='/g/data/os22/chad_tmp/NEE_modelling/results/figs/one_to_one/precip_site_name.png'
          )


Group by simplified landcover class

In [None]:
one_to_one(df,
           x='Precip_EC',
           y='precip_RS',
           groupby_var='IGBP_class_RS',
           title=f'Rainfall (mm)', #W/m\N{SUPERSCRIPT TWO},
           ax_lim=(0,500),
           col_wrap=3,
           adjust=0.8,
           output_path='/g/data/os22/chad_tmp/NEE_modelling/results/figs/one_to_one/precip_IGBP.png'
          )

## Insitu and remotely sensed distributions

In [5]:
df.columns

Index(['NEE_SOLO_EC', 'GPP_SOLO_EC', 'ER_SOLO_EC', 'Ta_EC', 'Sws_EC', 'RH_EC',
       'Precip_EC', 'Fe_EC', 'Fh_EC', 'Fsd_EC', 'Fn_EC', 'Fld_EC', 'VPD_EC',
       'lai_RS', 'LST_RS', 'Fpar_RS', 'soil_moisture_RS', 'LST-Tair_RS',
       'spei_RS', 'solar_RS', 'Ta_RS', 'VPD_RS', 'precip_RS', 'precip_cml_RS',
       'IGBP_class_RS', 'site_name'],
      dtype='object')

In [None]:
def 
insitu = df[[x, y, groupby_var]].dropna()

In [None]:
sns.displot(df, x="flipper_length_mm", hue="species", kind="kde", multiple="stack")

## CO2 flux correlation matrices

In [None]:
df = pd.read_csv(path,index_col='time', parse_dates=True)

In [None]:
corr = df.filter(regex='EC').drop(['Fe_EC','Fh_EC','Fld_EC'],axis=1).corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
fig,ax=plt.subplots(1,1, figsize=(8,8))
sb.heatmap(corr, cmap="bwr_r", annot=True, ax=ax, cbar=False, mask=mask)
plt.title('Correlations between CO2 fluxes and in-situ climate variables');

In [None]:
fluxes = df[['NEE_SOLO_EC', 'GPP_SOLO_EC','ER_SOLO_EC']]
rs = df.filter(regex='RS')

In [None]:
corr = fluxes.join(rs).corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
fig,ax=plt.subplots(1,1, figsize=(10,10))
sb.heatmap(corr, cmap="bwr_r", annot=True, ax=ax, cbar=False, mask=mask)
plt.title('Correlations between CO2 fluxes and gridded climate variables');

## Compare in-situ and gridded environmental variables

In [None]:
df.columns

In [None]:
var_ec = 'Fsd_EC'
var_rs = 'solar_RS'
var_name = 'ISW'

In [None]:
x=df[var_ec].values 
y=df[var_rs].values #dependent

mask = ~np.isnan(x) & ~np.isnan(y)
slope, intercept, r, p, sd, = linregress(x[mask], y[mask])

mae = mean_absolute_error(x[mask],y[mask])

In [None]:
fig, ax = plt.subplots(1,1, sharey=True, figsize=(8,6))
plt.scatter(x, y)
plt.plot(x, intercept + slope * x, '--m')
plt.plot(x, x, '-k')
plt.text(np.nanmin(x*1.05),np.nanmax(y*0.95),"r2 = "+str(round(r**2,4)))
plt.text(np.nanmin(x*1.05),np.nanmax(y*0.90),"MAE = "+str(round(mae, 3)))
ax.set_ylabel('Gridded')
ax.set_xlabel('In-situ')
ax.set_title(var_name);