## Define Variables / Import MetaData

In [None]:
import os
import sys
from pathlib import Path

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import r2_score

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
from utils.functions import import_flux_metadata, import_flux_site_data, import_site_RMSE_data
from utils.plotting import polyfit1d_and_plot
from plotting.plots_generator import import_flux_and_prep_data

In [None]:
from config import FLUX_DATA_PATH, FLUX_METADATA, MICASA_PREPROCESSED_DATA

In [None]:
fluxnet_meta = import_flux_metadata(FLUX_METADATA)

# Number of FLUXNET points as a predictor variable

In [None]:
ids_list = fluxnet_meta["Site ID"]

In [None]:
timedelta = "DD"

In [None]:
def import_and_plot(var, RMSE_type, df_to_join, col_join, xlabel):
    df_RMSE = import_site_RMSE_data(FLUX_METADATA, f"../analysis/RMSE_results_{RMSE_type}.csv")
    df_join = df_RMSE.join(df_to_join, on='Site ID',how="inner")

    abbr = "ANN", "GRW"
    titles = "", " (Growing)"
    options = dict(zip(abbr, titles))
    
    title = var[:3] + options[RMSE_type]
    polyfit1d_and_plot(df_join, col_join,  var, xlabel, title);

In [None]:
xlabel = "Number of FLUXNET Ameriflux Observations\n(Any quality value)"

## Number of FLUXNET points of all quality

In [None]:
# Build a df of number of obs
results= []
for site_ID in ids_list:
    # create a site id dictionary to append to results
    site_dict = {"Site ID" : site_ID}
    
    fluxnet_site_dirty = import_flux_site_data(FLUX_DATA_PATH, site_ID , timedelta)
    
    site_dict["Num_obs"] = len(fluxnet_site_dirty)
    results.append(site_dict)

num_obs = pd.DataFrame(results).set_index("Site ID")

### NEE

In [None]:
import_and_plot("NEE_RMSE", "ANN", num_obs, "Num_obs", xlabel)

In [None]:
import_and_plot("NEE_RMSE", "GRW", num_obs, "Num_obs", xlabel)

### NPP (Manually do NPP due to NaN values and outliers)

In [None]:
# outliers drop
df_RMSE_ANN = import_site_RMSE_data(FLUX_METADATA, f"../analysis/RMSE_results_ANN.csv")
df_RMSE_dropped_ANN = df_RMSE_ANN["NPP_RMSE"].where(df_RMSE_ANN["NPP_RMSE"]<2.5e-5).dropna().to_frame()

In [None]:
data = df_RMSE_dropped_ANN.join(num_obs, on="Site ID", how="inner")
polyfit1d_and_plot(data, "Num_obs",  "NPP_RMSE", xlabel, "NPP, outliers dropped");

In [None]:
# NAN values and outliers drop
data = import_site_RMSE_data(FLUX_METADATA, f"../analysis/RMSE_results_GRW.csv")
data = data[data["NPP_RMSE"]<2.5e-5]
data = data.join(num_obs, on="Site ID", how="inner").dropna()
polyfit1d_and_plot(data, "Num_obs",  "NPP_RMSE", xlabel, "NPP (Growing), outliers dropped");

### The growing plots look similar to annual, let's just only show annual

## Number of Good Quality values

In [None]:
# Build a df of number of good QA values
results = []
for site_ID in ids_list:
    # create a site id dictionary to append to results
    site_dict = {"Site ID" : site_ID}
    
    fluxnet_data = import_flux_and_prep_data(site_ID , timedelta)
    columns = ["NEE (kgC m-2 s-1)", "GPP_DT (kgC m-2 s-1)"]
    fluxnet_sub = fluxnet_data[columns]
    for col_name, df_col in fluxnet_sub.items():
        new_col_name = col_name[:3] + '_count_good'
        site_dict[new_col_name] = df_col.count()
    results.append(site_dict)

num_good = pd.DataFrame(results).set_index("Site ID")

In [None]:
xlabel="Number of good quality FLUXNET measurements"

In [None]:
import_and_plot("NEE_RMSE", "ANN", num_good, "NEE_count_good", xlabel)

In [None]:
import_and_plot("NEE_RMSE", "GRW", num_good, "NEE_count_good", xlabel)

### Do NPP/GPP by hand

In [None]:
#outliers drop
data = import_site_RMSE_data(FLUX_METADATA, f"../analysis/RMSE_results_ANN.csv")
data_NPP_RSME = data[data["NPP_RMSE"]<2.5e-5]
data_NPP_RSME = data_NPP_RSME.join(num_obs, on="Site ID", how="inner")
polyfit1d_and_plot(data_NPP_RSME, "Num_obs",  "NPP_RMSE", xlabel, "NPP, outliers dropped");

In [None]:
data = import_site_RMSE_data(FLUX_METADATA, f"../analysis/RMSE_results_GRW.csv")
data_NPP_RSME = data[data["NPP_RMSE"]<2.5e-5]
data_NPP_RSME = data_NPP_RSME.join(num_obs, on="Site ID", how="inner").dropna()
polyfit1d_and_plot(data_NPP_RSME, "Num_obs",  "NPP_RMSE", xlabel, "NPP (Growing), outliers dropped");

# Plot Percent NAN vs RMSE

In [None]:
nan_results = pd.read_csv('../analysis/nan_results.csv',index_col='SiteID')
nan_results

## Annual

In [None]:
df_ANN = import_site_RMSE_data(FLUX_METADATA, '../analysis/RMSE_results_ANN.csv')
df_ANN

In [None]:
# Import and merge NaN results
df_ANN = df_ANN.join(nan_results, on='Site ID',how="inner")
df_ANN

In [None]:
xlabel = "Percent (%) NaN values"

In [None]:
polyfit1d_and_plot(df_ANN, "NEE_pct_nan", "NEE_RMSE", xlabel, "NEE (Annual)");

In [None]:
polyfit1d_and_plot(df_ANN, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Annual)");

In [None]:
# Drop two outliers
df_ANN_dropped = df_ANN[["NPP_RMSE", "GPP_pct_nan"]].copy()
df_ANN_dropped = df_ANN_dropped[df_ANN_dropped["NPP_RMSE"]<2.5e-5]
df_ANN_dropped.count()

In [None]:
polyfit1d_and_plot(df_ANN_dropped, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP/GPP (Annual), outliers dropped");

# Growing Season Results

In [None]:
df_GRW = import_site_RMSE_data(FLUX_METADATA, '../analysis/RMSE_results_GRW.csv')
df_GRW = df_GRW.join(nan_results, on='Site ID', how="inner")
df_GRW

In [None]:
# Import and merge results
# RMSE_results_GRW = pd.read_csv('../analysis/RMSE_results_GRW.csv',index_col='SiteID')
# df_GRW = df_meta.join(RMSE_results_GRW, on='Site ID', how="inner")

In [None]:
df_GRW

In [None]:
polyfit1d_and_plot(df_GRW, "NEE_pct_nan", "NEE_RMSE", xlabel, "NEE (Growing)");

In [None]:
any(df_GRW["GPP_pct_nan"].isna()), any(df_GRW["NPP_RMSE"].isna())

In [None]:
df_GRW = df_GRW.dropna()

In [None]:
polyfit1d_and_plot(df_GRW, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Growing)");
# This puts an error out - I need to drop the NANs I guess

In [None]:
df_GRW_dropped = df_GRW[["NPP_RMSE", "GPP_pct_nan"]].copy()
df_GRW_dropped = df_GRW_dropped[df_GRW_dropped["NPP_RMSE"]<2.5e-5]

In [None]:
polyfit1d_and_plot(df_GRW_dropped, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Growing), outliers dropped");

# Climate Classes

In [None]:
fluxnet_meta.columns.tolist()

In [None]:
climate_vars = fluxnet_meta.columns.tolist()[8:10]
pd.set_option('display.max_colwidth', 100) 
fluxnet_meta[climate_vars].drop_duplicates().set_index(climate_vars[0])