## Define Variables / Import MetaData

In [None]:
import os
import sys
from pathlib import Path

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import r2_score

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
from utils.functions import import_flux_metadata, import_site_RMSE_data, polyfit1d_and_plot

In [None]:
from config import FLUX_DATA_PATH, FLUX_METADATA, MICASA_PREPROCESSED_DATA

In [None]:
fluxnet_meta = import_flux_metadata(FLUX_METADATA)

### Climate Classes

In [None]:
fluxnet_meta.columns.tolist()

In [None]:
climate_vars = fluxnet_meta.columns.tolist()[8:10]
pd.set_option('display.max_colwidth', 100) 
fluxnet_meta[climate_vars].drop_duplicates().set_index(climate_vars[0])

# Plot Percent NAN vs RMSE

In [None]:
nan_results = pd.read_csv('../analysis/nan_results.csv',index_col='SiteID')
nan_results

## Annual

In [None]:
df_ANN = import_site_RMSE_data(FLUX_METADATA, '../analysis/RMSE_results_ANN.csv')
df_ANN

In [None]:
# Import and merge NaN results
df_ANN = df_ANN.join(nan_results, on='Site ID',how="inner")
df_ANN

In [None]:
xlabel = "Percent (%) NaN values"

In [None]:
polyfit1d_and_plot(df_ANN, "NEE_pct_nan", "NEE_RMSE", xlabel, "NEE (Annual)");

In [None]:
polyfit1d_and_plot(df_ANN, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Annual)");

In [None]:
# Drop two outliers
df_ANN_dropped = df_ANN[["NPP_RMSE", "GPP_pct_nan"]].copy()
df_ANN_dropped = df_ANN_dropped[df_ANN_dropped["NPP_RMSE"]<2.5e-5]
df_ANN_dropped.count()

In [None]:
polyfit1d_and_plot(df_ANN_dropped, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP/GPP (Annual), outliers dropped");

# Growing Season Results

In [None]:
df_GRW = import_site_RMSE_data(FLUX_METADATA, '../analysis/RMSE_results_GRW.csv')
df_GRW = df_GRW.join(nan_results, on='Site ID', how="inner")
df_GRW

In [None]:
# Import and merge results
# RMSE_results_GRW = pd.read_csv('../analysis/RMSE_results_GRW.csv',index_col='SiteID')
# df_GRW = df_meta.join(RMSE_results_GRW, on='Site ID', how="inner")

In [None]:
df_GRW

In [None]:
polyfit1d_and_plot(df_GRW, "NEE_pct_nan", "NEE_RMSE", xlabel, "NEE (Growing)");

In [None]:
any(df_GRW["GPP_pct_nan"].isna()), any(df_GRW["NPP_RMSE"].isna())

In [None]:
df_GRW = df_GRW.dropna()

In [None]:
polyfit1d_and_plot(df_GRW, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Growing)");
# This puts an error out - I need to drop the NANs I guess

In [None]:
df_GRW_dropped = df_GRW[["NPP_RMSE", "GPP_pct_nan"]].copy()
df_GRW_dropped = df_GRW_dropped[df_GRW_dropped["NPP_RMSE"]<2.5e-5]

In [None]:
polyfit1d_and_plot(df_GRW_dropped, "GPP_pct_nan", "NPP_RMSE", xlabel, "NPP (Growing), outliers dropped");