# Explore workflows to add to .py scripts

## Define Variables / Import MetaData

In [None]:
import os
import sys
from pathlib import Path

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.dates as mdates
from cartopy import crs as ccrs 
import cartopy.feature as cfeature
import pandas as pd
import hvplot.pandas
import xarray as xr
import hvplot.xarray
import geoviews.feature as gf
from sklearn.metrics import root_mean_squared_error
from adjustText import adjust_text

## Functions

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
from utils.functions import import_flux_metadata, import_flux_site_data, convert_flux_to_micasa_units, replace_outliers_with_nan, clean_flux_datasets 

## Define variables 

In [None]:
# I can't get the config.py to work in jupyternotebook because it does not know where $NOBACKUP is
amer_filepath = '../../ameriflux-data/'
mic_filepath = '../preprocessing/intermediates/'

In [None]:
FLUX_DATA_PATH = Path(amer_filepath)
FLUX_METADATA = Path(FLUX_DATA_PATH, "AmeriFlux-site-search-results-202410071335.tsv")
MICASA_PREPROCESSED_DATA = Path(mic_filepath)

In [None]:
timedelta = "DD"

In [None]:
# set map proj
proj=ccrs.PlateCarree()

In [None]:
fluxnet_meta = import_flux_metadata(FLUX_METADATA)

In [None]:
# Define site ID
# site_ID = 'CA-MA1'
# site_ID = 'CA-DBB'
# site_ID = 'CA-LP1'
# site_ID = 'AR-TF1'
# site_ID = 'BR-CST'
site_ID = 'US-Wi3'

In [None]:
site_lat = fluxnet_meta.loc[
    fluxnet_meta["Site ID"] == site_ID, "Latitude (degrees)"
].values
site_lon = fluxnet_meta.loc[
    fluxnet_meta["Site ID"] == site_ID, "Longitude (degrees)"
].values

## Plotting

In [None]:
fluxnet_sel = import_flux_site_data(FLUX_DATA_PATH, site_ID, timedelta)
fluxnet_sel

In [None]:
cols = fluxnet_sel.columns.tolist()
list = [cols[0], cols[-1]]
list

In [None]:
new_list = ["NEE (kgC m-2 s-1)", "GPP_DT (kgC m-2 s-1)"]

In [None]:
for old, new in zip(list, new_list):
   fluxnet_sel = convert_flux_to_micasa_units(fluxnet_sel, old, new)
   fluxnet_sel = clean_flux_datasets(fluxnet_sel, new, "NEE_VUT_REF_QC") 

In [None]:
fluxnet_sel

In [None]:
# Mask GPP outliers
fluxnet_sel = replace_outliers_with_nan(fluxnet_sel, "GPP_DT (kgC m-2 s-1)")

In [None]:
############ Import Preprocessed Micasa Data ################
filename = f"{site_ID}_micasa_{timedelta}.csv"
path = os.path.join(MICASA_PREPROCESSED_DATA, filename)
micasa_ds = pd.read_csv(path, index_col=0, parse_dates=True)

############## Append datasets #########################
# Make clean dataframe and append together
## NEE
NEE_ds = pd.DataFrame()
NEE_ds["MiCASA"] = micasa_ds["MiCASA NEE (kg m-2 s-1)"]
NEE_ds["FluxNet"] = fluxnet_sel["NEE (kgC m-2 s-1)"]

In [None]:
NEE_ds.plot()

In [None]:
# NPP
NPP_ds = pd.DataFrame()
NPP_ds["MiCASA"] = micasa_ds["MiCASA NPP (kg m-2 s-1)"]
NPP_ds["FluxNet DT GPP/2"] = fluxnet_sel["GPP_DT (kgC m-2 s-1)"] / 2

In [None]:
NPP_ds.plot()

## Site Info

In [None]:
# Define subset site info to display
site_subset = ['Site ID', 
                'Name', 
                'Vegetation Description (IGBP)', 
                'Climate Class Description (Koeppen)', 
                'Elevation (m)',
                'Years of AmeriFlux FLUXNET Data']

In [None]:
site_sel = fluxnet_meta.loc[fluxnet_meta['Site ID'] == site_ID][site_subset]
with pd.option_context('display.max_colwidth', None):
  display(site_sel)

## Create final static plots

In [None]:
# Create a subplot grid with specific width ratios
fig, axs = plt.subplots(4, 1, 
                         gridspec_kw={'height_ratios': [1, 2,0.25,2],
                                      'hspace': 0.01},
                         figsize=(10, 12)) 

# Define the map projection
proj = ccrs.PlateCarree()


if site_lat >= 20:
    # North America extents
    min_lon, max_lon = -170, -57
    min_lat, max_lat = 25, 74

else:
    # South America extents
    min_lon, max_lon = -90, -30
    min_lat, max_lat = -60, 12
axs[0].axis('off')
axs[0] = plt.subplot(4, 1, 1, projection=proj,frameon=False)
axs[0].set_extent([min_lon, max_lon, min_lat, max_lat], crs=ccrs.PlateCarree())
# axs[0].add_feature(cfeature.STATES)
# axs[0].add_feature(cfeature.BORDERS)
axs[0].coastlines()

axs[0].scatter(site_lon,site_lat,
       marker='*', 
       s=500,
       color='yellow',
       edgecolor='black',
               zorder=3)

NEE_ds.plot(ax=axs[1],ylabel = 'NEE\n(kgC m$^{-2}$ s$^{-1}$)')
# Set pretty date labels
axs[1].xaxis.set_major_locator(mdates.AutoDateLocator())
# Disable minor ticks completely
axs[1].tick_params(axis='x', which='minor', labelsize=0, labelcolor='none')
# axs[1].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[3].xaxis.get_major_locator()))

axs[2].set_visible(False)

NPP_ds.plot(ax=axs[3],ylabel = 'NPP\n(kgC m$^{-2}$ s$^{-1}$)')
# Set pretty date labels
axs[3].xaxis.set_major_locator(mdates.AutoDateLocator())
# Disable minor ticks completely
axs[3].tick_params(axis='x', which='minor', labelsize=0, labelcolor='none')



date_format = mdates.DateFormatter('%b %Y')
for i in range(1,4,2):
    axs[i].xaxis.set_major_formatter(date_format)
    axs[i].set_xlabel('') 
fig.suptitle(f'{site_ID}',y=0.9,fontsize=14)

# Calculate RMSE and export to csv

In [None]:
# Drop NA values!
NEE_ds_clean = NEE_ds.dropna(subset=['FluxNet'])
NPP_ds_clean = NPP_ds.dropna(subset=['FluxNet DT GPP/2'])

In [None]:
NEE_RMSE = root_mean_squared_error(NEE_ds_clean.MiCASA, NEE_ds_clean.FluxNet)
NEE_RMSE

In [None]:
NPP_RMSE = root_mean_squared_error(NPP_ds_clean.MiCASA, NPP_ds_clean["FluxNet DT GPP/2"])

# Calc NAN percent

In [None]:
columns = ["NEE (kgC m-2 s-1)", "GPP_DT (kgC m-2 s-1)"]
fluxnet_sub = fluxnet_sel[columns]

In [None]:
fluxnet_sub.head()

In [None]:
results = []
# site id dictionary:
site_dict = {"site_ID" : site_ID}

for col_name, df_col in fluxnet_sub.items():
    # print(col_name, df_col)
    new_col_name = col_name[:3] + '_pct_nan'
    site_dict[new_col_name] = df_col.isna().mean() * 100

results.append(site_dict)
results

### Old

#### Xarray bokeh plot? This doesn't work so I have to plot the dataframe with Pandas

In [None]:
# ds_NEE = ds[values[0]]
# ds_NEE

In [None]:
# hv.extension('bokeh', inline=True)
# ds_NEE.hvplot.points(x='lon', y='lat',
#                       geo=True,
#                      # crs=proj, 
#                     # project=True
#                      )

# ds_dropped = ds_NEE.drop_indexes("site_id")
# ds_dropped = ds_dropped.drop_vars("site_id")
# ds_dropped