## Define Variables / Import MetaData

In [None]:
import os
import sys
from pathlib import Path

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.dates as mdates
from cartopy import crs as ccrs 
import cartopy.feature as cfeature
import pandas as pd
import hvplot.pandas
import xarray as xr
import hvplot.xarray
import geoviews.feature as gf
from sklearn.metrics import root_mean_squared_error
from adjustText import adjust_text

## Functions

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
from utils.functions import import_flux_metadata, import_flux_site_data, convert_flux_to_micasa_units, replace_outliers_with_nan, clean_flux_datasets 

## Define variables 

In [None]:
# I can't get the config.py to work in jupyternotebook because it does not know where $NOBACKUP is
amer_filepath = '../../ameriflux-data/'
mic_filepath = '../preprocessing/intermediates/'

In [None]:
FLUX_DATA_PATH = Path(amer_filepath)
FLUX_METADATA = Path(FLUX_DATA_PATH, "AmeriFlux-site-search-results-202410071335.tsv")
MICASA_PREPROCESSED_DATA = Path(mic_filepath)

In [None]:
timedelta = "DD"

In [None]:
# set map proj
proj=ccrs.PlateCarree()

In [None]:
fluxnet_meta = import_flux_metadata(FLUX_METADATA)

# RSME View

## RSME plotting

In [None]:
# Create df with lat/lons
site_subset = ['Site ID', 
               'Longitude (degrees)',
                'Latitude (degrees)',
               ]
df_meta = fluxnet_meta[site_subset]
df_meta.set_index('Site ID');

In [None]:
# Import and merge results
results = pd.read_csv('../analysis/results.csv',index_col='SiteID')
df = df_meta.join(results, on='Site ID')
df = df.rename(columns={'Latitude (degrees)': 'lat', 'Longitude (degrees)': 'lon'})
df

In [None]:
ds = xr.Dataset(
    coords={
        'site_id': df['Site ID'].values,
        'lat': ('site_id', df['lat'].values),
        'lon': ('site_id', df['lon'].values),
    }, 
    data_vars={
        'NEE_RSME': ("site_id", df['NEE_RSME'].values),
        'NPP_RSME': ("site_id", df['NPP_RSME'].values),
    }
)

In [None]:
ds

In [None]:
fig, axs = plt.subplots(1,2,figsize=(12, 10), subplot_kw={'projection': proj}, constrained_layout=True);
fig.suptitle('MiCASA, FluxNet Sites Root Mean Squared Error (RMSE)', y=0.76)
values =['NEE_RSME', 'NPP_RSME']
for ax,val in zip(axs, values):
    ax.add_feature(cfeature.BORDERS,zorder=1, linewidth=0.3)
    ax.add_feature(cfeature.COASTLINE,linewidth=0.3, zorder=1)
    ax.add_feature(cfeature.LAND,zorder=0, color='lightgrey')
    if val == 'NPP_RSME':
        norm=colors.LogNorm()
        print(norm)
    else:
        norm=colors.Normalize()
    plot = ds.plot.scatter(x="lon", y="lat",ax=ax,
                           markersize=val, edgecolor='none',add_legend=False,
                            norm=norm,
                           hue=val,
                           cmap='autumn_r',
                           add_colorbar=False
                          )
    
    cbar = fig.colorbar(plot, ax=ax, shrink=0.9, label=val, orientation='horizontal')
    # ax.set_title(val[:3])
plt.show()

In [None]:
# Histograms
fig, axs = plt.subplots(1,2,figsize=(6, 2), constrained_layout=True);
for ax,val in zip(axs, values):
    df[val].hist(ax=ax)
    ax.set_title(val)

### Look at NEE Only

In [None]:
var = "NEE_RSME"

In [None]:
ds_NEE = ds[var]

In [None]:
ds_sel = ds.where(ds[var]>4e-8,drop=True)
ds_sel

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,8), subplot_kw={'projection': proj}, 
                       constrained_layout=True
                      );
ax.add_feature(cfeature.LAND,
               edgecolor='lightgrey',facecolor='none',
               zorder=0
              )
ax.add_feature(cfeature.STATES,edgecolor='lightgrey', zorder=0)

# ax.scatter(ds_sel['lon'], ds_sel['lat'], c=ds_sel, cmap="autumn_r")
# ds_sel.plot(x="lon", y="lat")
conus_extent = [-125, -70, 25, 50]
ax.set_extent(conus_extent, crs=ccrs.PlateCarree())

plot = ds_sel.plot.scatter(
                        x="lon", y="lat",
                           ax=ax,
                           # s=100,
                           edgecolor='black',
                            linewidths=0.5,
    # add_legend=False,
                        # norm=colors.LogNorm(),  
                       hue=var,
                       cmap='autumn_r',
                       add_colorbar=False
                      )

texts = []
for i in range(len(ds_sel.site_id)):
    txt = ax.annotate(ds_sel.site_id.values[i], 
                (ds_sel.lon.values[i], ds_sel.lat.values[i]),
                transform=proj,
                fontsize=12)
    texts.append(txt)
adjust_text(texts, ax=ax)

cbar = fig.colorbar(plot, ax=ax, shrink=0.9, label=var[4:], orientation='horizontal')
fig.suptitle('NEE RMSE values Higher Than 4x$10^{-8}$', 
             y=0.78
            )

### Pandas Holoviews Interactive Plot

In [None]:
# Subset the df
df_sel = df[df[var]>4e-8]
df_sel

In [None]:
df_sel.hvplot.points(x="lon", 
                y="lat",
                geo=True, 
                # crs=ccrs.PlateCarree(),

                c=var,
                logz=True,
                cmap='autumn_r',
                clabel=f'{var}',

                 size=50,
                
                # tiles=True,
                tiles='EsriNatGeo',
                tiles_opts={'alpha': 0.4},



                hover_cols=['Site ID'],

                # # width=700, height=500,
                # xlim=(min_lon, max_lon),   # longitude range
                # ylim=(min_lat, max_lat),     # latitude range
                # frame_width=800,
                frame_height=400
                                   )

### Pandas Holoviews

In [None]:
df

In [None]:
import xyzservices.providers as xyz
from matplotlib.ticker import LogFormatter

In [None]:
min_lon, max_lon = df["lon"].min(), df["lon"].max()
min_lat, max_lat = df["lat"].min(), df["lat"].max()

print(min_lon, max_lon)
print(min_lat, max_lat)

In [None]:
plot_list = []
for i, value in enumerate(values): 
    plot = df.hvplot.points(x="lon", 
                            y="lat",
                            geo=True, 
                            crs=ccrs.PlateCarree(),
                            # projection=ccrs.PlateCarree(), # Doesn't work with tiles
    
                             #Custom cmap with transparency won't show up in bokeh
                            c=value,
                            logz=True,
                            cmap="autumn_r",
                            clabel=f'{value}',
    
                             size=45,
                             # Size values don't scale logarithmically
                            # s=values[0],
                            # scale=4500,
                             # color='red',
                            
                            tiles=True,
                            tiles_opts={'alpha': 0.4},
                            # tiles=xyz.Esri.WorldGrayCanvas,
    
    
                            hover_cols=['Site ID'],
    
                            # width=700, height=500,
                            xlim=(-170, -20),   # longitude range
                            ylim=(-60, 75),     # latitude range
                            # frame_width=800,
                            frame_height=700
                                               )
    plot_list.append(plot)

In [None]:
(plot_list[0] * gf.coastline).opts(title="Micasa/Ameriflux Net Ecosystem Exchange (NEE) RSME")

In [None]:
(plot_list[1] * gf.coastline).opts(title="Micasa/Ameriflux Net Primary Productivity (NPP) RSME")
