# Plotting RMSE

## Define Variables / Import MetaData

In [None]:
import os
import sys
from pathlib import Path

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from cartopy import crs as ccrs 
import cartopy.feature as cfeature
import pandas as pd
import hvplot.pandas
import xarray as xr
import hvplot.xarray
import geoviews.feature as gf
from adjustText import adjust_text

## Functions

In [None]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
from utils.functions import import_flux_metadata, import_site_RMSE_data

## Define variables 

In [None]:
# I can't get the config.py to work in jupyternotebook because it does not know where $NOBACKUP is
amer_filepath = '../../ameriflux-data/'
mic_filepath = '../preprocessing/intermediates/'

In [None]:
FLUX_DATA_PATH = Path(amer_filepath)
FLUX_METADATA = Path(FLUX_DATA_PATH, "AmeriFlux-site-search-results-202410071335.tsv")
MICASA_PREPROCESSED_DATA = Path(mic_filepath)

In [None]:
# set map proj
proj=ccrs.PlateCarree()

In [None]:
fluxnet_meta = import_flux_metadata(FLUX_METADATA)

In [None]:
df = import_site_RMSE_data(FLUX_METADATA, '../analysis/RMSE_results_ANN.csv')
df

## RMSE Viewer

In [None]:
ds = xr.Dataset(
    coords={
        'site_id': df.index.values,
        'lat': ('site_id', df['lat'].values),
        'lon': ('site_id', df['lon'].values),
    }, 
    data_vars={
        'NEE_RMSE': ("site_id", df['NEE_RMSE'].values),
        'NPP_RMSE': ("site_id", df['NPP_RMSE'].values),
    }
)

In [None]:
ds

In [None]:
fig, axs = plt.subplots(1,2,figsize=(12, 10), subplot_kw={'projection': proj}, constrained_layout=True);
fig.suptitle('MiCASA, FluxNet Sites Root Mean Squared Error (RMSE), Annual', y=0.76)
values =['NEE_RMSE', 'NPP_RMSE']
for ax,val in zip(axs, values):
    ax.add_feature(cfeature.BORDERS,zorder=1, linewidth=0.3)
    ax.add_feature(cfeature.COASTLINE,linewidth=0.3, zorder=1)
    ax.add_feature(cfeature.LAND,zorder=0, color='lightgrey')
    if val == 'NPP_RMSE':
        norm=colors.LogNorm()
        print(norm)
    else:
        norm=colors.Normalize()
    plot = ds.plot.scatter(x="lon", y="lat",ax=ax,
                           markersize=val, edgecolor='none',add_legend=False,
                            norm=norm,
                           hue=val,
                           cmap='autumn_r',
                           add_colorbar=False
                          )
    
    cbar = fig.colorbar(plot, ax=ax, shrink=0.9, label=val, orientation='horizontal')
    # ax.set_title(val[:3])
plt.show()

In [None]:
# Histograms
# fig, axs = plt.subplots(1,2,figsize=(6, 2), constrained_layout=True);
# for ax,val in zip(axs, values):
#     df[val].hist(ax=ax)
#     ax.set_title(val)

### Look at NEE Only

In [None]:
var = "NEE_RMSE"

In [None]:
ds_NEE = ds[var]

In [None]:
ds_sel = ds.where(ds[var]>4e-8,drop=True)
ds_sel

In [None]:
fig, ax = plt.subplots(1,1,figsize=(10,8), subplot_kw={'projection': proj}, 
                       constrained_layout=True
                      );
ax.add_feature(cfeature.LAND,
               edgecolor='lightgrey',facecolor='none',
               zorder=0
              )
ax.add_feature(cfeature.STATES,edgecolor='lightgrey', zorder=0)

# ax.scatter(ds_sel['lon'], ds_sel['lat'], c=ds_sel, cmap="autumn_r")
# ds_sel.plot(x="lon", y="lat")
conus_extent = [-125, -70, 25, 50]
ax.set_extent(conus_extent, crs=ccrs.PlateCarree())

plot = ds_sel.plot.scatter(
                        x="lon", y="lat",
                           ax=ax,
                           # s=100,
                           edgecolor='black',
                            linewidths=0.5,
    # add_legend=False,
                        # norm=colors.LogNorm(),  
                       hue=var,
                       cmap='autumn_r',
                       add_colorbar=False
                      )

texts = []
for i in range(len(ds_sel.site_id)):
    txt = ax.annotate(ds_sel.site_id.values[i], 
                (ds_sel.lon.values[i], ds_sel.lat.values[i]),
                transform=proj,
                fontsize=12)
    texts.append(txt)
adjust_text(texts, ax=ax)

cbar = fig.colorbar(plot, ax=ax, shrink=0.9, label=var[4:], orientation='horizontal')
fig.suptitle('NEE RMSE values Higher Than 4x$10^{-8}$ (Annual)', 
             y=0.78
            )

#### Sites info

In [None]:
# Subset the df
df_sel = df[df[var]>4e-8]

In [None]:
# Define subset site info to display
site_subset = ['Site ID', 
                'Name', 
                'Vegetation Description (IGBP)', 
                'Climate Class Description (Koeppen)', 
                'Elevation (m)',
                'Years of AmeriFlux FLUXNET Data']

In [None]:
sites_sel = df_sel.index.tolist()
sites_sel_info = fluxnet_meta[fluxnet_meta['Site ID'].isin(sites_sel)][site_subset]
sites_sel_info = sites_sel_info.set_index('Site ID')
sites_sel_info

#### Pandas Holoviews Interactive Plot

In [None]:
df_sel.hvplot.points(x="lon", 
                y="lat",
                geo=True, 
                # crs=ccrs.PlateCarree(),

                c=var,
                logz=True,
                cmap='autumn_r',
                clabel=f'{var}',

                 size=50,
                
                # tiles=True,
                tiles='EsriNatGeo',
                tiles_opts={'alpha': 0.4},



                hover_cols=['Site ID'],

                # # width=700, height=500,
                # xlim=(min_lon, max_lon),   # longitude range
                # ylim=(min_lat, max_lat),     # latitude range
                # frame_width=800,
                frame_height=400
                                   )

In [None]:
with pd.option_context('display.max_colwidth', None):
    display(sites_sel_info)

# Old

### Pandas Holoviews

In [None]:
df

In [None]:
import xyzservices.providers as xyz
from matplotlib.ticker import LogFormatter

In [None]:
min_lon, max_lon = df["lon"].min(), df["lon"].max()
min_lat, max_lat = df["lat"].min(), df["lat"].max()

print(min_lon, max_lon)
print(min_lat, max_lat)

In [None]:
plot_list = []
for i, value in enumerate(values): 
    plot = df.hvplot.points(x="lon", 
                            y="lat",
                            geo=True, 
                            crs=ccrs.PlateCarree(),
                            # projection=ccrs.PlateCarree(), # Doesn't work with tiles
    
                             #Custom cmap with transparency won't show up in bokeh
                            c=value,
                            logz=True,
                            cmap="autumn_r",
                            clabel=f'{value}',
    
                             size=45,
                             # Size values don't scale logarithmically
                            # s=values[0],
                            # scale=4500,
                             # color='red',
                            
                            tiles=True,
                            tiles_opts={'alpha': 0.4},
                            # tiles=xyz.Esri.WorldGrayCanvas,
    
    
                            hover_cols=['Site ID'],
    
                            # width=700, height=500,
                            xlim=(-170, -20),   # longitude range
                            ylim=(-60, 75),     # latitude range
                            # frame_width=800,
                            frame_height=700
                                               )
    plot_list.append(plot)

In [None]:
(plot_list[0] * gf.coastline).opts(title="Micasa/Ameriflux Net Ecosystem Exchange (NEE) RMSE")

In [None]:
(plot_list[1] * gf.coastline).opts(title="Micasa/Ameriflux Net Primary Productivity (NPP) RMSE")


#### Make custom colormap with transparency

In [None]:
# from matplotlib.colors import ListedColormap

In [None]:
# Make transparency colormap:
# cmap = plt.cm.autumn_r
# cmap

In [None]:
# cmap(np.arange(cmap.N)).shape

In [None]:
# cmap(1)

In [None]:
# my_cmap = cmap(np.arange(cmap.N))
# my_cmap[:, -1] = np.linspace(0, 1, cmap.N)
# my_cmap = ListedColormap(my_cmap)
# my_cmap

In [None]:
# my_cmap = ListedColormap(my_cmap)
# my_cmap

In [None]:
# fig, axs = plt.subplots(1,2,figsize=(12, 10), subplot_kw={'projection': proj}, constrained_layout=True);
# fig.suptitle('MiCASA, FluxNet Sites Root Mean Squared Error (RMSE)', y=0.76)
# values =['NEE_RMSE', 'NPP_RMSE']
# for ax,val in zip(axs, values):
#     ax.add_feature(cfeature.COASTLINE,zorder=0)
#     plot = ds.plot.scatter(x="lon", y="lat",ax=ax,
                           
#                            markersize=val, edgecolor='none',add_legend=False,
                           
#                             norm=colors.LogNorm(), 
#                             # norm=colors.LogNorm(vmin=ds[val].min(), vmax=ds[val].max()),
#                            hue=val,
#                            cmap=my_cmap,
#                            add_colorbar=False
#                           )
    
#     cbar = fig.colorbar(plot, ax=ax, shrink=0.9, label=val[4:], orientation='horizontal')
#     ax.set_title(val[:3])
# plt.show()

In [None]:
# Try to scale size by log:
# df_scale = df.copy()
# df_scale["log_NEE_RMSE"] = np.log(df_scale["NEE_RMSE"])
# df_scale.head()

# **** This doesn't work because the logs are negative- would have to create a pseudo log scale but this is complex ******