# Compare RNN map & Transformer LFMCS to true observations

**Purpose:** We want to compare metrics by the RNN maps to predictions by the transformer model.

**Date:** July 29, 2024


In [34]:
from hydroDL import kPath

import pandas as pd
import numpy as np
import os

from sklearn.metrics import r2_score

In [7]:
true_obs_dir = f'{kPath.dirVeg}inference/true_obs_interpolate_semimonthly/' 
transformer_dir = f'{kPath.dirVeg}inference/transformer_interpolate_semimonthly/' 
rnn_maps_dir = f'{kPath.dirVeg}inference/rnn_maps/'


In [19]:
# TEMPORARY: choose an arbitrary site
path = os.path.join(kPath.dirVeg, "transformer_lfmc_daily.csv")
df = pd.read_csv(path)
sites = df.site.unique()

In [100]:
compare_df = pd.DataFrame(columns=["true_lfmc", "rnn_lfmc", "transformer_lfmc"])

for site_id in sites:
    # Load site .csv files for true observation, rnn maps, transformer
    site_obs_path = f'{true_obs_dir}{site_id}.csv'
    site_rnn_path = f'{rnn_maps_dir}{site_id}.csv'
    site_transformer_path = f'{transformer_dir}{site_id}.csv'
    
    try:
        site_obs_df = pd.read_csv(site_obs_path, index_col=0)
        site_rnn_df = pd.read_csv(site_rnn_path, index_col=0)
        site_transformer_df = pd.read_csv(site_transformer_path, index_col=0)

        site_compare_df = pd.merge(site_obs_df, site_rnn_df, left_index=True, right_index=True)
        site_compare_df['true_lfmc'] = site_compare_df['lfmc_x']
        site_compare_df['rnn_lfmc'] = site_compare_df['lfmc_y']
        site_compare_df = site_compare_df.drop(['lfmc_x', 'lfmc_y'], axis=1)

        site_compare_df = pd.merge(site_compare_df, site_transformer_df, left_index=True, right_index=True)
        site_compare_df['transformer_lfmc'] = site_compare_df['lfmc']
        site_compare_df = site_compare_df.drop(['lfmc'], axis=1)
        site_compare_df = site_compare_df.dropna()
        site_compare_df = site_compare_df[site_compare_df.rnn_lfmc != 0]

        compare_df = pd.concat([compare_df, site_compare_df])

    except:
        print(f"missing {site_id}")

  compare_df = pd.concat([compare_df, site_compare_df])


missing N0153
missing N0186
missing N0285
missing N0400
missing N0538
missing N0661
missing N0690
missing N0698
missing N0965
missing N1036
missing N1042
missing N1043
missing N1044
missing N1045
missing N1048
missing N1051
missing N1098


In [101]:
rnn_rmse = np.sqrt(np.mean((compare_df.true_lfmc - compare_df.rnn_lfmc.astype('float')) ** 2))
rnn_corr_coef = np.corrcoef(compare_df.true_lfmc, compare_df.rnn_lfmc.astype('float'))[0, 1]
rnn_coef_det = r2_score(compare_df.true_lfmc, compare_df.rnn_lfmc.astype('float'))

In [102]:
transformer_rmse = np.sqrt(np.mean((compare_df.true_lfmc - compare_df.transformer_lfmc) ** 2))
transformer_corr_coef = np.corrcoef(compare_df.true_lfmc, compare_df.transformer_lfmc)[0, 1]
transformer_coef_det = r2_score(compare_df.true_lfmc, compare_df.transformer_lfmc)

In [103]:
print(rnn_rmse, rnn_corr_coef, rnn_coef_det)

43.111319117759976 0.2755786990525062 -0.2939754602790352


In [104]:
print(transformer_rmse, transformer_corr_coef, transformer_coef_det)

29.26213461330525 0.6584704683663191 0.40385016960814146
