# Interpolate from Daily to Semi-Monthly Observations

**Purpose:** We want to compare metrics by the RNN maps to predictions by the transformer model. Because the RNN gives us semi-monthly outputs, we have to interpolate the true LFMC observation and transformer out data to semi-monthly to do so.

**Date:** July 29, 2024


In [1]:
from hydroDL import kPath

import pandas as pd
import json
import os

loading package hydroDL


In [2]:
# Function from: https://github.com/fkwai/geolearn/blob/master/app/vegetation/data/wrap/monthly/addRS.py
# TODO: Understand what is happening here
def interpSM(inputDF, var):
    df = inputDF.copy()
    df = df[var]
    df = df.dropna()
    df = df.resample('SMS').mean()
    df = df.interpolate(limit=8)
    return df

### True observations

In [58]:
# Load true observations data
# TODO: Check with Kuai to make sure this is updated
nfmd_path = f'{kPath.dirVeg}NFMD/NFMD_single_A19.json'
f = open(nfmd_path)
nfmd_data = json.load(f)

In [66]:
for i in range(len(nfmd_data)):
    dates = nfmd_data[i]['t']
    lfmc_vals = nfmd_data[i]['v']
    site_id = nfmd_data[i]['siteId']

    site_data = {'lfmc': lfmc_vals}
    df = pd.DataFrame(site_data)
    df = df.set_index(pd.DatetimeIndex(dates))

    df_interpolate = interpSM(df, ['lfmc'])

    out_path =f'{kPath.dirVeg}inference/true_obs_interpolate_semimonthly/{site_id}.csv'
    df_interpolate.to_csv(out_path)

### Transformer outputs

In [21]:
path = os.path.join(kPath.dirVeg, "transformer_lfmc_daily.csv")
df = pd.read_csv(path, index_col="date")
df = df.set_index(pd.DatetimeIndex(df.index))

In [25]:
sites = df.site.unique()


In [26]:
for site in sites:
    df_site = df[df.site == site]
    df_interpolate = interpSM(df_site, ['lfmc'])

    out_path =f'{kPath.dirVeg}inference/transformer_interpolate_semimonthly/{site}.csv'
    df_interpolate.to_csv(out_path)