# Interpolate from Daily to Semi-Monthly Observations

**Purpose:** We want to compare metrics by the RNN maps to predictions by the transformer model. Because the RNN gives us semi-monthly outputs, we have to interpolate the true LFMC observation and transformer out data to semi-monthly to do so.

**Date:** July 29, 2024


In [6]:
from hydroDL import kPath

import pandas as pd
import json
import os

In [7]:
# Function from: https://github.com/fkwai/geolearn/blob/master/app/vegetation/data/wrap/monthly/addRS.py
# TODO: Understand what is happening here
def interpSM(inputDF, var, method, label):
    df = inputDF.copy()
    df = df[var]
    df = df.dropna()
    df = df.resample(method, label=label).mean()
    # df = df.interpolate(limit=1)
    return df

In [8]:
def shift_date(date):
    if date.day == 1:
        return date.replace(day=15)
    elif date.day == 15:
        # Find the last day of the month
        next_month = (date + pd.DateOffset(months=1)).replace(day=1)
        end_of_month = next_month - pd.DateOffset(days=1)
        return end_of_month

### True observations

In [9]:
# Load true observations data
# TODO: Check with Kuai to make sure this is updated
nfmd_path = f'{kPath.dirVeg}NFMD/NFMD_single.json'
f = open(nfmd_path)
nfmd_data = json.load(f)

print("number of sites:", len(nfmd_data))

number of sites: 335


In [10]:
for i in range(len(nfmd_data)):
    
    dates = nfmd_data[i]['t']
    lfmc_vals = nfmd_data[i]['v']
    site_id = nfmd_data[i]['siteId']

    site_data = {'lfmc': lfmc_vals}
    df = pd.DataFrame(site_data)
    df = df.set_index(pd.DatetimeIndex(dates))

    df_interpolate = interpSM(df, ['lfmc'], 'SMS', 'right')
    # df_interpolate.index  = df_interpolate.index.map(shift_date)

    out_path =f'{kPath.dirVeg}predictions/true_obs2_interpolate_semimonthly/{site_id}.csv'
    df_interpolate.to_csv(out_path)

### Transformer outputs

In [None]:
path = os.path.join(kPath.dirVeg, "transformer_lfmc_daily.csv")
df = pd.read_csv(path, index_col="date")
df = df.set_index(pd.DatetimeIndex(df.index))

In [None]:
sites = df.site.unique()

In [None]:
for site in sites:
    df_site = df[df.site == site]
    df_interpolate = interpSM(df_site, ['lfmc'])

    out_path =f'{kPath.dirVeg}inference/transformer_interpolate_semimonthly/{site}.csv'
    df_interpolate.to_csv(out_path)