# Warm Springs HMS Shift Analysis
Compute NSE for HMS predictions using raw hourly observations.


In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
def clean_df(df):
    df.columns = df.iloc[0]
    df = df[3:]
    df.columns = df.columns.str.strip()
    if 'Ordinate' in df.columns:
        df = df.drop(columns=['Ordinate'])
    if 'Date' in df.columns:
        df = df.rename(columns={'Date':'Day'})
    if 'Time' not in df.columns and 'time' in df.columns:
        df = df.rename(columns={'time':'Time'})
    mask = df['Time'] == '24:00:00'
    df.loc[mask,'Day'] = (pd.to_datetime(df.loc[mask,'Day'], format='%d-%b-%y') + pd.Timedelta(days=1)).dt.strftime('%d-%b-%y')
    df['Time'] = df['Time'].replace('24:00:00','00:00:00')
    df['date'] = pd.to_datetime(df['Day'], format='%d-%b-%y') + pd.to_timedelta(df['Time'])
    df.dropna(subset=['date'], inplace=True)
    df = df.loc[:, ~df.columns.duplicated(keep=False)]
    df.set_index('date', inplace=True)
    if 'Day' in df.columns:
        df.drop(columns=['Day'], inplace=True)
    if 'Time' in df.columns:
        df.drop(columns=['Time'], inplace=True)
    return df

In [3]:
obs_df = clean_df(pd.read_csv('../../../russian_river_data/hourly.csv', low_memory=False))
hms_df = clean_df(pd.read_csv('../../../russian_river_data/WarmSprings_Inflow_hourly.csv', low_memory=False))
obs = obs_df['LAKE SONOMA FLOW-RES IN CALC-VAL-SHIFT-SMOOTH'].astype(float)
pred = hms_df['Warm Springs Dam Inflow FLOW'].astype(float)
df = pd.concat([obs, pred], axis=1, join='inner')
obs = df.iloc[:,0]
pred = df.iloc[:,1]
nse = 1 - np.sum((obs - pred)**2)/np.sum((obs - np.mean(obs))**2)
print('NSE', nse)

NSE 0.40521964808462485


In [4]:
shifted_pred = pred.shift(-8)
mask = shifted_pred.notna()
shifted_nse = 1 - np.sum((obs[mask] - shifted_pred[mask])**2)/np.sum((obs[mask] - np.mean(obs[mask]))**2)
print('Shifted NSE', shifted_nse)

Shifted NSE 0.7186403200235634


## Test period NSE
Subset data to the test period and compute NSE before and after shifting HMS predictions.


In [9]:
with open('warm_springs_dam_nlayer.yaml') as f:
    text=f.read()
start_str=re.search(r'test_start_date:\s*"(.*?)"', text).group(1)
end_str=re.search(r'test_end_date:\s*"(.*?)"', text).group(1)
start=pd.to_datetime(start_str, dayfirst=True)
end=pd.to_datetime(end_str, dayfirst=True)
mask_period=(df.index>=start) & (df.index<=end)

In [10]:
obs_test=obs[mask_period]
pred_test=pred[mask_period]
nse_test=1-np.sum((obs_test-pred_test)**2)/np.sum((obs_test-np.mean(obs_test))**2)
print('Test NSE', nse_test)
shifted_pred_test=pred.shift(-8)[mask_period]
mask=shifted_pred_test.notna()
shifted_nse_test=1-np.sum((obs_test[mask]-shifted_pred_test[mask])**2)/np.sum((obs_test[mask]-np.mean(obs_test[mask]))**2)
print('Shifted Test NSE', shifted_nse_test)

Test NSE 0.3303296010156259
Shifted Test NSE 0.6136935521637354
