In [1]:
import sys
import pandas as pn
import holoviews as hv

hv.extension('bokeh')

# Print current working directory
#print(sys.path[0])

# Read new data
new = pn.read_csv('../../../../sensitive_data_forecast_tools/intermediate_data/forecast_pentad_linreg.csv')
newmod = new[['date', 'code', 'predictor', 'slope', 'intercept', 'delta',
              'forecasted_discharge', 'q_mean']]
print(new.columns)

# Read old data
old = pn.read_csv('../../../../pentad_forecast_data/intermediate_data/forecasts_pentad_kghm_full_hindcast.csv')
print(old.columns)

Index(['date', 'code', 'predictor', 'discharge_avg', 'pentad_in_month',
       'pentad_in_year', 'slope', 'intercept', 'forecasted_discharge',
       'q_mean', 'q_std_sigma', 'delta'],
      dtype='object')
Index(['date', 'code', 'predictor', 'slope', 'intercept', 'delta', 'fc_qmin',
       'fc_qmax', 'fc_qexp', 'qnorm', 'perc_norm', 'qdanger'],
      dtype='object')


In [2]:
# Reformat the new data to have the same columns as the old data
newmod = newmod.rename(columns={
    'forecasted_discharge':'fc_qexp',
    'q_mean': 'qnorm'})
print(newmod.columns)

Index(['date', 'code', 'predictor', 'slope', 'intercept', 'delta', 'fc_qexp',
       'qnorm'],
      dtype='object')


In [3]:
# Calculate fc_qmin and fc_qmax
newmod['fc_qmax'] = newmod['fc_qexp'] + newmod['delta']
newmod['fc_qmin'] = newmod['fc_qexp'] - newmod['delta']

# Calculate perc_norm
newmod['perc_norm'] = newmod['fc_qexp'] / newmod['qnorm'] * 100

# Assign an empty column for 'qdanger'
newmod['qdanger'] = None

# Have the same sequence of columns as old data
newmod = newmod[['date', 'code', 'predictor', 'slope', 'intercept', 'delta',
                 'fc_qmin', 'fc_qmax', 'fc_qexp', 'qnorm', 'perc_norm', 'qdanger']]

print(newmod.columns)

Index(['date', 'code', 'predictor', 'slope', 'intercept', 'delta', 'fc_qmin',
       'fc_qmax', 'fc_qexp', 'qnorm', 'perc_norm', 'qdanger'],
      dtype='object')


In [4]:
# Store newmod in a csv file
newmod.to_csv('../../../../pentad_forecast_data/intermediate_data/forecasts_pentad_kghm.csv', index=False)

In [18]:
# Create a figure where new and old data is compared for each unique code
for code in newmod['code'].unique():
    if code != '15102':
        continue
    print(code)
    newcode = newmod[newmod['code'] == code]
    oldcode = old[old['code'] == code]
    #print(newcode)
    #print(oldcode)

    # Merge the two dataframes
    merged = newcode.combine_first(oldcode)
    print(merged.columns)

    # Drop duplicate 'date' and 'code'
    merged = merged.drop_duplicates(subset=['date', 'code'])

    # Sort by date
    merged = merged.sort_values(by='date')

    # Use holoviews to plot the merged data
    #newplot = hv.Curve(merged, 'date', 'fc_qexp_x', label='new')
    #oldplot = hv.Curve(merged, 'date', 'fc_qexp_y', label='old')
    #plot = newplot * oldplot
    plot = hv.Curve(merged, 'date', 'fc_qexp', label='merged')
    plot.opts(width=800, height=400)

plot