### reqs

import pandas, prophet

In [1]:
# pandas
import pandas as pd

# prophet
from neuralprophet import NeuralProphet

# utils
from scipy.signal import savgol_filter
from sklearn.model_selection import train_test_split

# matplotlib
%matplotlib inline
import matplotlib
plt = matplotlib.pyplot
matplotlib.rcParams['figure.figsize'] = [15, 8]
matplotlib.rcParams['figure.facecolor'] = 'white'
import torch
import io

### load data

loads from data/observations.pickle

In [3]:
observations = pd.read_pickle('output/observations.pickle')
print(observations.head())
# to make this data better suited for regression, we'll run it through savitzky golay to smooth
observations = observations.apply(lambda d: savgol_filter(d, 25, 3))

# for readability, rename 107337_00065 -> gageheight, 107338_00010 -> watertemp
observations = observations.rename(columns={'107337_00065': 'gageheight', '107338_00010': 'watertemp'})

# [temp] to simplify, drop gageheight and reduce num observations
observations = observations.drop(columns=['gageheight'])

print(observations.head())
observations.head(4 * 24).plot()
plt.show()

KeyError: "['index'] not found in axis"

In [5]:
# water temp -> y, index -> ds
observations = observations.reset_index()
observations = observations.rename(columns={'watertemp': 'y', 'index': 'ds'})

# remove timezone
observations['ds'] = observations['ds'].dt.tz_localize(None)

print(observations.head())

# split df
cutoff = int(observations.shape[0] * 0.80)
train = observations.iloc[:cutoff, :]
test = observations.iloc[cutoff:, :]

print(train.shape, test.shape)

                   ds   airtemp  cloudcover  precip         y
0 2008-01-01 06:15:00 -7.274530    2.741197     0.0  1.943590
1 2008-01-01 06:30:00 -7.352735    2.464017     0.0  1.918974
2 2008-01-01 06:45:00 -7.365834    2.229257     0.0  1.895998
3 2008-01-01 07:00:00 -7.319631    2.035260     0.0  1.874504
4 2008-01-01 07:15:00 -7.219930    1.880372     0.0  1.854337
(390224, 5) (97556, 5)


### build model

In [6]:
model = Prophet(yearly_seasonality=True, daily_seasonality=True, weekly_seasonality=False)
model.add_regressor('airtemp', standardize=False)
model.add_regressor('cloudcover', standardize=False)
model.add_regressor('precip', standardize=False)

# fit
model.fit(train)

  components = components.append(new_comp)


Initial log joint probability = -25424.7


<prophet.forecaster.Prophet at 0x11111f850>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Error evaluating model log probability: Non-finite gradient.

      99        874952   0.000796825        178992           1           1      139   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        877682     0.0111749        431030      0.2254           1      266   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        882643    0.00221847        527191           1           1      386   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        883348   0.000394125        247686           1           1      515   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        885226     0.0289053        103525           1           1      645   
    Iter      log prob        ||dx||      |

In [1]:
# forecast one week
week = train.tail(4 * 24 * 3).append(test.head(4 * 24 * 28))

# compare
actual = week

# # 3 days historical
# historical = week.iloc[:4 * 24 * 3, :]
# projected = week.iloc[4 * 24 * 3:, :][['ds', 'airtemp', 'cloudcover', 'precip']]
# combined = historical.append(projected)
# print(combined)

forecast = model.predict(week[['ds', 'airtemp', 'cloudcover', 'precip']])
predicted = forecast

print(actual, predicted)

# plot weather conditions
plt.plot(week['ds'], week['airtemp'])
plt.plot(week['ds'], week['cloudcover'])
plt.plot(week['ds'], week['precip'])
plt.legend(['airtemp', 'cloudcover', 'precip'])
plt.show()

# plot water temp forecast
plt.axvline(x=week['ds'].values[4 * 24 * 3], color='gray')
plt.plot(actual['ds'], actual['y'], color='black')
plt.plot(predicted['ds'], predicted['yhat'], color='blue')
plt.fill_between(predicted['ds'], predicted['yhat_lower'], predicted['yhat_upper'], color='blue', alpha=0.15)
plt.legend(['horizon', 'actual', 'forecast'])
plt.show()

NameError: name 'train' is not defined

### save model

In [8]:
buffer = io.BytesIO()
torch.save(model, buffer)
# with open('output/model.json', 'w+') as f:
  # f.write(model_to_json(model))