In [None]:
# Purpose of this program: To predict water volume over time, for data which is fairly sinusoidal on a tilted axis.

In [None]:
# Imports and file choice
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib

from google.colab import files
uploaded = files.upload()

# Import data from a csv
df = pd.read_csv('ShastaAllData_WaterLevel.csv', header=0, names = ['DATE', 'WATERLEVEL', 'UNNECESSARY'])
df.drop('UNNECESSARY',axis = 1, inplace = True)
df.set_index('DATE', inplace=True)
df.head()

NUMROWSDATA = 1111

# Plot the data
plt.title('Lake Shasta Current Water Level over Time')
plt.plot(df.index, df.WATERLEVEL, color = 'blue')
plt.suptitle("")
plt.xlabel('Time (Days from January 7th, 2019)')
plt.ylim([600, 1500])
plt.xlim([0, 1700])
plt.ylabel('Water Level')
plt.minorticks_on()

In [None]:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

# Load the water volume levels into the variable values
values = df.values
values = values.astype('float32')
scaler = StandardScaler()
scaled = scaler.fit_transform(values)
joblib.dump(scaler, 'scaler.gz')

backward_steps = 10
n_features = df.shape[1]

# Convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
     n_vars = 1 if type(data) is list else data.shape[1]
     df = pd.DataFrame(data)
     cols, names = list(), list()
     for i in range(n_in, 0, -1):
          cols.append(df.shift(i))
          names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

     for i in range(0, n_out):
          cols.append(df.shift(-i))
          if i == 0:
                names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
          else:
               names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]

          agg = pd.concat(cols, axis=1)
          agg.columns = names

          if dropnan:
              agg.dropna(inplace=True)
          return agg

# Frame as supervised learning
reframed = series_to_supervised(scaled, backward_steps, 1)

# Split into train and test sets
values = reframed.values
threshold = int(0.9* len(reframed))

# 80% of data is for training
train = values[:threshold, :]
test = values[threshold:, :]

# Split into input and outputs
n_obs = backward_steps * n_features
train_X, train_y = train[:, :n_obs], train[:, -n_features]
test_X, test_y = test[:, :n_obs], test[:, -n_features]

# Reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], backward_steps, n_features))
test_X = test_X.reshape((test_X.shape[0], backward_steps, n_features))

# Design network
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(80, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(tf.keras.layers.Dense(1))
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.0001))
model.summary()

# Define early stopping parameter
history = model.fit(train_X, train_y, epochs=200, batch_size=30, validation_data=(test_X, test_y), verbose=2, shuffle=False)

# Plot history
plt.title('Error of Prediction over Time', fontsize=10, loc='center', fontweight='bold')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.ylabel('Mean absolute error')
plt.xlabel('Time (Days from January 7th, 2019)')
plt.legend()
plt.show()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 80)                26240     
                                                                 
 dense (Dense)               (None, 1)                 81        
                                                                 
Total params: 26,321
Trainable params: 26,321
Non-trainable params: 0
_________________________________________________________________


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/200
33/33 - 3s - loss: 0.7218 - val_loss: 2.7858 - 3s/epoch - 84ms/step
Epoch 2/200
33/33 - 0s - loss: 0.5040 - val_loss: 2.0117 - 220ms/epoch - 7ms/step
Epoch 3/200
33/33 - 0s - loss: 0.3222 - val_loss: 1.3416 - 209ms/epoch - 6ms/step
Epoch 4/200
33/33 - 0s - loss: 0.1720 - val_loss: 0.8041 - 234ms/epoch - 7ms/step
Epoch 5/200
33/33 - 0s - loss: 0.0713 - val_loss: 0.4379 - 218ms/epoch - 7ms/step
Epoch 6/200
33/33 - 0s - loss: 0.0267 - val_loss: 0.2327 - 217ms/epoch - 7ms/step
Epoch 7/200
33/33 - 0s - loss: 0.0140 - val_loss: 0.1284 - 219ms/epoch - 7ms/step
Epoch 8/200
33/33 - 0s - loss: 0.0093 - val_loss: 0.0742 - 226ms/epoch - 7ms/step
Epoch 9/200
33/33 - 0s - loss: 0.0071 - val_loss: 0.0461 - 231ms/epoch - 7ms/step
Epoch 10/200
33/33 - 0s - loss: 0.0061 - val_loss: 0.0321 - 216ms/epoch - 7ms/step
Epoch 11/200
33/33 - 0s - loss: 0.0058 - val_loss: 0.0253 - 218ms/epoch - 7ms/step
Epoch 12/200
33/33 - 0s - loss: 0.0057 - val_loss: 0.0218 - 209ms/epoch - 6ms/step
Epoch 13/200
33

In [None]:
INPUTFINALDAY = 1400
n_steps = 10
n_features = 1
numRepeat = INPUTFINALDAY - NUMROWSDATA

def getScaler(num, INDEX):
  returning = [[ int(df.loc[num + INDEX, "WATERLEVEL"]) ], [1]]
  returning = scaler.fit_transform(returning)
  return returning[0][0]

for INDEX in range (numRepeat):
  # getting 10 previous values from the dataframe and using them as inputs
  valOne = getScaler(NUMROWSDATA-11, INDEX)
  valTwo = getScaler(NUMROWSDATA-10, INDEX)
  valThree = getScaler(NUMROWSDATA-9, INDEX)
  valFour = getScaler(NUMROWSDATA-8, INDEX)
  valFive = getScaler(NUMROWSDATA-7, INDEX)
  valSix = getScaler(NUMROWSDATA-6, INDEX)
  valSeven = getScaler(NUMROWSDATA-5, INDEX)
  valEight = getScaler(NUMROWSDATA-4, INDEX)
  valNine = getScaler(NUMROWSDATA-3, INDEX)
  valTen = getScaler(NUMROWSDATA-2, INDEX)
  x_input = array([valOne, valTwo, valThree, valFour, valFive, valSix, valSeven, valEight, valNine, valTen])

  # make the input fit the shape that the model takes
  x_input = x_input.reshape((1, n_steps, n_features))

  # convert prediction from scaler to normal
  yhat = model.predict(x_input, verbose=0)
  inv_yhat = scaler.inverse_transform(yhat)

  # add current prediction to dataframe
  arrTwo = {'WATERLEVEL':[str(int(inv_yhat))]}
  df2 = pd.DataFrame(arrTwo)
  df = pd.concat([df, df2], ignore_index=True)

  # plot results
  plt.title('Water Level of Lake Shasta over Time', fontsize=10, loc='center', fontweight='bold')

  plt.plot(df.index, df.WATERLEVEL, color = 'blue')
  plt.suptitle("")
  plt.xlabel('Time (Days from January 7th, 2019)')
  plt.ylim([600, 1500])
  plt.xlim([0, 1700])
  plt.ylabel('Water Level')
  plt.minorticks_on()

