# Using LSTMs to predict river flow
Welcome. This notebook will demonstrate using a LSTM built with Keras to predict the flow of the Kenduskeag stream in Bangor Maine. Let's first take a look at our data.

In [5]:
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import pad_sequences
# Get some time series data
df = pd.read_csv("height2.csv")
df = df[['height', 'temp', 'rainfallh']]
df.dropna()


Unnamed: 0,height,temp,rainfallh
0,4.63,45.0,0.00
1,4.66,44.5,0.00
2,4.70,44.2,0.00
3,4.78,44.0,0.00
4,4.82,43.5,0.00
5,4.86,42.8,0.00
6,4.90,41.9,0.00
7,4.94,41.9,0.00
8,4.97,42.7,0.00
9,5.03,43.8,0.00


In [12]:
# Our core functions will be here

# This function reas the CSV and gets the necessary rows
def read_csv(path):
    df = pd.read_csv(path)
    df = df[['height', 'temp', 'rainfallh']]
    df.dropna()
    #X_test, actual = get_split(df)
    # Save it as a list
    return format_data(df)


def format_data(df):

    height2, predictors = get_split(df)
    df['single_input_vector'] = predictors.apply(tuple, axis=1).apply(list)
    # Double-encapsulate list so that you can sum it in the next step and keep time steps as separate elements
    df['single_input_vector'] = df.single_input_vector.apply(lambda x: [list(x)])
    df['cumulative_input_vectors'] = df.single_input_vector.cumsum()
    max_sequence_length = df.cumulative_input_vectors.apply(len).max()
    padded_sequences = pad_sequences(df.cumulative_input_vectors.tolist(), max_sequence_length).tolist()
    df['padded_input_vectors'] = pd.Series(padded_sequences).apply(np.asarray)
    print(len(df))
    X_train_init = np.asarray(df.padded_input_vectors)
    print(X_train_init.shape)
    s = np.hstack(X_train_init)
    fin = s.reshape(len(df),len(df),2)
    y_train = np.hstack(np.asarray(height2))
    return fin, y_train


def get_split(dataset):
    #print(dataset.drop('height',1))
    return dataset['height'], dataset.drop('height',1)

X_train, y_train = read_csv('height.csv')
#print(predictors[rainfallh].head())





3535
(3535,)


In [54]:
from keras.preprocessing.sequence import pad_sequences

max_sequence_length = df.cumulative_input_vectors.apply(len).max()
# Save it as a list   
padded_sequences = pad_sequences(df.cumulative_input_vectors.tolist(), max_sequence_length).tolist()
df['padded_input_vectors'] = pd.Series(padded_sequences).apply(np.asarray)

In [75]:
print(len(df))
print(X_train_init.shape)
s = np.hstack(X_train_init)
#fin = s.reshape(3535,3535,2)
y_train = np.hstack(np.asarray(height))

3535
(3535,)


In [66]:
# Extract your training data
X_train_init = np.asarray(df.padded_input_vectors)
# Use hstack to and reshape to make the inputs a 3d vector
X_train = np.hstack(X_train_init).reshape(len(df),max_sequence_length, 1)
y_train = np.hstack(np.asarray(height)).reshape(len(df),len(height))


ValueError: cannot reshape array of size 24992450 into shape (3535,3535,1)

In [39]:
#df['output_vector'].head()
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
def build_model(layers):
    print(layers)
    model = Sequential()

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=1,
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        100,
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        1))
    model.add(Activation("linear"))

    
    model.compile(loss="mse", optimizer="rmsprop")
    #print("> Compilation Time : ", time.time() - start)
    return model


In [40]:
model = build_model([2, 3535,3535,2])
print(model.summary())
print(y_train)

[2, 3535, 3535, 2]
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lstm_9 (LSTM)                    (None, None, 1)       16          lstm_input_5[0][0]               
____________________________________________________________________________________________________
dropout_9 (Dropout)              (None, None, 1)       0           lstm_9[0][0]                     
____________________________________________________________________________________________________
lstm_10 (LSTM)                   (None, 100)           40800       dropout_9[0][0]                  
____________________________________________________________________________________________________
dropout_10 (Dropout)             (None, 100)           0           lstm_10[0][0]                    
________________________________________________________________________

In [17]:
#load_data("height.csv", 20 ,True)
model = build_model([2, 3535, 3535, 2])
print(model.summary())
model.fit(
    X_train,
    y_train,
    batch_size=512,
    nb_epoch=2,
    validation_split=0.05)



[2, 3535, 3535, 2]
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lstm_5 (LSTM)                    (None, None, 1)       16          lstm_input_3[0][0]               
____________________________________________________________________________________________________
dropout_5 (Dropout)              (None, None, 1)       0           lstm_5[0][0]                     
____________________________________________________________________________________________________
lstm_6 (LSTM)                    (None, 100)           40800       dropout_5[0][0]                  
____________________________________________________________________________________________________
dropout_6 (Dropout)              (None, 100)           0           lstm_6[0][0]                     
________________________________________________________________________

<keras.callbacks.History at 0x1a382e6d898>

In [27]:
from numpy import newaxis
def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    for i in range(int(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

In [24]:
model.save('my_model2.h5')


In [25]:
X_test, y =read_csv("height3.csv")


327
(327,)


In [33]:
#predict_sequences_multiple(model, X_test,10,50)
model.predict(X_train)

array([[ 3.54423332],
       [ 3.54414773],
       [ 3.54411197],
       ..., 
       [ 3.54457498],
       [ 3.54457498],
       [ 3.54457498]], dtype=float32)