In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.layers import Activation, Flatten, Dropout
from keras.optimizers import SGD
from tqdm import tqdm_notebook

In [2]:
df = pd.read_csv('Sub0-RAW.csv')
df.head()

Unnamed: 0,Dates,Year,Month,Day,Hours,Precipitation,Temperature,Outflow
0,10/1/1980,1980,10,1,0:00:00,0.0,102.992,5.0
1,10/1/1980,1980,10,1,1:00:00,0.0,97.79,5.0
2,10/1/1980,1980,10,1,2:00:00,0.0,92.588,5.0
3,10/1/1980,1980,10,1,3:00:00,0.0,87.404,5.0
4,10/1/1980,1980,10,1,4:00:00,0.0,85.172,5.0


In [3]:
train_cols = ["Precipitation","Temperature","Outflow"]

In [4]:
new_df = df.filter(train_cols, axis=1)

In [5]:
def lag_seq(df, n_seq):
    for i in range(n_seq):
        df['Outflow(t+%d)' %(i+1)] = new_df['Outflow'].shift(-(i+1))
    return df

In [6]:
def timeseries(X, Y, time_steps, out_steps):
    input_size_0 = X.shape[0] - time_steps
    input_size_1 = X.shape[1]
    X_values = np.zeros((input_size_0, time_steps, input_size_1))
    Y_values = np.zeros((input_size_0,))
    
    for i in tqdm_notebook(range(input_size_0)):
        X_values[i] = X[i:time_steps+i]
        Y_values[i] = Y[time_steps+i-1]
    print("length of time-series i/o",X_values.shape,Y_values.shape)
    return X_values, Y_values

In [7]:
lag_df = lag_seq(new_df, 2)
lag_df.dropna(inplace=True)

In [8]:
lag_df

Unnamed: 0,Precipitation,Temperature,Outflow,Outflow(t+1),Outflow(t+2)
0,0.0,102.992,5.0,5.0,5.0
1,0.0,97.790,5.0,5.0,5.0
2,0.0,92.588,5.0,5.0,5.0
3,0.0,87.404,5.0,5.0,5.0
4,0.0,85.172,5.0,5.0,5.0
...,...,...,...,...,...
185537,0.0,50.054,27.0,29.0,30.0
185538,0.0,52.880,29.0,30.0,29.0
185539,0.0,54.284,30.0,29.0,29.0
185540,0.0,55.670,29.0,29.0,31.0


In [9]:
#label_cols = ["Outflow(t+1)", "Outflow(t+2)", "Outflow(t+3)"]
label_cols = ["Outflow(t+2)"]

In [10]:
#Min Max scalr normalizing
min_max_scaler = MinMaxScaler(feature_range = (0, 1))

In [11]:
#Splitting training and test data
df_train, df_test = train_test_split(lag_df, train_size=0.8, test_size=0.2, shuffle=False)
x_train = df_train.loc[:,train_cols].values
y_train = df_train.loc[:,label_cols].values
x_test = df_test.loc[:,train_cols].values
y_test = df_test.loc[:,label_cols].values

In [12]:
x_train.shape, y_test.shape

((148433, 3), (37109, 1))

In [13]:
#Normalizing training data
x_train_nor = min_max_scaler.fit_transform(x_train)
y_train_nor = min_max_scaler.fit_transform(y_train)

# Normalizing test data
x_test_nor =  min_max_scaler.fit_transform(x_test)
y_test_nor  = min_max_scaler.fit_transform(y_test)

In [14]:
#Building timeseries
X_Train, Y_Train = timeseries(x_train_nor, y_train_nor, time_steps=18, out_steps=1)
X_Test, Y_Test = timeseries(x_test_nor, y_test_nor, time_steps=18, out_steps=1)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  import sys


HBox(children=(FloatProgress(value=0.0, max=148415.0), HTML(value='')))


length of time-series i/o (148415, 18, 3) (148415,)


HBox(children=(FloatProgress(value=0.0, max=37091.0), HTML(value='')))


length of time-series i/o (37091, 18, 3) (37091,)


In [15]:
X_Train[0],Y_Train[0]

(array([[0.        , 0.87690308, 0.00277162],
        [0.        , 0.82324545, 0.00277162],
        [0.        , 0.76958782, 0.00277162],
        [0.        , 0.71611586, 0.00277162],
        [0.        , 0.6930932 , 0.00277162],
        [0.        , 0.67007055, 0.00277162],
        [0.        , 0.6470479 , 0.00277162],
        [0.        , 0.63535091, 0.00277162],
        [0.        , 0.62346825, 0.00277162],
        [0.        , 0.61158559, 0.00277162],
        [0.        , 0.6032306 , 0.00277162],
        [0.        , 0.59468994, 0.00277162],
        [0.        , 0.58633494, 0.00277162],
        [0.        , 0.58206461, 0.00277162],
        [0.        , 0.57797995, 0.00277162],
        [0.        , 0.57370962, 0.00277162],
        [0.        , 0.63943557, 0.00277162],
        [0.        , 0.70497586, 0.00277162]]),
 0.0022172949002217295)

In [16]:
X_Train.shape, Y_Train.shape

((148415, 18, 3), (148415,))

In [17]:
#Build Model
model = Sequential()
model.add(LSTM(units=24, return_sequences=True, 
               input_shape=(X_Train.shape[1],X_Train.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(units=50))
model.add(Dropout(0.2))

model.add(Dense(20, activation='relu'))

model.add(Dense(1, activation='linear'))

model.compile(optimizer = 'adam', loss = 'mean_squared_error',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 18, 24)            2688      
_________________________________________________________________
dropout (Dropout)            (None, 18, 24)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                15000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 20)                1020      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 21        
Total params: 18,729
Trainable params: 18,729
Non-trainable params: 0
____________________________________________________

In [18]:
history = model.fit(X_Train, Y_Train, epochs=5,
                   validation_data=(X_Test, Y_Test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
weights = model.get_weights()

In [20]:
len(weights)

10

In [21]:
[weights[i].shape for i in range(len(weights))]

[(3, 96),
 (24, 96),
 (96,),
 (24, 200),
 (50, 200),
 (200,),
 (50, 20),
 (20,),
 (20, 1),
 (1,)]