In [1]:
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [2]:
pyo.init_notebook_mode(connected=True)

In [3]:
passenger = pd.read_csv('international-airline-passengers.csv')[:-1]
passenger.columns = ['date','passengers']
passenger.index = passenger['date']
passenger = passenger.drop('date',axis=1)
passenger.index = pd.to_datetime(passenger.index)
passenger.head()

Unnamed: 0_level_0,passengers
date,Unnamed: 1_level_1
1949-01-01,112.0
1949-02-01,118.0
1949-03-01,132.0
1949-04-01,129.0
1949-05-01,121.0


In [4]:
data = [go.Scatter(x=passenger.index,
                   y=passenger['passengers'],
                   mode='lines+markers')]
layout = go.Layout(title='Passengers on fligths in function of time',
                   xaxis={'title':'Year'},
                   yaxis={'title':'Number of passengers (in thousands)'})
fig = go.Figure(data=data,layout=layout)
pyo.iplot(fig)

Now we will try to predict the last year in the dataset, based on the previous years, so we separate the data accordingly.

In [5]:
train_data = passenger.head(len(passenger)-12)
test_data = passenger.tail(12)

In [6]:
# rescale the data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data)
test_scaled = scaler.transform(test_data)

In [7]:
def next_batch(training_data,batch_size,steps):
    """Feeds batches into the neural network."""
    
    # Grab a random starting point for each batch
    rand_start = np.random.randint(0,len(training_data)-steps) 

    # Create Y data for time series in the batches
    y_batch = np.array(training_data[rand_start:rand_start+steps+1]).reshape(1,steps+1)

    return y_batch[:, :-1].reshape(-1, steps, 1), y_batch[:, 1:].reshape(-1, steps, 1) 

In [8]:
# set parameters of the model
num_inputs = 1
num_time_steps = 12
num_neurons = 200
num_outputs = 1
learning_rate = 0.0006
num_train_iterations = 25000
batch_size = 1

In [9]:
# introduce the placeholders
X = tf.placeholder(tf.float32, [None, num_time_steps, num_inputs])
y = tf.placeholder(tf.float32, [None, num_time_steps, num_outputs])

In [10]:
# set up the neural network
cell = tf.contrib.rnn.OutputProjectionWrapper(
    tf.contrib.rnn.BasicLSTMCell(num_units=num_neurons, activation=tf.nn.relu),
    output_size=num_outputs)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').


In [11]:
# define loss function and optimizer
loss = tf.reduce_mean(tf.square(outputs - y)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)

In [12]:
# initialize global variables, saver for the model (and set gpu performance)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)

In [13]:
# train the model
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(init)
    
    for iteration in range(num_train_iterations):
        
        X_batch, y_batch = next_batch(train_scaled,batch_size,num_time_steps)
        sess.run(train, feed_dict={X: X_batch, y: y_batch})
        
        if iteration % 100 == 0:
            
            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
            print(iteration, "\tMSE:", mse)
    
    # Save Model for Later
    saver.save(sess, "./ex_time_series_model")

0 	MSE: 0.008628319
100 	MSE: 0.052229013
200 	MSE: 0.038477506
300 	MSE: 0.0052137165
400 	MSE: 0.0062677357
500 	MSE: 0.020165278
600 	MSE: 0.004263454
700 	MSE: 0.00532247
800 	MSE: 0.003924582
900 	MSE: 0.0037568687
1000 	MSE: 0.0034860678
1100 	MSE: 0.0041833385
1200 	MSE: 0.013707967
1300 	MSE: 0.0043636966
1400 	MSE: 0.0043511516
1500 	MSE: 0.003123848
1600 	MSE: 0.0015432443
1700 	MSE: 0.009320882
1800 	MSE: 0.0012984119
1900 	MSE: 0.00497259
2000 	MSE: 0.0025073222
2100 	MSE: 0.003016578
2200 	MSE: 0.0011447474
2300 	MSE: 0.0054342556
2400 	MSE: 0.00057904416
2500 	MSE: 0.0015887631
2600 	MSE: 0.00409219
2700 	MSE: 0.007130801
2800 	MSE: 0.007444971
2900 	MSE: 0.004518594
3000 	MSE: 0.0042848107
3100 	MSE: 0.0021939555
3200 	MSE: 0.0014153403
3300 	MSE: 0.0013836613
3400 	MSE: 0.0014436269
3500 	MSE: 0.00076268805
3600 	MSE: 0.0029116075
3700 	MSE: 0.0018803971
3800 	MSE: 0.003531579
3900 	MSE: 0.0037715014
4000 	MSE: 0.0040681497
4100 	MSE: 0.0020254783
4200 	MSE: 0.002853206

In [14]:
# restore model and predict values
with tf.Session() as sess:
    
    # Use your Saver instance to restore your saved rnn time series model
    saver.restore(sess, "./ex_time_series_model")

    # Create a numpy array for your genreative seed from the last 12 months of the 
    # training set data. Hint: Just use tail(12) and then pass it to an np.array
    train_seed = list(train_scaled[-12:])
    
    ## Now create a for loop that 
    for iteration in range(12):
        X_batch = np.array(train_seed[-num_time_steps:]).reshape(1, num_time_steps, 1)
        y_pred = sess.run(outputs, feed_dict={X: X_batch})
        train_seed.append(y_pred[0, -1, 0])

INFO:tensorflow:Restoring parameters from ./ex_time_series_model


In [15]:
# scale back the results and save in the dataframe
results = scaler.inverse_transform(np.array(train_seed[12:]).reshape(12,1))
test_data['predicted'] = results

In [16]:
theoretical_trace = go.Scatter( x=passenger.index,
                                y=passenger['passengers'],
                                mode='lines+markers',
                                name='Actual passengers')
predicted_trace = go.Scatter(x=test_data.index,
                             y=test_data['predicted'],
                             mode='lines+markers',
                             name='Predicted passengers')
data = [theoretical_trace,predicted_trace]
layout = go.Layout(title='Passengers on fligths in function of time',
                   xaxis={'title':'Year'},
                   yaxis={'title':'Number of passengers (in thousands)'})
fig = go.Figure(data=data,layout=layout)
pyo.iplot(fig)