# Univariate Time Series

In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import math
from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

Using TensorFlow backend.


In [2]:
file=r'C:\Users\salumuri\Downloads\international-airline-passengers.csv'

In [3]:
data = pd.read_csv(file)

In [4]:
data.head()

Unnamed: 0,Month,passengers_in_100_thousands
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [5]:
ts_data=data.iloc[:,1]

In [6]:
scaler = MinMaxScaler(feature_range = (0, 1))
dataset = scaler.fit_transform(pd.DataFrame(ts_data))
# MinMaxScaler needs the data in 2D array , thats why the syntactical jugglery above

In [7]:
train_fraction = 0.60

train_size = int(len(dataset) * train_fraction)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]

In [8]:
def create_dataset(dataset, window_size = 1):
    data_X, data_Y = [], []
    for i in range(len(dataset) - window_size - 1):
        a = dataset[i:(i + window_size), 0]
        data_X.append(a)
        data_Y.append(dataset[i + window_size, 0])
    return(np.array(data_X), np.array(data_Y))

In [9]:
window_size = 10
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)
print("Original training data shape:")
print(train_X.shape)

Original training data shape:
(75, 10)


In [10]:
pd.DataFrame(train_X).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.015444,0.027027,0.054054,0.048263,0.032819,0.059846,0.084942,0.084942,0.061776,0.028958
1,0.027027,0.054054,0.048263,0.032819,0.059846,0.084942,0.084942,0.061776,0.028958,0.0
2,0.054054,0.048263,0.032819,0.059846,0.084942,0.084942,0.061776,0.028958,0.0,0.027027
3,0.048263,0.032819,0.059846,0.084942,0.084942,0.061776,0.028958,0.0,0.027027,0.021236
4,0.032819,0.059846,0.084942,0.084942,0.061776,0.028958,0.0,0.027027,0.021236,0.042471


In [11]:
# shape of data should be : (num_obs, feature_dimension, sequence_length)
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
print("New training data shape:")
print(train_X.shape)

New training data shape:
(75, 1, 10)


In [12]:
# input shape : (feature_dimension, seq_length)
model = Sequential()
    
model.add(LSTM(4, 
               input_shape = (1, window_size)))
model.add(Dense(1))
model.compile(loss = "mean_squared_error", 
              optimizer = "adam")
model.fit(train_X, 
          train_Y, 
          epochs = 100, 
          batch_size = 1, 
          verbose = 2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 2s - loss: 0.0623
Epoch 2/100
 - 0s - loss: 0.0239
Epoch 3/100
 - 0s - loss: 0.0103
Epoch 4/100
 - 0s - loss: 0.0079
Epoch 5/100
 - 0s - loss: 0.0071
Epoch 6/100
 - 0s - loss: 0.0065
Epoch 7/100
 - 0s - loss: 0.0060
Epoch 8/100
 - 0s - loss: 0.0055
Epoch 9/100
 - 0s - loss: 0.0051
Epoch 10/100
 - 0s - loss: 0.0048
Epoch 11/100
 - 0s - loss: 0.0045
Epoch 12/100
 - 0s - loss: 0.0042
Epoch 13/100
 - 0s - loss: 0.0040
Epoch 14/100
 - 0s - loss: 0.0038
Epoch 15/100
 - 0s - loss: 0.0038
Epoch 16/100
 - 0s - loss: 0.0036
Epoch 17/100
 - 0s - loss: 0.0035
Epoch 18/100
 - 0s - loss: 0.0034
Epoch 19/100
 - 0s - loss: 0.0034
Epoch 20/100
 - 0s - loss: 0.0032
Epoch 21/100
 - 0s - loss: 0.0032
Epoch 22/100
 - 0s - loss: 0.0032
Epoch 23/100
 - 0s - loss: 0.0031
Epoch 24/100
 - 0s - loss: 0.0030
Epoch 25/100
 - 0s - loss: 0.0029
Epoch 26/100
 - 0s - loss: 0.0029
Epoch

<keras.callbacks.History at 0x1b36311bd30>

In [13]:
# Make predictions on the original scale of the data.
pred = scaler.inverse_transform(model.predict(train_X))
# Prepare Y data to also be on the original scale for interpretability.
orig_data = scaler.inverse_transform([train_Y])
# Calculate RMSE.
score_train = np.sqrt(mean_squared_error(orig_data[0], pred[:, 0]))

In [14]:
# Make predictions on the original scale of the data.
pred = scaler.inverse_transform(model.predict(test_X))
# Prepare Y data to also be on the original scale for interpretability.
orig_data = scaler.inverse_transform([test_Y])
# Calculate RMSE.
score_test = np.sqrt(mean_squared_error(orig_data[0], pred[:, 0]))

In [15]:
print("Training data score: ",  score_train)
print("Test data score: ",  score_test)

Training data score:  18.924087727201943
Test data score:  49.73938035091093


# Multiple Time Series

In [16]:
file=r'C:\Users\salumuri\Downloads\all_stocks_2006-01-01_to_2018-01-01.csv'
data=pd.read_csv(file)

In [17]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
0,2006-01-03,77.76,79.35,77.24,79.11,3117200,MMM
1,2006-01-04,79.49,79.49,78.25,78.71,2558000,MMM
2,2006-01-05,78.41,78.65,77.56,77.99,2529500,MMM
3,2006-01-06,78.64,78.9,77.64,78.63,2479500,MMM
4,2006-01-09,78.5,79.83,78.46,79.02,1845600,MMM


In [18]:
data=data[['Date','Close','Name']]

In [19]:
multi_ts=data.pivot_table(columns='Name',values='Close',index='Date')

In [20]:
multi_ts.head()

Name,AABA,AAPL,AMZN,AXP,BA,CAT,CSCO,CVX,DIS,GE,...,MSFT,NKE,PFE,PG,TRV,UNH,UTX,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-01-03,40.91,10.68,47.58,52.58,70.44,57.8,17.45,59.08,24.4,35.37,...,26.84,10.74,23.78,58.78,45.99,61.73,56.53,30.38,46.23,58.47
2006-01-04,40.97,10.71,47.25,51.95,71.17,59.27,17.85,58.91,23.99,35.32,...,26.97,10.69,24.55,58.89,46.5,61.88,56.19,31.27,46.32,58.57
2006-01-05,41.53,10.63,47.65,52.5,70.33,59.27,18.35,58.19,24.41,35.23,...,26.99,10.76,24.58,58.7,46.95,61.69,55.98,31.63,45.69,58.28
2006-01-06,43.21,10.9,47.87,52.68,69.35,60.45,18.77,59.25,24.74,35.47,...,26.91,10.72,24.85,58.64,47.21,62.9,56.16,31.35,45.88,59.43
2006-01-09,43.42,10.86,47.08,53.99,68.77,61.55,19.06,58.95,25.0,35.38,...,26.86,10.88,24.85,59.08,47.23,61.4,56.8,31.48,45.71,59.4


In [21]:
multi_ts.shape

(3020, 31)

In [22]:
multi_ts.fillna(0,inplace=True)

In [23]:
multi_ts.columns

Index(['AABA', 'AAPL', 'AMZN', 'AXP', 'BA', 'CAT', 'CSCO', 'CVX', 'DIS', 'GE',
       'GOOGL', 'GS', 'HD', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM',
       'MRK', 'MSFT', 'NKE', 'PFE', 'PG', 'TRV', 'UNH', 'UTX', 'VZ', 'WMT',
       'XOM'],
      dtype='object', name='Name')

In [24]:
scaler=MinMaxScaler(feature_range=(0,1))

In [25]:
dataset = scaler.fit_transform(multi_ts)
y_min=min(multi_ts['CSCO'])
y_max=max(multi_ts['CSCO'])

In [26]:
train_fraction = 0.95

train_size = int(len(dataset) * train_fraction)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, ], dataset[train_size:len(dataset), ]

In [27]:
train.shape

(2869, 31)

In [28]:
test.shape

(151, 31)

In [29]:
def create_dataset(dataset, window_size = 1):
    data_X, data_Y = [], []
    for i in range(len(dataset) - window_size):
        a = dataset[i:(i + window_size),]
        data_X.append(a)
        data_Y.append(dataset[i + window_size, 0])
    return(np.array(data_X), np.array(data_Y))

In [30]:
window_size = 8
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)


In [31]:
train_X.shape,train_Y.shape

((2861, 8, 31), (2861,))

In [32]:
train_X = np.reshape(train_X, (train_X.shape[0], window_size, train_X.shape[2]))
test_X = np.reshape(test_X, (test_X.shape[0], window_size, test_X.shape[2]))

In [34]:
model = Sequential()
model.add(LSTM(window_size, input_shape=(window_size,train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_X, train_Y, epochs=3, batch_size=1, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1b36931f0b8>

In [35]:
testPredict = model.predict(test_X)

In [36]:
def inverse_transform(data,ymax,ymin):
    newdata=ymin+data*(ymax-ymin)
    return(newdata)

In [37]:
testPredict = inverse_transform(testPredict,y_max,y_min)
test_Y = inverse_transform(test_Y,y_max,y_min)

In [38]:
import math

testScore = math.sqrt(mean_squared_error(test_Y, testPredict))
print('Test Score: %.2f RMSE' % (testScore))

Test Score: 4.49 RMSE
