### Load Data

In [None]:
#Connect to Google Drive
from google.colab import drive
drive.mount('/gdrive')

In [None]:
import pandas as pd
import numpy as np

In [None]:
#Read Air traffc data
df = pd.read_csv('/gdrive/My Drive/AI-ML/data/International-airline-passengers.csv', index_col='Month')
df.sort_index(inplace=True)
df.head()

In [None]:
#Number of records
df.shape

In [None]:
#Plot data
df.plot()

### Data Preprocessing

In [None]:
#Check for null values
df.isnull().sum()

In [None]:
#Drop NA
df.dropna(inplace=True)

In [None]:
#Check Data Range
print('Min', np.min(df))
print('Max', np.max(df))

Let's Normalize data

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
#Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(df)

In [None]:
#Check data now
scaled

### Split Data

In [None]:
#First 70% examples will used for training (in the begining)
train_size = int(len(scaled) * 0.70)

#Last 30% will be used for Test
test_size = len(scaled - train_size)

#Split the data
train, test = scaled[0:train_size, :], scaled[train_size: len(scaled), :]
print('train: {}\ntest: {}'.format(len(train), len(test)))

In [None]:
#Training data size
train.shape

### Build Input and Output data
for both Training and Test

In [None]:
#window - how long the sequence will be
def create_dataset(dataset, window=1):
    
    dataX, dataY = [], []
    
    for i in range(len(dataset)-window):
        
        a = dataset[i:(i+window), 0]        
        dataX.append(a)
        dataY.append(dataset[i + window, 0])
    
    return np.array(dataX), np.array(dataY)

In [None]:
#Create Input features and targets
window_size = 12
X_train, y_train = create_dataset(train, window_size)
X_test, y_test = create_dataset(test, window_size)

In [None]:
X_train.shape

In [None]:
X_train[0]

In [None]:
y_train.shape

In [None]:
X_test.shape

In [None]:
#Make it 3 Dimensional Data - needed for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
#Check feature shape now for training and test
print(X_train.shape)
print(X_test.shape)

### Build the Model

In [None]:
import tensorflow as tf

In [None]:
#Define a model with LSTM layer
tf.keras.backend.clear_session()
model = tf.keras.Sequential()

In [None]:
model.add(tf.keras.layers.LSTM(32, input_shape=(window_size, 1)))

In [None]:
model.output

In [None]:
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='mse')

In [None]:
model.fit(X_train, y_train, 
          epochs=200, 
          validation_data=(X_test, y_test), 
          batch_size=32)

### Visualize Prediction
For both training and Test Data

In [None]:
#Get prediction for both Training and Test Data
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)

In [None]:
trainPredict[0]

In [None]:
#Un-normalize the predited data
trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

In [None]:
trainPredict[0]

Display Predicted vs Actual values

In [None]:
import matplotlib.pyplot as plt

In [None]:
#Shift training prediction by window size
trainPredictPlot = np.empty_like(scaled)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[window_size:len(trainPredict)+window_size, :] = trainPredict

#shift test predictions for plotting
testPredictPlot = np.empty_like(scaled)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(window_size*2):len(scaled), :] = testPredict

# plot baseline and predictions
plt.figure(figsize=(20,10))
plt.plot(scaler.inverse_transform(scaled)) #Original
plt.plot(trainPredictPlot) #Prediction on training data
plt.plot(testPredictPlot)  #Prediction on test data
plt.show()

Build model with different window-size to see if it learns better!