In [None]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass


In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

2.1.0


In [None]:
import pandas as pd
import numpy as np
import seaborn as sn
from matplotlib import pyplot as plt
#%matplotlib notebook

from datetime import datetime as dt
from pandas_datareader import data as pdr

In [None]:
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)

def load_share(ticker, start, end):
    
    share = pdr.DataReader(ticker, 'yahoo', start, end)
    
    return share

def load_shares(tickers, start, end):
    df_data = pd.DataFrame()
    df_return = pd.DataFrame()
    
    for ticker in tickers:
        df_ticker = load_share(ticker, start, end)['Adj Close']
        df_ticker.name = ticker
        
        df_return['{}_Log_Return'.format(ticker)] = np.log(df_ticker.shift(-1)) - np.log(df_ticker)
        #display(df_share)
        
        df_data = df_data.join(df_ticker, how='right')
        
    return df_data, df_return

In [None]:
# DAL --> Delta Airlines
# UAL --> United Airlines
# AAL --> American Airlines

tickers = ['DAL', 'UAL', 'AAL', 'LHA.DE']
start = dt(2019,1,1)

today = dt.today()
end = dt(today.year, today.month, today.day)

df_shares, df_return = load_shares(tickers, start, end)
df_shares.head()

df_shares = df_shares.fillna(method='ffill')

df_shares = df_shares.rename(columns={'LHA.DE':'LHA'})

df_shares.head()

In [None]:
series = df_shares.UAL.values
len(series)

In [None]:
series = df_shares.UAL.values
time = np.arange(0,len(series))

split_time = 230

time_train = time[:split_time]
x_train = series[:split_time]

time_valid = time[split_time:]
x_valid = series[split_time:]

window_size = 20
batch_size = 32
shuffle_buffer_size = 230

In [None]:
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)


In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
  dataset = tf.data.Dataset.from_tensor_slices(series)
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
  dataset = dataset.batch(batch_size).prefetch(1)
  return dataset

In [None]:
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)


model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(10, input_shape=[window_size], activation="relu"), 
    tf.keras.layers.Dense(10, activation="relu"), 
    tf.keras.layers.Dense(1)
])

model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
hist = model.fit(dataset,epochs=100,verbose=0)




In [None]:
forecast = []
for time in range(len(series) - window_size):
    forecast.append(model.predict(series[time:time + window_size][np.newaxis]))

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]


plt.figure(figsize=(10, 6))

plot_series(time_valid, x_valid)
plot_series(time_valid, results)

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()

In [None]:
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)


model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(10, input_shape=[window_size], activation="relu"), 
    tf.keras.layers.Dense(10, activation="relu"), 
    tf.keras.layers.Dense(1)
])

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(dataset, epochs=100, callbacks=[lr_schedule], verbose=0)

In [None]:
lrs = 1e-8 * (10 ** (np.arange(100) / 20))
plt.semilogx(lrs, history.history["loss"])
plt.axis([1e-8, 1e-3, 0, 300])

In [None]:
window_size = 30
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(10, activation="relu", input_shape=[window_size]),
  tf.keras.layers.Dense(10, activation="relu"),
  tf.keras.layers.Dense(1)
])

optimizer = tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9)
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(dataset, epochs=500, verbose=0)

In [None]:
loss = history.history['loss']
epochs = range(len(loss))
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.show()

In [None]:
# Plot all but the first 10
loss = history.history['loss']
epochs = range(10, len(loss))
plot_loss = loss[10:]
#print(plot_loss)
plt.plot(epochs, plot_loss, 'b', label='Training Loss')
plt.show()

In [None]:
forecast = []
for time in range(len(series) - window_size):
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]


plt.figure(figsize=(10, 6))

plot_series(time_valid, x_valid)
plot_series(time_valid, results)

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()

Prediction

In [None]:
series = df_shares.UAL.values
len(series)

In [None]:
time = np.arange(0,len(series))

time_train = time[:]
x_train = series[:]

window_size = 20
batch_size = 32
shuffle_buffer_size = 230

plot_series(time, series)

In [None]:
window_size = 30
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(10, activation="relu", input_shape=[window_size]),
  tf.keras.layers.Dense(10, activation="relu"),
  tf.keras.layers.Dense(1)
])

optimizer = tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9)
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(dataset, epochs=500, verbose=0)

In [None]:
loss = history.history['loss']
epochs = range(len(loss))
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.show()

In [None]:
# Plot all but the first 10
loss = history.history['loss']
epochs = range(10, len(loss))
plot_loss = loss[10:]
#print(plot_loss)
plt.plot(epochs, plot_loss, 'b', label='Training Loss')
plt.show()

In [None]:
range(len(series) - window_size)

In [None]:
forecast = []
for time in range(len(series) - window_size):
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))

#forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]


plt.figure(figsize=(10, 6))
plt.plot(results)
#plot_series(time_valid, x_valid)
#plot_series(time_valid, results)

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()

In [None]:
prediction = series
#left_bound_confidence = np.zeros(len(prediction))
#right_bound_confidence = np.zeros(len(prediction))
left_bound_confidence = []
right_bound_confidence = []


z_left = norm.ppf(0.05)
z_right = norm.ppf(0.95)

for i in range(30):
  window = prediction[x+i:x+i+30]
  new_value = model.predict(window[np.newaxis])
  prediction = np.append(prediction, new_value)

  interval_left = z_left*window.std() + window.mean()
  interval_right = z_right*window.std() + window.mean()

  left_bound_confidence.append(interval_left)
  right_bound_confidence.append(interval_right)
  #right_bound_confidence.append(interval_right)

x_axis = np.arange(len(prediction)-30, len(prediction))

plt.plot(prediction)
plt.plot(x_axis, np.array(left_bound_confidence), c='r')
plt.plot(x_axis, np.array(right_bound_confidence), c='r')
#plt.fill_between([len(prediction), len(prediction)+30], np.array(left_bound_confidence), np.array(right_bound_confidence))

In [None]:
len(right_bound_confidence)

In [None]:
from scipy.stats import norm

# left and right quantile
z_left = norm.ppf(0.05)
z_right = norm.ppf(0.95)

# upper and lower bound
interval_left = z_left*series.std() + series.mean()
interval_right = z_right*series.std() + series.mean()

print('90% confidence interval is ', (interval_left, interval_right))
plt.plot(prediction)
plt.fill_between([len(series), len(series)+30], interval_left, interval_right)

In [None]:
time_pred = np.arange(0, 500)

x = 240
model.predict(time_pred[x:x+30][np.newaxis])

In [None]:
x = len(series) - 30
model.predict(series[x:x+30][np.newaxis])

In [None]:
series[x:x+30][np.newaxis]