In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import date, timedelta, datetime
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_pinball_loss
from sklearn.linear_model import QuantileRegressor
from matplotlib import pyplot as plt
from numpy.lib.stride_tricks import sliding_window_view
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow_addons.losses import pinball_loss
from sklearn.metrics import mean_pinball_loss

In [2]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
horizons = [1,2,3,4,5]

# Get data

In [3]:
current_day = datetime.today()
data = yf.download("^GDAXI", start="2000-01-01", end=current_day)["Adj Close"]

[*********************100%***********************]  1 of 1 completed


# Wavenet modeling

## Data preparation

Add all five horizons as a predictor

In [4]:
data_prep = pd.DataFrame(data)
data_prep["log_return"] = 100 * (np.log(data_prep) - np.log(data_prep.shift(1)))
for horizon in range(1,6):
    data_prep[horizon] = 100 * (np.log(data_prep["Adj Close"]) - np.log(data_prep["Adj Close"].shift(horizon)))
    data_prep[horizon] = data_prep[horizon].shift(-horizon)

#Drop CLose
data_prep.drop("Adj Close", axis = 1, inplace = True)

### Create test, val, train and prediction data

Total length of data is 5530:
- Will take a year - 360 as a test set
- Whole year as validation set
- Rest as training set

In [5]:
window_size = 128
max_horizon = 5
test_size = 360
val_size = 360

pred_data = data_prep[-128:]

#Drop Nans
data_prep.dropna(inplace = True)

test_data = data_prep[-test_size:]
val_data = data_prep[-val_size-test_size:-test_size]
train_data = data_prep[:-val_size-test_size]

### Normalize data

In [6]:
train_mean = train_data.mean()
train_sd = train_data.std()

In [7]:
def normalize(data, mean = train_mean, sd = train_sd):
    return (data-mean)/sd

In [8]:
train_data = normalize(train_data)
val_data = normalize(val_data)
test_data = normalize(test_data)
pred_data = normalize(pred_data)

### Apply rolling window and get predictor

In [9]:
def convert_data(data, window_size = window_size):
    new_data = np.zeros(shape = (5, len(data),3))
    for h in horizons:
        new_data[h-1,:,0:2] = np.array(data.loc[:,["log_return", h]])
        new_data[h-1,:,2] = h-1
    window = sliding_window_view(new_data,window_size, axis = 1)
    window = np.swapaxes(window, 2,3)
    window = window.reshape(-1, window.shape[2], window.shape[3])
    features = window[:,:,0:1]
    embedding = window[:,:,2:3]
    X = [features, embedding]
    Y = window[:,:,1:2]
    return X,Y

In [10]:
x_train, y_train = convert_data(train_data)
x_val, y_val = convert_data(val_data)
x_test, y_test = convert_data(test_data)

## Build model

In [11]:
def model(dropout_rate = 0.1, window_size = window_size):
    """
    trainX -- input values; shape: [number of samples, NUM_UNROLLINGS, 1]
    trainY -- output values (inputs shifted by 1); shape: [number of samples, NUM_UNROLLINGS, 1]
    """
    #Define parameters
    filters = 8
    kernel_size = 2
    dilation_rates = [2**i for i in range(7)]
    
    #Define Inputs
    input_features = Input(shape=(window_size, 1))
    input_embedding = Input(shape=(window_size))
    #Extract embedding
    emb = Embedding(input_dim = 5, output_dim = 2)(input_embedding)
    x = tf.concat([input_features, emb], axis = 2)
    
    #Base layers
    for dilation in dilation_rates:
        #Preprocessing layer
        x = Conv1D(8, 1, padding='same', activation='relu')(x) 
        #Dilated convolution
        z = Conv1D(filters, kernel_size, activation="relu", padding="causal", dilation_rate=dilation)(x)
        #Residual connection
        x = Add()([x,z])
    

    #Fully Connected Layer
    out = Conv1D(32, 1, padding = "same")(x)
    out = Dropout(dropout_rate)(out)
    out = Conv1D(1, 1, activation="linear")(out)
    #Extract only last output as prediction
    def output_slice(x):
        return x[:,-1,:]
    #out = Lambda(output_slice)(out)
    cnn = Model([input_features, input_embedding], out)
    return cnn

In [12]:
cnn = model()
cnn.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 128, 1)]     0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 128, 2)       10          input_2[0][0]                    
__________________________________________________________________________________________________
tf.concat (TFOpLambda)          (None, 128, 3)       0           input_1[0][0]                    
                                                                 embedding[0][0]              

In [13]:
BATCH_SIZE = 256
EPOCHS = 100
learning_rate = 0.01
optimizer = tf.keras.optimizers.Adadelta(learning_rate = learning_rate)

#Early stopping
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, min_delta = 1e-5)

In [14]:
def train_model(model, optimizer, x_train, y_train, x_val, y_val, quantile, BATCH_SIZE = BATCH_SIZE, EPOCHS = EPOCHS, callback = callback, verbose = False):
    model.compile(optimizer = optimizer,  loss = lambda true,pred: pinball_loss(true, pred, tau = quantile))
    history = model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs = EPOCHS, batch_size = BATCH_SIZE, shuffle=True,
        callbacks = [callback], verbose = verbose)
    return model

In [15]:
def train_all_models(x_train, y_train, x_val, y_val, quantiles = quantiles, optimizer = optimizer, batch_size = BATCH_SIZE, epochs = EPOCHS, callback = callback):
    models = dict()
    for quantile in quantiles:
        cnn = model()
        cnn = train_model(cnn, optimizer, x_train, y_train, x_val, y_val, quantile)
        print("Training finished for quantile {}".format(quantile))
        models[quantile] = cnn
    return models

In [16]:
models = train_all_models(x_train, y_train, x_val, y_val)

Training finished for quantile 0.025
Training finished for quantile 0.25
Training finished for quantile 0.5
Training finished for quantile 0.75
Training finished for quantile 0.975


## Predict test data

### Evaluate Loss

In [17]:
true_values = np.squeeze(y_test)
for quantile in quantiles:
    pred = np.squeeze(models[quantile].predict(x_test))
    loss = mean_pinball_loss(true_values, pred, alpha = quantile)
    print("Pinball Loss for quantile {}: \t {}".format(quantile, loss))

Pinball Loss for quantile 0.025: 	 0.0503935403370947
Pinball Loss for quantile 0.25: 	 0.2075681811715162
Pinball Loss for quantile 0.5: 	 0.2308423626230055
Pinball Loss for quantile 0.75: 	 0.19171657544535486
Pinball Loss for quantile 0.975: 	 0.036486824936741595


### Test plausability

In [18]:
true_values = np.squeeze(y_test)
for quantile in quantiles:
    pred = np.squeeze(models[quantile].predict(x_test))
    q_smaller = (pred >= true_values).sum()
    emp_quant = q_smaller / true_values.size
    print("Quantile met for quantile = {}: \t {} %".format(quantile, np.round(emp_quant,4)*100))

Quantile met for quantile = 0.025: 	 1.79 %
Quantile met for quantile = 0.25: 	 16.36 %
Quantile met for quantile = 0.5: 	 49.54 %
Quantile met for quantile = 0.75: 	 79.28 %
Quantile met for quantile = 0.975: 	 98.11999999999999 %


## Predict new data

In [19]:
x_train, y_train = convert_data(train_data.append(val_data))
x_pred, y_pred = convert_data(pred_data)

In [20]:
models = train_all_models(x_train, y_train, x_test, y_test)

Training finished for quantile 0.025
Training finished for quantile 0.25
Training finished for quantile 0.5
Training finished for quantile 0.75
Training finished for quantile 0.975


In [21]:
future_pred = np.zeros(shape = (5,5))
for cnt,quantile in enumerate(quantiles):
    new_pred = models[quantile].predict(x_pred)
    for horizon in horizons:
        future_pred[cnt,horizon-1] = new_pred[horizon-1,0]
#Renormalize
for h in range(5):
    future_pred[:,h] = future_pred[:,h]*train_sd[h+1] + train_mean[h+1]



In [22]:
df_horizons = [1,2,5,6,7]
final_prediction = pd.DataFrame(columns = ["forecast_date","target","horizon","q0.025","q0.25","q0.5","q0.75","q0.975"])
final_prediction["forecast_date"] = [(current_day).strftime("%Y-%m-%d") for x in df_horizons]
final_prediction["horizon"] = ["{} day".format(x) for x in df_horizons]
final_prediction["target"] = "DAX"

for cnt, quantile in enumerate(quantiles):
    final_prediction["q{}".format(quantile)] = future_pred[cnt,:]

In [23]:
final_prediction

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2021-11-24,DAX,1 day,-2.467739,-0.504437,0.102366,0.728461,2.509569
1,2021-11-24,DAX,2 day,-3.456577,-0.67689,0.150206,1.02878,3.542781
2,2021-11-24,DAX,5 day,-4.182802,-0.766539,0.188933,1.25382,4.289406
3,2021-11-24,DAX,6 day,-4.779356,-0.908036,0.222214,1.439894,4.897632
4,2021-11-24,DAX,7 day,-5.323389,-1.033752,0.25351,1.610077,5.492815


## Save final prediction

In [24]:
current_date = date.today().strftime("%Y-%m-%d")
final_prediction.to_pickle("../evaluation/predictions/single/DAX_{}".format(current_date))