In [365]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, datetime, timedelta
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
import tensorflow_probability as tfp
from tensorflow.keras.models import Model
import random
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
from tensorflow.math import erf
from scipy.stats import norm
from sklearn.preprocessing import Normalizer,StandardScaler, LabelEncoder
from tensorflow_addons.losses import pinball_loss
from tensorflow_lattice.layers import Linear

# Read and preprocess data

In [2]:
data = pd.read_feather("data/berlin_data/historic_data/icon_eps_wind_10m.feather")
data.head()

Unnamed: 0,init_tm,met_var,location,fcst_hour,obs_tm,obs,ens_1,ens_2,ens_3,ens_4,...,ens_33,ens_34,ens_35,ens_36,ens_37,ens_38,ens_39,ens_40,ens_mean,ens_var
0,2018-12-18 00:00:00+00:00,wind_10m,Berlin,0.0,2018-12-18 00:00:00+00:00,6.48,3.8,6.56,4.54,5.05,...,5.59,5.45,5.3,4.47,5.99,3.48,4.92,5.09,4.58675,0.565448
1,2018-12-18 00:00:00+00:00,wind_10m,Berlin,1.0,2018-12-18 01:00:00+00:00,6.12,3.68,7.03,5.06,5.33,...,4.92,5.18,4.98,4.88,6.39,3.74,5.18,4.85,4.6975,0.663747
2,2018-12-18 00:00:00+00:00,wind_10m,Berlin,2.0,2018-12-18 02:00:00+00:00,4.32,3.28,7.1,5.39,5.44,...,4.91,4.88,5.2,4.8,6.66,4.14,5.05,4.8,4.8165,0.8301
3,2018-12-18 00:00:00+00:00,wind_10m,Berlin,3.0,2018-12-18 03:00:00+00:00,5.04,3.47,7.45,6.11,5.66,...,5.37,4.58,5.3,4.86,6.96,4.41,5.26,4.74,5.01625,1.222111
4,2018-12-18 00:00:00+00:00,wind_10m,Berlin,4.0,2018-12-18 04:00:00+00:00,6.48,3.56,8.02,6.29,5.81,...,5.3,4.05,5.04,5.26,6.67,4.86,4.95,4.56,5.016,1.35505


## Dropna

In [3]:
data.dropna(inplace = True)

In [4]:
# Normalize
hour = data["fcst_hour"]
data = data.iloc[:,3:-2].drop("obs_tm", axis = 1)
data = (data - data.mean())/data.std()
data["fcst_hour"] = hour

## Convert data to numpy

In [5]:
data_np = data.to_numpy()

In [6]:
label_enc = LabelEncoder()
encoding = label_enc.fit_transform(data_np[:,0])
data_np[:,0] = encoding

## Train, val, test split

In [128]:
train_val, test = train_test_split(data_np, test_size = 0.1)
train, val = train_test_split(train_val, test_size = 0.2)

In [297]:
def convert_format(input_data):
    #Extract forecast embedding
    horizon_emb = input_data[:,0]
    # Extract target
    target = np.expand_dims(input_data[:,1],1)
    #Extract features
    features = input_data[:,2:]
    
    return [features, horizon_emb], target

In [298]:
train_data, train_target = convert_format(train)
val_data, val_target = convert_format(val)
test_data, test_target = convert_format(test)

# Create Model

In [299]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

In [395]:
class base_model(tf.keras.Model):    
    def __init__(self, no_features, n_embeddings = 65):
        super(base_model, self).__init__()
        #Embedding layers
        self.embedding = Embedding(input_dim = n_embeddings, output_dim = 4)
        #Create Dense layers
        self.hidden = Dense(25, activation = "relu")
        self.out = Dense(1, activation = "linear")
        #Define monotonicites
        #mono = list(np.append(np.zeros(25),1))
        #self.linear = Linear(num_input_dims = 26, units = 1, monotonicities = mono, use_bias = True)

    def call(self, input_data):
        #Extract data
        features, horizon_emb = input_data
        #Calculate embedding
        emb = self.embedding(horizon_emb)
        emb = tf.squeeze(emb, axis = 1)
        conc = Concatenate(axis = 1)([features, emb])
        #Calculate output
        output = self.hidden(conc)
        output = self.out(output)

        return output

In [396]:
test_model = base_model(40)

In [397]:
learning_rate = 0.01
EPOCHS = 15
BATCH_SIZE = 256

In [407]:
#Define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
#Compile model
test_model.compile(optimizer = optimizer, loss = lambda true,pred: pinball_loss(true, pred, tau = 0.975))

In [408]:
test_model.fit(x = train_data, y = train_target, validation_data = (val_data, val_target), epochs = EPOCHS, batch_size = BATCH_SIZE, shuffle = True)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x14e88133310>

In [409]:
test_model.summary()

Model: "base_model_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_27 (Embedding)     multiple                  260       
_________________________________________________________________
dense_33 (Dense)             multiple                  1125      
_________________________________________________________________
dense_34 (Dense)             multiple                  26        
Total params: 1,411
Trainable params: 1,411
Non-trainable params: 0
_________________________________________________________________


# Predict test data

In [410]:
pred = test_model.predict(test_data)

## Evaluate pinball loss

In [411]:
pinball_loss(np.squeeze(test_target), np.squeeze(pred), tau = 0.975)

<tf.Tensor: shape=(), dtype=float32, numpy=0.042199276>

In [412]:
for cnt,quantile in enumerate(quantiles):
    break
    pred_filtered = pred[pred[:,1]==quantile]
    test_filtered = test_target[pred[:,1]==quantile,0:1]
    loss = np.mean(pinball_loss(test_filtered,pred_filtered, tau = quantile).numpy())
    print("Pinball loss for quantile {} : \t {}".format(quantile,loss))

## Evaluate pinball loss on naive prediction

In [413]:
naive_pred = np.quantile(test_data[0], quantiles, axis = 1)
for cnt,quantile in enumerate(quantiles):
    loss = pinball_loss(np.squeeze(test_target), naive_pred[cnt], tau = quantile).numpy()
    print("Pinball loss for quantile {} : \t {}".format(quantile,loss))

Pinball loss for quantile 0.025 : 	 0.061658888239079127
Pinball loss for quantile 0.25 : 	 0.19784392141286428
Pinball loss for quantile 0.5 : 	 0.24384083580018143
Pinball loss for quantile 0.75 : 	 0.20890743961319938
Pinball loss for quantile 0.975 : 	 0.07647236849793138


# Predict new data