In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, datetime, timedelta
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
import random
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
from scipy.stats import norm
from sklearn.preprocessing import Normalizer,StandardScaler, LabelEncoder
from tensorflow_addons.losses import pinball_loss
from sklearn.metrics import mean_pinball_loss
from scipy import stats
import math
import optuna as opt
from sklearn.model_selection import KFold

from tensorflow_addons.utils.types import TensorLike, FloatTensorLike

# Create losses

In [2]:
#Smooth exp quantile loss

@tf.function
def exp_pinball_loss(
    y_true: TensorLike, y_pred: TensorLike, tau: FloatTensorLike = 0.5,
    alpha: FloatTensorLike = 0.001
) -> tf.Tensor:
    """Computes the pinball loss between `y_true` and `y_pred`.
    `loss = maximum(tau * (y_true - y_pred), (tau - 1) * (y_true - y_pred))`
    In the context of regression this loss yields an estimator of the tau
    conditional quantile.
    See: https://en.wikipedia.org/wiki/Quantile_regression
    Usage:
    >>> loss = tfa.losses.pinball_loss([0., 0., 1., 1.],
    ... [1., 1., 1., 0.], tau=.1)
    >>> loss
    <tf.Tensor: shape=(), dtype=float32, numpy=0.475>
    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
      tau: (Optional) Float in [0, 1] or a tensor taking values in [0, 1] and
        shape = `[d0,..., dn]`.  It defines the slope of the pinball loss. In
        the context of quantile regression, the value of tau determines the
        conditional quantile level. When tau = 0.5, this amounts to l1
        regression, an estimator of the conditional median (0.5 quantile).
    Returns:
        pinball_loss: 1-D float `Tensor` with shape [batch_size].
    References:
      - https://en.wikipedia.org/wiki/Quantile_regression
      - https://projecteuclid.org/download/pdfview_1/euclid.bj/1297173840
    """
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)

    # Broadcast the pinball slope along the batch dimension
    tau = tf.expand_dims(tf.cast(tau, y_pred.dtype), 0)

    delta_y = y_true - y_pred
    #Implement smooth loss
    pinball = tau * delta_y + alpha * tf.math.softplus(-delta_y/alpha)
    return tf.reduce_mean(pinball, axis=-1)

In [3]:
#Smooth sqrt quantile loss

@tf.function
def sqrt_pinball_loss(
    y_true: TensorLike, y_pred: TensorLike, tau: FloatTensorLike = 0.5,
    alpha: FloatTensorLike = 0.001
) -> tf.Tensor:
    """Computes the pinball loss between `y_true` and `y_pred`.
    `loss = maximum(tau * (y_true - y_pred), (tau - 1) * (y_true - y_pred))`
    In the context of regression this loss yields an estimator of the tau
    conditional quantile.
    See: https://en.wikipedia.org/wiki/Quantile_regression
    Usage:
    >>> loss = tfa.losses.pinball_loss([0., 0., 1., 1.],
    ... [1., 1., 1., 0.], tau=.1)
    >>> loss
    <tf.Tensor: shape=(), dtype=float32, numpy=0.475>
    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
      tau: (Optional) Float in [0, 1] or a tensor taking values in [0, 1] and
        shape = `[d0,..., dn]`.  It defines the slope of the pinball loss. In
        the context of quantile regression, the value of tau determines the
        conditional quantile level. When tau = 0.5, this amounts to l1
        regression, an estimator of the conditional median (0.5 quantile).
    Returns:
        pinball_loss: 1-D float `Tensor` with shape [batch_size].
    References:
      - https://en.wikipedia.org/wiki/Quantile_regression
      - https://projecteuclid.org/download/pdfview_1/euclid.bj/1297173840
    """
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)

    # Broadcast the pinball slope along the batch dimension
    tau = tf.expand_dims(tf.cast(tau, y_pred.dtype), 0)
    one = tf.cast(1, tau.dtype)

    delta_y = y_true - y_pred
    #Implement smooth loss
    pinball = (delta_y*(2*tau - one) + tf.math.sqrt(tf.math.square(delta_y) + alpha))/2
    return tf.reduce_mean(pinball, axis=-1)

In [64]:
#Huber quantile loss

@tf.function
def huber_pinball_loss(
    y_true: TensorLike, y_pred: TensorLike, tau: FloatTensorLike = 0.5,
    alpha: FloatTensorLike = 0.001
) -> tf.Tensor:
    """Computes the pinball loss between `y_true` and `y_pred`.
    `loss = maximum(tau * (y_true - y_pred), (tau - 1) * (y_true - y_pred))`
    In the context of regression this loss yields an estimator of the tau
    conditional quantile.
    See: https://en.wikipedia.org/wiki/Quantile_regression
    Usage:
    >>> loss = tfa.losses.pinball_loss([0., 0., 1., 1.],
    ... [1., 1., 1., 0.], tau=.1)
    >>> loss
    <tf.Tensor: shape=(), dtype=float32, numpy=0.475>
    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
      tau: (Optional) Float in [0, 1] or a tensor taking values in [0, 1] and
        shape = `[d0,..., dn]`.  It defines the slope of the pinball loss. In
        the context of quantile regression, the value of tau determines the
        conditional quantile level. When tau = 0.5, this amounts to l1
        regression, an estimator of the conditional median (0.5 quantile).
    Returns:
        pinball_loss: 1-D float `Tensor` with shape [batch_size].
    References:
      - https://en.wikipedia.org/wiki/Quantile_regression
      - https://projecteuclid.org/download/pdfview_1/euclid.bj/1297173840
    """
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)

    # Broadcast the pinball slope along the batch dimension
    tau = tf.expand_dims(tf.cast(tau, y_pred.dtype), 0)
    alpha = tf.expand_dims(tf.cast(alpha, y_pred.dtype), 0)
    one = tf.cast(1, tau.dtype)

    error = tf.subtract(y_true,y_pred)
    abs_error = tf.abs(error)
    half = tf.convert_to_tensor(0.5, dtype=abs_error.dtype)
    huber = tf.where(abs_error <= alpha, half * tf.square(error)/alpha,
                         abs_error - half * alpha)
    
    
    #Implement smooth loss
    pinball = tf.where(error >=0, tau * huber, (one - tau) * huber)
    return tf.reduce_mean(pinball, axis=-1)

# Tune Wind model

## Functions

In [5]:
def normalize(dataframe, label_encoder = None,feature_scaler = None, target_scaler = None, learn = False):
    #Drop unused columns
    data = dataframe.copy()
    data.drop(["init_tm", "met_var", "location",  "ens_var", "obs_tm"], axis = 1, inplace = True)
    data = data.to_numpy()
    if learn == True:
        label_encoder = LabelEncoder()
        feature_scaler = StandardScaler()
        target_scaler = StandardScaler()
        #Learn label encoding for horizons
        label = label_encoder.fit_transform(data[:,0])
        #Learn target scaling
        target_scaled = target_scaler.fit_transform(data[:,1].reshape(-1,1))
        #Learn feature scaling
        feature_scaled = feature_scaler.fit_transform(data[:,2:])
        #Append
        data[:,0] = label
        data[:,1] = target_scaled.reshape(-1)
        data[:,2:] = feature_scaled
        
        return data, label_encoder, feature_scaler, target_scaler
    
    else:
        #Learn labels
        label = label_encoder.transform(data[:,0])
        #Scale target
        target_scaled = target_scaler.transform(data[:,1].reshape(-1,1))
        #Scale features
        feature_scaled = feature_scaler.transform(data[:,2:])
        #Append
        data[:,0] = label
        data[:,1] = target_scaled.reshape(-1)
        data[:,2:] = feature_scaled
        
        return data

In [6]:
def convert_format(input_data, predict = False):
    #Extract forecast embedding
    horizon_emb = input_data[:,0]
    
    if predict == False:        
        #Extract features
        features = input_data[:,2:]
        # Extract target
        target = np.expand_dims(input_data[:,1],1)
        return [features, horizon_emb], target
    else:
        #Extract features
        features = input_data[:,1:]
        return [features, horizon_emb]

In [7]:
def train_model(train_data, train_target, validation_data, batch_size, epochs, learning_rate, fine_tuning = True):
    model = base_model()    
    #Define optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    #Callbacks
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 7, min_delta = 1e-5)
    model.compile(optimizer = optimizer, loss = lambda true,pred: pinball_loss(true, pred, tau = quantiles))
    #model.compile(optimizer = optimizer, loss = lambda true,pred: smooth_pinball_loss(true, pred, tau = quantiles))
    #Normal fit
    history1 = model.fit(x = train_data, y = train_target, validation_data = validation_data, epochs = epochs, batch_size = BATCH_SIZE, callbacks = [callback], shuffle = True, verbose = False)
    
    #Fine tuning
    if fine_tuning == True:
        enc_horizons = label_encoder.transform(horizons)
        train_filtering = np.isin(train_data[1], enc_horizons)
        train_data_fine = [train_data[0][train_filtering], train_data[1][train_filtering]]
        train_target_fine = train_target[train_filtering]
        #Val filtering
        val_data, val_target = validation_data
        val_filtering = np.isin(val_data[1], enc_horizons)
        val_data_fine = [val_data[0][val_filtering], val_data[1][val_filtering]]
        val_target_fine = val_target[val_filtering]
        validation_data_fine = (val_data_fine, val_target_fine)
        
        #New optimizer
        history2 = model.fit(x = train_data_fine, y = train_target_fine, validation_data = validation_data_fine, epochs = epochs, batch_size = 256, callbacks = [callback], shuffle = True, verbose = False)
    return model, [history1, history2]

## Read data and prepare

In [41]:
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
horizons = [36, 48 ,60, 72, 84]
n_encodings = 65

In [9]:
#Wind data
wind_data = pd.read_feather("data/berlin_data/historic_data/icon_eps_wind_10m.feather")
#Pressure data
pressure_data = pd.read_feather("data/berlin_data/historic_data/icon_eps_mslp.feather")
pressure_data.rename({"ens_mean":"mean_pressure"}, axis = 1, inplace = True)
#Cloud data
cloud_data = pd.read_feather("data/berlin_data/historic_data/icon_eps_clct.feather")
cloud_data.rename({"ens_mean":"cloud_coverage"}, axis = 1, inplace = True)
#Vmax data
max_data = pd.read_feather("data/berlin_data/historic_data/icon_eps_vmax_10m.feather")
max_data.rename({"ens_mean":"vmax"}, axis = 1, inplace = True)


data = wind_data.merge(pressure_data[["init_tm","fcst_hour","mean_pressure"]], on = ["init_tm","fcst_hour"], how = "left")
data = data.merge(cloud_data[["init_tm","fcst_hour","cloud_coverage"]], on = ["init_tm","fcst_hour"], how = "left")
data = data.merge(max_data[["init_tm","fcst_hour","vmax"]], on = ["init_tm","fcst_hour"], how = "left")
#Replace vmax NaNs by mean
vmax_mean = data["vmax"].mean()
data.loc[:,"vmax"].fillna(vmax_mean, inplace = True)
data.dropna(inplace=True)

#Positional encoding
pos_enc = pd.DataFrame(index=pd.DatetimeIndex(data["obs_tm"]))
pos_enc["Dayofyear"] = pos_enc.index.dayofyear
pos_enc["n_days"] = 365
pos_enc.loc[pos_enc.index.year==2020,"n_days"] = 366
#Calculate actual positional encoding
cos_encoding = np.cos(2*math.pi*pos_enc["Dayofyear"]/pos_enc["n_days"])
data["pos_enc_1"] = cos_encoding.to_numpy()

## Create study object

In [42]:
def get_data(train_index, test_index, data):
    #Get split
    train_df = data.loc[train_index]
    test_df = data.loc[test_index]

    #Normalize data
    train, label_encoder, feature_scaler, target_scaler = normalize(train_df, learn = True)
    test = normalize(test_df, label_encoder, feature_scaler, target_scaler)
    n_encodings = len(np.unique(train[:,0]))

    #Convert format
    train_data, train_target = convert_format(train)
    test_data, test_target = convert_format(test)
    
    return train_data, train_target, test_data, test_target

In [56]:
def get_loss(trial, quantiles = quantiles):
    #Sample alpha
    alpha = trial.suggest_float("alpha", 1e-5, 0.01)
    loss = trial.suggest_categorical("loss", ["pinball","exp","abs","huber"])
    losses = {
    "pinball": lambda true,pred: pinball_loss(true, pred, tau = quantiles),
    "exp": lambda true,pred: exp_pinball_loss(true, pred, tau = quantiles, alpha = alpha),
    "abs": lambda true,pred: sqrt_pinball_loss(true, pred, tau = quantiles, alpha = alpha),
    "huber": lambda true,pred: huber_pinball_loss(true, pred, tau = quantiles, alpha = alpha)}


    loss_func = losses[loss]
    return loss_func

In [68]:
def get_optimizer(trial):
    # Copied from optuna tutorial
    kwargs = {}
    optimizer_options = ["RMSprop", "Adam", "SGD"]
    optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float(
            "rmsprop_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["decay"] = trial.suggest_float("rmsprop_decay", 0.85, 0.99)
        kwargs["momentum"] = trial.suggest_float("rmsprop_momentum", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float("adam_learning_rate", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "SGD":
        kwargs["learning_rate"] = trial.suggest_float(
            "sgd_opt_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float("sgd_opt_momentum", 1e-5, 1e-1, log=True)

    optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
    return optimizer

In [80]:
def create_model(trial):
    #Parameters
    dropout_rate = trial.suggest_float("dropout", 0.05, 0.5, log = True)
    embedding_dim = trial.suggest_int("embedding_dim", 2, 6, step = 2)
    n_layers = trial.suggest_int("n_layers",1,2,1)
    n_units_1 = trial.suggest_int("n_units_1",16,128, log =True)
    if n_layers == 2:
        n_units_2 = trial.suggest_int("n_units_2", 16, 128, log = True)
    else:
        n_units_2 = 1
    #Create Model
    class base_model(tf.keras.Model):    
        def __init__(self, n_layers, n_units_1, n_units_2, embedding_dim, dropout_rate, n_embeddings = n_encodings):
            super(base_model, self).__init__()
            #Embedding layers
            self.embedding = Embedding(input_dim = n_embeddings, output_dim = embedding_dim)
            #N_layers
            self.n_layers = n_layers
            #Dropout
            self.dropout = Dropout(dropout_rate)
            #Create Dense layers
            self.hidden = Dense(n_units_1, activation = "relu")
            self.hidden2 = Dense(n_units_2, activation = "relu")
            self.out = Dense(5, activation = "linear")

        def call(self, input_data):
            #Extract data
            features, horizon_emb = input_data
            #Calculate embedding
            emb = self.embedding(horizon_emb)
            emb = tf.squeeze(emb, axis = 1)
            conc = Concatenate(axis = 1)([features, emb])
            #Calculate output
            output = self.hidden(conc)
            if self.n_layers == 2:
                output = self.hidden2(output)
            output = self.dropout(output)
            output = self.out(output)
            return output

    #Train
    model = base_model(n_layers, n_units_1, n_units_2, dropout_rate)    
    return model

In [81]:
def learn(model, loss_func, optimizer, BATCH_SIZE, EPOCHS = 100,  data = data, n_splits = 10):
    #Get data
    data = data.reset_index().drop("index",axis=1)
    fold = KFold(n_splits = n_splits, shuffle = True, random_state = 10)
    split = fold.split(data.index)
    
    #Total loss
    test_loss = 0
    for train_index, test_index in split:
        #Get data
        train_data, train_target, test_data, test_target = get_data(train_index, test_index, data)

        #Compile model
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience = 7, min_delta = 1e-5)
        model.compile(optimizer = optimizer, loss = loss_func)
        
        #Normal fit
        history1 = model.fit(x = train_data, y = train_target, validation_split = 0.2, epochs = EPOCHS, batch_size = BATCH_SIZE, callbacks = [callback], shuffle = True, verbose = False)

        #Calculate loss
        pred = model.predict(test_data)
        total_loss = 0
        for cnt,quantile in enumerate(quantiles):
            loss = mean_pinball_loss(test_target.reshape(-1), pred[:,cnt].reshape(-1), alpha = quantile)
            total_loss += loss

        test_loss += total_loss/len(quantiles)

    return test_loss/n_splits

In [82]:
def objective(trial):
    # Create Parameters
    BATCH_SIZE = 2**trial.suggest_int("batch_size",7,11,1)
    EPOCHS = 100
    optimizer = get_optimizer(trial)    
    #Get loss
    loss_func = get_loss(trial)        
    #Get Model
    model = create_model(trial)    
    #Train model
    loss = learn(model = model, loss_func = loss_func, optimizer = optimizer, BATCH_SIZE = BATCH_SIZE, EPOCHS = EPOCHS, n_splits = 10)

    return loss

## Run study

In [79]:
wind_study = opt.create_study(direction='minimize')
wind_study.optimize(objective, n_trials=2)

[32m[I 2022-01-07 10:53:37,667][0m A new study created in memory with name: no-name-660a88f7-9377-468e-8ab4-38ee336575a9[0m


Split finished
Split finished
Split finished
Split finished


[32m[I 2022-01-07 10:58:57,452][0m Trial 0 finished with value: 0.13386053716326937 and parameters: {'batch_size': 7, 'optimizer': 'SGD', 'sgd_opt_learning_rate': 0.003032461510449239, 'sgd_opt_momentum': 0.005137794061527512, 'alpha': 0.00024240558891133236, 'loss': 'huber', 'dropout': 0.2821537424639687, 'n_layers': 2, 'n_units_1': 75, 'n_units_2': 112}. Best is trial 0 with value: 0.13386053716326937.[0m


Split finished
Split finished
Split finished
Split finished
Split finished


[32m[I 2022-01-07 11:00:02,592][0m Trial 1 finished with value: 0.13944635022300297 and parameters: {'batch_size': 10, 'optimizer': 'SGD', 'sgd_opt_learning_rate': 0.02414933525460908, 'sgd_opt_momentum': 0.0006118796446215753, 'alpha': 0.004837564092016341, 'loss': 'huber', 'dropout': 0.5704457896329578, 'n_layers': 1, 'n_units_1': 22}. Best is trial 0 with value: 0.13386053716326937.[0m


Split finished


In [77]:
print(study.best_value)
study.best_params

0.19690207430330064


{'batch_size': 8,
 'optimizer': 'SGD',
 'sgd_opt_learning_rate': 0.0010313188174217675,
 'sgd_opt_momentum': 0.004165680478234925,
 'alpha': 0.004277917581196242,
 'loss': 'exp',
 'dropout': 0.47635102537779317,
 'n_layers': 1,
 'n_units_1': 82}

# Tune temperature model

## Load data

In [83]:
data = pd.read_feather("data/berlin_data/historic_data/icon_eps_t_2m.feather")
data.dropna(inplace=True)
#Positional encoding
pos_enc = pd.DataFrame(index=pd.DatetimeIndex(data["obs_tm"]))
pos_enc["Dayofyear"] = pos_enc.index.dayofyear
pos_enc["n_days"] = 365
pos_enc.loc[pos_enc.index.year==2020,"n_days"] = 366
#Calculate actual positional encoding
cos_encoding = np.cos(2*math.pi*pos_enc["Dayofyear"]/pos_enc["n_days"])
data["pos_enc_1"] = cos_encoding.to_numpy()

## Run study

In [85]:
temp_study = opt.create_study(direction='minimize')
temp_study.optimize(objective, n_trials=2)

[32m[I 2022-01-07 11:10:35,599][0m A new study created in memory with name: no-name-90bc9b1b-e508-4c6a-b560-94d4039ecce3[0m
[33m[W 2022-01-07 11:10:35,603][0m Trial 0 failed because of the following error: TypeError("__init__() missing 1 required positional argument: 'dropout_rate'")[0m
Traceback (most recent call last):
  File "C:\Users\chris\anaconda3\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\chris\AppData\Local\Temp/ipykernel_9128/3653315619.py", line 9, in objective
    model = create_model(trial)
  File "C:\Users\chris\AppData\Local\Temp/ipykernel_9128/3507869797.py", line 42, in create_model
    model = base_model(n_layers, n_units_1, n_units_2, dropout_rate)
TypeError: __init__() missing 1 required positional argument: 'dropout_rate'


TypeError: __init__() missing 1 required positional argument: 'dropout_rate'