# CNN

## Notebook's Environment

In [None]:
INSTALL_DEPS = False
if INSTALL_DEPS:
  %pip install matplotlib==3.8.3
  %pip installnumpy==1.26.4
  %pip installpandas==2.2.1
  %pip installpandas_market_calendars==4.4.0
  %pip installpytz==2024.1
  %pip installscipy==1.12.0
  %pip installta==0.11.0
  %pip installyfinance==0.2.37

!python --version

## Cloud Environment Setup

In [None]:
import os
import sys
import warnings

warnings.filterwarnings("ignore")

IN_KAGGLE = IN_COLAB = False
try:
    # https://www.tensorflow.org/install/pip#windows-wsl2
    import google.colab
    from google.colab import drive

    drive.mount("/content/drive")
    DATA_PATH = "/content/drive/MyDrive/EDT dataset"
    MODEL_PATH = "/content/drive/MyDrive/models"
    IN_COLAB = True
    print("Colab!")
except:
    IN_COLAB = False
if "KAGGLE_KERNEL_RUN_TYPE" in os.environ and not IN_COLAB:
    print("Running in Kaggle...")
    for dirname, _, filenames in os.walk("/kaggle/input"):
        for filename in filenames:
            print(os.path.join(dirname, filename))
    MODEL_PATH = "./models"
    DATA_PATH = "/kaggle/input/"
    IN_KAGGLE = True
    print("Kaggle!")
elif not IN_COLAB:
    IN_KAGGLE = False
    MODEL_PATH = "./models"
    DATA_PATH = "./data/"
    print("running localhost!")

# Instruments

In [None]:
from constants import *

TARGET_FUT, INTERVAL

## Data Load

In [None]:
import pandas as pd
import numpy as np

filename = f"{DATA_PATH}{os.sep}futures_{INTERVAL}.csv"
print(filename)
futs_df = pd.read_csv(filename, index_col="Date", parse_dates=True)

print(futs_df.shape)
futs_df.head(2)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))

plt.plot(futs_df[f'{TARGET_FUT}_Close'], label=f'{TARGET_FUT} Close', alpha=0.7)
plt.title(f'{TARGET_FUT} Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Prepare the Data

In [None]:
from signals import dynamic_support_resistance, kalman_backtest, signal_kf_bollinger_bands, signal_tsmom
from quant_equations import get_ou, modulate_std
from tqdm import tqdm

UNIVERSE_COLS = set()

def augment_ts(df, target_close, target_high, target_low, target_volume, interval):
    hl, h = get_ou(df, target_close)
    window = abs(hl)
    mod_std = modulate_std(h)

    mom_df = signal_tsmom(df, target_close, int(window*1.5), contra_lookback=window//2, std_threshold=mod_std)
    bb_df = signal_kf_bollinger_bands(df, target_close, target_volume, std_factor=mod_std)

    spread = bb_df["%B"].bfill().ffill()
    volumes = df[target_volume].to_numpy()
    prices = df[target_close].to_numpy()

    assert not np.isnan(spread).any() and not np.isnan(volumes).any()

    sr_df, _, _ = dynamic_support_resistance(df, target_close, target_high, target_low, window_size=hl)
    kf_df, _ = kalman_backtest(spread, volumes, prices, period=interval)
    aug_ts_df = pd.concat([sr_df, kf_df, bb_df, mom_df], axis=1).bfill().ffill()

    return aug_ts_df

def process_exog(futures, futs_df, universe_cols=UNIVERSE_COLS):
    futs_exog_ts = []
    for f in tqdm(futures, desc="process_exog"):
        fut_df = futs_df.filter(regex=f"{f}_.*")

        universe_cols.update(fut_df.columns.tolist())

        train_df = fut_df
        futs_exog_ts.append(train_df)

    futs_exog_df = pd.concat(futs_exog_ts, axis=1)

    return futs_exog_df

def process_futures(futures, futs_df, futs_exog_df, train_size, interval, universe_cols=UNIVERSE_COLS):
    training_ts = []
    val_ts = []
    for f in tqdm(futures, desc="process_futures"):
        fut_df = futs_df.filter(regex=f"{f}_.*")
        fut_df.columns = fut_df.columns.str.replace(f"{f}_", "", regex=False)

        fut_df = pd.concat([fut_df, futs_exog_df], axis=1)

        target_close = f"Close"
        target_high = f"High"
        target_low = f"Low"
        target_volume = f"Volume"

        if universe_cols is not None:
            # For utility, we have all futures columns.
            universe_cols.update(fut_df.columns.tolist())

        train_df = augment_ts(fut_df.iloc[:train_size], target_close, target_high, target_low, target_volume, interval)
        test_df = augment_ts(fut_df.iloc[train_size:], target_close, target_high, target_low, target_volume, interval)
        training_ts.append(train_df.reset_index(drop=True))
        val_ts.append(test_df.reset_index(drop=True))

    return training_ts, val_ts

TEST_SPLIT = 0.6
TRAIN_SIZE = int(len(futs_df) * TEST_SPLIT)

futs_exog_df = process_exog(MARKET_FUTS, futs_df)
train_agri_ts, val_agri_ts = process_futures(AGRI_FUTS, futs_df, futs_exog_df, TRAIN_SIZE, INTERVAL)
# Stacking the lists of dataframes into single dataframes
train_ts_df = pd.concat([df.reset_index(drop=True) for df in train_agri_ts], axis=0, ignore_index=True).dropna()
test_ts_df = pd.concat([df.reset_index(drop=True) for df in val_agri_ts], axis=0, ignore_index=True).dropna()

train_ts_df.tail(5)

In [None]:
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler, normalize
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    fbeta_score, roc_auc_score, roc_curve, auc
)


PREDICTION_HORIZON = 1
WINDOW  = 10

def create_features_df(data_df):
    def _get_first_difference(data_df):
        return data_df.pct_change().bfill()

    def _get_log_returns(data_df):
        return np.log(data_df / data_df.shift(1)).fillna(0)

    price_transform = FunctionTransformer(_get_first_difference)
    data_df = price_transform.fit_transform(data_df)

    return data_df.fillna(0)

def prepare_data(ts_df, label_df=None, to_normalize=True):
    data_df = create_features_df(ts_df)
    if to_normalize:
        data_df = data_df.replace([np.inf, -np.inf], np.nan).interpolate()
        assert not data_df.isna().any()
        data_df = normalize(data_df, norm="l2")

    if label_df is not None:
        data_df = pd.concat([data_df, label_df], axis=1)

    return data_df.dropna(axis=0)

def prepare_windows(
    data_df,
    label_df,
    exog_ts,
    window_size=WINDOW,
    horizon=PREDICTION_HORIZON,
):
    assert len(data_df) > 1
    X, Xexog, y = [], [], []
    for i in tqdm(
        range(len(data_df) - window_size - horizon + 1), desc=f"Encoding Widows of {window_size} with {horizon} horizon."
    ):
        input_window = data_df.iloc[i : i + window_size].values
        X.append(input_window)
        if exog_ts is not None:
            input_window = data_df[exog_ts].iloc[i : i + window_size].values
            Xexog.append(input_window)
        if label_df is not None:
            target_window = label_df.iloc[i + window_size : i + window_size + horizon].values
            y.append(target_window)

    return np.array(X), np.array(Xexog), np.array(y)

In [None]:
import shutil

import tensorflow as tf
from tensorflow.keras.layers import (
    SpatialDropout1D,
    Dense,
    Conv1D,
    Layer,
    Add,
    Input,
    Concatenate,
    Flatten,
    MultiHeadAttention,
)
from tensorflow.keras import Model
from tensorboard import program
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow.keras.metrics import AUC, BinaryAccuracy, Recall, Precision
from tensorflow.keras.regularizers import L1L2
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard,ModelCheckpoint,ReduceLROnPlateau,LambdaCallback
from tensorflow.keras.optimizers import Adam
from tensorflow.summary import create_file_writer
from tensorflow.math import confusion_matrix

NONLINEAR_ACTIVATION = 'tanh'
NONLINEAR_ACTIVATION_DENSE = "tanh"
NONLINEAR_OUTPUT_ACTIVATION = "sigmoid"
MODEL_NAME = "TCN"
MODEL_DIR = f"../models/{MODEL_NAME}"
IMAGES_DIR = f"../images/{MODEL_NAME}/images"
LOG_BASEPATH = f"../logs/{MODEL_NAME}/tb"

OOS_SPLIT = 0.1
VAL_SPLIT = 0.20
EPOCHS = 300
PATIENCE_EPOCHS = 15
BATCH_SIZE = 124
GRID_SEARCH_TRAIN = False
CV_MODEL = True
CV_SPLITS = 3

MAX_FILTER = 512
MIN_FILTER = 32
FILTERS = [MIN_FILTER, MIN_FILTER*2]
HIDDEN_DENSE = [WINDOW]
BIAS = True
DROPRATE = 0.25
POOL_SIZE = 8
KERNEL_SIZE = 2
DILATION_RATE = 1
REG_WEIGHTS = 0.005
LEARN_RATE = 0.0025

ERROR_ALPHA = 0.8 # 0.5 > gives more weight to positive class errors
ERROR_GAMMA = 2.4 # loss contribution from easy examples. 0 > focus on hard examples.
CLASS_WEIGHTS = None
TARGET_METRIC = "auc"
LOSS = BinaryFocalCrossentropy(apply_class_balancing=True, from_logits=True, alpha=ERROR_ALPHA, gamma=ERROR_GAMMA)
METRICS = [AUC(name=TARGET_METRIC, from_logits=True, label_weights=CLASS_WEIGHTS),
            AUC(name="pr_auc", curve="PR", from_logits=True, label_weights=CLASS_WEIGHTS),
            BinaryCrossentropy(from_logits=True),
            BinaryAccuracy(name='accuracy'),
            Precision(name='precision'),
            Recall(name='recall')]

tf.keras.saving.get_custom_objects().clear()

@tf.keras.saving.register_keras_serializable()
class TCNBlock(Layer):
    """
    TCN Residual Block that uses zero-padding to maintain `steps` value of the ouput equal to the one in the input.
    Residual Block is obtained by stacking togeather (2x) the following:
        - 1D Dilated Convolution
        - ReLu
        - Spatial Dropout
    And adding the input after trasnforming it with a 1x1 Conv
    """

    def __init__(
        self,
        filters=1,
        kernel_size=2,
        dilation_rate=1,
        kernel_initializer="glorot_normal",
        bias_initializer="glorot_normal",
        kernel_regularizer=None,
        bias_regularizer=None,
        use_bias=False,
        dropout_rate=0.0,
        layer_id=None,
        **kwargs,
    ):
        super(TCNBlock, self).__init__(**kwargs)
        assert dilation_rate is not None and dilation_rate > 0 and filters > 0 and kernel_size > 0

        self.filters = filters
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.use_bias = use_bias
        self.dropout_rate = dropout_rate
        self.layer_id = str(layer_id)

    def get_config(self):
        config = super(TCNBlock, self).get_config()
        config.update({
            'filters': self.filters,
            'kernel_size': self.kernel_size,
            'dilation_rate': self.dilation_rate,
            'kernel_initializer': self.kernel_initializer,
            'bias_initializer': self.bias_initializer,
            'kernel_regularizer': self.kernel_regularizer,
            'bias_regularizer': self.bias_regularizer,
            'use_bias': self.use_bias,
            'dropout_rate': self.dropout_rate,
        })
        return config

    def build(self, inputs):
        self.conv1 = Conv1D(
            filters=self.filters,
            kernel_size=self.kernel_size,
            use_bias=self.use_bias,
            bias_initializer=self.bias_initializer,
            bias_regularizer=self.bias_regularizer,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer,
            padding="causal",
            dilation_rate=self.dilation_rate,
            activation=NONLINEAR_ACTIVATION,
            name=f"Conv1D_1_{self.layer_id}"
        )
        # Spatial dropout is specific to convolutions by dropping an entire timewindow,
        # not to rely too heavily on specific features detected by the kernels.
        self.dropout1 = SpatialDropout1D(
            self.dropout_rate, trainable=True, name=f"SpatialDropout1D_1_{self.layer_id}"
        )
        # Capture a higher order feature set from the previous convolution
        self.conv2 = Conv1D(
            filters=self.filters,
            kernel_size=self.kernel_size,
            use_bias=self.use_bias,
            bias_initializer=self.bias_initializer,
            bias_regularizer=self.bias_regularizer,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer,
            padding="causal",
            dilation_rate=self.dilation_rate,
            activation=NONLINEAR_ACTIVATION,
            name=f"Conv1D_2_{self.layer_id}"
        )
        self.dropout2 = SpatialDropout1D(
            self.dropout_rate, trainable=True, name=f"SpatialDropout1D_2_{self.layer_id}"
        )
        # The skip connection is an addition of the input to the block with the output of the second dropout layer.
        # Solves vanishing gradient, carries info from earlier layers to later layers, allowing gradients to flow across this alternative path.
        # Does not learn direct mappings, but differences (residuals) while keeping temporal context.
        # Note how it keeps dims intact with kernel 1.
        self.skip_out = Conv1D(
            filters=self.filters,
            kernel_size=1,
            activation="linear",
            padding="same",
            name=f"Conv1D_skipconnection_{self.layer_id}",
        )
        # This is the elementwise add for the residual connection and Conv1d 2's output
        self.residual_out = Add(name=f"residual_Add_{self.layer_id}")

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = self.dropout2(x)

        # Residual output by adding the inputs back
        skip_out_x = self.skip_out(inputs)
        x = self.residual_out([x, skip_out_x])
        return x, skip_out_x



@tf.keras.saving.register_keras_serializable()
class ConditionalBlock(Layer):
    """
    TCN condtioning Block that conditions a target timeseries to exogenous timeserieses.
    The Block is obtained by stacking togeather the following:
        - 1D Dilated Convolution for the main TS.
        - 1D Dilated Convolution for the exog TSs.
        - 1D Dilated skip layer for both to retain history.
        - ReLu
        - Spatial Dropout
    And adding the input after trasnforming it with a 1x1 Conv
    """

    def __init__(
        self,
        filters=1,
        kernel_size=2,
        kernel_initializer="glorot_normal",
        bias_initializer="glorot_normal",
        kernel_regularizer=None,
        bias_regularizer=None,
        use_bias=False,
        dropout_rate=0.01,
        layer_id=None,
        **kwargs,
    ):
        super(ConditionalBlock, self).__init__(**kwargs)

        assert filters > 0 and kernel_size > 0

        self.filters = filters
        self.kernel_size = kernel_size
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.use_bias = use_bias
        self.dropout_rate = dropout_rate
        self.layer_id = str(layer_id)

    def get_config(self):
        config = super(ConditionalBlock, self).get_config()
        config.update({
            'filters': self.filters,
            'kernel_size': self.kernel_size,
            'kernel_initializer': self.kernel_initializer,
            'bias_initializer': self.bias_initializer,
            'kernel_regularizer': self.kernel_regularizer,
            'bias_regularizer': self.bias_regularizer,
            'use_bias': self.use_bias,
            'dropout_rate': self.dropout_rate,
            #'id': self.layer_id
        })
        return config

    def build(self, inputs):
        self.main_conv = Conv1D(
            filters=self.filters,
            kernel_size=self.kernel_size,
            use_bias=self.use_bias,
            bias_initializer=self.bias_initializer,
            bias_regularizer=self.bias_regularizer,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer,
            padding="causal",
            activation=NONLINEAR_ACTIVATION,
            name=f"Conv1D_Conditional_1",
        )
        self.dropout1 = SpatialDropout1D(
            self.dropout_rate, trainable=True, name=f"SpatialDropout1D_1_{self.layer_id}"
        )
        self.main_skip_conn = Conv1D(
            filters=self.filters,
            kernel_size=1,
            activation="linear",
            padding="same",
            name=f"Skip_Conditional_1",
        )
        self.cond_conv = Conv1D(
            filters=self.filters,
            kernel_size=self.kernel_size,
            use_bias=self.use_bias,
            bias_initializer=self.bias_initializer,
            bias_regularizer=self.bias_regularizer,
            kernel_initializer=self.kernel_initializer,
            kernel_regularizer=self.kernel_regularizer,
            padding="causal",
            activation=NONLINEAR_ACTIVATION,
            name=f"Conv1D_Conditional_2",
        )
        self.cond_skip_conn = Conv1D(
            filters=self.filters,
            kernel_size=1,
            activation="linear",
            padding="same",
            name=f"Skip_Conditional_2",
        )
        self.dropout2 = SpatialDropout1D(
            self.dropout_rate, trainable=True, name=f"SpatialDropout1D_2_{self.layer_id}"
        )

    def call(self, inputs):
        main_input, cond_input = inputs[0], inputs[1] if len(inputs) > 1 else None

        x = self.main_conv(main_input)
        x = self.dropout1(x)
        skip_out_x = self.main_skip_conn(main_input)
        x = Add()([x, skip_out_x])
        if cond_input is not None:
            cond_x = self.cond_conv(cond_input)
            cond_x = self.dropout2(cond_x)
            cond_skip_out_x = self.cond_skip_conn(cond_input)
            cond_x = Add()([cond_x, cond_skip_out_x])

            x = Concatenate(axis=-1)([x, cond_x])
        return x

def TCN(
    input_shape,
    dense_units=None,
    conditioning_shapes=None,
    output_horizon=1,
    filters=[32],
    kernel_size=2,
    dilation_rate=2,
    kernel_initializer="glorot_normal",
    bias_initializer="glorot_normal",
    kernel_regularizer=None,
    bias_regularizer=None,
    use_bias=False,
    dropout_rate=0.01,
):
    """
    Tensorflow TCN Model builder.
    see: https://www.tensorflow.org/api_docs/python/tf/keras/Model
    see: https://www.tensorflow.org/guide/keras/making_new_layers_and_models_via_subclassing#the_model_class
    see: https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/L2

    :param layers: int
        Number of layers for the network. Defaults to 1 layer.
    :param filters: int
        the number of output filters in the convolution. Defaults to 32.
    :param kernel_size: int or tuple
        the length of the 1D convolution window
    :param dilation_rate: int
        the dilation rate to use for dilated convolution. Defaults to 1.
    :param output_horizon: int
        the output horizon.
    """
    main_input = Input(shape=input_shape, name="main_input")
    cond_input = (
        Input(shape=conditioning_shapes, name="exog_input")
        if conditioning_shapes is not None and len(conditioning_shapes) > 0
        else None
    )
    x = main_input
    if cond_input is not None:
        x = ConditionalBlock(
            filters=filters[0],
            kernel_size=kernel_size,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            use_bias=use_bias,
            dropout_rate=dropout_rate,
        )([main_input] + [cond_input])

    skip_connections = []
    for i, filter in enumerate(filters):
        # x_att = AttentionBlock(filters=filter)(x)
        x, x_skip = TCNBlock(
            filters=filter,
            kernel_size=kernel_size,
            dilation_rate=dilation_rate ** (i + 1),
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            use_bias=use_bias,
            dropout_rate=dropout_rate,
            layer_id=i,
        )(x)
        #skip_connections.append(x_att)
        skip_connections.append(x_skip)
    if skip_connections:
        skip_connections.append(x)
        aggregated = Concatenate(axis=-1, name=f"Final_Residuals")(skip_connections)
        aggregated = Conv1D(filters[-1], kernel_size=1, activation="linear", padding='same')(aggregated)
    if dense_units:
        # Dense networks for deep learning ifrequired.
        x = Flatten()(x)
        # First layer
        x = Dense(dense_units[0], input_shape=input_shape, activation=NONLINEAR_ACTIVATION_DENSE, name=f"Dense_0")(x)
        for i, units  in enumerate(dense_units, start=1):
            x = Dense(units , activation=NONLINEAR_ACTIVATION_DENSE, name=f"Dense__{i}")(x)
        # Last layer
        x = Dense(output_horizon, activation=NONLINEAR_OUTPUT_ACTIVATION, name=f"Dense_Classifier")(x)
    else:
        x = Conv1D(filters=output_horizon, kernel_size=1, padding="causal", activation=NONLINEAR_OUTPUT_ACTIVATION,name=f"Conv_Classifier")(x)
    model = Model(
        inputs=[main_input, cond_input] if cond_input is not None else [main_input],
        outputs=x,
        name="TCN_Conditional_Model",
    )
    return model


def start_tensorboard(log_base=LOG_BASEPATH, del_logs=True):
    if del_logs and os.path.exists(log_base):
        assert os.path.isdir(log_base)
        shutil.rmtree(log_base)

    tb = program.TensorBoard()
    tb.configure(argv=[None, '--logdir', log_base, '--bind_all'])
    url = tb.launch()

def build_tcn(
    input_shape,
    X, y,
    Xt=None, yt=None,
    conditioning_shapes=None,
    val_split=VAL_SPLIT,
    output_horizon= PREDICTION_HORIZON,
    filters= FILTERS,
    kernel_size= KERNEL_SIZE,
    dilation_rate= DILATION_RATE,
    kernel_regularizer=L1L2(l1= REG_WEIGHTS, l2=REG_WEIGHTS//10),
    bias_regularizer=L1L2(l1= REG_WEIGHTS, l2=REG_WEIGHTS//10),
    dropout_rate=DROPRATE,
    dense_units=HIDDEN_DENSE,
    lr=LEARN_RATE,
    patience=PATIENCE_EPOCHS,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    use_bias=BIAS,
    loss=LOSS,
    tb=False,
    classification=True,
    weighted=True,
):
    def _plot_confusion_matrix(cm, labels, cm2=None, labels2=None):
        plt.figure(figsize=(8 if cm2 is not None else 4, 4))
        if cm2 is not None:
            plt.subplot(1, 2, 1)
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Accent)

        df_cm = pd.DataFrame((cm / np.sum(cm, axis=1)[:, None])*100, index=[i for i in labels], columns=[i for i in labels])
        cm_plot1 = sns.heatmap(df_cm, annot=True,  fmt=".2f", cmap='Blues', xticklabels=labels, yticklabels=labels).get_figure()
        plt.xlabel('Predicted Labels')
        plt.ylabel('True Labels')
        plt.title('Confusion Matrix 1')
        tick_marks = np.arange(len(labels))
        plt.xticks(tick_marks, labels, rotation=45)
        plt.yticks(tick_marks, labels)

        cm_plot2=None
        if cm2 is not None:
            plt.subplot(1, 2, 2)
            df_cm = pd.DataFrame((cm2 / np.sum(cm2, axis=1)[:, None])*100, index=[i for i in labels2], columns=[i for i in labels2])
            cm_plot12 = sns.heatmap(df_cm, annot=True,  fmt=".2f", cmap='Reds', xticklabels=labels, yticklabels=labels).get_figure()
            plt.xlabel('Predicted Labels')
            plt.title('Confusion Matrix 2')
        plt.tight_layout()
        return cm_plot1, cm_plot2

    def _log_confusion_matrix(epoch, logs):
        def _plot_to_image(figure):
            """Converts the matplotlib plot specified by 'figure' to a PNG image and
            returns it. The supplied figure is closed and inaccessible after this call."""
            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            plt.close(figure)
            buf.seek(0)
            image = tf.image.decode_png(buf.getvalue(), channels=4)
            image = tf.expand_dims(image, 0)
            return image
        # model is global as is XT and yt - discretize for CM
        assert Xt is not None and len(Xt) > 1 and len(yt) > 1
        ypred = model.predict(Xt)
        y_discrete = (ypred.flatten() > 0.5).astype(int)
        cm = confusion_matrix(yt.flatten(), y_discrete)
        figure, _ = _plot_confusion_matrix(cm, labels=[1,0])
        cm_image = _plot_to_image(figure)

        file_writer_cm = create_file_writer(LOG_BASEPATH)
        with file_writer_cm.as_default():
            tf.summary.image("Confusion Matrix", cm_image, step=epoch)

    assert len(X) > 1 and len(y) > 1 and input_shape is not None
    global model, globalXt, globalyt
    globalXt = Xt
    globalyt = yt

    model = TCN(
        input_shape=input_shape,
        conditioning_shapes=conditioning_shapes,
        dense_units=dense_units,
        output_horizon=output_horizon,
        filters=filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        use_bias=use_bias,
        dropout_rate=dropout_rate,
    )
    if weighted:
        assert classification
        y_flat = y.flatten()
        weight_for_0 = (1 / np.sum(y_flat == 0)) * (len(y_flat) / 2.0)
        weight_for_1 = (1 / np.sum(y_flat == 1)) * (len(y_flat)  / 2.0)
        assert weight_for_0 > 0 and weight_for_1 > 0
        CLASS_WEIGHTS = {0: weight_for_0, 1: weight_for_1}

    model.compile(loss=loss, optimizer=Adam(learning_rate=lr), metrics=METRICS)
    callbacks = [EarlyStopping(
                    patience=patience,
                    monitor=f"val_{TARGET_METRIC}",
                    restore_best_weights=True,
                ),
                ReduceLROnPlateau(
                    monitor=f"val_{TARGET_METRIC}",
                    factor=0.3,
                    patience=patience//2,
                    verbose=1 if tb else 0,
                    min_delta=0.00001,
                )]
    if tb:
        callbacks.append(TensorBoard(log_dir=LOG_BASEPATH,
                                    histogram_freq=1,
                                    write_graph=True,
                                    write_images=True,
                                    update_freq='epoch',
                                    profile_batch=2,
                                    embeddings_freq=1))
    if tb and classification:
        callbacks.append(LambdaCallback(on_epoch_end=_log_confusion_matrix))
    if Xt is not None:
        assert len(Xt) > 1 and len(yt) > 1
        history = model.fit(
            X,
            y,
            validation_data=(Xt, yt),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1 if tb else 0,
        )
    else:
        assert val_split > 0 and  Xt is None and yt is None
        history = model.fit(
            X,
            y,
            validation_split=val_split,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1 if tb else 0,
        )
    return model, history

build_tcn(input_shape, X, y, Xt=None, yt=None)