# CNN

## Notebook's Environment

In [None]:
INSTALL_DEPS = True
if INSTALL_DEPS:
    %pip install hurst==0.0.5
    %pip install imbalanced_learn==0.12.3
    %pip install imblearn==0.0
    %pip install protobuf==5.27.0
    %pip install pykalman==0.9.7
    %pip install tqdm==4.66.4
    %pip install shap==0.45.1
    %pip install tensorflow==2.15.1
!python --version

## Cloud Environment Setup

In [None]:
import os
import sys
import warnings

warnings.filterwarnings("ignore")

IN_KAGGLE = IN_COLAB = False
try:
    # https://www.tensorflow.org/install/pip#windows-wsl2
    import google.colab
    from google.colab import drive

    drive.mount("/content/drive")
    DATA_PATH = "/content/drive/MyDrive/EDT dataset"
    MODEL_PATH = "/content/drive/MyDrive/models"
    IN_COLAB = True
    print("Colab!")
except:
    IN_COLAB = False
if "KAGGLE_KERNEL_RUN_TYPE" in os.environ and not IN_COLAB:
    print("Running in Kaggle...")
    for dirname, _, filenames in os.walk("/kaggle/input"):
        for filename in filenames:
            print(os.path.join(dirname, filename))
    MODEL_PATH = "./models"
    DATA_PATH = "/kaggle/input/intra-day-agriculture-futures-trades-2023-2024"
    IN_KAGGLE = True
    print("Kaggle!")
elif not IN_COLAB:
    IN_KAGGLE = False
    MODEL_PATH = "./models"
    DATA_PATH = "./data/"
    print("running localhost!")

In [None]:
import tensorflow as tf
from tensorflow.keras import mixed_precision

print(f'Tensorflow version: [{tf.__version__}]')

tf.get_logger().setLevel('INFO')

#tf.config.set_soft_device_placement(True)
#tf.config.experimental.enable_op_determinism()
#tf.random.set_seed(1)
try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()

  tf.config.experimental_connect_to_cluster(tpu)
  tf.tpu.experimental.initialize_tpu_system(tpu)
  strategy = tf.distribute.TPUStrategy(tpu)
except Exception as e:
  gpus = tf.config.experimental.list_physical_devices('GPU')
  if len(gpus) > 0:
    try:
        strategy = tf.distribute.MirroredStrategy()
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
    finally:
        print("Running on", len(tf.config.list_physical_devices('GPU')), "GPU(s)")
  else:
    # CPU is final fallback
    strategy = tf.distribute.get_strategy()
    print("Running on CPU")

def is_tpu_strategy(strategy):
    return isinstance(strategy, tf.distribute.TPUStrategy)

print("Number of accelerators:", strategy.num_replicas_in_sync)
os.getcwd()

# Instruments

## Data Load

In [None]:
import pandas as pd
import numpy as np
from algo_trading_utility_script import *

filename = f"{DATA_PATH}{os.sep}futures_{INTERVAL}.csv"
print(filename)
futs_df = pd.read_csv(filename, index_col="Date", parse_dates=True)

print(futs_df.shape)

HALF_LIFE, HURST = get_ou(futs_df, f'{TARGET_FUT}_Close')

print("Half-Life:", HALF_LIFE)
print("Hurst:", HURST)

futs_df.head(2)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))

plt.plot(futs_df[f'{TARGET_FUT}_Close'], label=f'{TARGET_FUT} Close', alpha=0.7)
plt.title(f'{TARGET_FUT} Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Prepare the Data

In [None]:
import pickle
from sklearn.preprocessing import StandardScaler, normalize, FunctionTransformer
from tqdm import tqdm

BIAS = 0.
CLASS_WEIGHTS = {0: 1., 1: 1.}
SCALERS = None
TEST_SPLIT = 0.8
TRAIN_SIZE = int(len(futs_df) * TEST_SPLIT)
CACHE = True
FUTURES_TMP_FILE = "./tmp/futures.pkl"
os.makedirs("./tmp/", exist_ok=True)

# FEATURES_SELECTED from feature selection GBC notebook.
COLS_TO_SCALE = ['10Y_Barcount', '10Y_Spread', '10Y_Volume', '2YY_Spread', '2YY_Volume',
                'Filtered_X', 'KG_X', 'KG_Z1', 'RTY_Spread', 'SD', 'Spread',
                'VXM_Open', 'VXM_Spread', 'Volume'] # StockFeat.list + MARKET_COLS + BB_COLS + SR_COLS + KF_COLS # FEATURES_SELECTED
FEATURES = FEATURES_SELECTED # StockFeat.list + MARKET_COLS + KF_COLS + BB_COLS + MOM_COLS + SR_COLS # FEATURES_SELECTED

print(f"Scaling these features: {COLS_TO_SCALE}")
print(f"Training on these features: {FEATURES}")

def oversample_mean_reversions(train_agri_ts, window, period=INTERVAL, hurst=HURST):
    samples = []
    for df in tqdm(train_agri_ts, desc="oversample_mean_reversions"):
        bb_df = df.copy()
        results_df = param_search_bbs(bb_df, StockFeatExt.CLOSE, period, initial_window=window * 2, window_min=window // 2, hurst=hurst)
        results_df = results_df[results_df["Metric"] == "Sharpe"]
        bb_df, _ = bollinger_band_backtest(bb_df, StockFeatExt.CLOSE, results_df["Window"].iloc[0], period, std_factor=results_df["Standard_Factor"].iloc[0])

        samples.append(bb_df[train_agri_ts[0].columns].reset_index(drop=True))
    return train_agri_ts + samples

def normalize_and_label_data(ts, meta_label=META_LABEL, cols_to_scale=COLS_TO_SCALE, scalers=None):
    def _get_first_difference(data_df):
        return data_df.diff(1).fillna(0)

    def _get_log_returns(data_df):
        return np.log(data_df / data_df.shift(1)).fillna(0)

    y0 = 0
    y1 = 0
    dfs = []
    new_scalers = []
    for df, scaler in tqdm(zip(ts, scalers or [None] * len(ts)), desc="label_data"):
        df = aug_metalabel_mr(df)
        if (df[meta_label] > 0).sum() == 0:
            print("A DS with no Positive Label was found!")
            continue
        y0 += (df[meta_label] == 0).sum()
        y1 += (df[meta_label] > 0).sum()
        if cols_to_scale is not None:
            if scaler is None:
                scaler= StandardScaler() # FunctionTransformer(_get_first_difference) #
                scaler.fit(df[cols_to_scale])
                new_scalers.append(scaler)
            df[cols_to_scale] = scaler.transform(df[cols_to_scale])
            df = df.iloc[1:] # First data is always nan after a transform
        df = df.loc[:, ~df.columns.duplicated(keep="first")]
        dfs.append(df.dropna())

    # Unless we SMOTE, this dataset is imbalanced.
    total = y0 + y1
    class_weight_0 = total / y0 if y0 != 0 else 0
    class_weight_1 = total / y1 if y1 != 0 else 0
    class_weights = {0: class_weight_0, 1: class_weight_1}

    # the bias will shift activation to be more sensible to the imbalance.
    bias = np.log(y1 / y0)

    return dfs, class_weights, bias, new_scalers if len(new_scalers)> 0 else scalers

with strategy.scope():
    if not os.path.exists(FUTURES_TMP_FILE):
        futs_exog_df = process_exog(MARKET_FUTS, futs_df)
        train_agri_ts, val_agri_ts = process_futures(FUTS, futs_df, futs_exog_df, TRAIN_SIZE, INTERVAL)
        # Same as SMOTE, but reusing the same TS with different MR algos.
        train_agri_ts = oversample_mean_reversions(train_agri_ts, HALF_LIFE)
        val_agri_ts = oversample_mean_reversions(val_agri_ts, HALF_LIFE)
        if CACHE:
            with open(FUTURES_TMP_FILE, 'wb') as f:
                pickle.dump((train_agri_ts, val_agri_ts), f)
    else:
        with open(FUTURES_TMP_FILE, 'rb') as f:
            train_agri_ts, val_agri_ts = pickle.load(f)
    train_agri_ts, CLASS_WEIGHTS, BIAS, SCALERS = normalize_and_label_data(train_agri_ts, cols_to_scale=COLS_TO_SCALE)
    val_agri_ts, val_weights, _, _ = normalize_and_label_data(val_agri_ts, cols_to_scale=COLS_TO_SCALE, scalers=SCALERS)

print(f"train weights: {CLASS_WEIGHTS}")
print(f"test weights: {val_weights}")
np.shape(train_agri_ts)

In [None]:
sample = val_agri_ts[0]
print(sample[META_LABEL].value_counts())

sampled_pattenrs = sample[sample[META_LABEL] > 0]
sampled_pattenrs[FEATURES + [META_LABEL, "Ret"]].tail(10)

In [None]:
WINDOW = 511  # window is the k+k-1xd-1 or the sum i=0..n of 1+sum(receptive feild)x2^i
WINDOW_TMP_PATH = "./tmp/"
# TPU see: https://github.com/tensorflow/tensorflow/issues/41635
BATCH_SIZE = 8  * strategy.num_replicas_in_sync # Default 8
print(f"BATCH_SIZE: {BATCH_SIZE}")

def prepare_windows(data_df, label_df, window_size=WINDOW):
    """
    Prepare windows of features and corresponding labels for classification.
    IMPORTANT: There is no padding, incomplete timewindows are discarded!

    Parameters:
    - data_df: DataFrame containing the features.
    - label_df: DataFrame containing the labels.
    - window_size: The size of the input window.

    Returns:
    - X: Array of input windows.
    - y: Array of corresponding labels.
    """
    X, y = [], []
    for i in range(len(data_df) - window_size):
        input_window = data_df.iloc[i : i + window_size].values
        assert not np.isnan(input_window).any(), "NaN values found in input window"
        X.append(input_window)
        if label_df is not None:
            target_label = label_df.iloc[i + window_size]
            y.append([target_label])
            assert not np.isnan(target_label).any(), "NaN values found in target label"
    return np.array(X), np.array(y)

def prepare_windows_with_disjoint_ts(ts_list, window_size=WINDOW):
    """
    Generator function to yield windows of features and corresponding labels from multiple time series.

    Parameters:
    - ts_list: List of DataFrames, each containing a time series.
    - window_size: The size of the input window.

    Yields:
    - features: The input window of features.
    - labels: The corresponding label.
    """
    for data_df in ts_list:
        X, y = prepare_windows(data_df[FEATURES], data_df[META_LABEL], window_size=window_size)
        for features, labels in zip(X, y):
            yield features, labels

def create_windowed_dataset_from_generator(ts_list, window_size=WINDOW, batch_size=BATCH_SIZE):
    """
    Create a TensorFlow dataset from a generator.

    Parameters:
    - ts_list: List of DataFrames, each containing a time series.
    - window_size: The size of the input window.
    - batch_size: The batch size for the dataset.

    Returns:
    - dataset: A TensorFlow dataset.
    """
    dataset = tf.data.Dataset.from_generator(
        lambda: prepare_windows_with_disjoint_ts(ts_list, window_size=window_size),
        output_signature=(
            tf.TensorSpec(shape=(window_size, len(FEATURES)), dtype=tf.float32),
            tf.TensorSpec(shape=(1,), dtype=tf.float32)  # Assuming labels are floats for binary classification
        )
    )
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

def create_dataset_from_generator(ts_list, batch_size):
    def generator(ts_list):
        full_df = pd.concat(ts_list)
        for i, row in full_df.iterrows():
            yield row[FEATURES].values, row[META_LABEL]  # Reshape to match (1,)

    output_signature = (
        tf.TensorSpec(shape=(len(FEATURES),), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.float32)
    )

    dataset = tf.data.Dataset.from_generator(
        lambda: generator(ts_list),
        output_signature=output_signature
    )
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

with strategy.scope():
    train_dataset = create_windowed_dataset_from_generator(train_agri_ts, batch_size=BATCH_SIZE)
    val_dataset = create_windowed_dataset_from_generator(val_agri_ts, batch_size=BATCH_SIZE)

In [None]:
# INPUT_SHAPE = (len(FEATURES), ) # The expected shape, where the None shape is BATCH_SIZE

sampled_dataset = val_dataset.shuffle(buffer_size=250).take(1)
for features, labels in train_dataset.take(1):
    INPUT_SHAPE = features.numpy().shape[1:]  # Assuming the shape is (batch_size, len(FEATURES))
    print("Features:", features.numpy())
    print("Labels:", labels.numpy())

print("INPUT_SHAPE:", INPUT_SHAPE)

# CNN 

## Architecture

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Add, Multiply, Input, Flatten, Dense, GlobalAveragePooling1D, MaxPooling1D, SpatialDropout1D, Activation, Dropout, ReLU, LeakyReLU, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2, l1, l1_l2
from tensorflow.keras.initializers import Constant, HeNormal

MODEL_NAME = None
MAX_DILATION = 8
FILTERS = 32
DROPRATE = 0.4
KERNEL_SIZE = 3
REG_WEIGHTS = 1e-5
CONVOLUTIONS = 12
DENSE_SIZE = 128
DENSE_DEPTH = 8

def resnet_block(in_x, layer_id, filters, kernel_size, reg_param, stride=1):
    # Convolutionals
    x = Conv1D(filters, kernel_size,
               strides=stride,
               padding='same',
               kernel_regularizer=l2(reg_param),
               name=f'conv1_{layer_id}')(in_x)
    x = BatchNormalization(name=f'bn1_{layer_id}')(x)
    x = ReLU(name=f'relu1_{layer_id}')(x)
    x = Conv1D(filters, kernel_size,
               strides=1,
               padding='same',
               kernel_regularizer=l2(reg_param),
               name=f'conv2_{layer_id}')(x)
    x = BatchNormalization(name=f'bn2_{layer_id}')(x)

    # Identity shortcut
    if in_x.shape[-1] != filters or stride != 1:
        in_x = Conv1D(filters, 1, strides=stride,
                      padding='same',
                      kernel_regularizer=l2(reg_param),
                      name=f'conv_shortcut_{layer_id}')(in_x)
        in_x = BatchNormalization(name=f'bn_shortcut_{layer_id}')(in_x)

    # Residual
    x = Add(name=f'add_{layer_id}')([x, in_x])
    x = ReLU(name=f'relu2_{layer_id}')(x)

    return x

def build_resnet_model(input_shape, reg_param=1e-4):
    MODEL_NAME = "RESNET"

    inputs = Input(shape=input_shape)
    x = Conv1D(64, 7, strides=2, padding='same', kernel_regularizer=l2(reg_param), name='initial_conv')(inputs)
    x = BatchNormalization(name='initial_bn')(x)
    x = ReLU(name='initial_relu')(x)
    x = MaxPooling1D(pool_size=3, strides=2, padding='same', name='initial_maxpool')(x)

    # Incremental Residual Blocks - same as the paper's 34-layer architecture
    filters = 64
    for i in range(3):
        x = resnet_block(x, layer_id=f'conv2_{i}', filters=filters, kernel_size=3, reg_param=reg_param)
    filters = 128
    for i in range(4):
        stride = 1 if i == 0 else 2
        x = resnet_block(x, layer_id=f'conv3_{i}', filters=filters, stride=stride, kernel_size=3, reg_param=reg_param)
    filters = 256
    for i in range(6):
        stride = 1 if i == 0 else 2
        x = resnet_block(x, layer_id=f'conv4_{i}', filters=filters, stride=stride, kernel_size=3, reg_param=reg_param)
    filters = 512
    for i in range(3):
        stride = 1 if i == 0 else 2
        x = resnet_block(x, layer_id=f'conv5_{i}', filters=filters, stride=stride, kernel_size=3, reg_param=reg_param)

    x = GlobalAveragePooling1D(name='global_avg_pool')(x)
    outputs = Dense(1, activation='sigmoid', name='output_dense')(x)

    model = Model(inputs, outputs, name=MODEL_NAME)
    return model


def dense_residual_block(in_x, units, reg_param, dropout_rate, layer_id):
    x = Dense(units, kernel_regularizer=l2(reg_param), name=f'dense_{layer_id}_1')(in_x)
    x = LeakyReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(units, kernel_regularizer=l2(reg_param), name=f'dense_{layer_id}_2')(x)
    x = BatchNormalization()(x)
    if in_x.shape[-1] != units:
        # Original RESNet had a Conv1D
        in_x = Dense(units, kernel_initializer=HeNormal(), kernel_regularizer=l1_l2(reg_param))(in_x)
    x = Add()([in_x, x])
    x = LeakyReLU()(x)
    return x

def build_deep_resnet_model(input_shape,
                            reg_param=REG_WEIGHTS,
                            dropout_rate=DROPRATE,
                            output_bias=BIAS,
                            dense_units = DENSE_SIZE,
                            dense_layers = DENSE_DEPTH):
    inputs = Input(shape=input_shape)
    x = inputs
    for layer_id in range(dense_layers):
        x = dense_residual_block(x, dense_units, reg_param, dropout_rate, layer_id)

    outputs = Dense(1, activation='sigmoid', name='output_dense', bias_initializer=Constant(output_bias))(x)
    model = Model(inputs, outputs, name=MODEL_NAME)
    return model

def build_baseline_model(input_shape,
                        reg_param=REG_WEIGHTS,
                        dropout_rate=DROPRATE,
                        output_bias=BIAS,
                        dense_size = DENSE_SIZE):
    inputs = Input(shape=input_shape)

    x = Dense(dense_size, kernel_regularizer=l1_l2(reg_param), kernel_initializer=HeNormal())(inputs)
    x = LeakyReLU()(x)
    x = Dense(dense_size, kernel_regularizer=l1_l2(reg_param), kernel_initializer=HeNormal())(x)
    x = LeakyReLU()(x)
    x = Dropout(dropout_rate)(x)

    outputs = Dense(1, activation='sigmoid', name='output_dense', bias_initializer=Constant(output_bias))(x)

    return Model(inputs, outputs, name=MODEL_NAME)

## Training

In [11]:
import tensorflow as tf

from tensorflow.keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow.keras.metrics import  AUC, Precision, Recall, TruePositives, TrueNegatives, FalsePositives, FalseNegatives
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

import shutil
import json

MODEL_DIR = f"models/{MODEL_NAME}.keras"
MODEL_HISTORY = f"{MODEL_PATH}/history.json"
IMAGES_DIR = f"images/{MODEL_NAME}/images"
LOG_BASEPATH = f"logs/{MODEL_NAME}/tb"
TARGET_METRIC = "tp"

EPOCHS = 30
PATIENCE_EPOCHS = 5
LEARN_RATE =1e-3
LEARN_RATE_MIN = 1e-5
ALPHA = CLASS_WEIGHTS[1] / (CLASS_WEIGHTS[0] + CLASS_WEIGHTS[1])
GAMMA = 2.

PURGE = True
if PURGE:
    # Remove tensorboard logs and other training artefacts for a fresh loop.
    shutil.rmtree(LOG_BASEPATH, ignore_errors=True)
    shutil.rmtree(MODEL_DIR, ignore_errors=True)
    shutil.rmtree(IMAGES_DIR, ignore_errors=True)
os.makedirs(IMAGES_DIR, exist_ok=True)
os.makedirs(LOG_BASEPATH, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
print(f"alpha: {ALPHA}, gamma {GAMMA}, bias: {BIAS}")

def build_cnn(input_shape, train_dataset, test_dataset=None,
                lr=LEARN_RATE,
                lr_min=LEARN_RATE_MIN,
                target_metric=TARGET_METRIC,
                patience=PATIENCE_EPOCHS,
                epochs=EPOCHS,
                class_weight=CLASS_WEIGHTS,
                initial_bias = BIAS,
                conv_layers = CONVOLUTIONS,
                max_dilation = MAX_DILATION,
                filters = FILTERS,
                kernel_size = KERNEL_SIZE,
                reg_param = REG_WEIGHTS,
                dropout_rate = DROPRATE,
                dense_units = DENSE_SIZE,
                dense_layers = DENSE_DEPTH // 4):
    model = build_resnet_model(
        input_shape=input_shape,
        reg_param=reg_param
    )
    optimizer = Adam(learning_rate=lr, clipnorm=1.)
    loss = BinaryFocalCrossentropy (from_logits=False,
                                    alpha=ALPHA,
                                    gamma=GAMMA,
                                    reduction='sum_over_batch_size',
                                    name='bfce')
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=[
            TruePositives(name=TARGET_METRIC), # Max TP
            TrueNegatives(name='tn'),
            FalsePositives(name='fp'),
            FalseNegatives(name='fn'),
            Precision(name='p'),
            Recall(name='r'),
            AUC(name='auc'),
            AUC(name='prc', curve='PR')
        ],
    )
    callbacks = [
        EarlyStopping(
            patience=patience,
            monitor=f"val_{target_metric}",
            restore_best_weights=True,
            mode="max" # TARGET_METRIC max or min
        ),
        ReduceLROnPlateau(
            monitor=f"val_{target_metric}",
            factor=0.5,
            patience=1,
            verbose=1,
            min_lr=lr_min,
            mode="max" # TARGET_METRIC max or min
        ),
        TensorBoard(
            log_dir=LOG_BASEPATH,
            histogram_freq=1,
            write_images=True
        )
    ]
    history = model.fit(
        train_dataset,
        validation_data=test_dataset,
        epochs=epochs,
        batch_size=BATCH_SIZE,
        callbacks=callbacks,
        verbose=1,
        class_weight=class_weight
    )
    return model, history

history_dict = None
with strategy.scope():
    if not PURGE and os.path.exists(MODEL_PATH):
        print(f"Loading model from: {MODEL_PATH}")
        model = tf.keras.models.load_model(MODEL_PATH)
        if os.path.exists(MODEL_HISTORY):
            with open(MODEL_HISTORY, 'r') as f:
                history_dict = json.load(f)
    else:
        print(f"input_shape: {INPUT_SHAPE}")
        model, history = build_cnn(INPUT_SHAPE, train_dataset=train_dataset, test_dataset=val_dataset)
        history_dict = history.history
        model.save(MODEL_PATH)
        # float32 is not directly serializable to JSON
        history_dict = {k: [float(i) for i in v] for k, v in history_dict.items()}
        with open(MODEL_HISTORY, 'w') as f:
            json.dump(history_dict, f)
model.summary()

KeyboardInterrupt: 

# Visualize History

In [None]:
def plot_model_stats(history_dict):
    plt.figure(figsize=(18, 10))

    # Plotting Loss
    plt.subplot(2, 3, 1)
    plt.plot(history_dict['loss'], label='Train Loss')
    plt.plot(history_dict['val_loss'], label='Val Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')

    # Plotting AUC
    plt.subplot(2, 3, 2)
    plt.plot(history_dict['auc'], label='Train AUC')
    plt.plot(history_dict['val_auc'], label='Val AUC')
    plt.title('Model AUC')
    plt.xlabel('Epoch')
    plt.ylabel('AUC')
    plt.legend(loc='lower right')

    # Plotting Precision
    plt.subplot(2, 3, 3)
    plt.plot(history_dict['p'], label='Train Precision')
    plt.plot(history_dict['val_p'], label='Val Precision')
    plt.title('Model Precision')
    plt.xlabel('Epoch')
    plt.ylabel('Precision')
    plt.legend(loc='lower right')

    # Plotting Recall
    plt.subplot(2, 3, 4)
    plt.plot(history_dict['r'], label='Train Recall')
    plt.plot(history_dict['val_r'], label='Val Recall')
    plt.title('Model Recall')
    plt.xlabel('Epoch')
    plt.ylabel('Recall')
    plt.legend(loc='lower right')

    # Plotting True Positives
    plt.subplot(2, 3, 5)
    plt.plot(history_dict['tp'], label='Train True Positives')
    plt.plot(history_dict['val_tp'], label='Val True Positives')
    plt.title('Model True Positives')
    plt.xlabel('Epoch')
    plt.ylabel('True Positives')
    plt.legend(loc='upper right')

    # Plotting PRC (Precision-Recall Curve)
    plt.subplot(2, 3, 6)
    plt.plot(history_dict['prc'], label='Train PRC')
    plt.plot(history_dict['val_prc'], label='Val PRC')
    plt.title('Model PRC')
    plt.xlabel('Epoch')
    plt.ylabel('PRC')
    plt.legend(loc='lower right')

    plt.tight_layout()
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_stats.png')
    plt.show()

if history_dict is not None:
    plot_model_stats(history_dict)

# Explain and Interpret

In [None]:
import seaborn as sns
from scipy.stats import norm
from sklearn.metrics import accuracy_score, precision_score, recall_score, fbeta_score, roc_auc_score

def print_metrics_and_distribution(model, data, labels):
    ypred_proba = model.predict(data)
    pred = (ypred_proba > 0.5).astype(int)

    metrics = {
        "Accuracy": accuracy_score(labels, pred.flatten()),
        "Precision": precision_score(labels, pred.flatten()),
        "Recall": recall_score(labels, pred.flatten()),
        "F1b Score": fbeta_score(labels, pred.flatten(), average="weighted", beta=0.1),
        "ROC AUC": roc_auc_score(labels, ypred_proba.flatten(), average='weighted')
    }

    metrics_df = pd.DataFrame.from_dict(metrics, orient='index')

    plt.figure(figsize=(10, 6))
    sns.kdeplot(ypred_proba, color='blue', fill=True, alpha=0.7)

    mu, std = norm.fit(ypred_proba)
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = norm.pdf(x, mu, std)
    plt.plot(x, p, 'k', linewidth=2)

    plt.title('PDF')
    plt.xlabel('Predicted Probability')
    plt.ylabel('Density')

    plt.tight_layout()
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_pdf.png')
    plt.show()

    metrics_df.to_json(f"{MODEL_PATH}/stats.json")

    return metrics_df

if MODEL_NAME == "WAVENET":
    test_data, test_labels = prepare_windows(val_agri_ts[0][FEATURES], val_agri_ts[0][META_LABEL], window_size=WINDOW)
else:
    test_data, test_labels = val_agri_ts[0][FEATURES], val_agri_ts[0][META_LABEL]
metrics_df = print_metrics_and_distribution(model, test_data, test_labels)
metrics_df

In [None]:
import seaborn as sns

from tensorflow.math import confusion_matrix

def plot_confusion_matrix(model, data, labels, label_names=['RW', 'MR']):
    ypred_proba = model.predict(data)
    pred = (ypred_proba > 0.5).astype(int)

    print(labels.shape)
    print(pred.shape)
    if len(labels.shape) > 0:
        labels = labels.flatten()
    if len(pred.shape) > 0:
        pred = pred.flatten()
    cm = confusion_matrix(labels, pred)

    plt.figure(figsize=(8, 6))
    df_cm = pd.DataFrame((cm / np.sum(cm, axis=1)[:, None])*100, index=[i for i in label_names], columns=[i for i in label_names])
    cm_plot = sns.heatmap(df_cm, annot=True, fmt=".2f", cmap='Blues', xticklabels=label_names, yticklabels=label_names)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')

    plt.tight_layout()
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_cm.png')
    plt.show()

plot_confusion_matrix(model, test_data, test_labels)

## SHAP

In [None]:
import shap
shap.initjs()

RANDOMIZE_SIZE = 32
SAMPLE_SIZE = 25

if MODEL_NAME != "WAVENET":
    # Shap doesn't work with window encoded data.
    background_features, background_labels = train_agri_ts[0][FEATURES].values,train_agri_ts[0][META_LABEL].values
    test_data, test_labels = val_agri_ts[0][FEATURES].values, val_agri_ts[0][META_LABEL].values
    test_features, test_labels = test_data[:SAMPLE_SIZE], test_labels[:SAMPLE_SIZE]

    shap.explainers._deep.deep_tf.op_handlers["LeakyRelu"] = shap.explainers._deep.deep_tf.op_handlers["Relu"]
    shap.explainers._deep.deep_tf.op_handlers["AddV2"] = shap.explainers._deep.deep_tf.op_handlers["Add"]
    shap.explainers._deep.deep_tf.op_handlers["BatchToSpaceND"] = shap.explainers._deep.deep_tf.op_handlers["Mean"]
    shap.explainers._deep.deep_tf.op_handlers["SpaceToBatchND"] = shap.explainers._deep.deep_tf.op_handlers["Mean"]
    # this is a hack: https://github.com/shap/shap/issues/1463

    e = shap.DeepExplainer(model, background_features)

    shap_values = e.shap_values(test_features)
    if isinstance(shap_values, list):
        shap_values = shap_values[0]
    shap_values = np.squeeze(shap_values)

    print(f"SHAP values shape: {shap_values.shape}")
    print(f"Test features shape: {test_features.shape}")
    assert shap_values.shape == test_features.shape
    shap.summary_plot(shap_values, test_features, feature_names=FEATURES)
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_shap_sum.png')

In [None]:
if MODEL_NAME != "WAVENET":
    sample_index = 2
    e = shap.KernelExplainer(model, background)

    select = range(SAMPLE_SIZE)
    shap_features = test_features[select]
    train_features = background_features[select]
    shap_values = e.shap_values(shap_features, nsamples=SAMPLE_SIZE)
    print(f"SHAP values shape: {shap_values.shape}")

    if isinstance(shap_values, list):
        shap_values = shap_values[0: SAMPLE_SIZE]
    shap_values = np.squeeze(shap_values)
    y_pred = (shap_values.sum(1) + e.expected_value) > 0
    misclassified = y_pred != test_labels[select]
    print(f"({shap_values.sum(1)} + {e.expected_value}) > 0")
    print(f"Misclassified: {np.shape(misclassified)} out of {np.shape(y_pred)[0]}")

    print(f"Explainer expected value: {e.expected_value}")
    shap.decision_plot(e.expected_value, shap_values, train_features, feature_names=FEATURES, link='logit', highlight=misclassified)

In [None]:
if MODEL_NAME != "WAVENET":
    shap.decision_plot(
        e.expected_value,
        shap_values[misclassified],
        train_features[misclassified],
        link="logit",
        highlight=0,
        feature_names=FEATURES
    )
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_shap_force.png')

    shap.force_plot(
        e.expected_value,
        shap_values[misclassified],
        train_features[misclassified],
        link="logit",
        feature_names=FEATURES
    )
    plt.savefig(f'{IMAGES_DIR}/{MODEL_NAME}_shap_force_misclassed.png')

# Grid Search and CV

In [None]:
from sklearn.model_selection import ParameterGrid, TimeSeriesSplit
from tensorboard.plugins.hparams import api as hp
from tensorflow.summary import create_file_writer

import json

HP_KERNEL_SIZE = hp.HParam("kernel_size", hp.Discrete([int(KERNEL_SIZE * 2), int(KERNEL_SIZE), int(KERNEL_SIZE // 2)]))
HP_BATCH_SIZE = hp.HParam("batch_size", hp.Discrete([int(BATCH_SIZE)]))
HP_EPOCHS = hp.HParam("epochs", hp.Discrete([int(EPOCHS)]))
HP_DILATION_RATE = hp.HParam("dilation_rate", hp.Discrete([int(MAX_DILATION), int(MAX_DILATION * 2)]))
HP_DROPOUT_RATE = hp.HParam("dropout_rate", hp.Discrete([float(DROPRATE), float(DROPRATE * 2)]))
HP_REG_WEIGHTS = hp.HParam("reg_weight", hp.Discrete([float(REG_WEIGHTS), float(REG_WEIGHTS / 2)]))
HP_LEARNING_RATE = hp.HParam("learning_rate", hp.Discrete([float(LEARN_RATE)]))
HP_PATIENCE = hp.HParam("patience", hp.Discrete([int(PATIENCE_EPOCHS)]))
HP_DENSE_DEPTH = hp.HParam("dense_depth", hp.Discrete([int(DENSE_DEPTH), int(DENSE_DEPTH * 2)]))
HP_DENSE_UNITS = hp.HParam("dense_units", hp.Discrete([int(DENSE_DEPTH // 2), int(DENSE_DEPTH), int(DENSE_DEPTH * 2)]))
HP_FILTERS = hp.HParam("filters", hp.Discrete([int(FILTERS // 2), int(FILTERS), int(FILTERS * 2)]))
HP_CONVOLUTIONS= hp.HParam("convolutions", hp.Discrete([int(CONVOLUTIONS // 2), int(CONVOLUTIONS), int(CONVOLUTIONS * 2)]))
HPARAMS = [
    HP_FILTERS,
    HP_KERNEL_SIZE,
    HP_BATCH_SIZE,
    HP_EPOCHS,
    HP_DILATION_RATE,
    HP_DROPOUT_RATE,
    HP_REG_WEIGHTS,
    HP_LEARNING_RATE,
    HP_PATIENCE,
    HP_DENSE_UNITS,
    HP_DENSE_DEPTH,
    HP_CONVOLUTIONS]

def grid_search_build_cnn(input_shape, train_dataset, test_dataset, hparams=HPARAMS, file_name=f"best_params.json", checkpoint_file = f"checkpoint.json"):
    def _decode_arrays(config_str):
        return [int(unit) for unit in config_str.split('_')]

    def _save_best_params(best_params, best_loss, best_metric, other_metrics = None, file_name="best_params.json"):
        os.makedirs(MODEL_DIR, exist_ok=True)
        with open(f"{MODEL_DIR}/{file_name}", "w") as file:
            json.dump({"best_params": best_params, "best_loss": best_loss, "best_metric": best_metric, 'other_metrics': other_metrics}, file)

    def _load_checkpoint(file_name):
        json = None
        try:
            os.makedirs(MODEL_DIR, exist_ok=True)
            with open(f"{MODEL_DIR}/{file_name}", "r") as file:
                json = json.load(file)
        except Exception as e:
            print(f"File {MODEL_DIR}/{file_name} not found or error {e}")
        return json

    def _save_checkpoint(state, file_name):
        os.makedirs(MODEL_DIR, exist_ok=True)
        with open(f"{MODEL_DIR}/{file_name}", "w") as file:
            json.dump(state, file)

    with create_file_writer(f"{LOG_BASEPATH}/hparam_tuning").as_default():
        hp.hparams_config(
            hparams=hparams,
            metrics=[hp.Metric(TARGET_METRIC, display_name=TARGET_METRIC)],
        )

    start_index = 0
    best_loss = np.inf
    best_metric = -np.inf
    best_params = None
    checkpoint = _load_checkpoint(checkpoint_file)
    if checkpoint:
        start_index = checkpoint['next_index']
        best_loss = checkpoint['best_loss']
        best_metric = checkpoint['best_metric']
        best_params = checkpoint['best_params']

    grid = list(ParameterGrid({h.name: h.domain.values for h in hparams}))
    for index, hp_values in enumerate(tqdm(grid[start_index:], desc="Grid Search.."), start=start_index):
        lr = hp_values["learning_rate"]
        conv_layers=hp_values["convolutions"]
        max_dilation=hp_values["dilation_rate"]
        filters=hp_values["filters"]
        kernel_size=hp_values["kernel_size"]
        reg_param=hp_values["reg_weight"]
        dropout_rate=hp_values["dropout_rate"]
        dense_units=hp_values["dense_units"]
        dense_layers=hp_values["dense_depth"]

        model, history = build_cnn(input_shape,
                                   train_dataset, test_dataset=test_dataset,
                                    lr=lr,
                                    lr_min=LEARN_RATE_MIN,
                                    target_metric=TARGET_METRIC,
                                    conv_layers=conv_layers,
                                    max_dilation=max_dilation,
                                    filters=filters,
                                    kernel_size=kernel_size,
                                    reg_param=reg_param,
                                    dropout_rate=dropout_rate,
                                    dense_units=dense_units,
                                    dense_layers=dense_layers)

        history_dict = history.history
        loss = history_dict[f"val_loss"][-1]
        metric = history_dict[f"val_{TARGET_METRIC}"][-1]
        if (metric > best_metric):
            best_history = history
            best_loss = loss
            best_metric = metric
            best_model = model
            best_params = hp_values
            other_metrics = {
                f"{TARGET_METRIC}": history_dict[f"{TARGET_METRIC}"][-1],
                f"v_{TARGET_METRIC}": history_dict[f"val_{TARGET_METRIC}"][-1],
                'ba': history_dict["ba"][-1],
                'v_ba': history_dict["val_ba"][-1],
            }
            _save_best_params(best_params, best_loss, best_metric, other_metrics, file_name)
        checkpoint_state = {
            'next_index': index + 1,
            'best_loss': best_loss,
            'best_metric': best_metric,
            'best_params': best_params
        }
        _save_checkpoint(checkpoint_state, checkpoint_file)
    return best_model, best_history, best_params, best_loss, best_metric

PARAM_SEARCH = False
if PARAM_SEARCH:
    with strategy.scope():
        model, history, best_params, best_loss, best_metric = grid_search_build_cnn(INPUT_SHAPE, train_dataset, val_dataset)
        print(best_params)
        print(best_metric)