In [1]:
!pip install tensorflow==2.14.0 -q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
tensorflow-decision-forests 1.8.1 requires tensorflow~=2.15.0, but you have tensorflow 2.14.0 which is incompatible.
tensorflow-serving-api 2.14.1 requires tensorflow<3,>=2.14.1, but you have tensorflow 2.14.0 which is incompatible.
tensorflow-text 2.15.0 requires tensorflow<2.16,>=2.15.0; platform_machine != "arm64" or platform_system != "Darwin", but you have tensorflow 2.14.0 which is incompatible.
tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.14.0 which is incompatible.[0m[31m
[0m

In [2]:
# experimental data 
exp_true = [9423, 9424, 9425, 
            9597, 9598, 9599, 
            10248, 10249, 10250, 
            11523, 11524, 11525,
            12036, 12037, 12038,
            12222, 12223, 12224,
            13041, 13042, 13043,
            14028, 14029, 14030,
            14472, 14473, 14474,
            15540, 15541, 15542,
            17289, 17290, 17291,
            17685, 17686, 17687,
            19458, 19459, 19460,
            22827, 22828, 22829,
            22938, 22939, 22940,
            23286, 23287, 23288,
            24168, 24169, 24170,
            25143, 25144, 25145,
            26010, 26011, 26012]
exp_false = [8769, 8770, 8771,
             9810, 9811, 9812,
             11178, 11179, 11180,
             12186, 12187, 12188,
             13968, 13969, 13970,
             14187, 14188, 14189,
             15177, 15178, 15179,
             16920, 16921, 16922,
             18630, 18631, 18632,
             18828, 18829, 18830,
             19308, 19309, 19310,
             20832, 20833, 20834,
             26202, 26203, 26204]

In [3]:
import os 
import numpy as np
import pandas as pd
import pyarrow.parquet as pq # Used to read the data
from joblib import Parallel, delayed
from tqdm import tqdm # Processing time measurement

import tensorflow as tf

from keras.layers import * # Keras is the most friendly Neural Network library, this Kernel use a lot of layers classes
from keras.models import Model
from keras import backend as K # The backend give us access to tensorflow operations and allow us to create the Attention class
from keras import optimizers # Allow us to access the Adam class to modify some parameters
from keras.callbacks import * # This object helps the model to train in a smarter way, avoiding overfitting

from sklearn.model_selection import GridSearchCV, StratifiedKFold # Used to use Kfold to train our model
from sklearn.model_selection import train_test_split 
from sklearn.metrics import matthews_corrcoef

2024-05-20 07:14:37.360626: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-20 07:14:37.360684: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-20 07:14:37.360721: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
# select how many folds will be created
N_SPLITS = 5
# it is just a constant with the measurements data size
sample_size = 800000

# max threads number for parallel
MAX_THREADS = 4
RANDOM_SEED = 2019

In [5]:
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [6]:
# https://www.kaggle.com/suicaokhoailang/lstm-attention-baseline-0-652-lb

class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = tf.keras.initializers.glorot_uniform(RANDOM_SEED)

        self.W_regularizer = tf.keras.regularizers.get(W_regularizer)
        self.b_regularizer = tf.keras.regularizers.get(b_regularizer)

        self.W_constraint = tf.keras.constraints.get(W_constraint)
        self.b_constraint = tf.keras.constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight(shape=(input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim

In [7]:
# just load train data
df_train = pd.read_csv('/kaggle/input/vsb-power-line-fault-detection/metadata_train.csv')
# set index, it makes the data access much faster
df_train = df_train.set_index(['id_measurement', 'phase'])
df_train.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,signal_id,target
id_measurement,phase,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,0
0,1,1,0
0,2,2,0
1,0,3,1
1,1,4,1


In [8]:
def get_features(dataset='train', split_parts=10):
    if dataset == 'train':
        cache_file = 'X.npy'
        meta_file = '/kaggle/input/vsb-power-line-fault-detection/metadata_train.csv'
    elif dataset == 'test':
        cache_file = 'X_test.npy'
        meta_file = '/kaggle/input/vsb-power-line-fault-detection/metadata_test.csv'
    if os.path.isfile(cache_file):
        X = np.load(cache_file)
        y = None
        if dataset == 'train':
            y = np.load('y.npy')
    else:
        meta_df = pd.read_csv(meta_file)

        data_measurements = meta_df.pivot(index='id_measurement', columns='phase', values='signal_id')
        data_measurements = data_measurements.values
        data_measurements = np.array_split(data_measurements, split_parts, axis=0)
        X = Parallel(n_jobs=min(split_parts, MAX_THREADS), verbose=1)(delayed(prep_data)(p, dataset) for p in data_measurements)
        try:
            y = meta_df.loc[meta_df['phase']==0, 'target'].values
        except:
            y = None
        X = np.concatenate(X, axis=0)

        if dataset == 'train':
            np.save("X.npy",X)
            np.save("y.npy",y)
        elif dataset == 'test':
            np.save("X_test.npy",X)
    return X, y

In [9]:
# in other notebook I have extracted the min and max values from the train data, the measurements
max_num = 127
min_num = -128

In [10]:
# This function standardize the data from (-128 to 127) to (-1 to 1)
# Theoretically it helps in the NN Model training, but I didn't tested without it
def min_max_transf(ts, min_data, max_data, range_needed=(-1,1)):
    ts_std = (ts - min_data) / (max_data - min_data)
    return ts_std * (range_needed[1] - range_needed[0]) + range_needed[0]

In [11]:
# This is one of the most important peace of code of this Kernel
# Any power line contain 3 phases of 800000 measurements, or 2.4 millions data 
# It would be praticaly impossible to build a NN with an input of that size
# The ideia here is to reduce it each phase to a matrix of <n_dim> bins by n features
# Each bean is a set of 5000 measurements (800000 / 160), so the features are extracted from this 5000 chunk data.
def transform_ts(ts, n_dim=160, min_max=(-1,1)):
    # convert data into -1 to 1
    ts_std = min_max_transf(ts, min_data=min_num, max_data=max_num)
    # bucket or chunk size, 5000 in this case (800000 / 160)
    bucket_size = int(sample_size / n_dim)
    # new_ts will be the container of the new data
    new_ts = []
    # this for iteract any chunk/bucket until reach the whole sample_size (800000)
    for i in tqdm(range(0, sample_size, bucket_size)):
        # cut each bucket to ts_range
        ts_range = ts_std[i:i + bucket_size]
        # calculate each feature
        mean = ts_range.mean()
        std = ts_range.std() # standard deviation
        std_top = mean + std # I have to test it more, but is is like a band
        std_bot = mean - std
        # I think that the percentiles are very important, it is like a distribuiton analysis from eath chunk
        percentil_calc = np.percentile(ts_range, [0, 1, 25, 50, 75, 99, 100]) 
        max_range = percentil_calc[-1] - percentil_calc[0] # this is the amplitude of the chunk
        relative_percentile = percentil_calc - mean # maybe it could heap to understand the asymmetry
        # now, we just add all the features to new_ts and convert it to np.array
        new_ts.append(np.concatenate([np.asarray([mean, std, std_top, std_bot, max_range]),percentil_calc, relative_percentile]))
    return np.asarray(new_ts)

In [12]:
def prep_data(signal_ids, dataset="train"):
    signal_ids_all = np.concatenate(signal_ids)
    if dataset == "train":
        praq_data = pq.read_pandas('/kaggle/input/vsb-power-line-fault-detection/train.parquet', columns=[str(i) for i in signal_ids_all]).to_pandas()
    elif dataset == "test":
        praq_data = pq.read_pandas('/kaggle/input/vsb-power-line-fault-detection/test.parquet', columns=[str(i) for i in signal_ids_all]).to_pandas()
    else:
        raise ValueError("Unknown dataset")
    X = []
    for sids in tqdm(signal_ids):
        data = praq_data[[str(s) for s in sids]].values.T
        X_signal = [transform_ts(signal) for signal in data]
        X_signal = np.concatenate(X_signal, axis=1)
        X.append(X_signal)
    X = np.asarray(X)
    return X

In [13]:
%%time
X, y = get_features("train", split_parts=6)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
  0%|          | 0/484 [00:00<?, ?it/s]
  0%|          | 0/484 [00:00<?, ?it/s][A
  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1578.84it/s][A

  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1540.95it/s][A

  0%|          | 0/160 [00:00<?, ?it/s][A
  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1995.16it/s][A

100%|██████████| 160/160 [00:00<00:00, 1378.93it/s]

100%|██████████| 160/160 [00:00<00:00, 1532.16it/s][A

  0%|          | 0/160 [00:00<?, ?it/s][A
  0%|          | 0/484 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 2080.19it/s]
100%|██████████| 160/160 [00:00<00:00, 2149.10it/s]

  0%|          | 1/484 [00:00<02:48,  2.87it/s]
  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1731.62it/s][A
100%|██████████| 160/160 [00:00<00:00, 1479.38it/s]
  0%

CPU times: user 3.05 s, sys: 2.27 s, total: 5.32 s
Wall time: 3min 55s


In [14]:
print(X.shape, y.shape)

(2904, 160, 57) (2904,)


In [15]:
%%time
# Now load the test data
# This first part is the meta data, not the main data, the measurements
meta_test = pd.read_csv('/kaggle/input/vsb-power-line-fault-detection/metadata_test.csv')

CPU times: user 8.71 ms, sys: 3.27 ms, total: 12 ms
Wall time: 17.8 ms


In [16]:
meta_test = meta_test.set_index(['signal_id'])
meta_test.head()

Unnamed: 0_level_0,id_measurement,phase
signal_id,Unnamed: 1_level_1,Unnamed: 2_level_1
8712,2904,0
8713,2904,1
8714,2904,2
8715,2905,0
8716,2905,1


In [17]:
tmp = meta_test.reset_index()

In [18]:
true_mask = tmp["signal_id"].isin(exp_true).values[::3]
false_mask = tmp["signal_id"].isin(exp_false).values[::3]

In [19]:
%%time
MAX_THREADS = 4
X_test_input, _ = get_features("test")

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
  0%|          | 0/678 [00:00<?, ?it/s]
  0%|          | 0/678 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1954.71it/s]

  0%|          | 0/678 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 2177.93it/s]

100%|██████████| 160/160 [00:00<00:00, 2167.60it/s]

100%|██████████| 160/160 [00:00<00:00, 2161.22it/s]

  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 1584.42it/s][A

100%|██████████| 160/160 [00:00<00:00, 2039.08it/s]
  0%|          | 1/678 [00:00<02:50,  3.96it/s]
100%|██████████| 160/160 [00:00<00:00, 2170.49it/s]
  0%|          | 1/678 [00:00<02:42,  4.17it/s]
  0%|          | 0/160 [00:00<?, ?it/s][A
100%|██████████| 160/160 [00:00<00:00, 2064.84it/s][A

100%|██████████| 160/160 [00:00<00:00, 1273.21it/s]

100%|██████████| 160/160 [00:00<00:00, 1949.46it/s]

100%|██████████| 160/160 [00:00<00:00, 1963.15it/s]
  0%|          | 0/678 [00:00<?

CPU times: user 6.34 s, sys: 5.31 s, total: 11.6 s
Wall time: 8min 15s


In [20]:
X_test_input.shape

(6779, 160, 57)

In [21]:
true_X_test = X_test_input[true_mask]
false_X_test = X_test_input[false_mask]

In [22]:
print(true_X_test.shape)
print(false_X_test.shape)

(19, 160, 57)
(13, 160, 57)


In [23]:
XX = np.append(X, true_X_test, axis=0)
X = np.append(XX, false_X_test, axis=0)

In [24]:
y = np.append(y, [1] * (len(exp_true) // 3) + [0]* (len(exp_false) // 3), axis=0)

In [25]:
print(X.shape)
print(y.shape)

(2936, 160, 57)
(2936,)


In [26]:
def mcc(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.math.greater(y_pred, 0.5), tf.float32)
    
    tp = tf.reduce_sum(y_true * y_pred)
    tn = tf.reduce_sum((1 - y_true) * (1 - y_pred))
    fp = tf.reduce_sum((1 - y_true) * y_pred)
    fn = tf.reduce_sum(y_true * (1 - y_pred))
    
    numerator = (tp * tn - fp * fn)
    denominator = tf.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + 1e-15)
    
    return numerator / denominator

In [33]:
# This is NN LSTM Model creation
def model_lstm(input_shape):
    # The shape was explained above, must have this order
    inp = Input(shape=(input_shape[1], input_shape[2],))
    
    init_glorot_uniform = tf.keras.initializers.glorot_uniform(seed=RANDOM_SEED)
    init_orthogonal = tf.keras.initializers.orthogonal(seed=RANDOM_SEED)
    
    # This is the LSTM layer
    # Bidirecional implies that the 160 chunks are calculated in both ways, 0 to 159 and 159 to zero
    # although it appear that just 0 to 159 way matter, I have tested with and without, and tha later worked best
    # 128 and 64 are the number of cells used, too many can overfit and too few can underfit
    x = Bidirectional(LSTM(128, return_sequences=True, kernel_initializer=init_glorot_uniform, recurrent_initializer=init_orthogonal))(inp)
    # The second LSTM can give more fire power to the model, but can overfit it too
    x = Bidirectional(LSTM(64, return_sequences=True, kernel_initializer=init_glorot_uniform, recurrent_initializer=init_orthogonal))(x)
    # Attention is a new tecnology that can be applyed to a Recurrent NN to give more meanings to a signal found in the middle
    # of the data, it helps more in longs chains of data. A normal RNN give all the responsibility of detect the signal
    # to the last cell. Google RNN Attention for more information :)
    x = Attention(input_shape[1])(x)
    # A intermediate full connected (Dense) can help to deal with nonlinears outputs
    x = Dense(64, activation="relu", kernel_initializer=init_glorot_uniform)(x)
    # A binnary classification as this must finish with shape (1,)
    x = Dense(1, activation="sigmoid", kernel_initializer=init_glorot_uniform)(x)
    model = Model(inputs=inp, outputs=x)
    # Pay attention in the addition of matthews_correlation metric in the compilation, it is a success factor key
    model.compile(loss='binary_crossentropy', optimizer='adam',metrics=[mcc])
    
    return model

In [34]:
# Here is where the training happens

# First, create a set of indexes of the 5 folds
splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_SEED).split(X, y))
preds_val = []
y_val = []
# Then, iteract with each fold
# If you dont know, enumerate(['a', 'b', 'c']) returns [(0, 'a'), (1, 'b'), (2, 'c')]
for idx, (train_idx, val_idx) in tqdm(enumerate(splits)):
    K.clear_session() # I dont know what it do, but I imagine that it "clear session" :)
    print("Beginning fold {}".format(idx+1))
    # use the indexes to extract the folds in the train and validation data
    train_X, train_y, val_X, val_y = X[train_idx], y[train_idx], X[val_idx], y[val_idx]
    # instantiate the model for this fold
    model = model_lstm(train_X.shape)
    # This checkpoint helps to avoid overfitting. It just save the weights of the model if it delivered an
    # validation matthews_correlation greater than the last one.
    ckpt = tf.keras.callbacks.ModelCheckpoint('/kaggle/working/weights_{}.h5'.format(idx), save_best_only=True, save_weights_only=True, verbose=1, monitor='val_mcc', mode='max')
    # Train, train, train
    model.fit(train_X, train_y, batch_size=128, epochs=100, validation_data=[val_X, val_y], callbacks=[ckpt])
    # loads the best weights saved by the checkpoint
    model.load_weights('/kaggle/working/weights_{}.h5'.format(idx))
    # Add the predictions of the validation to the list preds_val
    preds_val.append(model.predict(val_X, batch_size=512))
    # and the val true y
    y_val.append(val_y)

# concatenates all and prints the shape    
preds_val = np.concatenate(preds_val)[...,0]
y_val = np.concatenate(y_val)
preds_val.shape, y_val.shape

0it [00:00, ?it/s]

Beginning fold 1
Epoch 1/100
Epoch 1: val_mcc improved from -inf to 0.00000, saving model to /kaggle/working/weights_0.h5
Epoch 2/100
Epoch 2: val_mcc improved from 0.00000 to 0.17387, saving model to /kaggle/working/weights_0.h5
Epoch 3/100
Epoch 3: val_mcc improved from 0.17387 to 0.43286, saving model to /kaggle/working/weights_0.h5
Epoch 4/100
Epoch 4: val_mcc improved from 0.43286 to 0.55890, saving model to /kaggle/working/weights_0.h5
Epoch 5/100
Epoch 5: val_mcc improved from 0.55890 to 0.57274, saving model to /kaggle/working/weights_0.h5
Epoch 6/100
Epoch 6: val_mcc improved from 0.57274 to 0.73749, saving model to /kaggle/working/weights_0.h5
Epoch 7/100
Epoch 7: val_mcc did not improve from 0.73749
Epoch 8/100
Epoch 8: val_mcc improved from 0.73749 to 0.74440, saving model to /kaggle/working/weights_0.h5
Epoch 9/100
Epoch 9: val_mcc improved from 0.74440 to 0.75739, saving model to /kaggle/working/weights_0.h5
Epoch 10/100
Epoch 10: val_mcc did not improve from 0.75739
Epoc

1it [22:59, 1379.05s/it]

Beginning fold 2
Epoch 1/100
Epoch 1: val_mcc improved from -inf to 0.00000, saving model to /kaggle/working/weights_1.h5
Epoch 2/100
Epoch 2: val_mcc did not improve from 0.00000
Epoch 3/100
Epoch 3: val_mcc improved from 0.00000 to 0.53739, saving model to /kaggle/working/weights_1.h5
Epoch 4/100
Epoch 4: val_mcc improved from 0.53739 to 0.59808, saving model to /kaggle/working/weights_1.h5
Epoch 5/100
Epoch 5: val_mcc improved from 0.59808 to 0.68286, saving model to /kaggle/working/weights_1.h5
Epoch 6/100
Epoch 6: val_mcc improved from 0.68286 to 0.75547, saving model to /kaggle/working/weights_1.h5
Epoch 7/100
Epoch 7: val_mcc did not improve from 0.75547
Epoch 8/100
Epoch 8: val_mcc did not improve from 0.75547
Epoch 9/100
Epoch 9: val_mcc did not improve from 0.75547
Epoch 10/100
Epoch 10: val_mcc did not improve from 0.75547
Epoch 11/100
Epoch 11: val_mcc did not improve from 0.75547
Epoch 12/100
Epoch 12: val_mcc did not improve from 0.75547
Epoch 13/100
Epoch 13: val_mcc did

2it [46:31, 1398.77s/it]

Beginning fold 3
Epoch 1/100
Epoch 1: val_mcc improved from -inf to 0.00000, saving model to /kaggle/working/weights_2.h5
Epoch 2/100
Epoch 2: val_mcc did not improve from 0.00000
Epoch 3/100
Epoch 3: val_mcc improved from 0.00000 to 0.35032, saving model to /kaggle/working/weights_2.h5
Epoch 4/100
Epoch 4: val_mcc did not improve from 0.35032
Epoch 5/100
Epoch 5: val_mcc improved from 0.35032 to 0.59343, saving model to /kaggle/working/weights_2.h5
Epoch 6/100
Epoch 6: val_mcc improved from 0.59343 to 0.66435, saving model to /kaggle/working/weights_2.h5
Epoch 7/100
Epoch 7: val_mcc did not improve from 0.66435
Epoch 8/100
Epoch 8: val_mcc improved from 0.66435 to 0.66908, saving model to /kaggle/working/weights_2.h5
Epoch 9/100
Epoch 9: val_mcc did not improve from 0.66908
Epoch 10/100
Epoch 10: val_mcc did not improve from 0.66908
Epoch 11/100
Epoch 11: val_mcc improved from 0.66908 to 0.68069, saving model to /kaggle/working/weights_2.h5
Epoch 12/100
Epoch 12: val_mcc did not impro

3it [1:09:17, 1383.78s/it]

Beginning fold 4
Epoch 1/100
Epoch 1: val_mcc improved from -inf to 0.00000, saving model to /kaggle/working/weights_3.h5
Epoch 2/100
Epoch 2: val_mcc did not improve from 0.00000
Epoch 3/100
Epoch 3: val_mcc improved from 0.00000 to 0.42373, saving model to /kaggle/working/weights_3.h5
Epoch 4/100
Epoch 4: val_mcc improved from 0.42373 to 0.56223, saving model to /kaggle/working/weights_3.h5
Epoch 5/100
Epoch 5: val_mcc improved from 0.56223 to 0.59936, saving model to /kaggle/working/weights_3.h5
Epoch 6/100
Epoch 6: val_mcc improved from 0.59936 to 0.60815, saving model to /kaggle/working/weights_3.h5
Epoch 7/100
Epoch 7: val_mcc did not improve from 0.60815
Epoch 8/100
Epoch 8: val_mcc did not improve from 0.60815
Epoch 9/100
Epoch 9: val_mcc did not improve from 0.60815
Epoch 10/100
Epoch 10: val_mcc did not improve from 0.60815
Epoch 11/100
Epoch 11: val_mcc did not improve from 0.60815
Epoch 12/100
Epoch 12: val_mcc improved from 0.60815 to 0.65787, saving model to /kaggle/worki

4it [1:31:45, 1369.76s/it]

Beginning fold 5
Epoch 1/100
Epoch 1: val_mcc improved from -inf to 0.00000, saving model to /kaggle/working/weights_4.h5
Epoch 2/100
Epoch 2: val_mcc did not improve from 0.00000
Epoch 3/100
Epoch 3: val_mcc improved from 0.00000 to 0.43380, saving model to /kaggle/working/weights_4.h5
Epoch 4/100
Epoch 4: val_mcc did not improve from 0.43380
Epoch 5/100
Epoch 5: val_mcc improved from 0.43380 to 0.61365, saving model to /kaggle/working/weights_4.h5
Epoch 6/100
Epoch 6: val_mcc improved from 0.61365 to 0.65547, saving model to /kaggle/working/weights_4.h5
Epoch 7/100
Epoch 7: val_mcc did not improve from 0.65547
Epoch 8/100
Epoch 8: val_mcc did not improve from 0.65547
Epoch 9/100
Epoch 9: val_mcc did not improve from 0.65547
Epoch 10/100
Epoch 10: val_mcc did not improve from 0.65547
Epoch 11/100
Epoch 11: val_mcc did not improve from 0.65547
Epoch 12/100
Epoch 12: val_mcc improved from 0.65547 to 0.74210, saving model to /kaggle/working/weights_4.h5
Epoch 13/100
Epoch 13: val_mcc did

5it [1:54:40, 1376.10s/it]


((2936,), (2936,))

In [35]:
def threshold_search(y_true, y_proba):
    thresholds = np.linspace(0.0,1.0,101)
    scores = [matthews_corrcoef(y_true, (y_proba > t).astype(np.uint8)) for t in thresholds]
    best_idx = np.argmax(scores)
    return thresholds[best_idx], scores[best_idx]

In [36]:
best_threshold, best_score = threshold_search(y_val, preds_val)
print(best_threshold, best_score)

0.5 0.7430623798311649
