In [1]:
import os, json, joblib, numpy as np, pandas as pd
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")

from scipy.spatial.transform import Rotation as R

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.utils import Sequence, to_categorical, pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, LayerNormalization, Activation, add, MaxPooling1D, Dropout,
    Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply, Reshape,
    Lambda, Concatenate, GRU, GaussianNoise
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
import tensorflow as tf
import polars as pl

import matplotlib.pyplot as plt

2025-08-09 09:02:28.305120: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-09 09:02:28.700433: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754719348.832930    1479 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754719348.874463    1479 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-09 09:02:29.242297: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
import keras
print(tf.__version__)
print(keras.__version__)

2.18.0
3.8.0


In [3]:
print(tf.config.list_physical_devices("GPU"))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
print(tf.sysconfig.get_build_info())

OrderedDict([('cpu_compiler', '/usr/lib/llvm-18/bin/clang'), ('cuda_compute_capabilities', ['sm_60', 'sm_70', 'sm_80', 'sm_89', 'compute_90']), ('cuda_version', '12.5.1'), ('cudnn_version', '9'), ('is_cuda_build', True), ('is_rocm_build', False), ('is_tensorrt_build', False)])


In [5]:
print("GPU sayısı:", len(tf.config.list_physical_devices('GPU')))

GPU sayısı: 1


In [6]:
state_num = 1
import random
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
seed_everything(seed=state_num)

In [7]:
# (Competition metric will only be imported when TRAINing)
TRAIN = True                
RAW_DIR = Path("")
PRETRAINED_DIR = Path("new_model_10_fold")
EXPORT_DIR = Path("imu_only_gru_5folds")
BATCH_SIZE = 64
PAD_PERCENTILE = 95 
LR_INIT = 5e-4
WD = 3e-3
MIXUP_ALPHA = 0.4 
EPOCHS = 160
PATIENCE = 40
N_SPLITS = 5
MASKING_PROB = 0.25 
GATE_LOSS_WEIGHT = 0.20 # 0.20 

print("▶ imports ready · tensorflow", tf.__version__)

▶ imports ready · tensorflow 2.18.0


In [8]:
def remove_gravity_from_acc(acc_data, rot_data):
    acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    linear_accel = np.zeros_like(acc_values)
    gravity_world = np.array([0, 0, 9.81])
    for i in range(len(acc_values)):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :]
            continue
        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
             linear_accel[i, :] = acc_values[i, :]
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_vel = np.zeros((len(quat_values), 3))
    for i in range(len(quat_values) - 1):
        q_t, q_t_plus_dt = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q_t)) or np.all(np.isnan(q_t_plus_dt)): continue
        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)
            delta_rot = rot_t.inv() * rot_t_plus_dt
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError: pass
    return angular_vel

def calculate_angular_distance(rot_data):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_dist = np.zeros(len(quat_values))
    for i in range(len(quat_values) - 1):
        q1, q2 = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q1)) or np.all(np.isnan(q2)): continue
        try:
            r1, r2 = R.from_quat(q1), R.from_quat(q2)
            relative_rotation = r1.inv() * r2
            angular_dist[i] = np.linalg.norm(relative_rotation.as_rotvec())
        except ValueError: pass
    return angular_dist

In [9]:
class MixupGenerator(Sequence):
    def __init__(self, X, y, batch_size, class_weight=None, alpha=0.2):
        self.X, self.y = X, y
        self.batch = batch_size
        self.class_weight = class_weight
        self.alpha = alpha
        self.indices = np.arange(len(X))
        
    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch))

    def __getitem__(self, i):
        idx = self.indices[i*self.batch:(i+1)*self.batch]
        Xb, yb = self.X[idx].copy(), self.y[idx].copy()
        
        sample_weights = np.ones(len(Xb), dtype='float32')
        if self.class_weight:
            y_integers = yb.argmax(axis=1)
            sample_weights = np.array([self.class_weight[i] for i in y_integers])
        
        if self.alpha > 0:
            lam = np.random.beta(self.alpha, self.alpha)
            perm = np.random.permutation(len(Xb))
            X_mix = lam * Xb + (1 - lam) * Xb[perm]
            y_mix = lam * yb + (1 - lam) * yb[perm]
            sample_weights_mix = lam * sample_weights + (1 - lam) * sample_weights[perm]
            return X_mix, y_mix, sample_weights_mix

        return Xb, yb, sample_weights

    def on_epoch_end(self):
        np.random.shuffle(self.indices)




from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
from tensorflow.keras import layers
import tensorflow as tf

# --- Yardımcı Fonksiyonlar ---
def time_sum(x):
    return K.sum(x, axis=1)

def squeeze_last_axis(x):
    return tf.squeeze(x, axis=-1)

def expand_last_axis(x):
    return tf.expand_dims(x, axis=-1)

# --- SE Blok ---
def se_block(input_tensor, ratio=8):
    filters = input_tensor.shape[-1]
    se = GlobalAveragePooling1D()(input_tensor)
    se = Dense(filters // ratio, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    se = Reshape((1, filters))(se)
    return Multiply()([input_tensor, se])

# --- Residual + SE Blok ---
def residual_se_block(x, filters, kernel_size, drop_rate=0.3, weight_decay=1e-4):
    shortcut = x

    x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)

    x = se_block(x)

    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    x = Dropout(drop_rate)(x)
    return x

# --- Keras Uyumlu Lightweight Multi-Head Attention Katmanı ---
class MultiHeadAttentionLayer(Layer):
    def __init__(self, heads=4, proj_dim=64):
        super(MultiHeadAttentionLayer, self).__init__()
        self.heads = heads
        self.proj_dim = proj_dim
        self.query_layers = [Dense(proj_dim) for _ in range(heads)]
        self.key_layers = [Dense(proj_dim) for _ in range(heads)]
        self.value_layers = [Dense(proj_dim) for _ in range(heads)]

    def call(self, x):
        heads_output = []
        for i in range(self.heads):
            q = self.query_layers[i](x)
            k = self.key_layers[i](x)
            v = self.value_layers[i](x)

            score = tf.matmul(q, k, transpose_b=True)
            score = score / tf.math.sqrt(tf.cast(self.proj_dim, tf.float32))
            weights = tf.nn.softmax(score, axis=-1)
            head_output = tf.matmul(weights, v)
            heads_output.append(head_output)

        return tf.concat(heads_output, axis=-1)

# --- Model Tanımı ---
def build_model(pad_len, imu_dim, n_classes, wd=3e-3):
    inp = Input(shape=(pad_len, imu_dim), name='imu_input')

    # --- CNN Encoder ---
    x = residual_se_block(inp, filters=64, kernel_size=3, drop_rate=0.20, weight_decay=wd)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_se_block(x, filters=128, kernel_size=5, drop_rate=0.25, weight_decay=wd)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_se_block(x, filters=256, kernel_size=7, drop_rate=0.30, weight_decay=wd)
    x = MaxPooling1D(pool_size=2)(x)

    # --- GRU + LSTM Paralel Yol ---
    gru_branch = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    gru_branch = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(gru_branch)

    lstm_branch = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    lstm_branch = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(lstm_branch)

    x = Concatenate()([gru_branch, lstm_branch])
    x = Dropout(0.3)(x)

    # --- Multihead Attention Katmanı ---
    x = MultiHeadAttentionLayer(heads=4, proj_dim=64)(x)
    x = GlobalAveragePooling1D()(x)

    # --- Fully Connected ---
    for units, drop in [(256, 0.5), (128, 0.4)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    output = Dense(n_classes, activation='softmax', kernel_regularizer=l2(wd), name='main_output')(x)

    model = Model(inputs=inp, outputs=output)
    return model


# GPT 5 - CV: 7740 - LB 0.797
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, backend as K

# -------------------------
# Custom Layers (serializable)
# -------------------------
class SEBlock1D(layers.Layer):
    def __init__(self, reduction=8, wd=2e-4, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.reduction = reduction
        self.wd = wd

    def build(self, input_shape):
        ch = int(input_shape[-1])
        hidden = max(4, ch // self.reduction)
        self.gap = layers.GlobalAveragePooling1D()
        self.fc1 = layers.Dense(hidden, activation="relu",
                                kernel_regularizer=regularizers.l2(self.wd))
        self.fc2 = layers.Dense(ch, activation="sigmoid",
                                kernel_regularizer=regularizers.l2(self.wd))
        self.reshape = layers.Reshape((1, ch))
        super().build(input_shape)

    def call(self, x):
        w = self.gap(x)
        w = self.fc1(w)
        w = self.fc2(w)
        w = self.reshape(w)
        return x * w

    def get_config(self):
        return {"reduction": self.reduction, "wd": self.wd, **super().get_config()}


class MultiScaleResBlock(layers.Layer):
    def __init__(self, filters, wd=2e-4, dilation_rates=(1,2,4), dropout=0.12, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.filters = filters
        self.wd = wd
        self.dilation_rates = tuple(dilation_rates)
        self.spatial_dropout_rate = dropout

    def build(self, input_shape):
        in_ch = int(input_shape[-1])
        branches = []
        for i, d in enumerate(self.dilation_rates):
            conv = layers.SeparableConv1D(self.filters // len(self.dilation_rates),
                                          kernel_size=3,
                                          padding="same",
                                          dilation_rate=d,
                                          depthwise_regularizer=regularizers.l2(self.wd),
                                          pointwise_regularizer=regularizers.l2(self.wd))
            branches.append(conv)
        self.branches = branches
        # after concat we project to exact filters to ensure Add works
        self.project_after_concat = layers.Conv1D(self.filters, 1, padding="same",
                                                  kernel_regularizer=regularizers.l2(self.wd))
        self.bn = layers.BatchNormalization()
        self.act = layers.Activation(tf.nn.gelu)
        self.se = SEBlock1D(reduction=8, wd=self.wd)
        # projection for residual if input channels != filters
        if in_ch != self.filters:
            self.proj = layers.Conv1D(self.filters, 1, padding="same",
                                      kernel_regularizer=regularizers.l2(self.wd))
            self.proj_bn = layers.BatchNormalization()
        else:
            self.proj = None
        self.sdrop = layers.SpatialDropout1D(self.spatial_dropout_rate)
        super().build(input_shape)

    def call(self, x):
        outs = []
        for conv in self.branches:
            outs.append(conv(x))
        out = layers.Concatenate()(outs)
        out = self.project_after_concat(out)
        out = self.bn(out)
        out = self.act(out)
        out = self.se(out)
        if self.proj is not None:
            proj = self.proj(x)
            proj = self.proj_bn(proj)
        else:
            proj = x
        out = layers.Add()([proj, out])
        out = self.act(out)
        out = self.sdrop(out)
        return out

    def get_config(self):
        return {"filters": self.filters, "wd": self.wd,
                "dilation_rates": self.dilation_rates,
                "dropout": self.spatial_dropout_rate, **super().get_config()}


class AttentionPooling(layers.Layer):
    def __init__(self, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        # uses a Dense(1) + softmax over time, then weighted sum

    def build(self, input_shape):
        self.dense = layers.Dense(1, use_bias=False)
        super().build(input_shape)

    def call(self, x):
        # x: (B, T, C)
        s = self.dense(x)                 # (B, T, 1)
        a = tf.nn.softmax(s, axis=1)      # (B, T, 1)
        weighted = x * a                  # broadcast (B,T,C)
        pooled = tf.reduce_sum(weighted, axis=1)  # (B, C)
        return pooled

    def get_config(self):
        return {**super().get_config()}


class StdPooling(layers.Layer):
    def __init__(self, eps=1e-6, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.eps = eps

    def call(self, x):
        # x: (B, T, C)
        mean = tf.reduce_mean(x, axis=1, keepdims=True)
        var = tf.reduce_mean(tf.square(x - mean), axis=1)
        std = tf.sqrt(tf.maximum(var, self.eps))
        return std  # shape (B, C)

    def get_config(self):
        return {"eps": self.eps, **super().get_config()}


# -------------------------
# Model Builder
# -------------------------
def build_imu_only_model(pad_len, num_features, num_classes, wd=2e-4):
    """
    Returns compiled Keras model.
    pad_len: int
    num_features: int (e.g. 7)
    num_classes: int (18)
    wd: weight decay (L2)
    """
    inp = layers.Input(shape=(pad_len, num_features), name="imu_input")
    x = layers.LayerNormalization(name="input_ln")(inp)

    # stem
    x = layers.Conv1D(64, 3, padding="same",
                      kernel_regularizer=regularizers.l2(wd),
                      name="stem_conv")(x)
    x = layers.BatchNormalization(name="stem_bn")(x)
    x = layers.Activation(tf.nn.gelu, name="stem_act")(x)

    # stacked blocks
    x = MultiScaleResBlock(filters=128, wd=wd, dilation_rates=(1,2,4), name="msrb_1")(x)
    x = MultiScaleResBlock(filters=192, wd=wd, dilation_rates=(1,2,4), name="msrb_2")(x)
    x = MultiScaleResBlock(filters=256, wd=wd, dilation_rates=(1,2,4), name="msrb_3")(x)

    # mid conv
    x = layers.Conv1D(256, 3, strides=1, padding="same",
                      kernel_regularizer=regularizers.l2(wd), name="mid_conv")(x)
    x = layers.BatchNormalization(name="mid_bn")(x)
    x = layers.Activation(tf.nn.gelu, name="mid_act")(x)

    # hybrid RNN encoder
    x = layers.Bidirectional(layers.GRU(180, return_sequences=True, dropout=0.12), name="bigru1")(x)
    x = layers.Bidirectional(layers.LSTM(160, return_sequences=True, dropout=0.12), name="bilstm1")(x)

    x = layers.Conv1D(256, 1, padding="same", kernel_regularizer=regularizers.l2(wd), name="proj_conv")(x)
    x = layers.LayerNormalization(name="proj_ln")(x)

    # pooling
    att = AttentionPooling(name="att_pool")(x)                # (B, C)
    avg = layers.GlobalAveragePooling1D(name="avg_pool")(x)   # (B, C)
    mx  = layers.GlobalMaxPooling1D(name="max_pool")(x)       # (B, C)
    std = StdPooling(name="std_pool")(x)                     # (B, C)

    feat = layers.Concatenate(name="final_concat")([att, avg, mx, std])  # (B, 4*C)

    # head
    h = layers.Dense(384, activation=tf.nn.gelu,
                     kernel_regularizer=regularizers.l2(wd), name="head_fc1")(feat)
    h = layers.BatchNormalization(name="head_bn1")(h)
    h = layers.Dropout(0.45, name="head_drop1")(h)
    h = layers.Dense(192, activation=tf.nn.gelu,
                     kernel_regularizer=regularizers.l2(wd), name="head_fc2")(h)
    h = layers.BatchNormalization(name="head_bn2")(h)
    h = layers.Dropout(0.35, name="head_drop2")(h)

    out = layers.Dense(num_classes, activation="softmax", name="main_output",
                       kernel_regularizer=regularizers.l2(wd))(h)

    model = models.Model(inputs=inp, outputs=out, name="IMU_Only_Strong_Serial")

    # optimizer
    try:
        import tensorflow_addons as tfa
        optimizer = tfa.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-5)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
                  metrics=['accuracy'])
    return model


In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, backend as K

# -------------------------
# Custom Layers (serializable)
# -------------------------
class SEBlock1D(layers.Layer):
    def __init__(self, reduction=8, wd=2e-4, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.reduction = reduction
        self.wd = wd

    def build(self, input_shape):
        ch = int(input_shape[-1])
        hidden = max(4, ch // self.reduction)
        self.gap = layers.GlobalAveragePooling1D()
        self.fc1 = layers.Dense(hidden, activation="relu",
                                kernel_regularizer=regularizers.l2(self.wd))
        self.fc2 = layers.Dense(ch, activation="sigmoid",
                                kernel_regularizer=regularizers.l2(self.wd))
        self.reshape = layers.Reshape((1, ch))
        super().build(input_shape)

    def call(self, x):
        w = self.gap(x)
        w = self.fc1(w)
        w = self.fc2(w)
        w = self.reshape(w)
        return x * w

    def get_config(self):
        return {"reduction": self.reduction, "wd": self.wd, **super().get_config()}


class MultiScaleResBlock(layers.Layer):
    def __init__(self, filters, wd=2e-4, dilation_rates=(1,2,4), dropout=0.12, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.filters = filters
        self.wd = wd
        self.dilation_rates = tuple(dilation_rates)
        self.spatial_dropout_rate = dropout

    def build(self, input_shape):
        in_ch = int(input_shape[-1])
        branches = []
        for i, d in enumerate(self.dilation_rates):
            conv = layers.SeparableConv1D(self.filters // len(self.dilation_rates),
                                          kernel_size=3,
                                          padding="same",
                                          dilation_rate=d,
                                          depthwise_regularizer=regularizers.l2(self.wd),
                                          pointwise_regularizer=regularizers.l2(self.wd))
            branches.append(conv)
        self.branches = branches
        # after concat we project to exact filters to ensure Add works
        self.project_after_concat = layers.Conv1D(self.filters, 1, padding="same",
                                                  kernel_regularizer=regularizers.l2(self.wd))
        self.bn = layers.BatchNormalization()
        self.act = layers.Activation(tf.nn.gelu)
        self.se = SEBlock1D(reduction=8, wd=self.wd)
        # projection for residual if input channels != filters
        if in_ch != self.filters:
            self.proj = layers.Conv1D(self.filters, 1, padding="same",
                                      kernel_regularizer=regularizers.l2(self.wd))
            self.proj_bn = layers.BatchNormalization()
        else:
            self.proj = None
        self.sdrop = layers.SpatialDropout1D(self.spatial_dropout_rate)
        super().build(input_shape)

    def call(self, x):
        outs = []
        for conv in self.branches:
            outs.append(conv(x))
        out = layers.Concatenate()(outs)
        out = self.project_after_concat(out)
        out = self.bn(out)
        out = self.act(out)
        out = self.se(out)
        if self.proj is not None:
            proj = self.proj(x)
            proj = self.proj_bn(proj)
        else:
            proj = x
        out = layers.Add()([proj, out])
        out = self.act(out)
        out = self.sdrop(out)
        return out

    def get_config(self):
        return {"filters": self.filters, "wd": self.wd,
                "dilation_rates": self.dilation_rates,
                "dropout": self.spatial_dropout_rate, **super().get_config()}


class AttentionPooling(layers.Layer):
    def __init__(self, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        # uses a Dense(1) + softmax over time, then weighted sum

    def build(self, input_shape):
        self.dense = layers.Dense(1, use_bias=False)
        super().build(input_shape)

    def call(self, x):
        # x: (B, T, C)
        s = self.dense(x)                 # (B, T, 1)
        a = tf.nn.softmax(s, axis=1)      # (B, T, 1)
        weighted = x * a                  # broadcast (B,T,C)
        pooled = tf.reduce_sum(weighted, axis=1)  # (B, C)
        return pooled

    def get_config(self):
        return {**super().get_config()}


class StdPooling(layers.Layer):
    def __init__(self, eps=1e-6, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.eps = eps

    def call(self, x):
        # x: (B, T, C)
        mean = tf.reduce_mean(x, axis=1, keepdims=True)
        var = tf.reduce_mean(tf.square(x - mean), axis=1)
        std = tf.sqrt(tf.maximum(var, self.eps))
        return std  # shape (B, C)

    def get_config(self):
        return {"eps": self.eps, **super().get_config()}


# -------------------------
# Model Builder
# -------------------------
def build_imu_only_model(pad_len, num_features, num_classes, wd=2e-4):
    """
    Returns compiled Keras model.
    pad_len: int
    num_features: int (e.g. 7)
    num_classes: int (18)
    wd: weight decay (L2)
    """
    inp = layers.Input(shape=(pad_len, num_features), name="imu_input")
    x = layers.LayerNormalization(name="input_ln")(inp)

    # stem
    x = layers.Conv1D(64, 3, padding="same",
                      kernel_regularizer=regularizers.l2(wd),
                      name="stem_conv")(x)
    x = layers.BatchNormalization(name="stem_bn")(x)
    x = layers.Activation(tf.nn.gelu, name="stem_act")(x)

    # stacked blocks
    x = MultiScaleResBlock(filters=128, wd=wd, dilation_rates=(1,2,4), name="msrb_1")(x)
    x = MultiScaleResBlock(filters=192, wd=wd, dilation_rates=(1,2,4), name="msrb_2")(x)
    x = MultiScaleResBlock(filters=256, wd=wd, dilation_rates=(1,2,4), name="msrb_3")(x)

    # mid conv
    x = layers.Conv1D(256, 3, strides=1, padding="same",
                      kernel_regularizer=regularizers.l2(wd), name="mid_conv")(x)
    x = layers.BatchNormalization(name="mid_bn")(x)
    x = layers.Activation(tf.nn.gelu, name="mid_act")(x)

    # hybrid RNN encoder
    x = layers.Bidirectional(layers.GRU(180, return_sequences=True, dropout=0.12), name="bigru1")(x)
    x = layers.Bidirectional(layers.LSTM(160, return_sequences=True, dropout=0.12), name="bilstm1")(x)

    x = layers.Conv1D(256, 1, padding="same", kernel_regularizer=regularizers.l2(wd), name="proj_conv")(x)
    x = layers.LayerNormalization(name="proj_ln")(x)

    # pooling
    att = AttentionPooling(name="att_pool")(x)                # (B, C)
    avg = layers.GlobalAveragePooling1D(name="avg_pool")(x)   # (B, C)
    mx  = layers.GlobalMaxPooling1D(name="max_pool")(x)       # (B, C)
    std = StdPooling(name="std_pool")(x)                     # (B, C)

    feat = layers.Concatenate(name="final_concat")([att, avg, mx, std])  # (B, 4*C)

    # head
    h = layers.Dense(384, activation=tf.nn.gelu,
                     kernel_regularizer=regularizers.l2(wd), name="head_fc1")(feat)
    h = layers.BatchNormalization(name="head_bn1")(h)
    h = layers.Dropout(0.45, name="head_drop1")(h)
    h = layers.Dense(192, activation=tf.nn.gelu,
                     kernel_regularizer=regularizers.l2(wd), name="head_fc2")(h)
    h = layers.BatchNormalization(name="head_bn2")(h)
    h = layers.Dropout(0.35, name="head_drop2")(h)

    out = layers.Dense(num_classes, activation="softmax", name="main_output",
                       kernel_regularizer=regularizers.l2(wd))(h)

    model = models.Model(inputs=inp, outputs=out, name="IMU_Only_Strong_Serial")

    return model


# GPT5 - 1
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, backend as K

def se_block(x, reduction=8, name=None):
    channels = int(x.shape[-1])
    se = layers.GlobalAveragePooling1D(name=(None if name is None else name+"_gap"))(x)
    se = layers.Dense(max(4, channels//reduction), activation="relu", name=(None if name is None else name+"_fc1"))(se)
    se = layers.Dense(channels, activation="sigmoid", name=(None if name is None else name+"_fc2"))(se)
    se = layers.Reshape((1, channels))(se)
    return layers.Multiply(name=(None if name is None else name+"_scale"))([x, se])

def residual_inception_block(x, filters, wd, name=None):
    # multi-kernel branch (3,5,7) using SeparableConv1D (efficient)
    branch1 = layers.SeparableConv1D(filters//3, 3, padding="same",
                                     depthwise_regularizer=regularizers.l2(wd),
                                     pointwise_regularizer=regularizers.l2(wd),
                                     name=(None if name is None else name+"_b1"))(x)
    branch2 = layers.SeparableConv1D(filters//3, 5, padding="same",
                                     depthwise_regularizer=regularizers.l2(wd),
                                     pointwise_regularizer=regularizers.l2(wd),
                                     name=(None if name is None else name+"_b2"))(x)
    branch3 = layers.SeparableConv1D(filters - 2*(filters//3), 7, padding="same",
                                     depthwise_regularizer=regularizers.l2(wd),
                                     pointwise_regularizer=regularizers.l2(wd),
                                     name=(None if name is None else name+"_b3"))(x)

    out = layers.Concatenate(name=(None if name is None else name+"_concat"))([branch1, branch2, branch3])
    out = layers.BatchNormalization(name=(None if name is None else name+"_bn"))(out)
    out = layers.Activation("gelu", name=(None if name is None else name+"_act"))(out)

    # squeeze-excite
    out = se_block(out, reduction=8, name=(None if name is None else name+"_se"))

    # residual projection if required
    if int(x.shape[-1]) != filters:
        proj = layers.Conv1D(filters, 1, padding="same",
                             kernel_regularizer=regularizers.l2(wd),
                             name=(None if name is None else name+"_proj"))(x)
        proj = layers.BatchNormalization()(proj)
    else:
        proj = x

    out = layers.Add(name=(None if name is None else name+"_resadd"))([proj, out])
    out = layers.Activation("gelu", name=(None if name is None else name+"_resact"))(out)
    out = layers.SpatialDropout1D(0.1, name=(None if name is None else name+"_drop"))(out)
    return out

def transformer_encoder_block(x, head=4, ff_dim=None, dropout=0.1, wd=2e-4, name=None):
    d_model = int(x.shape[-1])
    if ff_dim is None:
        ff_dim = d_model * 2
    attn = layers.MultiHeadAttention(num_heads=head, key_dim=d_model//head, name=(None if name is None else name+"_mha"))(x, x)
    attn = layers.Dropout(dropout)(attn)
    x = layers.Add()([x, attn])
    x = layers.LayerNormalization()(x)

    ff = layers.Dense(ff_dim, activation="gelu",
                      kernel_regularizer=regularizers.l2(wd))(x)
    ff = layers.Dense(d_model, kernel_regularizer=regularizers.l2(wd))(ff)
    ff = layers.Dropout(dropout)(ff)
    x = layers.Add()([x, ff])
    x = layers.LayerNormalization()(x)
    return x

class AttentionPooling(layers.Layer):
    """Attention pooling layer (learned temporal pooling)."""
    def __init__(self, **kwargs):
        super(AttentionPooling, self).__init__(**kwargs)

    def build(self, input_shape):
        self.dense = layers.Dense(1, use_bias=False, name=self.name + "_attn_dense")
        self.softmax = layers.Softmax(axis=1, name=self.name + "_attn_softmax")
        self.multiply = layers.Multiply()
        super(AttentionPooling, self).build(input_shape)

    def call(self, inputs):
        # inputs: (B, T, C)
        attn_scores = self.dense(inputs)  # (B, T, 1)
        attn_scores = self.softmax(attn_scores)
        pooled = self.multiply([inputs, attn_scores])
        pooled = K.sum(pooled, axis=1)  # (B, C)
        return pooled
    
    # Bu metod, modelin doğru bir şekilde kaydedilip yüklenebilmesi için gereklidir.
    def get_config(self):
        config = super(AttentionPooling, self).get_config()
        return config

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

def build_imu_only_model(pad_len, num_features, num_classes, wd=2e-4):
    """
    Returns a compiled Keras model ready for training.
    pad_len: int, time dimension after pad/truncate
    num_features: int, IMU channels (e.g. 7)
    num_classes: int, 18
    wd: float, L2 weight decay applied via kernel_regularizer
    """
    inputs = layers.Input(shape=(pad_len, num_features), name="imu_input")
    x = layers.LayerNormalization(name="input_ln")(inputs)

    # initial conv stem
    x = layers.Conv1D(64, 3, padding="same", kernel_regularizer=regularizers.l2(wd), name="stem_conv")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("gelu")(x)

    # stacked multi-scale residual blocks (keep temporal resolution)
    x = residual_inception_block(x, 128, wd, name="res_inc_1")
    x = residual_inception_block(x, 192, wd, name="res_inc_2")
    x = residual_inception_block(x, 256, wd, name="res_inc_3")

    # lightweight sequence encoder (BiGRU) to capture mid-range temporal patterns
    x = layers.Bidirectional(layers.GRU(160, return_sequences=True, dropout=0.15), name="bigru")(x)  # out dim 320

    # project down to manageable dim before transformer
    x = layers.Conv1D(256, 1, padding="same", kernel_regularizer=regularizers.l2(wd), name="proj")(x)
    x = layers.LayerNormalization(name="proj_ln")(x)

    # transformer encoder stack (2 blocks)
    x = transformer_encoder_block(x, head=4, ff_dim=512, dropout=0.12, wd=wd, name="trans_enc1")
    x = transformer_encoder_block(x, head=4, ff_dim=512, dropout=0.12, wd=wd, name="trans_enc2")

    # attention pooling (learned temporal pooling)
    pooled = AttentionPooling(name="attn_pool")(x) # (B, C)

    # classifier head
    h = layers.Dense(256, activation="gelu", kernel_regularizer=regularizers.l2(wd), name="head_fc1")(pooled)
    h = layers.BatchNormalization(name="head_bn")(h)
    h = layers.Dropout(0.45, name="head_drop")(h)
    outputs = layers.Dense(num_classes, activation="softmax", name="main_output",
                           kernel_regularizer=regularizers.l2(wd))(h)

    model = models.Model(inputs=inputs, outputs=outputs)

    # Compile with recommended settings for competition (label smoothing, stable optimizer)
    # opt = tf.keras.optimizers.Adam(learning_rate=1e-3)
    # model.compile(optimizer=opt,
    #               loss={'main_output': tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)},
    #               metrics={'main_output': 'accuracy'})
    return model


# GRU 2 | 0.7740 CV - 0.80 LB   (wd: 3e-3 kullanılmıştır)
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
import tensorflow as tf

# --- Yardımcı Fonksiyonlar ---
def time_sum(x):
    return K.sum(x, axis=1)

def squeeze_last_axis(x):
    return tf.squeeze(x, axis=-1)

def expand_last_axis(x):
    return tf.expand_dims(x, axis=-1)

# --- SE Blok ---
def se_block(input_tensor, ratio=8):
    channel_axis = -1
    filters = input_tensor.shape[channel_axis]
    se = GlobalAveragePooling1D()(input_tensor)
    se = Dense(filters // ratio, activation='relu')(se)
    se = Dense(filters, activation='sigmoid')(se)
    se = Reshape((1, filters))(se)
    x = Multiply()([input_tensor, se])
    return x

# --- Residual CNN Blok ---
def residual_cnn_block(x, filters, kernel_size, drop_rate=0.3, weight_decay=1e-4):
    shortcut = x

    x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)

    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    x = Dropout(drop_rate)(x)
    return x

# --- Attention Katmanı ---
def attention_block(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    weighted = Multiply()([inputs, weights])
    return Lambda(time_sum)(weighted)

# --- Feature Aggregation Blok ---
def feature_aggregation_block(x):
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    return Concatenate()([avg_pool, max_pool])

# --- Model Tanımı ---
def build_model(pad_len, imu_dim, n_classes, wd=3e-3):
    inp = Input(shape=(pad_len, imu_dim), name='imu_input')

    # --- CNN Encoder ---
    x = residual_cnn_block(inp, filters=64, kernel_size=3, drop_rate=0.20, weight_decay=wd)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_cnn_block(x, filters=128, kernel_size=5, drop_rate=0.25, weight_decay=wd)
    x = se_block(x)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_cnn_block(x, filters=256, kernel_size=7, drop_rate=0.30, weight_decay=wd)
    x = MaxPooling1D(pool_size=2)(x)

    # --- GRU + LSTM Paralel ---
    gru_branch = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    lstm_branch = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    x = Concatenate()([gru_branch, lstm_branch])
    x = Dropout(0.3)(x)

    # --- Attention ---
    x = attention_block(x)

    # --- Feature Aggregation (dense öncesi alternatif zenginleşirme) ---
    # x = feature_aggregation_block(x)  # opsiyonel

    # --- Fully Connected ---
    for units, drop in [(256, 0.5), (128, 0.4)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    output = Dense(n_classes, activation='softmax', kernel_regularizer=l2(wd), name='main_output')(x)

    model = Model(inputs=inp, outputs=output)
    return model

# GRU 3 | 0.7755 CV - 0.80 LB    (ls1 - wd: 2e-4 kullanılmıştır.)
###  YARIŞMA ANALİZİ TARANARAK
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
import tensorflow as tf

def time_sum(x): return K.sum(x, axis=1)
def squeeze_last_axis(x): return tf.squeeze(x, axis=-1)
def expand_last_axis(x): return tf.expand_dims(x, axis=-1)

def se_block(input_tensor, ratio=8):
    ch = input_tensor.shape[-1]
    se = GlobalAveragePooling1D()(input_tensor)
    se = Dense(ch // ratio, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1, ch))(se)
    return Multiply()([input_tensor, se])

def residual_cnn_block(x, filters, kernel_size, drop_rate=0.25, weight_decay=2e-4, dilation=1):
    shortcut = x
    x = Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation,
               kernel_regularizer=l2(weight_decay), use_bias=False)(x)
    x = BatchNormalization()(x); x = Activation('relu')(x)
    x = Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation,
               kernel_regularizer=l2(weight_decay), use_bias=False)(x)
    x = BatchNormalization()(x)
    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(shortcut)
        shortcut = BatchNormalization()(shortcut)
    x = Add()([x, shortcut]); x = Activation('relu')(x); x = Dropout(drop_rate)(x)
    return x

def attention_block(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    return Lambda(time_sum)(Multiply()([inputs, weights]))

def build_model(pad_len, imu_dim, n_classes, wd=2e-4):
    inp = Input(shape=(pad_len, imu_dim), name='imu_input')
    x = residual_cnn_block(inp, 64, 3, drop_rate=0.2, weight_decay=wd, dilation=1)
    x = MaxPooling1D(2)(x)
    x = residual_cnn_block(x, 128, 5, drop_rate=0.25, weight_decay=wd, dilation=2)
    x = se_block(x)
    x = MaxPooling1D(2)(x)
    x = residual_cnn_block(x, 256, 7, drop_rate=0.3, weight_decay=wd, dilation=2)
    x = LayerNormalization()(x)
    x = MaxPooling1D(2)(x)

    gru_branch = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    lstm_branch = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    x = Concatenate()([gru_branch, lstm_branch])
    x = Dropout(0.25)(x)
    x = attention_block(x)

    for units, drop in [(256, 0.4), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    output = Dense(n_classes, activation='softmax', name='main_output', kernel_regularizer=l2(wd))(x)
    return Model(inputs=inp, outputs=output)

# GEÇMİŞ YARIŞMA ÇÖZÜMLERİNE GÖRE OLUŞTURULDU - DENENECEK 

from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
import tensorflow as tf

def time_sum(x): return K.sum(x, axis=1)
def squeeze_last_axis(x): return tf.squeeze(x, axis=-1)
def expand_last_axis(x): return tf.expand_dims(x, axis=-1)

def se_block(inp, ratio=8):
    ch = inp.shape[-1]
    se = GlobalAveragePooling1D()(inp)
    se = Dense(ch//ratio, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1,ch))(se)
    return Multiply()([inp, se])
    
class SegmentAttention(Layer):
    def __init__(self, n_segments=4, **kwargs):
        super().__init__(**kwargs)
        self.n_segments = n_segments
        # Her segmentin temsilcisi için bir ağırlık oluşturur
        self.dense_weights = Dense(1)
        self.softmax_activation = Activation('softmax')

    def call(self, x):
        seg_len = tf.shape(x)[1] // self.n_segments
        
        # Her segmentin ortalamasını alarak bir temsilci vektör oluştur
        segs = [tf.reduce_mean(x[:, i*seg_len:(i+1)*seg_len, :], axis=1, keepdims=True)
                for i in range(self.n_segments)]
        stacked = Concatenate(axis=1)(segs) # Boyut: (None, 4, 512)
        
        # Ağırlıkları her bir segmentin ortalaması üzerinden hesapla
        weights = self.dense_weights(stacked) # Boyut: (None, 4, 1)
        weights = self.softmax_activation(weights) # softmax(weights)
        
        # Ağırlıklı segmentleri topla
        weighted_segments = Multiply()([stacked, weights])
        
        return tf.reduce_sum(weighted_segments, axis=1)

    def get_config(self):
        config = super().get_config()
        config.update({'n_segments': self.n_segments})
        return config
def residual_cnn_block(x, filters, kernel_size, drop_rate=0.2, weight_decay=2e-4, dilation=1):
    sc = x
    x = Conv1D(filters, kernel_size, dilation_rate=dilation, padding='same', use_bias=False,
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x); x = Activation('relu')(x)
    x = Conv1D(filters, kernel_size, dilation_rate=dilation, padding='same', use_bias=False,
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    if sc.shape[-1] != filters:
        sc = Conv1D(filters,1,padding='same',use_bias=False,kernel_regularizer=l2(weight_decay))(sc)
        sc = BatchNormalization()(sc)
    x = Add()([x, sc]); x = Activation('relu')(x); x = Dropout(drop_rate)(x)
    return x

def attention_block(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    return Lambda(time_sum)(Multiply()([inputs,weights]))


def build_model(pad_len, imu_dim, n_classes):
    inp = Input(shape=(pad_len, imu_dim), name='imu_input')
    x = residual_cnn_block(inp,64,3,drop_rate=0.2,dilation=1)
    x = MaxPooling1D(2)(x)
    x = residual_cnn_block(x,128,5,drop_rate=0.25,dilation=2)
    x = se_block(x)
    x = MaxPooling1D(2)(x)
    x = residual_cnn_block(x,256,7,drop_rate=0.3,dilation=2)
    x = LayerNormalization()(x)
    x = MaxPooling1D(2)(x)

    # Parallel 双 RNN
    gru = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(2e-4)))(x)
    lstm = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(2e-4)))(x)
    x = Concatenate()([gru, lstm])
    x = Dropout(0.25)(x)

    # Segment attention + Temporal attention
    seg_att = SegmentAttention(n_segments=4)(x)
    temp_att = attention_block(x)
    x = Concatenate()([seg_att, temp_att])

    for units, drop in [(256,0.4),(128,0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(2e-4))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    output = Dense(n_classes, activation='softmax', name='main_output',
                   kernel_regularizer=l2(2e-4))(x)
    return Model(inputs=inp, outputs=output)


from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
import tensorflow as tf

# -- Yardımcı Fonksiyonlar --
def time_sum(x): return K.sum(x, axis=1)
def squeeze_last_axis(x): return tf.squeeze(x, axis=-1)
def expand_last_axis(x): return tf.expand_dims(x, axis=-1)

# -- SE Blok --
def se_block(x, ratio=8):
    ch = x.shape[-1]
    se = GlobalAveragePooling1D()(x)
    se = Dense(ch // ratio, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1, ch))(se)
    return Multiply()([x, se])

# -- Residual CNN Blok --
def residual_cnn_block(x, filters, kernel_size, dilation=1, drop_rate=0.3, wd=1e-4):
    shortcut = x
    x = Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation, use_bias=False,
               kernel_regularizer=l2(wd))(x)
    x = BatchNormalization()(x); x = Activation('relu')(x)

    x = Conv1D(filters, kernel_size, padding='same', dilation_rate=dilation, use_bias=False,
               kernel_regularizer=l2(wd))(x)
    x = BatchNormalization()(x)

    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False, kernel_regularizer=l2(wd))(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    x = Dropout(drop_rate)(x)
    return x

# -- Attention --
def attention_block(x):
    score = Dense(1, activation='tanh')(x)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    return Lambda(time_sum)(Multiply()([x, weights]))

# -- Model --
def build_model(pad_len, imu_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, imu_dim), name='imu_input')

    # -- Encoder: Residual CNN + SE --
    x = residual_cnn_block(inp, 64, 3, dilation=1, drop_rate=0.2, wd=wd)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_cnn_block(x, 128, 5, dilation=2, drop_rate=0.25, wd=wd)
    x = se_block(x)
    x = MaxPooling1D(pool_size=2)(x)

    x = residual_cnn_block(x, 256, 7, dilation=2, drop_rate=0.3, wd=wd)
    x = LayerNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)

    # -- GRU + LSTM Paralel --
    gru_out = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    lstm_out = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    x = Concatenate()([gru_out, lstm_out])
    x = Dropout(0.3)(x)

    # -- Attention --
    x = attention_block(x)

    # -- Fully Connected --
    for units, drop in [(256, 0.4), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    out = Dense(n_classes, activation='softmax', kernel_regularizer=l2(wd), name='main_output')(x)
    return Model(inputs=inp, outputs=out)


In [11]:
from scipy.ndimage import sobel

# ToF için spatial gradyan (sobel) temelli özellikler
def calculate_spatial_tof_features(seq_df, sensor_id):
    # 1D 64-pikseli 8x8'e reshape edip sobel gradyanı alacağız
    pixel_cols = [f"tof_{sensor_id}_v{p}" for p in range(64)]
    tof_data = seq_df[pixel_cols].replace(-1, np.nan).ffill().bfill().fillna(0).values
    
    # Frame sayısı x 64 → (N x 8 x 8)
    N = len(seq_df)
    reshaped = tof_data.reshape(N, 8, 8)
    
    # Spatial gradyanları hesapla (sobel x ve y)
    sobel_x = sobel(reshaped, axis=1)
    sobel_y = sobel(reshaped, axis=2)
    grad_mag = np.sqrt(sobel_x ** 2 + sobel_y ** 2)

    # Özet istatistikleri hesapla
    grad_mean = grad_mag.mean(axis=(1, 2))
    grad_std  = grad_mag.std(axis=(1, 2))
    grad_max  = grad_mag.max(axis=(1, 2))
    
    return pd.DataFrame({
        f'tof_{sensor_id}_grad_mean': grad_mean,
        f'tof_{sensor_id}_grad_std': grad_std,
        f'tof_{sensor_id}_grad_max': grad_max
    }, index=seq_df.index)

In [12]:
from scipy.signal import find_peaks

def count_peaks(series):
    peaks, _ = find_peaks(series, height=np.mean(series))
    return len(peaks)

In [13]:
if TRAIN:
    print("▶ TRAIN MODE – loading dataset ...")
    df = pd.read_csv(RAW_DIR / "train.csv")
    
    #train_dem_df = pd.read_csv(RAW_DIR / "train_demographics.csv")
    #df = pd.merge(df, train_dem_df[['subject', 'handedness']], on='subject', how='left')
 
    
    le = LabelEncoder()
    df['gesture_int'] = le.fit_transform(df['gesture'])
    np.save(EXPORT_DIR / "gesture_classes.npy", le.classes_)

    acc_y_neg_subjects = (
        df.groupby('subject')['acc_y']
        .mean()
        .loc[lambda x: x < 0]
        .index
        .tolist()
    )

    print("acc_y ortalaması negatif olan subject'ler:", acc_y_neg_subjects)

    # Bu subject'leri tamamen drop et
    df = df[~df['subject'].isin(acc_y_neg_subjects)].reset_index(drop=True)

    # --- [Önemli Değişiklik] Gelişmiş Fiziksel ve İstatistiksel Özellikler ---
    print("  Removing gravity and calculating linear acceleration features...")
    linear_accel_list = [pd.DataFrame(remove_gravity_from_acc(group[['acc_x', 'acc_y', 'acc_z']], group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['linear_acc_x', 'linear_acc_y', 'linear_acc_z'], index=group.index) for _, group in df.groupby('sequence_id')]
    df = pd.concat([df, pd.concat(linear_accel_list)], axis=1)
    
    # Lineer İvme Özellikleri
    df['linear_acc_mag'] = np.sqrt(df['linear_acc_x']**2 + df['linear_acc_y']**2 + df['linear_acc_z']**2)
    # df['linear_acc_mag_jerk'] already exists, but consider a smoother derivative or higher order jerks if needed
    df['linear_acc_mag_jerk'] = df.groupby('sequence_id')['linear_acc_mag'].diff().fillna(0) # Keep current for now
    
  
    
    print("  Calculating angular velocity and distance from quaternions...")
    angular_vel_list = [pd.DataFrame(calculate_angular_velocity_from_quat(group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['angular_vel_x', 'angular_vel_y', 'angular_vel_z'], index=group.index) for _, group in df.groupby('sequence_id')]
    df = pd.concat([df, pd.concat(angular_vel_list)], axis=1)
    angular_dist_list = [pd.DataFrame(calculate_angular_distance(group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['angular_distance'], index=group.index) for _, group in df.groupby('sequence_id')]
    df = pd.concat([df, pd.concat(angular_dist_list)], axis=1)

  
    #- HUSEYİN GUR (GMN) 1.FE 80>81
    # Hız ve İvme için Anlık İstatistiksel Özellikler (Mevcut sensör okumalarına ek olarak) 
    for col in ['acc_x', 'acc_y', 'acc_z',  'linear_acc_x', 'linear_acc_y', 'linear_acc_z', 'angular_vel_x', 'angular_vel_y', 'angular_vel_z']:  # 'rot_w', 'rot_x', 'rot_y', 'rot_z' eksik
        if col in df.columns:
            df[f'{col}_diff'] = df.groupby('sequence_id')[col].diff().fillna(0)
            df[f'{col}_abs_diff'] = np.abs(df.groupby('sequence_id')[col].diff()).fillna(0) # Mutlak fark
    #- HUSEYİN GUR (GMN) 1.FE 80>81

    # --- [Önemli Değişiklik] Fiziksel ve Yeni İstatistiksel FE'yi Yansıtan Özellik Listesi ---
    imu_cols_base = ['acc_x', 'acc_y', 'acc_z',] + [c for c in df.columns if c.startswith('rot_')]  #+ ['handedness', 'height_cm']

    imu_engineered = [
    'linear_acc_mag', 'linear_acc_mag_jerk',
    'angular_vel_x', 'angular_vel_y', 'angular_vel_z', 'angular_distance'

    ]
    #- HUSEYİN GUR (GMN) 1.FE 80>81
    # Yeni eklenen differansiyel ve mutlak fark özellikleri
    for col in ['acc_x', 'acc_y', 'acc_z', 'linear_acc_x', 'linear_acc_y', 'linear_acc_z', 'angular_vel_x', 'angular_vel_y', 'angular_vel_z']:
        if col in df.columns:
            imu_engineered.append(f'{col}_diff')
            imu_engineered.append(f'{col}_abs_diff')
     #- HUSEYİN GUR (GMN) 1.FE 80>81

    imu_cols = list(dict.fromkeys(imu_cols_base + imu_engineered))
    
    # HUSEYIN GUR - THM - CGPT 3.FE ÇOK AZ BAŞARISIZ 81>81 FAKAT CV 83
    #df = extract_temporal_thm_features(df)
    #df = extract_spatial_thm_features(df)
    # HUSEYIN GUR - THM - CGPT 3.FE
    
    final_feature_cols = imu_cols
    imu_dim_final = len(imu_cols)
    
    print(f"  IMU (phys-based + enhanced) {imu_dim_final}")
    np.save(EXPORT_DIR / "feature_cols.npy", np.array(final_feature_cols))

    print("  Building sequences...")
    seq_gp = df.groupby('sequence_id') 
    X_list_unscaled, y_list_int, groups_list, lens = [], [], [], [] 
    for seq_id, seq_df in seq_gp:
        seq_df_copy = seq_df.copy()
        # Sadece belirlenen nihai özellik sütunlarını kullan
        X_list_unscaled.append(seq_df_copy[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32'))
        y_list_int.append(seq_df_copy['gesture_int'].iloc[0])
        groups_list.append(seq_df_copy['subject'].iloc[0])
        lens.append(len(seq_df_copy))

    print("  Fitting StandardScaler...")
    all_steps_concatenated = np.concatenate(X_list_unscaled, axis=0)
    scaler = StandardScaler().fit(all_steps_concatenated)
    joblib.dump(scaler, EXPORT_DIR / "scaler.pkl")
    
    print("  Scaling and padding sequences...")
    X_scaled_list = [scaler.transform(x_seq) for x_seq in X_list_unscaled]
    pad_len = int(np.percentile(lens, PAD_PERCENTILE)); np.save(EXPORT_DIR / "sequence_maxlen.npy", pad_len)
    X = pad_sequences(X_scaled_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')

     # --- DEĞİŞİKLİK BAŞLANGICI --- SOL ELLERİ HER FOLD'A EŞİT DAĞIT!
    subject_acc_x_mean_global = df.groupby('subject')['acc_x'].mean()
    subject_is_acc_x_mean_negative = (subject_acc_x_mean_global < 0).astype(str) # '0' veya '1' string olarak
    
    y_stratify = np.array([f"{gesture_label}_{subject_is_acc_x_mean_negative.loc[sub_id]}" 
                           for gesture_label, sub_id in zip(y_list_int, groups_list)])
    # --- DEĞİŞİKLİK SONU ---

    groups, y = np.array(groups_list), to_categorical(y_list_int, num_classes=len(le.classes_))
    print("  Starting training with Stratified Group K-Fold CV...")
    sgkf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=state_num)
    oof_preds = np.zeros_like(y, dtype='float32')
    


    for fold, (train_idx, val_idx) in enumerate(sgkf.split(X, y_stratify, groups)):
        print(f"\n===== FOLD {fold+1}/{N_SPLITS} =====")
        X_tr, X_val, y_tr, y_val = X[train_idx], X[val_idx], y[train_idx], y[val_idx]
        
        # --- [Önemli Değişiklik] Model Derlemesi ve Geri Çağırmalar ---
        model = build_imu_only_model(pad_len, imu_dim_final, len(le.classes_))
        
        # Learning Rate Scheduler ekleme - TEK BAŞINA 80>81
        # Bu scheduler, belirli bir metrik iyileşmediğinde öğrenme oranını azaltır.
        lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_accuracy',
            mode='max',
            factor=0.5,
            patience=16,
            cooldown=2,
            min_lr=3e-6,
            verbose=1
        )
        
        model.compile(optimizer=Adam(LR_INIT),
                      loss={'main_output': tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)},
                      loss_weights={'main_output': 1.0},
                      metrics={'main_output': 'accuracy'})
        
        class_weight_dict = dict(enumerate(compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_tr.argmax(1))))
        
        # GatedMixupGenerator'ın imu_dim parametresini güncelledik
        train_gen = MixupGenerator(X_tr, y_tr, batch_size=BATCH_SIZE, class_weight=class_weight_dict, alpha=MIXUP_ALPHA)
        val_gen = MixupGenerator(X_val, y_val, batch_size=BATCH_SIZE)

        # EarlyStopping ve LearningRateScheduler'ı birlikte kullan
        cb = [
            EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1, monitor='val_accuracy', mode='max'),
            lr_scheduler
        ]
        
        model.fit(train_gen, epochs=EPOCHS, validation_data=val_gen, callbacks=cb, verbose=1)
        model.save(EXPORT_DIR / f"gesture_model_fold_{fold}.h5")
        preds_val = model.predict(X_val) # Gate çıktısını ayır
        oof_preds[val_idx] = preds_val

    print("\n✔ Training done.")

    from metric import CompetitionMetric
    true_oof_int = y.argmax(1)
    pred_oof_int = oof_preds.argmax(1)
        
    h_f1_oof = CompetitionMetric().calculate_hierarchical_f1(
        pd.DataFrame({'gesture': le.classes_[true_oof_int]}),
        pd.DataFrame({'gesture': le.classes_[pred_oof_int]}))
    print(f"Overall OOF H‑F1 Score = {h_f1_oof:.4f}")
  

▶ TRAIN MODE – loading dataset ...
acc_y ortalaması negatif olan subject'ler: ['SUBJ_019262', 'SUBJ_045235']
  Removing gravity and calculating linear acceleration features...
  Calculating angular velocity and distance from quaternions...
  IMU (phys-based + enhanced) 31
  Building sequences...
  Fitting StandardScaler...
  Scaling and padding sequences...
  Starting training with Stratified Group K-Fold CV...

===== FOLD 1/5 =====


I0000 00:00:1754719432.249455    1479 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 21770 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6


Epoch 1/160


I0000 00:00:1754719442.342705    2374 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 134ms/step - accuracy: 0.0886 - loss: 4.3596 - val_accuracy: 0.0769 - val_loss: 4.3522 - learning_rate: 5.0000e-04
Epoch 2/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 122ms/step - accuracy: 0.1587 - loss: 3.6735 - val_accuracy: 0.1065 - val_loss: 3.9532 - learning_rate: 5.0000e-04
Epoch 3/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 125ms/step - accuracy: 0.2264 - loss: 3.3686 - val_accuracy: 0.1378 - val_loss: 3.6887 - learning_rate: 5.0000e-04
Epoch 4/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 122ms/step - accuracy: 0.2568 - loss: 3.2158 - val_accuracy: 0.2634 - val_loss: 3.1678 - learning_rate: 5.0000e-04
Epoch 5/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.2966 - loss: 3.1529 - val_accuracy: 0.3735 - val_loss: 2.8606 - learning_rate: 5.0000e-04
Epoch 6/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━

2025-08-09 09:31:48.439299: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step

===== FOLD 2/5 =====
Epoch 1/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 138ms/step - accuracy: 0.0689 - loss: 4.4614 - val_accuracy: 0.0784 - val_loss: 4.1413 - learning_rate: 5.0000e-04
Epoch 2/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 126ms/step - accuracy: 0.1685 - loss: 3.7231 - val_accuracy: 0.0925 - val_loss: 4.0718 - learning_rate: 5.0000e-04
Epoch 3/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 129ms/step - accuracy: 0.2051 - loss: 3.4867 - val_accuracy: 0.1740 - val_loss: 3.4552 - learning_rate: 5.0000e-04
Epoch 4/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 127ms/step - accuracy: 0.2637 - loss: 3.2711 - val_accuracy: 0.3009 - val_loss: 3.0267 - learning_rate: 5.0000e-04
Epoch 5/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 127ms/step - accuracy: 0.2901 - loss: 3.1366 - val_accuracy



[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step

===== FOLD 3/5 =====
Epoch 1/160
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 132ms/step - accuracy: 0.0929 - loss: 4.3076 - val_accuracy: 0.0808 - val_loss: 4.1968 - learning_rate: 5.0000e-04
Epoch 2/160
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.1691 - loss: 3.6800 - val_accuracy: 0.0915 - val_loss: 4.4070 - learning_rate: 5.0000e-04
Epoch 3/160
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.2121 - loss: 3.4034 - val_accuracy: 0.1654 - val_loss: 3.6724 - learning_rate: 5.0000e-04
Epoch 4/160
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.2773 - loss: 3.1930 - val_accuracy: 0.3390 - val_loss: 2.9556 - learning_rate: 5.0000e-04
Epoch 5/160
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.3160 - loss: 3.0333 - va

2025-08-09 10:28:08.145953: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step

===== FOLD 4/5 =====
Epoch 1/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 132ms/step - accuracy: 0.0919 - loss: 4.3231 - val_accuracy: 0.0886 - val_loss: 4.1793 - learning_rate: 5.0000e-04
Epoch 2/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.1895 - loss: 3.5847 - val_accuracy: 0.0852 - val_loss: 4.3096 - learning_rate: 5.0000e-04
Epoch 3/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 122ms/step - accuracy: 0.2271 - loss: 3.4001 - val_accuracy: 0.1988 - val_loss: 3.4814 - learning_rate: 5.0000e-04
Epoch 4/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 122ms/step - accuracy: 0.2737 - loss: 3.2717 - val_accuracy: 0.3232 - val_loss: 3.0062 - learning_rate: 5.0000e-04
Epoch 5/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 122ms/step - accuracy: 0.3206 - loss: 2.9744 - va

2025-08-09 11:01:46.153089: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step

===== FOLD 5/5 =====
Epoch 1/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 133ms/step - accuracy: 0.0713 - loss: 4.4921 - val_accuracy: 0.0799 - val_loss: 4.3185 - learning_rate: 5.0000e-04
Epoch 2/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.1699 - loss: 3.6941 - val_accuracy: 0.1119 - val_loss: 4.0976 - learning_rate: 5.0000e-04
Epoch 3/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 124ms/step - accuracy: 0.2295 - loss: 3.3598 - val_accuracy: 0.1715 - val_loss: 3.6045 - learning_rate: 5.0000e-04
Epoch 4/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 124ms/step - accuracy: 0.2715 - loss: 3.2229 - val_accuracy: 0.2545 - val_loss: 3.1936 - learning_rate: 5.0000e-04
Epoch 5/160
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 123ms/step - accuracy: 0.3262 - loss: 3.0198 - val_accuracy

2025-08-09 11:34:30.580806: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step

✔ Training done.
Overall OOF H‑F1 Score = 0.7740
