# Importing the necessary Libraries

In [None]:
import os, json, joblib, numpy as np, pandas as pd
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from keras.utils import Sequence, to_categorical, pad_sequences
from keras.models import Model, load_model
from keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, add, MaxPooling1D, Dropout,
    Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply, Reshape,
    Lambda, Concatenate, GRU, GaussianNoise
)
from keras.regularizers import l2
from keras.optimizers import Adam
from keras import backend as K
import tensorflow as tf
import polars as pl
from sklearn.model_selection import StratifiedGroupKFold
from scipy.spatial.transform import Rotation as R

2025-09-02 17:28:42.131965: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-02 17:28:42.891226: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-02 17:28:45.015132: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


# Fix Seed

In [3]:
import random
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
seed_everything(seed=42)

# Configuration

In [None]:
import sys
from pathlib import Path

# ~/MyKaggle をモジュールパスに追加
PROJECT_ROOT = Path.home() / "kaggle-cmi-detect" # /home/jiwon/MyKaggle
sys.path.append(str(PROJECT_ROOT))

from config import RAW_DIR, EXTERNAL_DIR # /home/jiwon/MyKaggle/competitions/cmi_detect_behavior/config.pyを探せるようになる


TRAIN = False
RAW_DIR = Path(RAW_DIR)
PROCESSED_DIR = Path(EXTERNAL_DIR)
EXPORT_DIR = Path("./")
BATCH_SIZE = 64
PAD_PERCENTILE = 95
LR_INIT = 5e-4
WD = 3e-3
MIXUP_ALPHA = 0.4
EPOCHS = 160
PATIENCE = 40

print("▶ imports ready · tensorflow", tf.__version__)

▶ imports ready · tensorflow 2.20.0


# Utility Functions

In [None]:
# Tensor Manipulations
def time_sum(x): # (B, T, C) → (B, C)
    return K.sum(x, axis=1)

def squeeze_last_axis(x): # (B, T, 1) → (B, T)
    return  tf.squeeze(x, axis=-1)

def expand_last_axis(x): # (B, T) → (B, T, 1)
    return tf.expand_dims(x, axis=-1)

def se_block(x, reduction=8):
    ch = x.shape[-1]                       # チャネル数 C（静的にわかる想定）
    se = GlobalAveragePooling1D()(x)       # (B, T, C) → (B, C)  時間方向の平均
    se = Dense(ch // reduction, activation='relu')(se)  # (B, C/r)
    se = Dense(ch, activation='sigmoid')(se)            # (B, C)  チャネルごとのゲート
    se = Reshape((1, ch))(se)                           # (B, 1, C) に戻す（時間次元にブロードキャストするため）
    return Multiply()([x, se])                          # (B, T, C) × (B, 1, C) → (B, T, C)

In [None]:
def residual_se_cnn_block(x, filters, kernel_size, pool_size=2, drop=0.3, wd=1e-4):
    shortcut =  x
    for _ in range(2):
        x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)