# Подгрузка модулей, выбор фич

In [1]:
from utils.support import *

from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Concatenate, Dense, BatchNormalization,\
                                    Flatten, MaxPooling1D, Reshape, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import Callback, ModelCheckpoint

from sklearn.metrics import roc_curve, recall_score, make_scorer
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.model_selection import train_test_split

2025-08-10 17:51:31.972662: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-10 17:51:31.977115: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-10 17:51:32.041270: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-08-10 17:51:32.041377: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-08-10 17:51:32.049274: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
data = open_data('data/bank_data_final_2.csv')
data = data[[col for col in data.columns if data[col].dtype != 'object']]
data.head(2)

Unnamed: 0,fraud_bool,income,name_email_similarity,current_address_months_count,customer_age,days_since_request,intended_balcon_amount,zip_count_4w,velocity_6h,velocity_24h,...,zip_peak,bank_branch_count_8w,bank_branch_peak,zip_transaction_ratio,max_zip_sum,zip_sum_ratio,vel_1,vel_2,vel_3,vel_4
634102,0,7,0.546775,375,3,0.027343,-0.654816,1455,3871.735367,3169.891947,...,1,14,1,194.0,291000.0,7.275,-2114.180477,-1776.742716,107.040705,-0.011601
173005,0,8,0.715285,379,4,0.007504,18.068853,1086,9501.114204,6294.918432,...,1,748,0,2.899866,217200.0,5.43,5708.831358,1203.137209,-235.72693,-0.124511


In [3]:
X, y = data.drop('fraud_bool', axis=1), data['fraud_bool']

In [4]:
# функция, возвращающая recall для данного fpr
def get_recall(y_true, y_pred, target_fpr=0.05, is_probabilities_in=True):
    classes = np.unique(y_true)
    if len(classes) < 2:
        return 0.0

    try:
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        if len(fpr) == 0:
            print('Empty FPR.')
            return 0.0

        idx = np.abs(fpr - target_fpr).argmin()
        thresh = thresholds[idx]
        if is_probabilities_in:
            y_pred = (y_pred >= thresh).astype('uint8')

        return recall_score(y_true, y_pred)
    except Exception:
        print('Exception!')
        return 0.0

recall_at_5_fpr = make_scorer(get_recall)

Итак, в классических моделях экспериментировали с фичами: какие брать, какие нет. Оказалось, что фичи с якобы линейными зависимостями с целевой переменной (напомним: бинарной fraud_bool с дисбалансом классов) в целом показывают себя лучше. Поэтому решил воспользоваться select_k_best:

In [12]:
kbest = SelectKBest(mutual_info_classif, k=32)
kbest.fit(X, y)

0,1,2
,score_func,<function mut...x7c00790fd750>
,k,32


In [15]:
best_k_features = list(kbest.get_feature_names_out())

In [20]:
best_k_features

['income',
 'customer_age',
 'email_is_free',
 'phone_home_valid',
 'phone_mobile_valid',
 'has_other_cards',
 'proposed_credit_limit',
 'source',
 'keep_alive_session',
 'device_distinct_emails_8w',
 'only_one_valid',
 'payment_type_AA',
 'payment_type_AC',
 'device_os_windows',
 'device_os_linux',
 'device_os_other',
 'n_e_cat_similarity',
 'danger_spike',
 'cur_address_months_binned_first_spike',
 'cur_address_months_binned_danger_spike',
 'cur_address_fraud_zone',
 'is_recent_request',
 'bank_months_count_qt',
 'bank_months_0_2',
 'bank_months_strong_peak',
 'cur_address_months_binned_end_spike',
 'births_danger_zone',
 'is_credit_limit_high',
 'intended_balcon_peak_1',
 'has_entered_website_recently',
 'zip_peak',
 'bank_branch_peak']

In [13]:
save_list(best_k_features, 'best_k_features')

In [7]:
best_k_features = open_list('logs/best_k_features.txt')

---

In [28]:
# разделение данных на обучающую и валидационную выборки
X_train, X_val, y_train, y_val = train_test_split(X[best_k_features], y, test_size=0.25, random_state=rs, stratify=y)

Не будем использовать оверсемплинг - как показали эксперименты, он плохо работает с нашими данными. Но оставить ситуацию как есть, конечно, нельзя. В методе fit в Keras API можно указать веса классов. Вычислим их и будем использовать в дальнейшем:

In [29]:
from sklearn.utils import class_weight

def compute_class_weight(y_train):
    classes = np.unique(y_train)
    cw = class_weight.compute_class_weight('balanced', classes=classes, y=y_train)
    class_weight_dict = {classes[i]: float(cw[i]) for i in range(len(classes))}
    return class_weight_dict
class_weight_dict = compute_class_weight(y_train)
class_weight_dict

{0: 0.5055761680831787, 1: 45.33365570599613}

In [None]:
# кол-во входных нейронов
input_dims = 32

Ну, теперь к самим нейронкам.

# 6. Нейронные сети

Как по мне, моя задача не требует каких-то сложных нейронных сетей, но попытаться стоит. Тем более, оказалось, что вычисления проходят даже быстрее, чем с классическими моделями.

## Dense Neural Network (DNN)

Начнем с обычной полносвязной сети:

In [None]:
# Простая 3-х слойная нейронная сеть с регулиризацией
model_dnn = Sequential(
    name = 'DNN_Model', 
    layers = [
        Dense(64, input_dim=input_dims, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(input_dims, activation='relu'),
        BatchNormalization(),
        Dense(1, activation='sigmoid')
    ]
)

К сожалению, в связи со сложной архитектурой метода fit не получится передать обычную функцию get_recall (из models.ipynb) в качестве метрики. Нужно написать наследующий от Callback класс. Заодно напишем и функцию, которая проводит весь этап компиляции и обучения модели:

In [12]:
class RecallAtFPR(Callback):
    """
        Класс, вычисляющий целевую метрику при обучении Keras API модели.
        
        Аргументы класса:
        X_val, y_val: numpy arrays
        target_fpr: целевой FPR
        batch_size: batch_size для predict
        every_n_epochs: считать метрику каждые N эпох, чтобы экономить время
        """
    def __init__(self, X_val, y_val, fpr=0.05, batch_size=None, every_n_epochs=4):
        super().__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.batch_size = batch_size
        self.target_fpr = fpr
        self.every_n_epochs= every_n_epochs
        
    def on_epoch_end(self, epoch, logs=None):
        if (epoch+1) % self.every_n_epochs != 0:
            return
        
        logs = logs or {}
        # делаем предсказание
        y_prob = self.model.predict(self.X_val, batch_size=self.batch_size, verbose=0)
        # взависимости от размера возвращенного predict получаем предсказания
        if y_prob.ndim == 1:
            pass
        elif y_prob.ndim == 2 and y_prob.shape[1] == 2:
            y_prob = y_prob[:, 1]
        else:
            y_prob = y_prob.ravel()
            
        # если значения не в диапозоне [0, 1] - значит, логиты; применяем softmax
        if y_prob.min() < 0 or y_prob.max() > 1:
            y_prob = 1/(1+np.exp(-y_prob))
        
        # считаем roc показатели и обрабатываем исключения
        try:
            fpr, tpr, thresholds = roc_curve(self.y_val, y_prob)
        except ValueError:
            logs['val_recall_at_fpr'] = 0.0
            print(f'\nEpoch {epoch+1}: no valid ROC (single-class in val)')
            return
        
        if fpr.size == 0:
            logs['val_recall_at_fpr'] = 0.0
            print(f"\nEpoch {epoch+1}: empty fpr.")
            return
        
        # находим показатель tpr при fpr=5%
        idx = np.argmin(np.abs(fpr-self.target_fpr))
        thresh = thresholds[idx]
        y_pred = (y_prob >= thresh).astype('uint8')
        score = recall_score(self.y_val, y_pred)
        
        logs['val_recall_at_fpr'] = score
        print(f"\nEpoch {epoch+1}: recall@5 = {score:.4f}")

In [20]:
def optimize_model(model, filepath_to_save, 
                   X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, 
                   optimizer=None, loss='binary_crossentropy', metrics=['AUC', "Recall", "Precision"], 
                   fpr=0.05, batch_size=1024, epochs=20, every_n_epochs=4,
                   class_weight=class_weight_dict):
    """
    Функция, проводящая оптимизацию для заданной модели.
    
    
    model (Keras API model) - модель на вход, 
    filepath_to_save (str) - путь к файлу для сохранения лучшей версии модели,
    X_train (np.array) - обучающая выборка, 
    y_train (np.array) - обучающая целевая переменная,
    X_val(np.array) - валидационная выборка, 
    y_val (np.array) - валидационная целевая переменная,
    
    optimizer (tf.keras.optimizers) - функция-оптимизатор при компиляции модели, 
    loss (func типа tf.keras.losses) - функция потерь, 
    metrics (func типа tf.keras.metrics) - метрика,
    fpr (float, [0, 1]) - показатель fpr для нахождения tpr в данной точке на ROC-кривой,
    batch_size (int) - размер выборки,
    epochs=20 - количество эпох для обучения,
    every_n_epochs=4 - какие каждый n эпох производить рассчет метрики на валидации,
    class_weight - веса классов
    """
    
    if optimizer is None:
        optimizer=Adam(0.001)

    # компиляция модели
    model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=metrics
    )

    # создаем объект класса отзыва
    recall_callback = RecallAtFPR(X_val, y_val, fpr, batch_size, every_n_epochs)
    # создаем чекпоинт модели
    checkpoint = ModelCheckpoint(
        filepath = filepath_to_save,
        monitor='val_recall_at_fpr',
        save_best_only=True,
        save_weight_only=False,
        mode='max',
        verbose=1
    )
    # обучение нейронки
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        class_weight=class_weight,
        callbacks=[recall_callback, checkpoint]
    )
    
    return history

Итак, оптимизируем DNN:

In [None]:
optimize_model(model_dnn, 'logs/keras/test.keras')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: recall@5 = 0.3612

Epoch 4: val_recall_at_fpr improved from -inf to 0.36122, saving model to data/keras/test.keras
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: recall@5 = 0.3960

Epoch 8: val_recall_at_fpr improved from 0.36122 to 0.39603, saving model to data/keras/test.keras
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 12: recall@5 = 0.3569

Epoch 12: val_recall_at_fpr did not improve from 0.39603
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: recall@5 = 0.3090

Epoch 16: val_recall_at_fpr did not improve from 0.39603
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 20: recall@5 = 0.3288

Epoch 20: val_recall_at_fpr did not improve from 0.39603


<keras.src.callbacks.History at 0x756264bf3be0>

In [None]:
y_pred = model_dnn.predict(X_val)
y_pred 



array([[0.6275351 ],
       [0.29558596],
       [0.1217978 ],
       ...,
       [0.07900803],
       [0.07967044],
       [0.2555716 ]], dtype=float32)

In [40]:
get_recall(y_val, y_pred)

0.38636363636363635

Получили показатель 38%, что пока что проигрывает лучшей модели - Catboost. Попробуем добавить больше слоев:

## Deep Block Neural Network

In [14]:
model_deep_block = Sequential(
    name='Deep_Block',
    layers=[
        Dense(256, input_dim = input_dims, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(128, activation='relu', kernel_regularizer=l2(0.0001)),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(64, activation='relu', kernel_regularizer=l2(0.0001)),
        BatchNormalization(),
        Dropout(0.2),
        
        Dense(32, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.15),
        
        Dense(1, activation='sigmoid')
    ]
)

optimize_model(model_deep_block, 'logs/keras/model_deep_block_1.keras')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 4: recall@5 = 0.3133

Epoch 4: val_recall_at_fpr improved from -inf to 0.31335, saving model to logs/keras/model_deep_block_1.keras
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: recall@5 = 0.3748

Epoch 8: val_recall_at_fpr improved from 0.31335 to 0.37476, saving model to logs/keras/model_deep_block_1.keras
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 12: recall@5 = 0.3825

Epoch 12: val_recall_at_fpr improved from 0.37476 to 0.38250, saving model to logs/keras/model_deep_block_1.keras
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: recall@5 = 0.4033

Epoch 16: val_recall_at_fpr improved from 0.38250 to 0.40329, saving model to logs/keras/model_deep_block_1.keras
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 20: recall@5 = 0.3946

Epoch 20: val_recall_at_fpr did not improve from 0.40329


<keras.src.callbacks.History at 0x749df7ec2e60>

Уже лучше! 41%. Попробуем "пирамидальную" структуру:

In [21]:
model_deep_block_pyramide = Sequential(
    name='Deep_Block_Pyramid',
    layers=[
        Dense(64, input_dim = input_dims, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.4),
        
        Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        
        Dense(32, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.15),
        
        Dense(1, activation='sigmoid')
    ]
)

optimize_model(model_deep_block_pyramide, 'logs/keras/model_deep_block_pyramid.keras', 
               every_n_epochs=3, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 3: recall@5 = 0.3897

Epoch 3: val_recall_at_fpr improved from -inf to 0.38975, saving model to logs/keras/model_deep_block_pyramid.keras
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 6: recall@5 = 0.3767

Epoch 6: val_recall_at_fpr did not improve from 0.38975
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 9: recall@5 = 0.3985

Epoch 9: val_recall_at_fpr improved from 0.38975 to 0.39845, saving model to logs/keras/model_deep_block_pyramid.keras
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 12: recall@5 = 0.3912

Epoch 12: val_recall_at_fpr did not improve from 0.39845
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 15: recall@5 = 0.3946

Epoch 15: val_recall_at_fpr did not improve from 0.39845
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 18: recall@5 = 0.3854

Epoch 18: val_recall_at_fpr did not improve from 0.39845
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 21: recall@5 = 0.4057

Epoch 21: val_recall_at_fpr improved from 0.39845 to 0.40571, saving model to logs/keras/model_

<keras.src.callbacks.History at 0x749db87dba30>

## Wide & Deep Neural Network

Финальная архитектура - Wide & Deep (с объединением признаков). Состоит из линейной части и нелинейной. Другими словами: можем использовать вместе те самые признаки, которые мы разделили по информативности для разных типов моделей!

In [42]:
cat_features = open_list('logs/top_cat_features_list.txt')
len(cat_features)

17

In [43]:
lin_features = open_list('logs/top_linear_features.txt')
tree_features = open_list('logs/top_non_linear_features.txt')

informative_features = list(set(tree_features + lin_features))
len(informative_features)

42

In [44]:
linear_features = [feature for feature in informative_features if feature not in cat_features]
len(linear_features)

25

In [45]:
wide_input_dims=17
deep_input_dims=25

In [46]:
X_train_cat, X_val_cat, X_train_lin, X_val_lin, y_train_cat, y_val_cat = train_test_split(
                X[cat_features], X[linear_features], y, test_size=0.25, random_state=rs, stratify=y)

In [48]:
# вычисляем новые веса классов (в прочем, они останутся такими же в силу параметра stratify)
class_weight_dict = compute_class_weight(y_train)
class_weight_dict

{0: 0.5055761680831787, 1: 45.33365570599613}

Строим сеть и обучаем:

In [50]:
# входной слой
deep_inp = Input(shape=(deep_input_dims,))
wide_inp = Input(shape=(wide_input_dims,))

# wide (линейная) часть
wide = Dense(1, activation='linear')(wide_inp)

# deep (глубокая) часть
deep = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(deep_inp)
deep = BatchNormalization()(deep)
deep = Dropout(0.1)(deep)

deep = Dense(64, activation='relu', kernel_regularizer=l2(0.0007))(deep)
deep = BatchNormalization()(deep)
deep = Dropout(0.15)(deep)

deep = Dense(32, activation='relu', kernel_regularizer=l2(0.001))(deep)
deep = BatchNormalization()(deep)
deep = Dropout(0.1)(deep)

# объединение слоев и построение модели
concat = Concatenate()([wide, deep])
out = Dense(1, activation='sigmoid')(concat)

model_wide_deep = Model(inputs=[wide_inp, deep_inp], outputs=out)

# оптимизация модели
optimize_model(model_wide_deep, 'logs/keras/model_wide_deep.keras', X_train=[X_train_cat, X_train_lin], 
               y_train=y_train, 
               X_val=[X_val_cat, X_val_lin], y_val=y_val, every_n_epochs=3, epochs=21)

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 3: recall@5 = 0.3085

Epoch 3: val_recall_at_fpr improved from -inf to 0.30851, saving model to logs/keras/model_wide_deep.keras
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 6: recall@5 = 0.3699

Epoch 6: val_recall_at_fpr improved from 0.30851 to 0.36992, saving model to logs/keras/model_wide_deep.keras
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 9: recall@5 = 0.3743

Epoch 9: val_recall_at_fpr improved from 0.36992 to 0.37427, saving model to logs/keras/model_wide_deep.keras
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 12: recall@5 = 0.3694

Epoch 12: val_recall_at_fpr did not improve from 0.37427
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 15: recall@5 = 0.3772

Epoch 15: val_recall_at_fpr improved from 0.37427 to 0.37718, saving model to logs/keras/model_wide_deep.keras
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 18: recall@5 = 0.3868

Epoch 18: val_recall_at_fpr improved from 0.37718 to 0.38685, saving model to logs/keras/model_wide_deep.keras
Epoch 19/21
Epoc

<keras.src.callbacks.History at 0x749db943c310>

Получили слабые результаты - что-то в районе 17%. Тут мне показалось, что модель сама плохо отличает, какие признаки отнести к wide части, а какие - к deep. Поэтому я разделил данные на непрерывные и категориальные фичи. В wide часть пойдут категориальные фичи, а вот в deep - непрерывные, где сложно уловить связь с бинарной целевой переменной.

Update: Стало лучше, но опять же, на уровне остальных вариантов.

---

Итого, лучшая версия - CatBoost!