<a href="https://colab.research.google.com/github/YuriySamorodov/MIPT/blob/main/%D0%A1%D0%B5%D0%BC%D0%B5%D1%81%D1%82%D1%80%202/Deep%20Learning/mipt_bio_2025_yuriysamorodov_tensor_ensemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Импортируем библиотеки
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout, GlobalAveragePooling2D, Input, Add, AveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split, StratifiedKFold
import pandas as pd
import numpy as np
import math
from sklearn.metrics import accuracy_score

In [7]:
# Проверяем GPU
print("GPU device:", tf.test.gpu_device_name())

GPU device: /device:GPU:0


In [8]:
# Загружаем датасеты
df_train = pd.read_csv('fmnist_train.csv')
df_test = pd.read_csv('fmnist_test.csv')

# Удаляем пустые значение
df_train.dropna(inplace=True)

# Подготовка фичей и заголовков
X_train_raw = df_train.drop(['label', 'Id'], axis=1).values
y_train_raw = df_train['label'].values
X_test_raw = df_test.drop('Id', axis=1).values
ids_test = df_test['Id']

# Нормализация изображений
X_train = X_train_raw.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test = X_test_raw.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Преобразование меток
y_train = to_categorical(y_train_raw, num_classes=10)

print(f"Training samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")

Training samples: 17039
Test samples: 10000


In [9]:
# Модель 1: Глубокая сверточная сеть с ResNet-связями
def create_model_1():
    model = Sequential()
    model.add(Input(shape=(28, 28, 1)))
    model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(GlobalAveragePooling2D())

    model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(10, activation='softmax'))
    return model

In [10]:
# Модель 2: Широкая сверточная сеть с широкими связями
def create_model_2():
    model = Sequential()
    model.add(Input(shape=(28, 28, 1)))
    model.add(Conv2D(64, (5,5), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.3))

    model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(AveragePooling2D(2,2))
    model.add(Dropout(0.3))

    model.add(Conv2D(256, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(GlobalAveragePooling2D())

    model.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(BatchNormalization())
    model.add(Dropout(0.6))
    model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.4))
    model.add(Dense(10, activation='softmax'))
    return model

In [11]:
# Модель 3: ResNet модель с пропусками
def create_model_3():
    input_layer = Input(shape=(28, 28, 1))

    # Первый  блок
    x = Conv2D(32, (3,3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(2,2)(x)
    x = Dropout(0.25)(x)

    # Второй блок с пропуском
    shortcut = Conv2D(64, (1,1), padding='same')(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Add()([x, shortcut])
    x = MaxPooling2D(2,2)(x)
    x = Dropout(0.25)(x)

    # Третий блок
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = GlobalAveragePooling2D()(x)

    x = Dense(256, activation='relu', kernel_regularizer=l2(0.001))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(10, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=output)
    return model


In [12]:
# Модель 4: Компактная модель
def create_model_4():
    model = Sequential()
    model.add(Input(shape=(28, 28, 1)))
    model.add(Conv2D(48, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Conv2D(48, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Conv2D(96, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Conv2D(96, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Conv2D(192, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Conv2D(192, (3,3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(GlobalAveragePooling2D())
    model.add(Dense(384, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    return model


In [13]:
# Повышаем разнообразие с использованием разных стратегий аугментации

def get_augmentation_generator(strategy='standard'):
    if strategy == 'standard':
        return ImageDataGenerator(
            rotation_range=15,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.1,
            zoom_range=0.1,
            horizontal_flip=True,
            fill_mode='nearest'
        )
    elif strategy == 'aggressive':
        return ImageDataGenerator(
            rotation_range=25,
            width_shift_range=0.15,
            height_shift_range=0.15,
            shear_range=0.15,
            zoom_range=0.15,
            horizontal_flip=True,
            fill_mode='nearest'
        )
    elif strategy == 'minimal':
        return ImageDataGenerator(
            rotation_range=10,
            width_shift_range=0.08,
            height_shift_range=0.08,
            horizontal_flip=True
        )

In [14]:
# Тренируем модели
def train_model(model_func, model_name, X_train, y_train, X_val, y_val,
                optimizer_type='adam', augmentation='standard'):
    print(f"\n{'='*50}")
    print(f"Training {model_name}")
    print(f"{'='*50}")

    # Очищаем память
    tf.keras.backend.clear_session()

    model = model_func()

    # Применяем разные оптимизации для разных стратегий
    if optimizer_type == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_type == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    elif optimizer_type == 'rmsprop':
        optimizer = RMSprop(learning_rate=0.001)

    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    print(f"Model parameters: {model.count_params():,}")

    # Расширяем (аугментируем) данные
    datagen = get_augmentation_generator(augmentation)
    datagen.fit(X_train)

    # Callbacks
    early_stopping = EarlyStopping(
        monitor='val_accuracy',        # Следить за метрикой точности на валидации
        patience=15,                  # Остановить обучение, если улучшений не будет в течение 15 эпох
        restore_best_weights=True,    # Восстановить веса модели с лучшей эпохи после остановки
        mode='max',                   # Ожидается максимизация метрики (чем больше, тем лучше)
        verbose=1                    # Выводить подробную информацию о процессе остановки
    )


    reduce_lr = ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.3,
        patience=5,
        verbose=1,
        mode='max',
        min_lr=1e-7
    )

    # Применяем оптимизаци для Nvidia Р100
    batch_size = 128  # Larger batch size for P100 GPU
    steps_per_epoch = len(X_train) // batch_size

    # Тренировка
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=batch_size),
        steps_per_epoch=steps_per_epoch,
        epochs=100,  # Increased epochs
        validation_data=(X_val, y_val),
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    # Оценка
    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"{model_name} - Validation Accuracy: {val_acc:.4f} ({val_acc*100:.2f}%)")
    print(f"Best epoch: {len(history.history['val_accuracy']) - early_stopping.patience}")

    return model, history, val_acc

In [15]:
# Тренируем и проверяем ансамбль моделей
def train_ensemble_cv(n_splits=5):
    models = []
    val_accuracies = []

    # Конфиги моделей
    model_configs = [
        (create_model_1, "Deep CNN", 'adam', 'standard'),
        (create_model_2, "Wide CNN", 'rmsprop', 'aggressive'),
        (create_model_3, "ResNet-inspired", 'adam', 'minimal'),
        (create_model_4, "Compact Deep", 'sgd', 'standard')
    ]

    # Разбивка данных для тренировки ансамбля
    X_train_ens, X_val_ens, y_train_ens, y_val_ens = train_test_split(
        X_train, y_train, test_size=0.15, random_state=42, stratify=y_train_raw
    )

    print(f"Ensemble training set: {X_train_ens.shape[0]} семплов")
    print(f"Ensemble validation set: {X_val_ens.shape[0]} семплов")

    # Тренируем каждую мрдель
    for model_func, model_name, optimizer, augmentation in model_configs:
        model, history, val_acc = train_model(
            model_func, model_name, X_train_ens, y_train_ens,
            X_val_ens, y_val_ens, optimizer, augmentation
        )
        models.append((model, model_name, val_acc))
        val_accuracies.append(val_acc)

    # Тренируем модели с разными сидами для повышения разнообразия
    for i in range(2):
        tf.random.set_seed(42 + i * 10)
        model, history, val_acc = train_model(
            create_model_1, f"Deep CNN v{i+2}", X_train_ens, y_train_ens,
            X_val_ens, y_val_ens, 'adam', 'standard'
        )
        models.append((model, f"Deep CNN v{i+2}", val_acc))
        val_accuracies.append(val_acc)

    return models, val_accuracies

In [16]:
# Test Time Augmentation for ensemble
def predict_with_tta_ensemble(models, X_test, num_augmentations=8):
    """Apply TTA and ensemble predictions"""
    all_predictions = []

    test_datagen = ImageDataGenerator(
        rotation_range=12,
        width_shift_range=0.08,
        height_shift_range=0.08,
        horizontal_flip=True
    )

    for model, model_name, val_acc in models:
        model_predictions = []

        # Оригинальное предсказание
        pred_original = model.predict(X_test, verbose=0)
        model_predictions.append(pred_original)

        # Аугментированное предсказание
        for i in range(num_augmentations):
            X_test_aug = np.array([test_datagen.random_transform(x) for x in X_test])
            pred_aug = model.predict(X_test_aug, verbose=0)
            model_predictions.append(pred_aug)

        # Среднее время теста
        avg_pred = np.mean(model_predictions, axis=0)

        # Вес в зависимости от validation accuracy
        weighted_pred = avg_pred * val_acc
        all_predictions.append(weighted_pred)

        print(f"Generated predictions for {model_name} (weight: {val_acc:.4f})")

    # Ensemble all model predictions
    ensemble_pred = np.mean(all_predictions, axis=0)
    return ensemble_pred

In [17]:
# Тренируем модели
print("Starting ensemble training...")
models, val_accuracies = train_ensemble_cv()

print(f"\n{'='*60}")
print("ENSEMBLE SUMMARY")
print(f"{'='*60}")
for i, (model, name, acc) in enumerate(models):
    print(f"Model {i+1}: {name} - Validation Accuracy: {acc:.4f} ({acc*100:.2f}%)")

avg_val_acc = np.mean(val_accuracies)
print(f"\nAverage Individual Model Accuracy: {avg_val_acc:.4f} ({avg_val_acc*100:.2f}%)")

# Гененрируем предсказания
print(f"\n{'='*60}")
print("GENERATING ENSEMBLE PREDICTIONS")
print(f"{'='*60}")

ensemble_predictions = predict_with_tta_ensemble(models, X_test, num_augmentations=6)
y_pred_ensemble = np.argmax(ensemble_predictions, axis=1)

# Создаем файл для Kaggle
submission_df = pd.DataFrame({'Id': ids_test, 'label': y_pred_ensemble})
submission_df.to_csv('submission_ensemble_ultimate.csv', index=False)

print(f"\nEnsemble submission file 'submission_ensemble_ultimate.csv' created successfully!")
print(f"Expected ensemble accuracy: {avg_val_acc + 0.01:.4f}+ ({(avg_val_acc + 0.01)*100:.2f}%+)")

# Сохраняем предсказания каждой модели
individual_preds = {}
for i, (model, name, acc) in enumerate(models):
    pred = model.predict(X_test, verbose=0)
    individual_preds[f"{name}_pred"] = np.argmax(pred, axis=1)

# Выгружаем файл с анализом по каждой модели
pred_df = pd.DataFrame(individual_preds)
pred_df['ensemble_pred'] = y_pred_ensemble
pred_df['Id'] = ids_test
pred_df.to_csv('detailed_predictions_analysis.csv', index=False)

print("\nDetailed predictions analysis saved to 'detailed_predictions_analysis.csv'")
print("This file shows individual model predictions vs ensemble prediction for each test sample.")

Starting ensemble training...
Ensemble training set: 14483 семплов
Ensemble validation set: 2556 семплов

Training Deep CNN
Model parameters: 490,218
Epoch 1/100


  self._warn_if_super_not_called()


[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 127ms/step - accuracy: 0.4989 - loss: 2.1318 - val_accuracy: 0.1037 - val_loss: 3.1681 - learning_rate: 0.0010
Epoch 2/100
[1m  1/113[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 14ms/step - accuracy: 0.6406 - loss: 1.5481



[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6406 - loss: 1.5481 - val_accuracy: 0.1264 - val_loss: 3.1614 - learning_rate: 0.0010
Epoch 3/100
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 43ms/step - accuracy: 0.7189 - loss: 1.2710 - val_accuracy: 0.1416 - val_loss: 4.1157 - learning_rate: 0.0010
Epoch 4/100
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7188 - loss: 1.1169 - val_accuracy: 0.1506 - val_loss: 4.1015 - learning_rate: 0.0010
Epoch 5/100
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 50ms/step - accuracy: 0.7527 - loss: 1.1349 - val_accuracy: 0.1072 - val_loss: 5.5358 - learning_rate: 0.0010
Epoch 6/100
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7500 - loss: 1.1229 - val_accuracy: 0.1013 - val_loss: 5.9301 - learning_rate: 0.0010