# ===============================================================================
# ФИНАЛЬНЫЕ PRODUCTION МОДЕЛИ + SHAP ANALYSIS
# ===============================================================================

**Цель:** Воспроизводимый notebook с лучшими моделями для каждого сегмента

**Содержание:**
1. Загрузка подготовленных данных
2. Загрузка результатов экспериментов → выбор лучших моделей
3. Обучение лучших моделей с оптимальными параметрами
4. SHAP Analysis (Feature Importance + Explainability)
5. Полные метрики и визуализации
6. Сохранение финальных моделей

**Reproducibility:** Random seed = 42, Run All должен давать те же результаты

**Дата:** 2025-01-13

# ===============================================================================

---
# 1. ИМПОРТ БИБЛИОТЕК И КОНФИГУРАЦИЯ

In [None]:
# ====================================================================================
# ИМПОРТ БИБЛИОТЕК
# ====================================================================================

import os
import warnings
from datetime import datetime
from pathlib import Path
import pickle
import time

# Данные
import numpy as np
import pandas as pd

# Визуализация
import matplotlib.pyplot as plt
import seaborn as sns

# ML Models
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Balancing
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.utils.class_weight import compute_class_weight

# Metrics
from sklearn.metrics import (
    roc_auc_score, roc_curve, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix,
    classification_report
)

# SHAP for explainability
import shap

# Настройки
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

print("="*80)
print("ФИНАЛЬНЫЕ PRODUCTION МОДЕЛИ + SHAP ANALYSIS")
print("="*80)
print(f"✓ Библиотеки импортированы")
print(f"  Дата запуска: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80)

In [None]:
# ====================================================================================
# КОНФИГУРАЦИЯ
# ====================================================================================

class Config:
    """Централизованная конфигурация"""
    
    # ВОСПРОИЗВОДИМОСТЬ
    RANDOM_SEED = 42
    
    # ПУТИ
    OUTPUT_DIR = Path("output")
    MODELS_DIR = Path("models")
    FIGURES_DIR = Path("figures")
    
    # КОЛОНКИ
    TARGET_COLUMN = 'target_churn_3m'
    
    # СЕГМЕНТЫ
    SEGMENTS = {
        'Segment 1': {
            'name': 'Small Business',
            'train': 'seg1_train.parquet',
            'val': 'seg1_val.parquet',
            'test': 'seg1_test.parquet'
        },
        'Segment 2': {
            'name': 'Middle + Large Business',
            'train': 'seg2_train.parquet',
            'val': 'seg2_val.parquet',
            'test': 'seg2_test.parquet'
        }
    }
    
    # SHAP PARAMETERS
    SHAP_SAMPLE_SIZE = 100  # Для Tree SHAP используем все данные или sample
    SHAP_TOP_FEATURES = 20  # Топ признаков для визуализации
    
    # ВИЗУАЛИЗАЦИЯ
    FIGURE_SIZE = (12, 8)
    FIGURE_DPI = 100
    
    @classmethod
    def create_directories(cls):
        for dir_path in [cls.OUTPUT_DIR, cls.MODELS_DIR, cls.FIGURES_DIR]:
            dir_path.mkdir(parents=True, exist_ok=True)

config = Config()
config.create_directories()
np.random.seed(config.RANDOM_SEED)

print("\n✓ Конфигурация инициализирована")
print(f"  Random seed: {config.RANDOM_SEED}")
print(f"  Сегментов: {len(config.SEGMENTS)}")

---
# 2. ЗАГРУЗКА ДАННЫХ

In [None]:
# ====================================================================================
# ЗАГРУЗКА ПОДГОТОВЛЕННЫХ ДАННЫХ
# ====================================================================================

print("\n" + "="*80)
print("ЗАГРУЗКА ДАННЫХ")
print("="*80)

data = {}

for seg_id, seg_info in config.SEGMENTS.items():
    print(f"\n{seg_id}: {seg_info['name']}")
    print("-" * 80)
    
    data[seg_id] = {}
    
    for split in ['train', 'val', 'test']:
        file_path = config.OUTPUT_DIR / seg_info[split]
        
        if not file_path.exists():
            raise FileNotFoundError(
                f"Файл не найден: {file_path}\n"
                f"Сначала запустите notebook 01_data_preparation_eda.ipynb"
            )
        
        df = pd.read_parquet(file_path)
        data[seg_id][split] = df
        
        churn_rate = df[config.TARGET_COLUMN].mean()
        print(f"  {split.upper():5s}: {df.shape} | Churn: {churn_rate*100:.2f}%")

print("\n" + "="*80)
print("✓ Все данные загружены успешно")
print("="*80)

---
# 3. ВЫБОР ЛУЧШИХ МОДЕЛЕЙ ИЗ ЭКСПЕРИМЕНТОВ

In [None]:
# ====================================================================================
# ЗАГРУЗКА РЕЗУЛЬТАТОВ ЭКСПЕРИМЕНТОВ
# ====================================================================================

print("\n" + "="*80)
print("ВЫБОР ЛУЧШИХ МОДЕЛЕЙ")
print("="*80)

# Попытка загрузить результаты экспериментов
experiments_file = config.OUTPUT_DIR / 'experiments_all.csv'

if experiments_file.exists():
    experiments_df = pd.read_csv(experiments_file)
    print(f"\n✓ Загружены результаты: {experiments_file}")
    print(f"  Всего экспериментов: {len(experiments_df)}")
    
    # Выбираем лучшую модель для каждого сегмента
    best_models_config = {}
    
    for seg_id in config.SEGMENTS.keys():
        seg_experiments = experiments_df[experiments_df['segment_group'] == seg_id]
        best_exp = seg_experiments.sort_values('roc_auc', ascending=False).iloc[0]
        
        best_models_config[seg_id] = {
            'algorithm': best_exp['algorithm'],
            'balancing_method': best_exp['balancing_method'],
            'roc_auc': best_exp['roc_auc'],
            'threshold': best_exp['threshold']
        }
        
        print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
        print("-" * 80)
        print(f"  Лучшая модель: {best_exp['algorithm']}")
        print(f"  Балансировка: {best_exp['balancing_method']}")
        print(f"  ROC-AUC (test): {best_exp['roc_auc']:.4f}")
        print(f"  Gini: {best_exp['gini']:.4f}")
        print(f"  F1: {best_exp['f1']:.4f}")
        print(f"  Optimal Threshold: {best_exp['threshold']:.4f}")
else:
    print(f"\n⚠️  Результаты экспериментов не найдены: {experiments_file}")
    print("\nИспользуем параметры по умолчанию для каждого сегмента:")
    
    # Fallback: используем разумные defaults
    best_models_config = {
        'Segment 1': {
            'algorithm': 'LightGBM',
            'balancing_method': 'SMOTE',
            'threshold': 0.5
        },
        'Segment 2': {
            'algorithm': 'CatBoost',
            'balancing_method': 'Class weights',
            'threshold': 0.5
        }
    }

print("\n" + "="*80)

---
# 4. ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ

In [None]:
# ====================================================================================
# ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ
# ====================================================================================

def prepare_data(df, target_col):
    """Разделение на X и y"""
    X = df.drop(columns=[target_col])
    y = df[target_col]
    return X, y


def apply_balancing(X_train, y_train, method, random_seed=42):
    """
    Применить метод балансировки.
    """
    if method == 'No balancing':
        return X_train.copy(), y_train.copy(), None
    
    elif method == 'Class weights':
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(y_train),
            y=y_train
        )
        sample_weights = np.array([class_weights[int(y)] for y in y_train])
        return X_train.copy(), y_train.copy(), sample_weights
    
    elif method == 'SMOTE':
        smote = SMOTE(random_state=random_seed, k_neighbors=5)
        X_res, y_res = smote.fit_resample(X_train, y_train)
        return X_res, y_res, None
    
    elif method == 'Random Undersampling':
        rus = RandomUnderSampler(random_state=random_seed)
        X_res, y_res = rus.fit_resample(X_train, y_train)
        return X_res, y_res, None
    
    elif method == 'SMOTE + Undersampling':
        smote = SMOTE(random_state=random_seed, k_neighbors=5)
        X_smote, y_smote = smote.fit_resample(X_train, y_train)
        rus = RandomUnderSampler(random_state=random_seed)
        X_res, y_res = rus.fit_resample(X_smote, y_smote)
        return X_res, y_res, None
    
    else:
        raise ValueError(f"Unknown balancing method: {method}")


def train_model(algorithm, X_train, y_train, X_val, y_val, sample_weights=None, random_seed=42):
    """
    Обучить модель с оптимальными параметрами.
    """
    if algorithm == 'CatBoost':
        model = CatBoostClassifier(
            iterations=300,
            depth=6,
            learning_rate=0.05,
            loss_function='Logloss',
            eval_metric='AUC',
            early_stopping_rounds=50,
            use_best_model=True,
            random_seed=random_seed,
            task_type='CPU',
            verbose=False,
            allow_writing_files=False
        )
        from catboost import Pool
        train_pool = Pool(X_train, y_train, weight=sample_weights)
        val_pool = Pool(X_val, y_val)
        model.fit(train_pool, eval_set=val_pool)
        
    elif algorithm == 'LightGBM':
        model = LGBMClassifier(
            n_estimators=300,
            max_depth=6,
            learning_rate=0.05,
            objective='binary',
            metric='auc',
            random_state=random_seed,
            verbose=-1,
            n_jobs=-1
        )
        model.fit(
            X_train, y_train,
            sample_weight=sample_weights,
            eval_set=[(X_val, y_val)],
            callbacks=[]
        )
        
    elif algorithm == 'XGBoost':
        model = XGBClassifier(
            n_estimators=300,
            max_depth=6,
            learning_rate=0.05,
            objective='binary:logistic',
            eval_metric='auc',
            early_stopping_rounds=50,
            random_state=random_seed,
            n_jobs=-1,
            verbosity=0
        )
        model.fit(
            X_train, y_train,
            sample_weight=sample_weights,
            eval_set=[(X_val, y_val)],
            verbose=False
        )
        
    elif algorithm == 'RandomForest':
        model = RandomForestClassifier(
            n_estimators=200,
            max_depth=10,
            min_samples_split=100,
            random_state=random_seed,
            n_jobs=-1,
            verbose=0
        )
        model.fit(X_train, y_train, sample_weight=sample_weights)
    
    else:
        raise ValueError(f"Unknown algorithm: {algorithm}")
    
    return model


def calculate_metrics(y_true, y_pred_proba, threshold):
    """
    Рассчитать все метрики.
    """
    y_pred = (y_pred_proba >= threshold).astype(int)
    
    metrics = {
        'threshold': threshold,
        'roc_auc': roc_auc_score(y_true, y_pred_proba),
        'pr_auc': average_precision_score(y_true, y_pred_proba),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0),
    }
    
    metrics['gini'] = 2 * metrics['roc_auc'] - 1
    
    cm = confusion_matrix(y_true, y_pred)
    metrics['tn'] = cm[0, 0]
    metrics['fp'] = cm[0, 1]
    metrics['fn'] = cm[1, 0]
    metrics['tp'] = cm[1, 1]
    
    return metrics


print("✓ Вспомогательные функции определены")

---
# 5. ОБУЧЕНИЕ ЛУЧШИХ МОДЕЛЕЙ

In [None]:
# ====================================================================================
# ОБУЧЕНИЕ ЛУЧШИХ МОДЕЛЕЙ ДЛЯ КАЖДОГО СЕГМЕНТА
# ====================================================================================

print("\n" + "="*80)
print("ОБУЧЕНИЕ ЛУЧШИХ МОДЕЛЕЙ")
print("="*80)

final_models = {}
final_results = {}

for seg_id, model_config in best_models_config.items():
    print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
    print("-" * 80)
    print(f"  Алгоритм: {model_config['algorithm']}")
    print(f"  Балансировка: {model_config['balancing_method']}")
    
    # Подготовка данных
    X_train, y_train = prepare_data(data[seg_id]['train'], config.TARGET_COLUMN)
    X_val, y_val = prepare_data(data[seg_id]['val'], config.TARGET_COLUMN)
    X_test, y_test = prepare_data(data[seg_id]['test'], config.TARGET_COLUMN)
    
    # Применение балансировки
    X_train_balanced, y_train_balanced, sample_weights = apply_balancing(
        X_train, y_train, 
        model_config['balancing_method'],
        config.RANDOM_SEED
    )
    
    print(f"\n  Размер после балансировки: {X_train_balanced.shape}")
    print(f"  Churn rate: {y_train_balanced.mean()*100:.2f}%")
    
    # Обучение модели
    print(f"\n  Обучение модели...")
    start_time = time.time()
    
    model = train_model(
        model_config['algorithm'],
        X_train_balanced, y_train_balanced,
        X_val, y_val,
        sample_weights,
        config.RANDOM_SEED
    )
    
    train_time = time.time() - start_time
    print(f"  ✓ Обучение завершено за {train_time:.2f} сек")
    
    # Предсказания
    y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # Метрики
    threshold = model_config.get('threshold', 0.5)
    metrics = calculate_metrics(y_test, y_test_proba, threshold)
    
    print(f"\n  МЕТРИКИ НА TEST:")
    print(f"    ROC-AUC:   {metrics['roc_auc']:.4f}")
    print(f"    Gini:      {metrics['gini']:.4f}")
    print(f"    F1-Score:  {metrics['f1']:.4f}")
    print(f"    Precision: {metrics['precision']:.4f}")
    print(f"    Recall:    {metrics['recall']:.4f}")
    print(f"    Threshold: {threshold:.4f}")
    
    # Сохранение
    final_models[seg_id] = {
        'model': model,
        'X_train': X_train,  # Для SHAP
        'X_test': X_test,
        'y_test': y_test,
        'y_test_proba': y_test_proba,
        'feature_names': X_train.columns.tolist()
    }
    
    final_results[seg_id] = {
        'algorithm': model_config['algorithm'],
        'balancing_method': model_config['balancing_method'],
        'train_time': train_time,
        **metrics
    }

print("\n" + "="*80)
print("✓ Все модели обучены")
print("="*80)

---
# 6. SHAP ANALYSIS - EXPLAINABILITY

In [None]:
# ====================================================================================
# SHAP ANALYSIS ДЛЯ ЛУЧШИХ МОДЕЛЕЙ
# ====================================================================================

print("\n" + "="*80)
print("SHAP ANALYSIS - EXPLAINABILITY")
print("="*80)

shap_values_storage = {}

for seg_id, model_data in final_models.items():
    print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
    print("-" * 80)
    
    model = model_data['model']
    X_test = model_data['X_test']
    
    # Используем sample для SHAP если данных много
    if len(X_test) > config.SHAP_SAMPLE_SIZE:
        print(f"  Используем sample {config.SHAP_SAMPLE_SIZE} из {len(X_test)} для SHAP")
        X_shap = X_test.sample(n=config.SHAP_SAMPLE_SIZE, random_state=config.RANDOM_SEED)
    else:
        X_shap = X_test
    
    print(f"  Расчет SHAP values...")
    start_time = time.time()
    
    try:
        # Tree SHAP для tree-based моделей
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X_shap)
        
        # Для бинарной классификации берем SHAP values для класса 1
        if isinstance(shap_values, list):
            shap_values = shap_values[1]
        
        elapsed = time.time() - start_time
        print(f"  ✓ SHAP values рассчитаны за {elapsed:.2f} сек")
        
        shap_values_storage[seg_id] = {
            'explainer': explainer,
            'shap_values': shap_values,
            'X_shap': X_shap
        }
        
    except Exception as e:
        print(f"  ❌ Ошибка при расчете SHAP: {str(e)}")
        shap_values_storage[seg_id] = None

print("\n" + "="*80)
print("✓ SHAP analysis завершен")
print("="*80)

In [None]:
# ====================================================================================
# SHAP SUMMARY PLOTS
# ====================================================================================

print("\n" + "="*80)
print("SHAP SUMMARY PLOTS")
print("="*80)

for seg_id, shap_data in shap_values_storage.items():
    if shap_data is None:
        continue
    
    print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
    print("-" * 80)
    
    shap_values = shap_data['shap_values']
    X_shap = shap_data['X_shap']
    
    # 1. Summary Plot (Bar) - Feature Importance
    fig, ax = plt.subplots(figsize=(12, 8))
    shap.summary_plot(
        shap_values, 
        X_shap,
        plot_type="bar",
        max_display=config.SHAP_TOP_FEATURES,
        show=False
    )
    plt.title(f'SHAP Feature Importance: {seg_id}', fontsize=14, fontweight='bold', pad=20)
    plt.tight_layout()
    
    seg_num = seg_id.split()[1]
    bar_path = config.FIGURES_DIR / f'shap_importance_seg{seg_num}.png'
    plt.savefig(bar_path, dpi=config.FIGURE_DPI, bbox_inches='tight')
    plt.show()
    print(f"  ✓ Сохранено: {bar_path}")
    
    # 2. Summary Plot (Beeswarm) - Feature Impact
    fig, ax = plt.subplots(figsize=(12, 10))
    shap.summary_plot(
        shap_values,
        X_shap,
        max_display=config.SHAP_TOP_FEATURES,
        show=False
    )
    plt.title(f'SHAP Feature Impact: {seg_id}', fontsize=14, fontweight='bold', pad=20)
    plt.tight_layout()
    
    beeswarm_path = config.FIGURES_DIR / f'shap_beeswarm_seg{seg_num}.png'
    plt.savefig(beeswarm_path, dpi=config.FIGURE_DPI, bbox_inches='tight')
    plt.show()
    print(f"  ✓ Сохранено: {beeswarm_path}")

print("\n" + "="*80)

In [None]:
# ====================================================================================
# SHAP FEATURE IMPORTANCE TABLE
# ====================================================================================

print("\n" + "="*80)
print("SHAP FEATURE IMPORTANCE (ТОП-20)")
print("="*80)

for seg_id, shap_data in shap_values_storage.items():
    if shap_data is None:
        continue
    
    print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
    print("-" * 80)
    
    shap_values = shap_data['shap_values']
    X_shap = shap_data['X_shap']
    
    # Рассчитываем mean absolute SHAP values
    mean_abs_shap = np.abs(shap_values).mean(axis=0)
    
    # Создаем DataFrame
    importance_df = pd.DataFrame({
        'Feature': X_shap.columns,
        'SHAP_Importance': mean_abs_shap
    }).sort_values('SHAP_Importance', ascending=False).reset_index(drop=True)
    
    # Топ-20
    top20 = importance_df.head(20)
    print("\nТОП-20 признаков по SHAP importance:")
    print(top20.to_string(index=False))
    
    # Сохраняем
    seg_num = seg_id.split()[1]
    importance_file = config.OUTPUT_DIR / f'shap_importance_seg{seg_num}.csv'
    importance_df.to_csv(importance_file, index=False)
    print(f"\n✓ Сохранено: {importance_file}")

print("\n" + "="*80)

---
# 7. ВИЗУАЛИЗАЦИЯ РЕЗУЛЬТАТОВ

In [None]:
# ====================================================================================
# ROC CURVES
# ====================================================================================

print("\n" + "="*80)
print("ROC CURVES")
print("="*80)

fig, ax = plt.subplots(figsize=(10, 8))

colors = ['#2E86AB', '#A23B72']

for idx, (seg_id, model_data) in enumerate(final_models.items()):
    y_test = model_data['y_test']
    y_test_proba = model_data['y_test_proba']
    
    fpr, tpr, _ = roc_curve(y_test, y_test_proba)
    roc_auc = final_results[seg_id]['roc_auc']
    algorithm = final_results[seg_id]['algorithm']
    
    label = f"{seg_id} | {algorithm} (AUC = {roc_auc:.4f})"
    ax.plot(fpr, tpr, color=colors[idx], lw=2, label=label)

# Diagonal
ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--', label='Random (AUC = 0.5000)')

ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='bold')
ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='bold')
ax.set_title('ROC CURVES - ФИНАЛЬНЫЕ МОДЕЛИ', fontsize=14, fontweight='bold', pad=20)
ax.legend(loc='lower right', fontsize=10)
ax.grid(alpha=0.3)

plt.tight_layout()
roc_path = config.FIGURES_DIR / 'final_roc_curves.png'
plt.savefig(roc_path, dpi=config.FIGURE_DPI, bbox_inches='tight')
plt.show()

print(f"\n✓ Сохранено: {roc_path}")
print("="*80)

In [None]:
# ====================================================================================
# CONFUSION MATRICES
# ====================================================================================

print("\n" + "="*80)
print("CONFUSION MATRICES")
print("="*80)

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

for idx, (seg_id, result) in enumerate(final_results.items()):
    cm = np.array([[result['tn'], result['fp']], 
                   [result['fn'], result['tp']]])
    
    sns.heatmap(
        cm, 
        annot=True, 
        fmt='d', 
        cmap='Blues',
        ax=axes[idx],
        cbar=True,
        square=True
    )
    
    axes[idx].set_xlabel('Predicted', fontsize=12, fontweight='bold')
    axes[idx].set_ylabel('Actual', fontsize=12, fontweight='bold')
    axes[idx].set_title(
        f'{seg_id}\n{result["algorithm"]}',
        fontsize=12, fontweight='bold'
    )
    axes[idx].set_xticklabels(['No Churn', 'Churn'])
    axes[idx].set_yticklabels(['No Churn', 'Churn'])

plt.tight_layout()
cm_path = config.FIGURES_DIR / 'final_confusion_matrices.png'
plt.savefig(cm_path, dpi=config.FIGURE_DPI, bbox_inches='tight')
plt.show()

print(f"\n✓ Сохранено: {cm_path}")
print("="*80)

---
# 8. СОХРАНЕНИЕ ФИНАЛЬНЫХ МОДЕЛЕЙ

In [None]:
# ====================================================================================
# СОХРАНЕНИЕ ФИНАЛЬНЫХ МОДЕЛЕЙ
# ====================================================================================

print("\n" + "="*80)
print("СОХРАНЕНИЕ ФИНАЛЬНЫХ МОДЕЛЕЙ")
print("="*80)

for seg_id, model_data in final_models.items():
    seg_num = seg_id.split()[1]
    algorithm = final_results[seg_id]['algorithm']
    
    # Имя файла
    algo_name = algorithm.lower().replace(' ', '_')
    model_filename = f"final_model_seg{seg_num}_{algo_name}.pkl"
    model_path = config.MODELS_DIR / model_filename
    
    # Сохраняем модель
    with open(model_path, 'wb') as f:
        pickle.dump(model_data['model'], f)
    
    file_size = model_path.stat().st_size / 1024
    
    print(f"\n✓ {model_filename}")
    print(f"  Сегмент: {seg_id}")
    print(f"  Алгоритм: {algorithm}")
    print(f"  ROC-AUC: {final_results[seg_id]['roc_auc']:.4f}")
    print(f"  Размер: {file_size:.2f} KB")

print("\n" + "="*80)
print("✓ Все модели сохранены")
print("="*80)

In [None]:
# ====================================================================================
# СОХРАНЕНИЕ РЕЗУЛЬТАТОВ
# ====================================================================================

print("\n" + "="*80)
print("СОХРАНЕНИЕ РЕЗУЛЬТАТОВ")
print("="*80)

# Создаем DataFrame с результатами
results_list = []
for seg_id, result in final_results.items():
    results_list.append({
        'segment': seg_id,
        'segment_name': config.SEGMENTS[seg_id]['name'],
        **result
    })

results_df = pd.DataFrame(results_list)

# Сохраняем
results_file = config.OUTPUT_DIR / 'final_production_models_results.csv'
results_df.to_csv(results_file, index=False)

print(f"\n✓ Результаты сохранены: {results_file}")
print(f"\nИтоговая таблица:")
print(results_df.to_string(index=False))

print("\n" + "="*80)

---
# 9. ФИНАЛЬНАЯ СВОДКА

In [None]:
# ====================================================================================
# ФИНАЛЬНАЯ СВОДКА
# ====================================================================================

print("\n\n" + "="*80)
print("✓✓✓ ФИНАЛЬНЫЕ PRODUCTION МОДЕЛИ ГОТОВЫ ✓✓✓")
print("="*80)

print(f"\nДата: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Random seed: {config.RANDOM_SEED}")

print(f"\n{'='*80}")
print("ФИНАЛЬНЫЕ МОДЕЛИ")
print(f"{'='*80}")

for seg_id, result in final_results.items():
    print(f"\n{seg_id}: {config.SEGMENTS[seg_id]['name']}")
    print("-" * 80)
    print(f"  Алгоритм: {result['algorithm']}")
    print(f"  Балансировка: {result['balancing_method']}")
    print(f"  ROC-AUC: {result['roc_auc']:.4f}")
    print(f"  Gini: {result['gini']:.4f}")
    print(f"  F1-Score: {result['f1']:.4f}")
    print(f"  Precision: {result['precision']:.4f}")
    print(f"  Recall: {result['recall']:.4f}")
    print(f"  Threshold: {result['threshold']:.4f}")
    print(f"  Время обучения: {result['train_time']:.2f} сек")

print(f"\n{'='*80}")
print("СОХРАНЕННЫЕ ФАЙЛЫ")
print(f"{'='*80}")

print(f"\nМОДЕЛИ (models/):")
for seg_id, result in final_results.items():
    seg_num = seg_id.split()[1]
    algo_name = result['algorithm'].lower().replace(' ', '_')
    print(f"  • final_model_seg{seg_num}_{algo_name}.pkl")

print(f"\nРЕЗУЛЬТАТЫ (output/):")
print(f"  • final_production_models_results.csv")
for seg_id in final_results.keys():
    seg_num = seg_id.split()[1]
    print(f"  • shap_importance_seg{seg_num}.csv")

print(f"\nВИЗУАЛИЗАЦИИ (figures/):")
print(f"  • final_roc_curves.png")
print(f"  • final_confusion_matrices.png")
for seg_id in final_results.keys():
    seg_num = seg_id.split()[1]
    print(f"  • shap_importance_seg{seg_num}.png")
    print(f"  • shap_beeswarm_seg{seg_num}.png")

print(f"\n{'='*80}")
print("REPRODUCIBILITY")
print(f"{'='*80}")
print(f"\n✓ Random seed зафиксирован: {config.RANDOM_SEED}")
print(f"✓ Все параметры моделей зафиксированы")
print(f"✓ Балансировка использует фиксированный seed")
print(f"✓ Run All должен давать идентичные результаты")

print(f"\n{'='*80}")
print("✓ ГОТОВО К ВАЛИДАЦИИ")
print(f"{'='*80}")