In [1]:
%pip install pandas numpy scikit-learn gensim lazypredict lightautoml "flaml[blendsearch]" matplotlib seaborn -q

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import warnings
import re
import matplotlib.pyplot as plt
import seaborn as sns

from lazypredict.Supervised import LazyClassifier, LazyRegressor
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
from flaml import AutoML

warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
print("Библиотеки импортированы.")

'nlp' extra dependency package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.
'nlp' extra dependency package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.
Библиотеки импортированы.


In [3]:
def preprocess_churn(train_path, test_path, target_column='Churn'):
    """Предобработка данных для задачи Churn."""
    print(f"Preprocessing Churn data: {train_path}, {test_path}...")
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)

    df_train.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in df_train.columns]
    df_test.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in df_test.columns]
    target_column = "".join (c if c.isalnum() else "_" for c in str(target_column))

    df_train['TotalSpent'] = pd.to_numeric(df_train['TotalSpent'], errors='coerce')
    df_train['TotalSpent'] = df_train['TotalSpent'].fillna(0)

    df_test['TotalSpent'] = pd.to_numeric(df_test['TotalSpent'], errors='coerce')
    df_test['TotalSpent'] = df_test['TotalSpent'].fillna(0)

    X_train = df_train.drop(target_column, axis=1)
    y_train = df_train[target_column]
    X_test = df_test.drop(target_column, axis=1)
    y_test = df_test[target_column]

    categorical_cols_train = X_train.select_dtypes(include=['object', 'category']).columns
    categorical_cols_test = X_test.select_dtypes(include=['object', 'category']).columns
    common_categorical_cols = list(set(categorical_cols_train) & set(categorical_cols_test))

    for col in common_categorical_cols:
        X_train[col] = X_train[col].astype(str)
        X_test[col] = X_test[col].astype(str)

        unique_vals_train = set(X_train[col].unique())
        map_dict = {'Yes': 1, 'No': 0, 'No phone service': 0, 'No internet service': 0}
        if unique_vals_train.issubset(set(map_dict.keys()) | {np.nan, 'nan', 'None'}):
            X_train[col] = X_train[col].map(map_dict).fillna(0).astype(int)
            X_test[col] = X_test[col].map(map_dict).fillna(0).astype(int)
            common_categorical_cols.remove(col)

    if common_categorical_cols:
        X_train = pd.get_dummies(X_train, columns=common_categorical_cols, dummy_na=False, drop_first=False)
        X_test = pd.get_dummies(X_test, columns=common_categorical_cols, dummy_na=False, drop_first=False)

        train_cols = set(X_train.columns)
        test_cols = set(X_test.columns)

        missing_in_test = list(train_cols - test_cols)
        for c in missing_in_test:
            if c != target_column:
                X_test[c] = 0

        missing_in_train = list(test_cols - train_cols)
        for c in missing_in_train:
             if c != target_column:
                X_train[c] = 0

        final_train_cols = [col for col in X_train.columns if col != target_column]
        X_test = X_test[final_train_cols]
    else:
         print("No remaining categorical columns for One-Hot Encoding.")


    print("Churn preprocessing finished.")
    return X_train, y_train, X_test, y_test


def preprocess_price(train_path, test_path, target_column='price'):
    """Предобработка данных для задачи Price."""
    print(f"Preprocessing Price data: {train_path}, {test_path}...")
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)

    df_train.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in df_train.columns]
    df_test.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in df_test.columns]
    target_column = "".join (c if c.isalnum() else "_" for c in str(target_column))

    X_train = df_train.drop(target_column, axis=1)
    y_train = df_train[target_column]
    X_test = df_test.drop(target_column, axis=1)
    y_test = df_test[target_column]

    numeric_cols_train = X_train.select_dtypes(include=np.number).columns
    numeric_cols_test = X_test.select_dtypes(include=np.number).columns
    common_numeric_cols = list(set(numeric_cols_train) & set(numeric_cols_test))

    for col in common_numeric_cols:
        median_val = X_train[col].median()
        X_train[col].fillna(median_val, inplace=True)
        X_test[col].fillna(median_val, inplace=True)

    categorical_cols_train = X_train.select_dtypes(include=['object', 'category']).columns
    categorical_cols_test = X_test.select_dtypes(include=['object', 'category']).columns
    common_categorical_cols = list(set(categorical_cols_train) & set(categorical_cols_test))

    for col in common_categorical_cols:
        X_train[col] = X_train[col].astype(str)
        X_test[col] = X_test[col].astype(str)

        mode_val = X_train[col].mode()[0] if not X_train[col].mode().empty else "Missing"
        X_train[col].fillna(mode_val, inplace=True)
        X_test[col].fillna(mode_val, inplace=True)

        train_categories = set(X_train[col].unique())
        test_categories = set(X_test[col].unique())
        new_categories_in_test = list(test_categories - train_categories)
        if new_categories_in_test:
             print(f"Warning: New categories found in test column '{col}': {new_categories_in_test}. Replacing with mode '{mode_val}'.")
             X_test[col] = X_test[col].apply(lambda x: mode_val if x in new_categories_in_test else x)

    if common_categorical_cols:
        X_train = pd.get_dummies(X_train, columns=common_categorical_cols, dummy_na=False, drop_first=False) # Используем общее подмножество колонок
        X_test = pd.get_dummies(X_test, columns=common_categorical_cols, dummy_na=False, drop_first=False)

        train_cols = set(X_train.columns)
        test_cols = set(X_test.columns)

        missing_in_test = list(train_cols - test_cols)
        for c in missing_in_test:
             if c != target_column:
                X_test[c] = 0

        missing_in_train = list(test_cols - train_cols)
        for c in missing_in_train:
            if c != target_column:
                X_train[c] = 0

        final_train_cols = [col for col in X_train.columns if col != target_column]
        X_test = X_test[final_train_cols]
    else:
        print("No categorical columns found for One-Hot Encoding.")

    non_numeric_train_cols = X_train.select_dtypes(exclude=np.number).columns
    if len(non_numeric_train_cols) > 0:
        print(f"Warning: Dropping remaining non-numeric columns from train/test: {list(non_numeric_train_cols)}")
        X_train = X_train.drop(columns=non_numeric_train_cols)
        cols_to_drop_in_test = [col for col in non_numeric_train_cols if col in X_test.columns]
        X_test = X_test.drop(columns=cols_to_drop_in_test)

    print("Price preprocessing finished.")
    return X_train, y_train, X_test, y_test


print("Preprocessing functions defined.")

Preprocessing functions defined.


In [4]:
def run_lazypredict(X_train, y_train, X_test, y_test, task_type):
    """Запускает LazyPredict и возвращает метрику и время."""
    print(f"\n--- Running LazyPredict for {task_type} ---")
    start_time = time.time()
    best_model_metric = np.nan
    models = None

    try:
        if task_type == 'classification':
            le = LabelEncoder()
            y_train_enc = le.fit_transform(y_train)
            y_test_enc = le.transform(y_test)

            clf = LazyClassifier(verbose=1, ignore_warnings=True, custom_metric=roc_auc_score, predictions=False)
            models, _ = clf.fit(X_train, X_test, y_train_enc, y_test_enc)
            metric_name = 'ROC AUC'
            if models is not None and not models.empty and metric_name in models.columns:
                best_model_metric = models[metric_name].max()
            else:
                print("LazyClassifier did not return models or the metric column.")
                best_model_metric = 'N/A'

        elif task_type == 'regression':
            reg = LazyRegressor(verbose=1, ignore_warnings=True, custom_metric=lambda y_true, y_pred: mean_squared_error(y_true, y_pred, squared=False), predictions=False)
            y_train_clean = y_train.fillna(y_train.median())
            y_test_clean = y_test.fillna(y_train.median())
            models, _ = reg.fit(X_train, X_test, y_train_clean, y_test_clean)
            metric_name = 'RMSE'
            if models is not None and not models.empty and metric_name in models.columns:
                best_model_metric = models[metric_name].min()
            else:
                print("LazyRegressor did not return models or the metric column.")
                best_model_metric = 'N/A'

        else:
            raise ValueError("task_type must be 'classification' or 'regression'")

    except Exception as e:
        print(f"LazyPredict failed: {e}")
        best_model_metric = 'Error'

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"LazyPredict finished in {elapsed_time:.2f} seconds. Best internal {metric_name if models is not None else 'Metric'}: {best_model_metric if isinstance(best_model_metric, (int, float)) else best_model_metric}")
    return best_model_metric, elapsed_time


def run_lightautoml(train_df_orig, test_df_orig, target_col, roles, task: Task):
    """Запускает LightAutoML и возвращает метрику на тесте и время."""
    print(f"\n--- Running LightAutoML for task: {task.name} ---")
    automl = TabularAutoML(task=task, timeout=300, cpu_limit=4, memory_limit=16)
    score = np.nan
    elapsed_time = np.nan
    metric_name = 'Unknown'

    try:
        start_time = time.time()
        _ = automl.fit_predict(train_df_orig, roles=roles, verbose=1)
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"LightAutoML fit_predict completed in {elapsed_time:.2f} seconds.")

        test_preds = automl.predict(test_df_orig).data
        print(f"Prediction on test data completed. Shape: {test_preds.shape}")

        y_test_true = test_df_orig[target_col]

        print("Calculating metric on test set...")
        if task.name == 'binary':
            metric_name = 'ROC AUC'

            if test_preds.ndim == 1 or test_preds.shape[1] == 1:
                test_preds_proba = test_preds.flatten()
            elif test_preds.shape[1] == 2:
                le_test_internal = LabelEncoder().fit(y_test_true)
                if 1 in le_test_internal.classes_:
                    pos_idx = list(le_test_internal.classes_).index(1)
                    test_preds_proba = test_preds[:, pos_idx]
                    print(f"Using column {pos_idx} for positive class probabilities.")
                else:
                    print("Warning: Class '1' not found. Using column 1 for probabilities.")
                    test_preds_proba = test_preds[:, 1]
            else:
                raise ValueError(f"Unexpected prediction array shape {test_preds.shape} for binary task.")

            if not pd.api.types.is_numeric_dtype(y_test_true):
                le = LabelEncoder()
                y_test_true_enc = le.fit_transform(y_test_true)
                print(f"Encoded target classes: {le.classes_}")
            else:
                y_test_true_enc = y_test_true

            score = roc_auc_score(y_test_true_enc, test_preds_proba)

        elif task.name == 'reg':
            metric_name = 'RMSE' # Имя метрики для вывода
            y_test_true_clean = y_test_true.fillna(y_test_true.median())
            score = mean_squared_error(y_test_true_clean, test_preds[:, 0], squared=False)
        else:
            raise ValueError("Unsupported task type for metric calculation")

        print("Metric calculation successful.")
        print(f"LightAutoML finished. Test {metric_name}: {score:.4f}. Time: {elapsed_time:.2f} seconds.")

    except Exception as e:
        print(f"LightAutoML failed during execution: {e}")
        import traceback
        traceback.print_exc()
        score = 'Error'
        if 'start_time' in locals() and np.isnan(elapsed_time):
            elapsed_time = time.time() - start_time
        elif np.isnan(elapsed_time):
            elapsed_time = 0
        print(f"LightAutoML failed after {elapsed_time:.2f} seconds.")

    return score, elapsed_time

def run_flaml(X_train, y_train, X_test, y_test, task_type, metric_name, time_budget=120):
    """Запускает FLAML и возвращает метрику на тесте и время."""
    print(f"\n--- Running FLAML for {task_type} ---")
    automl = AutoML()
    settings = {
        "time_budget": time_budget,
        "metric": metric_name,
        "task": task_type,
        "log_file_name": f"flaml_{task_type}.log",
        "n_jobs": -1,
        "seed": 42,
        "eval_method": "auto",
        "early_stop": True
    }
    score = np.nan
    elapsed_time = np.nan

    try:
        start_time = time.time()
        
        if task_type == 'classification' and not pd.api.types.is_numeric_dtype(y_train):
            le_flaml = LabelEncoder()
            y_train_enc = le_flaml.fit_transform(y_train)
            y_test_enc = le_flaml.transform(y_test)
        else:
            y_train_enc = y_train
            y_test_enc = y_test

        if not isinstance(X_train, pd.DataFrame):
             X_train_df = pd.DataFrame(X_train)
             X_test_df = pd.DataFrame(X_test)
        else:
             X_train_df = X_train
             X_test_df = X_test
            
        X_train_df.columns = X_train_df.columns.astype(str)
        X_test_df.columns = X_test_df.columns.astype(str)

        automl.fit(X_train_df, y_train_enc, **settings)
        end_time = time.time()
        elapsed_time = end_time - start_time

        if task_type == 'classification':
            test_preds_proba = automl.predict_proba(X_test_df)[:, 1]
            score = roc_auc_score(y_test_enc, test_preds_proba)
            metric_display_name = 'ROC AUC'
        elif task_type == 'regression':
            y_test_clean = pd.Series(y_test_enc).fillna(pd.Series(y_train_enc).median()).values
            test_preds = automl.predict(X_test_df)
            score = mean_squared_error(y_test_clean, test_preds, squared=False)
            metric_display_name = 'RMSE'
        else:
            raise ValueError("task_type must be 'classification' or 'regression'")

        print(f"FLAML finished in {elapsed_time:.2f} seconds. Best model: {automl.best_estimator}. Test {metric_display_name}: {score:.4f}")

    except Exception as e:
        print(f"FLAML failed: {e}")
        score = 'Error'
        elapsed_time = time.time() - start_time
        print(f"FLAML failed after {elapsed_time:.2f} seconds.")


    return score, elapsed_time

print("AutoML runner functions defined.")

AutoML runner functions defined.


In [5]:
results = {'flat_price': {}, 'user_churn': {}}

print("\n=== Starting User Churn Task (Classification) ===")
churn_train_path = 'train_churn.csv' 
churn_test_path = 'test_churn.csv'
churn_target = 'Churn'

X_train_c, y_train_c, X_test_c, y_test_c = preprocess_churn(churn_train_path, churn_test_path, churn_target)

train_df_c_orig = pd.read_csv(churn_train_path)
test_df_c_orig = pd.read_csv(churn_test_path)
train_df_c_orig.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in train_df_c_orig.columns]
test_df_c_orig.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in test_df_c_orig.columns]
churn_target_clean = "".join (c if c.isalnum() else "_" for c in str(churn_target))

score_lp_c, time_lp_c = run_lazypredict(X_train_c, y_train_c, X_test_c, y_test_c, 'classification')
results['user_churn']['lazypredict'] = {'Metric': score_lp_c, 'Time': time_lp_c}

churn_roles = {'target': churn_target_clean}
churn_task = Task('binary')
score_lama_c, time_lama_c = run_lightautoml(train_df_c_orig, test_df_c_orig, churn_target_clean, churn_roles, churn_task)
results['user_churn']['LightAutoML'] = {'Metric': score_lama_c, 'Time': time_lama_c}

score_flaml_c, time_flaml_c = run_flaml(X_train_c, y_train_c, X_test_c, y_test_c, 'classification', 'roc_auc', time_budget=120) # Увеличим бюджет
results['user_churn']['FLAML'] = {'Metric': score_flaml_c, 'Time': time_flaml_c}

print("\n=== User Churn Task Finished ===")


=== Starting User Churn Task (Classification) ===
Preprocessing Churn data: train_churn.csv, test_churn.csv...
Churn preprocessing finished.

--- Running LazyPredict for classification ---


  7%|███                                         | 2/29 [00:00<00:05,  5.39it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.7730138713745272, 'Balanced Accuracy': 0.6610627177700349, 'ROC AUC': 0.6610627177700349, 'F1 Score': 0.7607812536178268, 'roc_auc_score': 0.6610627177700349, 'Time taken': 0.23195409774780273}
{'Model': 'BaggingClassifier', 'Accuracy': 0.7843631778058008, 'Balanced Accuracy': 0.6734818317570931, 'ROC AUC': 0.6734818317570932, 'F1 Score': 0.771937712151358, 'roc_auc_score': 0.6734818317570932, 'Time taken': 0.15196657180786133}
{'Model': 'BernoulliNB', 'Accuracy': 0.755359394703657, 'Balanced Accuracy': 0.645980587356894, 'ROC AUC': 0.645980587356894, 'F1 Score': 0.7450657736064885, 'roc_auc_score': 0.645980587356894, 'Time taken': 0.023845672607421875}


 17%|███████▌                                    | 5/29 [00:00<00:01, 12.91it/s]

{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.7755359394703657, 'Balanced Accuracy': 0.6659407665505227, 'ROC AUC': 0.6659407665505226, 'F1 Score': 0.763986576721334, 'roc_auc_score': 0.6659407665505226, 'Time taken': 0.07070541381835938}
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.7288776796973518, 'Balanced Accuracy': 0.6583084453293513, 'ROC AUC': 0.6583084453293513, 'F1 Score': 0.7319107550544387, 'roc_auc_score': 0.6583084453293513, 'Time taken': 0.038869619369506836}
{'Model': 'DummyClassifier', 'Accuracy': 0.7414880201765448, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.6314191974856022, 'roc_auc_score': 0.5, 'Time taken': 0.016569137573242188}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.7187894073139974, 'Balanced Accuracy': 0.6229094076655052, 'ROC AUC': 0.6229094076655052, 'F1 Score': 0.7157225197634501, 'roc_auc_score': 0.6229094076655052, 'Time taken': 0.01956462860107422}


 31%|█████████████▋                              | 9/29 [00:01<00:02,  9.39it/s]

{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.7654476670870114, 'Balanced Accuracy': 0.6623154139704662, 'ROC AUC': 0.6623154139704662, 'F1 Score': 0.7566269312386655, 'roc_auc_score': 0.6623154139704662, 'Time taken': 0.43546009063720703}
{'Model': 'GaussianNB', 'Accuracy': 0.7389659520807061, 'Balanced Accuracy': 0.6889414302306288, 'ROC AUC': 0.6889414302306289, 'F1 Score': 0.7455295766767602, 'roc_auc_score': 0.6889414302306289, 'Time taken': 0.01595759391784668}
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.7490542244640606, 'Balanced Accuracy': 0.6433175709308113, 'ROC AUC': 0.6433175709308113, 'F1 Score': 0.7404351373363875, 'roc_auc_score': 0.6433175709308113, 'Time taken': 0.0767824649810791}


 41%|█████████████████▊                         | 12/29 [00:02<00:03,  4.88it/s]

{'Model': 'LabelPropagation', 'Accuracy': 0.7402269861286255, 'Balanced Accuracy': 0.6500746640119462, 'ROC AUC': 0.6500746640119462, 'F1 Score': 0.737163115465912, 'roc_auc_score': 0.6500746640119462, 'Time taken': 0.9977490901947021}


 45%|███████████████████▎                       | 13/29 [00:03<00:07,  2.18it/s]

{'Model': 'LabelSpreading', 'Accuracy': 0.7440100882723834, 'Balanced Accuracy': 0.654214368674299, 'ROC AUC': 0.6542143686742989, 'F1 Score': 0.7407615474801252, 'roc_auc_score': 0.6542143686742989, 'Time taken': 1.8592805862426758}
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.7679697351828499, 'Balanced Accuracy': 0.6656047784967646, 'ROC AUC': 0.6656047784967645, 'F1 Score': 0.7592438459565293, 'roc_auc_score': 0.6656047784967645, 'Time taken': 0.049002647399902344}


 52%|██████████████████████▏                    | 15/29 [00:04<00:04,  2.81it/s]

{'Model': 'LinearSVC', 'Accuracy': 0.776796973518285, 'Balanced Accuracy': 0.6652024224323876, 'ROC AUC': 0.6652024224323876, 'F1 Score': 0.76449285592708, 'roc_auc_score': 0.6652024224323876, 'Time taken': 0.1863563060760498}
{'Model': 'LogisticRegression', 'Accuracy': 0.7742749054224464, 'Balanced Accuracy': 0.6619130579060892, 'ROC AUC': 0.6619130579060892, 'F1 Score': 0.7618317582539398, 'roc_auc_score': 0.6619130579060892, 'Time taken': 0.02362680435180664}
{'Model': 'NearestCentroid', 'Accuracy': 0.639344262295082, 'Balanced Accuracy': 0.6678364028538244, 'ROC AUC': 0.6678364028538244, 'F1 Score': 0.6617576658943509, 'roc_auc_score': 0.6678364028538244, 'Time taken': 0.019501924514770508}


 62%|██████████████████████████▋                | 18/29 [00:04<00:03,  3.35it/s]

{'Model': 'NuSVC', 'Accuracy': 0.7730138713745272, 'Balanced Accuracy': 0.6261116641778662, 'ROC AUC': 0.6261116641778663, 'F1 Score': 0.7460956445824037, 'roc_auc_score': 0.6261116641778663, 'Time taken': 0.5914409160614014}
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.7074401008827238, 'Balanced Accuracy': 0.534233449477352, 'ROC AUC': 0.5342334494773519, 'F1 Score': 0.6685295021809501, 'roc_auc_score': 0.5342334494773519, 'Time taken': 0.02708721160888672}
{'Model': 'Perceptron', 'Accuracy': 0.7087011349306431, 'Balanced Accuracy': 0.6097519495603119, 'ROC AUC': 0.6097519495603119, 'F1 Score': 0.7055242245083272, 'roc_auc_score': 0.6097519495603119, 'Time taken': 0.01757073402404785}
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.7326607818411097, 'Balanced Accuracy': 0.6942218350754936, 'ROC AUC': 0.6942218350754936, 'F1 Score': 0.7417838620661751, 'roc_auc_score': 0.6942218350754936, 'Time taken': 0.02181839942932129}


 86%|█████████████████████████████████████      | 25/29 [00:05<00:00,  5.72it/s]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.7805800756620429, 'Balanced Accuracy': 0.6661647585863614, 'ROC AUC': 0.6661647585863614, 'F1 Score': 0.7670983798948147, 'roc_auc_score': 0.6661647585863614, 'Time taken': 0.5813310146331787}
{'Model': 'RidgeClassifier', 'Accuracy': 0.7793190416141236, 'Balanced Accuracy': 0.6669031027044965, 'ROC AUC': 0.6669031027044964, 'F1 Score': 0.7666029217923256, 'roc_auc_score': 0.6669031027044964, 'Time taken': 0.024072885513305664}
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.7793190416141236, 'Balanced Accuracy': 0.6653144184503069, 'ROC AUC': 0.6653144184503069, 'F1 Score': 0.766043108115186, 'roc_auc_score': 0.6653144184503069, 'Time taken': 0.037425994873046875}
{'Model': 'SGDClassifier', 'Accuracy': 0.7641866330390921, 'Balanced Accuracy': 0.6598763895802223, 'ROC AUC': 0.6598763895802223, 'F1 Score': 0.7550580958719757, 'roc_auc_score': 0.6598763895802223, 'Time taken': 0.03771519660949707}


 93%|████████████████████████████████████████   | 27/29 [00:06<00:00,  5.21it/s]

{'Model': 'SVC', 'Accuracy': 0.7654476670870114, 'Balanced Accuracy': 0.6241869918699187, 'ROC AUC': 0.6241869918699187, 'F1 Score': 0.7415618341437337, 'roc_auc_score': 0.6241869918699187, 'Time taken': 0.4957454204559326}
{'Model': 'XGBClassifier', 'Accuracy': 0.78562421185372, 'Balanced Accuracy': 0.6949850671976108, 'ROC AUC': 0.6949850671976109, 'F1 Score': 0.7793895188314903, 'roc_auc_score': 0.6949850671976109, 'Time taken': 0.16057848930358887}


100%|███████████████████████████████████████████| 29/29 [00:06<00:00,  4.57it/s]

[LightGBM] [Info] Number of positive: 1179, number of negative: 3310
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000719 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 608
[LightGBM] [Info] Number of data points in the train set: 4489, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.262642 -> initscore=-1.032282
[LightGBM] [Info] Start training from score -1.032282
{'Model': 'LGBMClassifier', 'Accuracy': 0.7818411097099621, 'Balanced Accuracy': 0.6781358885017422, 'ROC AUC': 0.6781358885017422, 'F1 Score': 0.7719098086533955, 'roc_auc_score': 0.6781358885017422, 'Time taken': 0.11159753799438477}
LazyPredict finished in 6.35 seconds. Best internal ROC AUC: 0.6949850671976109

--- Running LightAutoML for task: binary ---
[20:17:00] Stdout logging level is INFO.
[20:17:00] Copying TaskTimer may affect the




[20:17:06] Layer [1m1[0m train process start. Time left 294.36 secs
[20:17:06] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[20:17:08] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.8500000000000001[0m
[20:17:08] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[20:17:08] Time left 292.38 secs

[20:17:08] [1mSelector_LightGBM[0m fitting and predicting completed
[20:17:08] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
[20:17:10] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m0.8062412715983898[0m
[20:17:10] [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed
[20:17:10] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 57.87 secs


Optimization Progress: 100%|█| 101/101 [00:24<00:00,  4.09it/s, best_trial=81, b

[20:17:35] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed
[20:17:35] Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...





[20:17:35] Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m0.816558786825847[0m
[20:17:35] [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed
[20:17:35] Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...
[20:17:36] Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m0.8246747589359614[0m
[20:17:36] [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed
[20:17:36] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 196.57 secs


Optimization Progress: 100%|█| 101/101 [00:17<00:00,  5.62it/s, best_trial=94, b

[20:17:54] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed
[20:17:54] Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...





[20:17:55] Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m0.8246842400621142[0m
[20:17:55] [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed
[20:17:55] Time left 245.43 secs

[20:17:55] [1mLayer 1 training completed.[0m

[20:17:55] Blending: optimization starts with equal weights. Score = [1m0.8364838[0m
[20:17:55] Blending: iteration [1m0[0m: score = [1m0.8510616[0m, weights = [1m[0.76711875 0.         0.16106412 0.07181717 0.        ][0m
[20:17:55] Blending: iteration [1m1[0m: score = [1m0.8511275[0m, weights = [1m[0.8123261  0.         0.11660836 0.07106554 0.        ][0m
[20:17:55] Blending: iteration [1m2[0m: score = [1m0.8511329[0m, weights = [1m[0.82596105 0.         0.09596662 0.07807232 0.        ][0m
[20:17:55] Blending: no improvements for score. Terminated.

[20:17:55] Blending: best score = [1m0.8511329[0m, best weights = [1m[0.82596105 0.         0.09596662 0.07807232 0.        ][0m
[20:17:55] [

In [6]:
print("\n=== Starting Flat Price Task (Regression) ===")
price_train_path = 'train_price.csv'
price_test_path = 'test_price.csv'
price_target = 'price'

X_train_p, y_train_p, X_test_p, y_test_p = preprocess_price(price_train_path, price_test_path, price_target)

train_df_p_orig = pd.read_csv(price_train_path)
test_df_p_orig = pd.read_csv(price_test_path)
train_df_p_orig.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in train_df_p_orig.columns]
test_df_p_orig.columns = ["".join (c if c.isalnum() else "_" for c in str(x)) for x in test_df_p_orig.columns]
price_target_clean = "".join (c if c.isalnum() else "_" for c in str(price_target))

score_lp_p, time_lp_p = run_lazypredict(X_train_p, y_train_p, X_test_p, y_test_p, 'regression')
results['flat_price']['lazypredict'] = {'Metric': score_lp_p, 'Time': time_lp_p}

price_roles = {'target': price_target_clean}
price_task = Task('reg', metric='mse') # Используем 'mse' для инициализации
score_lama_p, time_lama_p = run_lightautoml(train_df_p_orig, test_df_p_orig, price_target_clean, price_roles, price_task)
results['flat_price']['LightAutoML'] = {'Metric': score_lama_p, 'Time': time_lama_p}

score_flaml_p, time_flaml_p = run_flaml(X_train_p, y_train_p, X_test_p, y_test_p, 'regression', 'rmse', time_budget=120) # Увеличим бюджет
results['flat_price']['FLAML'] = {'Metric': score_flaml_p, 'Time': time_flaml_p}

print("\n=== Flat Price Task Finished ===")


=== Starting Flat Price Task (Regression) ===
Preprocessing Price data: train_price.csv, test_price.csv...
Price preprocessing finished.

--- Running LazyPredict for regression ---


  2%|█                                           | 1/42 [00:00<00:29,  1.38it/s]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.39198463581711596, 'Adjusted R-Squared': 0.39151241707204987, 'RMSE': 23026676.384219553, 'Time taken': 0.7232785224914551, '<lambda>': 23026676.384219553}


  5%|██                                          | 2/42 [00:02<01:00,  1.51s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.6401551341553663, 'Adjusted R-Squared': 0.6398756585023193, 'RMSE': 17714611.64975341, 'Time taken': 2.0618667602539062, '<lambda>': 17714611.64975341}
{'Model': 'BayesianRidge', 'R-Squared': 0.5079262241455897, 'Adjusted R-Squared': 0.5075440521239565, 'RMSE': 20715198.105511684, 'Time taken': 0.0720055103302002, '<lambda>': 20715198.105511684}


 10%|████▏                                       | 4/42 [00:03<00:25,  1.47it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.4791009278241928, 'Adjusted R-Squared': 0.4786963684649084, 'RMSE': 21313303.656561922, 'Time taken': 0.2981078624725342, '<lambda>': 21313303.656561922}
{'Model': 'DummyRegressor', 'R-Squared': -7.918606745871415e-07, 'Adjusted R-Squared': -0.0007774484170959273, 'RMSE': 29530727.936655443, 'Time taken': 0.014713525772094727, '<lambda>': 29530727.936655443}
{'Model': 'ElasticNet', 'R-Squared': 0.4972553337023715, 'Adjusted R-Squared': 0.49686487407027524, 'RMSE': 20938603.66494874, 'Time taken': 0.03621387481689453, '<lambda>': 20938603.66494874}


 19%|████████▍                                   | 8/42 [00:03<00:11,  3.05it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 9.055184609696454e-05, 'Adjusted R-Squared': -0.0006860337676917183, 'RMSE': 29529379.183846045, 'Time taken': 0.4169352054595947, '<lambda>': 29529379.183846045}
{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.40694652763166816, 'Adjusted R-Squared': 0.4064859291287748, 'RMSE': 22741594.271753512, 'Time taken': 0.13814902305603027, '<lambda>': 22741594.271753512}


 26%|███████████▎                               | 11/42 [00:11<00:40,  1.30s/it]

{'Model': 'ExtraTreesRegressor', 'R-Squared': 0.6731887159473058, 'Adjusted R-Squared': 0.6729348960218238, 'RMSE': 16881945.823670313, 'Time taken': 8.03565526008606, '<lambda>': 16881945.823670313}
{'Model': 'GammaRegressor', 'R-Squared': 0.012676653225419665, 'Adjusted R-Squared': 0.011909842682046534, 'RMSE': 29342943.940832965, 'Time taken': 0.09694218635559082, '<lambda>': 29342943.940832965}


 31%|█████████████▎                             | 13/42 [00:15<00:40,  1.40s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.6235708733185918, 'Adjusted R-Squared': 0.623278517400832, 'RMSE': 18118222.581376746, 'Time taken': 3.5636003017425537, '<lambda>': 18118222.581376746}
{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.6803874233861078, 'Adjusted R-Squared': 0.6801391943795287, 'RMSE': 16694980.291530738, 'Time taken': 0.13448452949523926, '<lambda>': 16694980.291530738}


 33%|██████████████▎                            | 14/42 [00:15<00:30,  1.09s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.479814894132634, 'Adjusted R-Squared': 0.479410889279525, 'RMSE': 21298692.190174878, 'Time taken': 0.16869091987609863, '<lambda>': 21298692.190174878}


 43%|██████████████████▍                        | 18/42 [00:16<00:10,  2.30it/s]

{'Model': 'KNeighborsRegressor', 'R-Squared': 0.51148469247734, 'Adjusted R-Squared': 0.5111052841612789, 'RMSE': 20640160.446646035, 'Time taken': 0.33377671241760254, '<lambda>': 20640160.446646035}
{'Model': 'Lars', 'R-Squared': 0.5078736042873483, 'Adjusted R-Squared': 0.5074913913981894, 'RMSE': 20716305.66468494, 'Time taken': 0.024597883224487305, '<lambda>': 20716305.66468494}
{'Model': 'LarsCV', 'R-Squared': 0.5078736042873483, 'Adjusted R-Squared': 0.5074913913981894, 'RMSE': 20716305.66468494, 'Time taken': 0.08811593055725098, '<lambda>': 20716305.66468494}
{'Model': 'Lasso', 'R-Squared': 0.5078736411641306, 'Adjusted R-Squared': 0.5074914283036123, 'RMSE': 20716304.88851167, 'Time taken': 0.10339188575744629, '<lambda>': 20716304.88851167}


 52%|██████████████████████▌                    | 22/42 [00:16<00:05,  3.62it/s]

{'Model': 'LassoCV', 'R-Squared': 0.5088017469988524, 'Adjusted R-Squared': 0.508420254957245, 'RMSE': 20696761.13037564, 'Time taken': 0.38382720947265625, '<lambda>': 20696761.13037564}
{'Model': 'LassoLars', 'R-Squared': 0.5078736408784263, 'Adjusted R-Squared': 0.5074914280176862, 'RMSE': 20716304.8945251, 'Time taken': 0.07264566421508789, '<lambda>': 20716304.8945251}
{'Model': 'LassoLarsCV', 'R-Squared': 0.5078736042873483, 'Adjusted R-Squared': 0.5074913913981894, 'RMSE': 20716305.66468494, 'Time taken': 0.08023524284362793, '<lambda>': 20716305.66468494}
{'Model': 'LassoLarsIC', 'R-Squared': 0.5078736042873483, 'Adjusted R-Squared': 0.5074913913981894, 'RMSE': 20716305.66468494, 'Time taken': 0.040950775146484375, '<lambda>': 20716305.66468494}


 60%|█████████████████████████▌                 | 25/42 [00:17<00:03,  5.59it/s]

{'Model': 'LinearRegression', 'R-Squared': 0.5078736042873477, 'Adjusted R-Squared': 0.5074913913981889, 'RMSE': 20716305.66468495, 'Time taken': 0.03076338768005371, '<lambda>': 20716305.66468495}
{'Model': 'LinearSVR', 'R-Squared': -0.9032539791528584, 'Adjusted R-Squared': -0.9047321526637948, 'RMSE': 40740124.68218284, 'Time taken': 0.04586648941040039, '<lambda>': 40740124.68218284}


 62%|██████████████████████████▌                | 26/42 [00:51<01:32,  5.75s/it]

{'Model': 'MLPRegressor', 'R-Squared': -0.8677482803603318, 'Adjusted R-Squared': -0.8691988781593469, 'RMSE': 40358326.84520656, 'Time taken': 34.25646615028381, '<lambda>': 40358326.84520656}


 69%|█████████████████████████████▋             | 29/42 [02:34<03:24, 15.75s/it]

{'Model': 'NuSVR', 'R-Squared': -0.03368662693046787, 'Adjusted R-Squared': -0.03448944579083779, 'RMSE': 30023991.56772709, 'Time taken': 103.33929967880249, '<lambda>': 30023991.56772709}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.49900167702222686, 'Adjusted R-Squared': 0.4986125736980458, 'RMSE': 20902205.66568626, 'Time taken': 0.031076431274414062, '<lambda>': 20902205.66568626}
{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.5066290380529329, 'Adjusted R-Squared': 0.5062458585640136, 'RMSE': 20742484.441552278, 'Time taken': 0.08064556121826172, '<lambda>': 20742484.441552278}
{'Model': 'PassiveAggressiveRegressor', 'R-Squared': 0.36122661193199257, 'Adjusted R-Squared': 0.36073050478492985, 'RMSE': 23601922.932198774, 'Time taken': 3.8767025470733643, '<lambda>': 23601922.932198774}


 74%|███████████████████████████████▋           | 31/42 [02:38<02:02, 11.18s/it]

{'Model': 'PoissonRegressor', 'R-Squared': 0.44880491109198273, 'Adjusted R-Squared': 0.4483768221513019, 'RMSE': 21924346.150195993, 'Time taken': 0.27907323837280273, '<lambda>': 21924346.150195993}


 79%|█████████████████████████████████▊         | 33/42 [03:38<02:25, 16.14s/it]

{'Model': 'QuantileRegressor', 'R-Squared': -0.08914456547463234, 'Adjusted R-Squared': -0.08999045607247136, 'RMSE': 30818872.41800783, 'Time taken': 59.7262008190155, '<lambda>': 30818872.41800783}
{'Model': 'RANSACRegressor', 'R-Squared': 0.21988632320809387, 'Adjusted R-Squared': 0.21928044328603202, 'RMSE': 26082722.8417927, 'Time taken': 0.19401097297668457, '<lambda>': 26082722.8417927}


 86%|████████████████████████████████████▊      | 36/42 [03:55<00:58,  9.69s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.7096060814710462, 'Adjusted R-Squared': 0.7093805453088691, 'RMSE': 15913574.407070009, 'Time taken': 16.283119678497314, '<lambda>': 15913574.407070009}
{'Model': 'Ridge', 'R-Squared': 0.5078880410139006, 'Adjusted R-Squared': 0.5075058393371112, 'RMSE': 20716001.8018602, 'Time taken': 0.02303028106689453, '<lambda>': 20716001.8018602}
{'Model': 'RidgeCV', 'R-Squared': 0.5080176695104901, 'Adjusted R-Squared': 0.5076355685104428, 'RMSE': 20713273.194086913, 'Time taken': 0.08390498161315918, '<lambda>': 20713273.194086913}
{'Model': 'SGDRegressor', 'R-Squared': 0.4872388902218505, 'Adjusted R-Squared': 0.4868406512594132, 'RMSE': 21146160.29474951, 'Time taken': 0.10325908660888672, '<lambda>': 21146160.29474951}


 90%|██████████████████████████████████████▉    | 38/42 [05:30<01:34, 23.60s/it]

{'Model': 'SVR', 'R-Squared': -0.08898282374711086, 'Adjusted R-Squared': -0.08982858872727606, 'RMSE': 30816583.9785765, 'Time taken': 95.4829089641571, '<lambda>': 30816583.9785765}
{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.5078736042873477, 'Adjusted R-Squared': 0.5074913913981889, 'RMSE': 20716305.66468495, 'Time taken': 0.02391529083251953, '<lambda>': 20716305.66468495}


 95%|████████████████████████████████████████▉  | 40/42 [05:31<00:31, 15.56s/it]

{'Model': 'TweedieRegressor', 'R-Squared': 0.45401770722700663, 'Adjusted R-Squared': 0.45359366683541547, 'RMSE': 21820427.73240636, 'Time taken': 0.242889404296875, '<lambda>': 21820427.73240636}


100%|███████████████████████████████████████████| 42/42 [05:31<00:00,  7.89s/it]

{'Model': 'XGBRegressor', 'R-Squared': 0.5825558028807093, 'Adjusted R-Squared': 0.5822315923648061, 'RMSE': 19079772.618173122, 'Time taken': 0.283435583114624, '<lambda>': 19079772.618173122}
{'Model': 'LGBMRegressor', 'R-Squared': 0.667303065247423, 'Adjusted R-Squared': 0.6670446741963558, 'RMSE': 17033283.717123944, 'Time taken': 0.12380313873291016, '<lambda>': 17033283.717123944}
LazyPredict finished in 331.48 seconds. Best internal RMSE: 15913574.407070009

--- Running LightAutoML for task: reg ---
[20:25:28] Stdout logging level is INFO.
[20:25:28] Task: reg

[20:25:28] Start automl preset with listed constraints:
[20:25:28] - time: 300.00 seconds
[20:25:28] - CPU: 4 cores
[20:25:28] - memory: 16 GB

[20:25:28] [1mTrain data shape: (51115, 20)[0m




  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)
  target = dataset.target.astype(np.int32)


[20:25:31] Layer [1m1[0m train process start. Time left 296.71 secs
[20:25:33] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[20:25:37] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m-1880150874579500.5[0m
[20:25:37] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[20:25:37] Time left 291.21 secs

[20:25:37] [1mSelector_LightGBM[0m fitting and predicting completed
[20:25:39] Start fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m ...
[20:25:43] Fitting [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m finished. score = [1m-803180271998590.2[0m
[20:25:43] [1mLvl_0_Pipe_1_Mod_0_LightGBM[0m fitting and predicting completed
[20:25:43] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ... Time budget is 48.39 secs


Optimization Progress:  23%|▏| 23/101 [00:51<02:54,  2.23s/it, best_trial=5, bes

[20:26:34] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m completed
[20:26:34] Start fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m ...





[20:26:38] Fitting [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m finished. score = [1m-763833348839860.2[0m
[20:26:38] [1mLvl_0_Pipe_1_Mod_1_Tuned_LightGBM[0m fitting and predicting completed
[20:26:38] Start fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m ...
[20:26:42] Fitting [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m finished. score = [1m-968330622198421.4[0m
[20:26:42] [1mLvl_0_Pipe_1_Mod_2_CatBoost[0m fitting and predicting completed
[20:26:42] Start hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ... Time budget is 155.78 secs


Optimization Progress:  53%|▌| 54/101 [02:38<02:17,  2.93s/it, best_trial=24, be

[20:29:20] Hyperparameters optimization for [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m completed
[20:29:20] Start fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m ...





[20:29:30] Fitting [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m finished. score = [1m-901757478303153.8[0m
[20:29:30] [1mLvl_0_Pipe_1_Mod_3_Tuned_CatBoost[0m fitting and predicting completed
[20:29:30] Time left 57.74 secs

[20:29:30] [1mLayer 1 training completed.[0m

[20:29:30] Blending: optimization starts with equal weights. Score = [1m-851591981656305.5000000[0m
[20:29:30] Blending: iteration [1m0[0m: score = [1m-763833348839860.2500000[0m, weights = [1m[0. 0. 1. 0. 0.][0m
[20:29:30] Blending: no improvements for score. Terminated.

[20:29:30] Blending: best score = [1m-763833348839860.2500000[0m, best weights = [1m[0. 0. 1. 0. 0.][0m
[20:29:30] [1mAutoml preset training completed in 242.40 seconds[0m

[20:29:30] Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (5 averaged models Lvl_0_Pipe_1_Mod_1_Tuned_LightGBM) 

LightAutoML fit_predict completed in 242.40 seconds.
Prediction on test data completed. Shape: (9021, 1)
Calculating metri

In [7]:
flat_price_results_list = []
for framework, data in results['flat_price'].items():
    metric_val = data['Metric'] if isinstance(data['Metric'], (int, float)) else 'N/A' if data['Metric'] != 'Error' else 'Error'
    time_val = data['Time'] if isinstance(data['Time'], (int, float)) else 'N/A' if data['Time'] != 'Error' else 'Error'
    flat_price_results_list.append({'Framework': framework, 'RMSE': metric_val, 'Время выполнения, сек.': time_val})

df_price = pd.DataFrame(flat_price_results_list)
df_price = df_price.set_index('Framework')

if 'Время выполнения, сек.' in df_price.columns:
     df_price['Время выполнения, сек.'] = pd.to_numeric(df_price['Время выполнения, сек.'], errors='coerce').round(2)

print("\n\nПоказатели метрик, полученных на датасете flat_price:")
print(df_price.to_markdown(floatfmt=".4f"))

user_churn_results_list = []
for framework, data in results['user_churn'].items():
    metric_val = data['Metric'] if isinstance(data['Metric'], (int, float)) else 'N/A' if data['Metric'] != 'Error' else 'Error'
    time_val = data['Time'] if isinstance(data['Time'], (int, float)) else 'N/A' if data['Time'] != 'Error' else 'Error'
    user_churn_results_list.append({'Framework': framework, 'ROC AUC': metric_val, 'Время выполнения, сек.': time_val})

df_churn = pd.DataFrame(user_churn_results_list)
df_churn = df_churn.set_index('Framework')

if 'Время выполнения, сек.' in df_churn.columns:
     df_churn['Время выполнения, сек.'] = pd.to_numeric(df_churn['Время выполнения, сек.'], errors='coerce').round(2)


print("\n\nПоказатели метрик, полученных на датасете user_churn:")
print(df_churn.to_markdown(floatfmt=".4f"))



Показатели метрик, полученных на датасете flat_price:
| Framework   |          RMSE |   Время выполнения, сек. |
|:------------|--------------:|-------------------------:|
| lazypredict | 15913574.4071 |                 331.4800 |
| LightAutoML | 15366090.8499 |                 242.4000 |
| FLAML       | 14825147.9141 |                 125.6500 |


Показатели метрик, полученных на датасете user_churn:
| Framework   |   ROC AUC |   Время выполнения, сек. |
|:------------|----------:|-------------------------:|
| lazypredict |    0.6950 |                   6.3500 |
| LightAutoML |    0.8193 |                  54.8400 |
| FLAML       |    0.8204 |                 120.6600 |
