# README BEFORE THE EXECUTION
* Per la corretta esecuzione del codice è necessario caricare solo il dataset di partenza (Train.csv) nella root del progetto Colab (/content/Train.csv).
* Verranno create 3 cartelle, le quali contengono i dataset utilizzati negli esperimenti e un file .txt che riassume i risultati ottenuti con i dataset.

## Dependency del progetto
Dichiaro tutte le dipendenze in una sezione sola

In [None]:
!pip install h2o

Collecting h2o
  Downloading h2o-3.46.0.3.tar.gz (265.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.3/265.3 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: h2o
  Building wheel for h2o (setup.py) ... [?25l[?25hdone
  Created wheel for h2o: filename=h2o-3.46.0.3-py2.py3-none-any.whl size=265365897 sha256=c22adafac00f487b2cc38c238f5dfce0b5a82f25bab00abc60d055fbbe948dbe
  Stored in directory: /root/.cache/pip/wheels/c4/63/41/baa115b5255e1db3e2383bce4e2e6181746aac0b42264c242f
Successfully built h2o
Installing collected packages: h2o
Successfully installed h2o-3.46.0.3


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import h2o
from h2o.automl import H2OAutoML

## Metodi utili alla fase di trasformazione del dataset
* Trasformazione delle feature categoriche in numeriche
* Contare il numero di outliers attraverso il metodo IQR
* Contare il numero di valori nulli
* Trovare tutti l'index di tutti i valori non outliers
* Genare un numero di outliers/valori nulli attraverso una percentuale

In [None]:
le = LabelEncoder()

def transform_features(dataframe):
    """
    Transform the features of the dataframe to numerical values
    Args:
        dataframe: given dataframe

    Returns: new dataframe with transformed features
    """
    for feature in dataframe.columns:
        if dataframe[feature].dtype == 'object':
            dataframe[feature] = le.fit_transform(dataframe[feature])


def count_non_outliers(dataframe):
    """
    Count the number of non-outliers for each feature in the dataframe using
    the IQR method.

    Args:
        dataframe: given dataframe

    Returns:
        A dictionary with features as keys and counts of non-outliers as values
    """
    non_outliers_count = {}

    for feature in dataframe.columns:
        if dataframe[feature].dtype != 'object':
            Q1 = dataframe[feature].quantile(0.25)
            Q3 = dataframe[feature].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR

            non_outliers = dataframe[(dataframe[feature] >= lower_bound) & (dataframe[feature] <= upper_bound)]
            non_outliers_count[feature] = non_outliers.shape[0]

    return non_outliers_count


def count_outliers(dataframe):
    """
    Count the number of outliers for each feature in the dataframe using the IQR method.

    Args:
        dataframe: given dataframe

    Returns:
        A dictionary with features as keys and counts of outliers as values,
        and the total number of outliers.
    """
    outliers_count = {}
    total_outliers = 0

    for feature in dataframe.columns:
        if dataframe[feature].dtype != 'object':
            Q1 = dataframe[feature].quantile(0.25)
            Q3 = dataframe[feature].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR

            outliers = dataframe[(dataframe[feature] < lower_bound) | (dataframe[feature] > upper_bound)]
            count = outliers.shape[0]
            outliers_count[feature] = count
            total_outliers += count

    outliers_count['Total'] = total_outliers
    return outliers_count


def get_non_outliers_index_list(dataframe, feature):
    """
    Get the list of indices for non-outliers in the dataframe based on IQR method.
    Args:
        dataframe: given dataframe
        feature: feature to check for outliers

    Returns: List of indices that are non-outliers
    """
    Q1 = dataframe[feature].quantile(0.25)
    Q3 = dataframe[feature].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    non_outliers = dataframe[(dataframe[feature] >= lower_bound) & (dataframe[feature] <= upper_bound)].index.tolist()
    return non_outliers


def generate_new_outliers(dataframe, feature, percentage):
    """
    Insert new outliers in the dataframe (with a defined percentage).
    Args:
        dataframe: given dataframe
        feature: feature to insert outliers
        percentage: percentage of new outliers to insert

    Returns: new dataframe with new outliers
    """
    non_outliers = get_non_outliers_index_list(dataframe, feature)
    num_new_outliers = int(len(non_outliers) * percentage)

    if num_new_outliers == 0:
        return dataframe

    lower_bound, upper_bound = dataframe[feature].min(), dataframe[feature].max()

    for _ in range(num_new_outliers):
        index = np.random.choice(non_outliers, 1)[0]
        is_lower_bound = np.random.choice([True, False])

        if is_lower_bound:
            outlier_value = np.random.uniform(low=lower_bound - 30 * (upper_bound - lower_bound), high=lower_bound)
        else:
            outlier_value = np.random.uniform(low=upper_bound, high=upper_bound + 30 * (upper_bound - lower_bound))

        outlier_value = dataframe[feature].dtype.type(outlier_value)
        dataframe.at[index, feature] = outlier_value
        non_outliers.remove(index)  # Ensure the index is not chosen again

    return dataframe


def insert_outliers_with_percentage(dataframe, percentage):
    """
    Insert outliers in the dataframe with a defined percentage.
    Args:
        dataframe: given dataframe
        percentage: percentage of outliers to insert

    Returns: new dataframe with new outliers
    """
    new_df = dataframe.copy()
    print(new_df.dtypes)
    excluded_features = ['ID', 'Reached.on.Time_Y.N']

    for feature in new_df.columns:
        if feature not in excluded_features and np.issubdtype(new_df[feature].dtype, np.number):
            new_df = generate_new_outliers(new_df, feature, percentage)
    return new_df


def generate_null_values(dataframe, feature, percentage):
    """
    Insert null values in the dataframe (with a defined percentage) in non-outlier positions.
    Args:
        dataframe: given dataframe
        feature: feature to insert null values
        percentage: percentage of new null values to insert

    Returns: new dataframe with new null values
    """
    non_outliers = get_non_outliers_index_list(dataframe, feature)
    num_new_null = int(len(non_outliers) * percentage)

    if num_new_null == 0:
        return dataframe

    for _ in range(num_new_null):
        index = np.random.choice(non_outliers)
        while pd.isnull(dataframe.at[index, feature]):
            index = np.random.choice(non_outliers)
        dataframe.at[index, feature] = np.nan
        non_outliers.remove(index)

    return dataframe


def insert_null_values_with_percentage(dataframe, percentage):
    """
    Insert null values in the dataframe with a defined percentage.
    Args:
        dataframe: given dataframe
        percentage: percentage of null values to insert

    Returns: new dataframe with new null values
    """
    new_df = dataframe.copy()
    excluded_features = ['Reached.on.Time_Y.N']
    for feature in new_df.columns:
        if feature not in excluded_features and np.issubdtype(new_df[feature].dtype, np.number):
            new_df = generate_null_values(new_df, feature, percentage)
    return new_df


def count_null_values(dataframe):
    """
    Count the number of null values for each feature in the dataframe.
    Args:
        dataframe: given dataframe

    Returns: A dictionary with features as keys and counts of null values as values
    """
    null_values_count = {}
    total_null = 0

    for feature in dataframe.columns:
        count = dataframe[feature].isnull().sum()
        null_values_count[feature] = count
        total_null += count
    null_values_count['Total'] = total_null

    return null_values_count

Leggo il dataset di partenza, divido in train e test e lo esporto

In [None]:
df = pd.read_csv('/content/Train.csv')
train_df, test_df = train_test_split(df, test_size=0.2)
test_df.to_csv('/content/Test.csv', index=False)

transform_features(train_df)
train_df.drop('ID', axis=1, inplace=True)

## Metodi per simulare gli esperimenti
* Dataset con una % variabile di outliers
* Dataset con una % variabile di valori nulli
* Dataset con una % variabile di valori misti (nulli/outliers)

In [None]:
def create_dataset_with_outliers(percentage, datasets_path):
  for p in percentage:
    print(f"Percentage of outliers: {p}")
    out_df = train_df.copy()
    out_df = insert_outliers_with_percentage(out_df, p)
    print(count_outliers(out_df))
    out_df.to_csv(f'{datasets_path}/Train_{p}.csv', index=False)

def create_dataset_with_null(percentage, datasets_path):
  for p in percentage:
    print(f"Percentage of null values: {p}")
    null_df = train_df.copy()
    null_df = insert_null_values_with_percentage(null_df, p)
    print(count_null_values(null_df))
    null_df.to_csv(f'{datasets_path}/Train_{p}.csv', index=False)

def create_mixed_values(percentage, input_path, datasets_path):
  for p in percentage:
    out_df = pd.read_csv(f'{input_path}/Train_{p}.csv')
    out_df = insert_null_values_with_percentage(out_df, p)
    out_df.to_csv(f'{datasets_path}/Train_{p}.csv', index=False)
    print(count_outliers(out_df))
    print(count_null_values(out_df))

In [None]:
def autoML_with_percentage(percentage, datasets_path):
    h2o.init(max_mem_size="16g")
    train = h2o.import_file(f"/content/Train.csv")
    test = h2o.import_file("/content/Test.csv")

    x = train.columns
    y = "Reached.on.Time_Y.N"
    x.remove(y)

    train[y] = train[y].asfactor()
    test[y] = test[y].asfactor()

    aml = H2OAutoML(max_models=2, seed=1, include_algos=["GLM", "DeepLearning"])
    aml.train(x=x, y=y, training_frame=train)

    lb = aml.leaderboard
    print(lb.head(rows=lb.nrows))

    with open(f"{datasets_path}/leaderboard.txt", "a") as file:
        file.write(f"Normal train\n")
        file.write(lb.as_data_frame().to_string())

    for p in percentage:
        train = h2o.import_file(f"{datasets_path}/Train_{p}.csv")
        test = h2o.import_file("/content/Test.csv")

        x = train.columns
        y = "Reached.on.Time_Y.N"
        x.remove(y)

        train[y] = train[y].asfactor()
        test[y] = test[y].asfactor()
        aml = H2OAutoML(max_models=2, seed=1, include_algos=["GLM", "DeepLearning"])
        aml.train(x=x, y=y, training_frame=train)

        lb = aml.leaderboard
        print(lb.head(rows=lb.nrows))

        with open(f"{datasets_path}/leaderboard.txt", "a") as file:
            file.write(f"\n\nPercentage of outliers: {p}\n")
            file.write(lb.as_data_frame().to_string())


Creo le cartelle dove andrò a riportare i **dataset** e i **risultati** della funzione di autoML

In [None]:
!mkdir only_outliers
!mkdir only_null_values
!mkdir mixed_values

## Esperimento 1: Allenamento con soli outliers


In [None]:
percentage = [0.05, 0.10, 0.15, 0.20]
datasets_path = "/content/only_outliers"

create_dataset_with_outliers(percentage, datasets_path)
autoML_with_percentage(percentage, datasets_path)

Percentage of outliers: 0.05
Warehouse_block        int64
Mode_of_Shipment       int64
Customer_care_calls    int64
Customer_rating        int64
Cost_of_the_Product    int64
Prior_purchases        int64
Product_importance     int64
Gender                 int64
Discount_offered       int64
Weight_in_gms          int64
Reached.on.Time_Y.N    int64
dtype: object
{'Warehouse_block': 417, 'Mode_of_Shipment': 431, 'Customer_care_calls': 435, 'Customer_rating': 427, 'Cost_of_the_Product': 436, 'Prior_purchases': 1204, 'Product_importance': 432, 'Gender': 400, 'Discount_offered': 1921, 'Weight_in_gms': 432, 'Reached.on.Time_Y.N': 0, 'Total': 6535}
Percentage of outliers: 0.1
Warehouse_block        int64
Mode_of_Shipment       int64
Customer_care_calls    int64
Customer_rating        int64
Cost_of_the_Product    int64
Prior_purchases        int64
Product_importance     int64
Gender                 int64
Discount_offered       int64
Weight_in_gms          int64
Reached.on.Time_Y.N    int64
dtype

0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.3
H2O_cluster_version_age:,4 days
H2O_cluster_name:,H2O_from_python_unknownUser_6qqshh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,16 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_1_20240616_81534  0.747392   0.512462  0.856259                0.499887  0.429638  0.184589
GLM_1_AutoML_1_20240616_81534           0.739519   0.53302   0.851551                0.49929   0.435129  0.189337
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_2_20240616_81601  0.662407   0.641073  0.753854                0.499719  0.475241  0.225854
GLM_1_AutoML_2_20240616_81601           0.542064   0.674529  0.617493                0.49986   0.490653  0.24074
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_3_20240616_81614  0.600103   0.665114  0.676253                     0.5  0.485849  0.236049
GLM_1_AutoML_3_20240616_81614           0.511549   0.674821  0.598548                     0.5  0.490808  0.240893
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_4_20240616_81628  0.526857   0.681733  0.613221                0.5       0.493824  0.243862
GLM_1_AutoML_4_20240616_81628           0.485521   0.675176  0.576673                0.499719  0.490983  0.241064
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_5_20240616_81640  0.524586   0.681696  0.612851                0.499253  0.493991  0.244027
GLM_1_AutoML_5_20240616_81640           0.484905   0.67509   0.57582                 0.499719  0.490941  0.241023
[2 rows x 7 columns]






## Esperimento 2: Allenamento con soli valori nulli


In [None]:
percentage = [0.05, 0.10, 0.15, 0.20]
datasets_path = "/content/only_null_values"

create_dataset_with_null(percentage, datasets_path)
autoML_with_percentage(percentage, datasets_path)

Percentage of null values: 0.05
{'Warehouse_block': 439, 'Mode_of_Shipment': 439, 'Customer_care_calls': 439, 'Customer_rating': 439, 'Cost_of_the_Product': 439, 'Prior_purchases': 399, 'Product_importance': 439, 'Gender': 439, 'Discount_offered': 351, 'Weight_in_gms': 439, 'Reached.on.Time_Y.N': 0, 'Total': 4262}
Percentage of null values: 0.1
{'Warehouse_block': 879, 'Mode_of_Shipment': 879, 'Customer_care_calls': 879, 'Customer_rating': 879, 'Cost_of_the_Product': 879, 'Prior_purchases': 799, 'Product_importance': 879, 'Gender': 879, 'Discount_offered': 702, 'Weight_in_gms': 879, 'Reached.on.Time_Y.N': 0, 'Total': 8533}
Percentage of null values: 0.15
{'Warehouse_block': 1319, 'Mode_of_Shipment': 1319, 'Customer_care_calls': 1319, 'Customer_rating': 1319, 'Cost_of_the_Product': 1319, 'Prior_purchases': 1199, 'Product_importance': 1319, 'Gender': 1319, 'Discount_offered': 1053, 'Weight_in_gms': 1319, 'Reached.on.Time_Y.N': 0, 'Total': 12804}
Percentage of null values: 0.2
{'Warehouse

0,1
H2O_cluster_uptime:,2 mins 03 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.3
H2O_cluster_version_age:,4 days
H2O_cluster_name:,H2O_from_python_unknownUser_6qqshh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,16.00 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
GLM_1_AutoML_6_20240616_81732           0.739519   0.53302   0.851551                0.49929   0.435129  0.189337
DeepLearning_1_AutoML_6_20240616_81732  0.737621   0.514101  0.851402                0.498398  0.430449  0.185287
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
GLM_1_AutoML_7_20240616_81748           0.72324     0.54941  0.837204                 0.49763  0.442235  0.195572
DeepLearning_1_AutoML_7_20240616_81748  0.720219    0.5442   0.835995                 0.5      0.441905  0.19528
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                    auc    logloss     aucpr    mean_per_class_error      rmse       mse
GLM_1_AutoML_8_20240616_81802           0.72119   0.554243  0.83596                 0.495136  0.44389   0.197038
DeepLearning_1_AutoML_8_20240616_81802  0.71843   0.550427  0.834239                0.49986   0.444858  0.197899
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                     auc    logloss     aucpr    mean_per_class_error      rmse       mse
GLM_1_AutoML_9_20240616_81815           0.721166   0.557702  0.834108                0.496883  0.445091  0.198106
DeepLearning_1_AutoML_9_20240616_81815  0.711024   0.549838  0.83055                 0.498197  0.444038  0.19717
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss    aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_10_20240616_81830  0.71888    0.551335  0.83286                0.496849  0.444492  0.197573
GLM_1_AutoML_10_20240616_81830           0.711843   0.565461  0.82902                0.497439  0.448251  0.200929
[2 rows x 7 columns]






## Esperimento 3: Allenamento con valori misti


In [None]:
percentage = [0.05, 0.10, 0.15, 0.20]
datasets_path = "/content/mixed_values"
input_path = "/content/only_outliers"

create_mixed_values(percentage, input_path, datasets_path)
autoML_with_percentage(percentage, datasets_path)

{'Warehouse_block': 417, 'Mode_of_Shipment': 431, 'Customer_care_calls': 435, 'Customer_rating': 427, 'Cost_of_the_Product': 436, 'Prior_purchases': 1204, 'Product_importance': 432, 'Gender': 400, 'Discount_offered': 1862, 'Weight_in_gms': 432, 'Reached.on.Time_Y.N': 0, 'Total': 6476}
{'Warehouse_block': 419, 'Mode_of_Shipment': 418, 'Customer_care_calls': 418, 'Customer_rating': 418, 'Cost_of_the_Product': 418, 'Prior_purchases': 379, 'Product_importance': 418, 'Gender': 419, 'Discount_offered': 343, 'Weight_in_gms': 418, 'Reached.on.Time_Y.N': 0, 'Total': 4068}
{'Warehouse_block': 844, 'Mode_of_Shipment': 858, 'Customer_care_calls': 865, 'Customer_rating': 853, 'Cost_of_the_Product': 868, 'Prior_purchases': 929, 'Product_importance': 864, 'Gender': 833, 'Discount_offered': 1536, 'Weight_in_gms': 860, 'Reached.on.Time_Y.N': 0, 'Total': 9310}
{'Warehouse_block': 795, 'Mode_of_Shipment': 794, 'Customer_care_calls': 793, 'Customer_rating': 794, 'Cost_of_the_Product': 793, 'Prior_purchase

0,1
H2O_cluster_uptime:,3 mins 46 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.3
H2O_cluster_version_age:,4 days
H2O_cluster_name:,H2O_from_python_unknownUser_6qqshh
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,15.99 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_11_20240616_81914  0.742274   0.508256  0.854005                0.498843  0.427795  0.183008
GLM_1_AutoML_11_20240616_81914           0.739519   0.53302   0.851551                0.49929   0.435129  0.189337
[2 rows x 7 columns]






Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_12_20240616_81930  0.674205   0.622406  0.775167                 0.49986  0.467395  0.218458
GLM_1_AutoML_12_20240616_81930           0.541179   0.674545  0.616831                 0.49986  0.490661  0.240748
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss     aucpr    mean_per_class_error      rmse      mse
DeepLearning_1_AutoML_13_20240616_81942  0.575236   0.673592  0.649398                     0.5  0.489684  0.23979
GLM_1_AutoML_13_20240616_81942           0.510974   0.674835  0.598282                     0.5  0.490815  0.2409
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_14_20240616_81953  0.538563   0.682245  0.620026                0.499534  0.493906  0.243943
GLM_1_AutoML_14_20240616_81953           0.484286   0.675189  0.576182                0.49986   0.490989  0.24107
[2 rows x 7 columns]

Parse progress: |




████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
model_id                                      auc    logloss     aucpr    mean_per_class_error      rmse       mse
DeepLearning_1_AutoML_15_20240616_82002  0.523509   0.681676  0.609854                0.49986   0.494037  0.244073
GLM_1_AutoML_15_20240616_82002           0.482891   0.675108  0.575069                0.499438  0.49095   0.241032
[2 rows x 7 columns]




