In [14]:
from evaluate import evaluate_autoML, evaluate_auto_scikit, evaluate_tpot
import pandas as pd
import os
import sys

def rename_index_if_exists(df, index):
    if not index in df.index:
        return index

    num = 0
    while (num < 1000):
        new_index = f'{index}_{num}'
        if not new_index in df.index:
            break
        num += 1
    return new_index


def load_data(drop_categorical, target):
    # drop_categorical = True
    # target = 'Annual_revenue'
    
    data_dir_path = "data"
    train = pd.read_csv(data_dir_path + '/train.csv')
    test = pd.read_csv(data_dir_path + '/test.csv')
    n_train = len(train)
    combined = pd.concat([train, test], axis=0)

    if drop_categorical:
        string_cols = combined.select_dtypes(include=['object']).columns
        combined = combined.drop(columns=string_cols)
    else:
        combined = pd.get_dummies(combined)

    train = combined.iloc[:n_train, :]
    test = combined.iloc[n_train:, :]

    X_train = train.drop(columns=[target])
    y_train = train[target]
    X_test = test.drop(columns=[target])
    y_test = test[target]

    return X_train, y_train, X_test, y_test



In [None]:
mode = 'autoML'
        
rel_data_dir_path = 'data'
rel_save_path = 'result/DVM-CAR.csv'

py_dir_path = ""
data_dir_path = os.path.join(py_dir_path, rel_data_dir_path)
save_path = os.path.join(py_dir_path, rel_save_path)

drop_categorical = False
target = 'Annual_revenue'
X_train, y_train, X_test, y_test = load_data(drop_categorical, target)


func_dicts = {'autoML': {'func': evaluate_autoML, 'args': {'n_generation': 6}},
            'auto-scikitlearn': {'func': evaluate_auto_scikit, 'args': {'target_time': 80}},
            'tpot': {'func': evaluate_tpot, 'args': {'generations': 3}}}

func = func_dicts[mode]['func']
args = func_dicts[mode]['args']
result = func(X_train, y_train, X_test, y_test, **args)
result['args'] = args
result['drop_categorical'] = drop_categorical
result['target'] = target

if os.path.exists(save_path):
    df = pd.read_csv(save_path, index_col='func')
else:
    df = pd.DataFrame(columns=result.keys())
    df.index.name = 'func'

index = rename_index_if_exists(df, mode)
df.loc[index] = result

os.makedirs(os.path.dirname(save_path), exist_ok=True)
df.to_csv(save_path)


[2025-02-07 05:34:06] AutoML.__init__() - n_population: 30, n_generation: 6, n_parent: 5, prob_mutations: [0.2, 0.5], use_joblib: True, n_jobs: -1
[2025-02-07 05:34:06] AutoML.fit() - use_kfold: True, kfold: 5, valid_size: 0.2000, seed: 42, max_n_try: 1000, timeout: 30
Structure-3 - valid r2: 0.5471±0.1100
Structure-6 - valid r2: -0.0389±0.2309
Structure-2 - valid r2: 0.5748±0.0682
Structure-5 - valid r2: 0.8090±0.1178
Structure-0 - valid r2: 0.5333±0.0995
Structure-10 - valid r2: 0.4788±0.0959
Structure-9 - valid r2: 0.7659±0.0571
Structure-13 - valid r2: 0.8290±0.0227
Structure-8 - valid r2: 0.7768±0.0467
Structure-15 - valid r2: 0.5480±0.1134
Structure-11 - valid r2: 0.7783±0.0496
Structure-17 - valid r2: 0.4571±0.0871
Structure-16 - valid r2: 0.8303±0.0482
Structure-4 - valid r2: 0.8742±0.0082
Structure-1 - valid r2: 0.8830±0.0179
Structure-20 - valid r2: 0.8358±0.0486
Structure-19 - valid r2: 0.7733±0.0404
Structure-23 - valid r2: 0.8087±0.0449
Structure-24 - valid r2: 0.5334±0.09