# Setup

In [None]:
import os
os.environ['PYTHONHASHSEED'] = str(42)

import sys
import shutil
import copy
import pickle
import random as rnd

import numpy as np
from numpy import array, nan, random as np_rnd, where, dot
import pandas as pd
from pandas import DataFrame as dataframe, Series as series, isna, read_csv

from sklearn.model_selection import train_test_split as tts, KFold, StratifiedKFold, GroupKFold, GroupShuffleSplit, StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler, KBinsDiscretizer, MultiLabelBinarizer
from sklearn import metrics
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import PolynomialFeatures

import h2o

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 200)

## Define utility functions

In [None]:
# ===== utility functions =====
# label encoding for categorical column with excepting na value
def seed_everything(seed=42):
    # python random module
    rnd.seed(seed)
    # numpy random
    np_rnd.seed(seed)
    # tf random
    try:
        tf_rnd.set_seed(seed)
    except:
        pass
    # RAPIDS random
    try:
        cp.random.seed(seed)
    except:
        pass
    # pytorch random
    try:
        torch.manual_seed(seed)
    except:
        pass
def which(bool_list):
    return where(bool_list)[0]
def easyIO(x=None, path=None, op="r"):
    tmp = None
    if op == "r":
        with open(path, "rb") as f:
            tmp = pickle.load(f)
        return tmp
    elif op == "w":
        with open(path, "wb") as f:
            pickle.dump(x, f)
    else:
        print("Unknown operation type")
def diff(first, second):
    second = set(second)
    return [item for item in first if item not in second]
def findIdx(data_x, col_names):
    return [int(i) for i, j in enumerate(data_x) if j in col_names]
def orderElems(for_order, using_ref):
    return [i for i in using_ref if i in for_order]
# concatenate by row
def cbr(df1, df2):
    if type(df1) == series:
        tmp_concat = series(pd.concat([dataframe(df1), dataframe(df2)], axis=0, ignore_index=True).iloc[:,0])
        tmp_concat.reset_index(drop=True, inplace=True)
    elif type(df1) == dataframe:
        tmp_concat = pd.concat([df1, df2], axis=0, ignore_index=True)
        tmp_concat.reset_index(drop=True, inplace=True)
    elif type(df1) == np.ndarray:
        tmp_concat = np.concatenate([df1, df2], axis=0)
    else:
        print("Unknown Type: return 1st argument")
        tmp_concat = df1
    return tmp_concat
def change_width(ax, new_value):
    for patch in ax.patches :
        current_width = patch.get_width()
        adj_value = current_width - new_value
        # we change the bar width
        patch.set_width(new_value)
        # we recenter the bar
        patch.set_x(patch.get_x() + adj_value * .5)
def week_of_month(date):
    month = date.month
    week = 0
    while date.month == month:
        week += 1
        date -= timedelta(days=7)
    return week
def getSeason(date):
    month = date.month
    if month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    elif month in [9, 10, 11]:
        return "Fall"
    else:
        return "Winter"
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print('Error: Creating directory. ' + directory)
def sigmoid(x):
    return 1/(1 + np.exp(-x))
def dispPerformance(result_dic):
    perf_table = dataframe()
    index_names = []
    for k, v in result_dic.items():
        index_names.append(k)
        perf_table = pd.concat([perf_table, series(v["performance"]).to_frame().T], ignore_index=True, axis=0)
    perf_table.index = index_names
    perf_table.sort_values(perf_table.columns[0], inplace=True)
    print(perf_table)
    return perf_table
def powspace(start, stop, power, num):
    start = np.power(start, 1/float(power))
    stop = np.power(stop, 1/float(power))
    return np.power(np.linspace(start, stop, num=num), power)
def xgb_custom_lossfunction(alpha = 1):
    def support_under_mse(label, pred):
        # grad : 1차 미분
        # hess : 2차 미분
        residual = (label - pred).astype("float")
        grad = np.where(residual > 0, -2 * alpha * residual, -2 * residual)
        hess = np.where(residual > 0, 2 * alpha, 2.0)
        return grad, hess
    return support_under_mse
def pd_flatten(df):
    df = df.unstack()
    df.index = [str(i) + "_" + str(j) for i, j in df.index]
    return df
def tf_losses_rmse(y_true, y_pred, sample_weight=None):
    return tf.sqrt(tf.reduce_mean((y_true - y_pred) ** 2)) if sample_weight is None else tf.sqrt(tf.reduce_mean(((y_true - y_pred) ** 2) * sample_weight))
def tf_loss_nmae(y_true, y_pred, sample_weight=False):
    mae = tf.reduce_mean(tf.math.abs(y_true - y_pred))
    score = tf.math.divide(mae, tf.reduce_mean(tf.math.abs(y_true)))
    return score
def text_extractor(string, lang="eng", spacing=True):
    # # 괄호를 포함한 괄호 안 문자 제거 정규식
    # re.sub(r'\([^)]*\)', '', remove_text)
    # # <>를 포함한 <> 안 문자 제거 정규식
    # re.sub(r'\<[^)]*\>', '', remove_text)
    if lang == "eng":
        text_finder = re.compile('[^ A-Za-z]') if spacing else re.compile('[^A-Za-z]')
    elif lang == "kor":
        text_finder = re.compile('[^ ㄱ-ㅣ가-힣+]') if spacing else re.compile('[^ㄱ-ㅣ가-힣+]')
    # default : kor + eng
    else:
        text_finder = re.compile('[^ A-Za-zㄱ-ㅣ가-힣+]') if spacing else re.compile('[^A-Za-zㄱ-ㅣ가-힣+]')
    return text_finder.sub('', string)
def memory_usage(message='debug'):
    # current process RAM usage
    p = psutil.Process()
    rss = p.memory_info().rss / 2 ** 20 # Bytes to MB
    print(f"[{message}] memory usage: {rss: 10.3f} MB")
    return rss
def cos_sim(a, b):
    return dot(a, b)/(norm(a) * norm(b))
class MyLabelEncoder:
    def __init__(self, preset={}):
        # dic_cat format -> {"col_name": {"value": replace}}
        self.dic_cat = preset
    def fit_transform(self, data_x, col_names):
        tmp_x = copy.deepcopy(data_x)
        for i in col_names:
            # if key is not in dic, update dic
            if i not in self.dic_cat.keys():
                tmp_dic = dict.fromkeys(sorted(set(tmp_x[i]).difference([nan])))
                label_cnt = 0
                for j in tmp_dic.keys():
                    tmp_dic[j] = label_cnt
                    label_cnt += 1
                self.dic_cat[i] = tmp_dic
            # transform value which is not in dic to nan
            tmp_x[i] = tmp_x[i].astype("object")
            conv = tmp_x[i].replace(self.dic_cat[i])
            for conv_idx, j in enumerate(conv):
                if j not in self.dic_cat[i].values():
                    conv[conv_idx] = nan
            # final return
            tmp_x[i] = conv.astype("float")
        return tmp_x
    def transform(self, data_x):
        tmp_x = copy.deepcopy(data_x)
        for i in self.dic_cat.keys():
            # transform value which is not in dic to nan
            tmp_x[i] = tmp_x[i].astype("object")
            conv = tmp_x[i].replace(self.dic_cat[i])
            for conv_idx, j in enumerate(conv):
                if j not in self.dic_cat[i].values():
                    conv[conv_idx] = nan
            # final return
            tmp_x[i] = conv.astype("float")
        return tmp_x
    def clear(self):
        self.dic_cat = {}
class MyOneHotEncoder:
    def __init__(self, label_preset={}):
        self.dic_cat = {}
        self.label_preset = label_preset
    def fit_transform(self, data_x, col_names):
        tmp_x = dataframe()
        for i in data_x:
            if i not in col_names:
                tmp_x = pd.concat([tmp_x, dataframe(data_x[i])], axis=1)
            else:
                if not ((data_x[i].dtype.name == "object") or (data_x[i].dtype.name == "category")):
                    print(F"WARNING : {i} is not object or category")
                self.dic_cat[i] = OneHotEncoder(sparse=False, handle_unknown="ignore")
                conv = self.dic_cat[i].fit_transform(dataframe(data_x[i])).astype("int")
                col_list = []
                for j in self.dic_cat[i].categories_[0]:
                    if i in self.label_preset.keys():
                        for k, v in self.label_preset[i].items():
                            if v == j:
                                col_list.append(str(i) + "_" + str(k))
                    else:
                        col_list.append(str(i) + "_" + str(j))
                conv = dataframe(conv, columns=col_list)
                tmp_x = pd.concat([tmp_x, conv], axis=1)
        return tmp_x
    def transform(self, data_x):
        tmp_x = dataframe()
        for i in data_x:
            if not i in list(self.dic_cat.keys()):
                tmp_x = pd.concat([tmp_x, dataframe(data_x[i])], axis=1)
            else:
                if not ((data_x[i].dtype.name == "object") or (data_x[i].dtype.name == "category")):
                    print(F"WARNING : {i} is not object or category")
                conv = self.dic_cat[i].transform(dataframe(data_x[i])).astype("int")
                col_list = []
                for j in self.dic_cat[i].categories_[0]:
                    if i in self.label_preset.keys():
                        for k, v in self.label_preset[i].items():
                            if v == j: col_list.append(str(i) + "_" + str(k))
                    else:
                        col_list.append(str(i) + "_" + str(j))
                conv = dataframe(conv, columns=col_list)
                tmp_x = pd.concat([tmp_x, conv], axis=1)
        return tmp_x
    def clear(self):
        self.dic_cat = {}
        self.label_preset = {}
class MyKNNImputer:
    def __init__(self, k=5):
        self.imputer = KNNImputer(n_neighbors=k)
        self.dic_cat = {}
    def fit_transform(self, x, cat_vars=None):
        if cat_vars is None:
            x_imp = dataframe(self.imputer.fit_transform(x), columns=x.columns)
        else:
            naIdx = dict.fromkeys(cat_vars)
            for i in cat_vars:
                self.dic_cat[i] = diff(list(sorted(set(x[i]))), [nan])
                naIdx[i] = list(which(array(x[i].isna())))
            x_imp = dataframe(self.imputer.fit_transform(x), columns=x.columns)

            # if imputed categorical value are not in the range, adjust the value
            for i in cat_vars:
                x_imp[i] = x_imp[i].apply(lambda x: int(round(x, 0)))
                for j in naIdx[i]:
                    if x_imp[i][j] not in self.dic_cat[i]:
                        if x_imp[i][j] < self.dic_cat[i][0]:
                            x_imp[i][naIdx[i]] = self.dic_cat[i][0]
                        elif x_imp[i][j] > self.dic_cat[i][0]:
                            x_imp[i][naIdx[i]] = self.dic_cat[i][len(self.dic_cat[i]) - 1]
        return x_imp
    def transform(self, x):
        if len(self.dic_cat.keys()) == 0:
            x_imp = dataframe(self.imputer.transform(x), columns=x.columns)
        else:
            naIdx = dict.fromkeys(self.dic_cat.keys())
            for i in self.dic_cat.keys():
                naIdx[i] = list(which(array(x[i].isna())))
            x_imp = dataframe(self.imputer.transform(x), columns=x.columns)

            # if imputed categorical value are not in the range, adjust the value
            for i in self.dic_cat.keys():
                x_imp[i] = x_imp[i].apply(lambda x: int(round(x, 0)))
                for j in naIdx[i]:
                    if x_imp[i][j] not in self.dic_cat[i]:
                        if x_imp[i][j] < self.dic_cat[i][0]:
                            x_imp[i][naIdx[i]] = self.dic_cat[i][0]
                        elif x_imp[i][j] > self.dic_cat[i][0]:
                            x_imp[i][naIdx[i]] = self.dic_cat[i][len(self.dic_cat[i]) - 1]
        return x_imp
    def clear(self):
        self.imputer = None
        self.dic_cat = {}
def remove_outlier(df, std=3, mode="remove"):
    tmp_df = df.copy()
    if mode == "remove":
        outlier_mask = (np.abs(stats.zscore(tmp_df)) > std).all(axis=1)
        print("found outlier :", outlier_mask.sum())
        tmp_df = tmp_df[~outlier_mask]
    elif mode == "interpolate":
        tmp_outlier = []
        for i in tmp_df:
            outlier_mask = (np.abs(stats.zscore(tmp_df[i])) > std)
            tmp_outlier.append(outlier_mask.sum())
            if tmp_outlier[-1] == 0:
                continue
            tmp_df[i][outlier_mask] = np.nan
            tmp_df[i] = tmp_df[i].interpolate(method='linear').bfill()
        print("found outlier :", np.sum(outlier_mask))
    return tmp_df
def convert_sparse_matrix_to_sparse_tensor(X, sorted=True):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.sparse.reorder(tf.SparseTensor(indices, coo.data, coo.shape)) if sorted else tf.SparseTensor(indices, coo.data, coo.shape)
def tfds_to_df(x):
    metadata = x.element_spec
    if type(metadata) is tuple:
        tmp = dataframe(columns=range(len(metadata)), index=range(x.cardinality().numpy()))
        for idx, value in enumerate(x.as_numpy_iterator()):
            tmp.iloc[idx] = value
    elif type(metadata) is dict:
        tmp = dataframe(columns=list(metadata.keys()), index=range(x.cardinality().numpy()))
        for idx, value in enumerate(x.as_numpy_iterator()):
            tmp.iloc[idx] = value
    # if single tensor
    else:
        tmp = series(index=range(x.cardinality().numpy()))
        for idx, value in enumerate(x.as_numpy_iterator()):
            tmp.iloc[idx] = value
    return tmp

# Data loading

In [None]:
folder_path = "./dacon/LG_selfdriving_antenna_performance_prediction/"
feature_version = 10

allTarget = range(1,15,1)
allTarget = ["Y_" + str(i).zfill(2) for i in allTarget]

df_full = pd.read_csv(folder_path + "rawdata/train.csv")
df_test_x = pd.read_csv(folder_path + "rawdata/test.csv")
df_full_info = pd.read_csv(folder_path + "rawdata/meta/x_feature_info_v2.csv")

num_vars = df_full_info["Feature"][df_full_info["타입"] == "numeric"].to_list()
bin_vars = df_full_info["Feature"][df_full_info["타입"] == "binary"].to_list()
feature_info = {
    "num_vars": num_vars,
    "bin_vars": bin_vars
}

df_full_x = df_full.filter(regex="X")
df_full_y = df_full.filter(regex="Y")
del df_full

# Preprocessing

In [None]:
class GetSequenceInteraction():
    def __init__(self, idx_var=None, process_interaction=3, feature_scale_vars=[], drop_vars=[]):
        self.idx_var = None
        self.process_interaction = process_interaction
        self.feature_scale_vars = feature_scale_vars
        self.drop_vars = drop_vars
        self.scaler = None

    def fit(self, x):
        x = x.reset_index(drop=True)
        # 모든 값이 같은, 즉 분산이 0인 feature 제거 -> 검사통과여부 관련
        if len(self.drop_vars) > 0:
            x = self.drop_var_zero(x)
        # N개 까지의 feature interaction feature 추가
        self.scaler = ColumnTransformer(
            [("numerical_std_scaler", StandardScaler(), self.feature_scale_vars)]
        )
        self.scaler.fit(x)
        x = self.feature_sequence_interaction(x)
        return self

    def transform(self, x):
        x = x.reset_index(drop=True)
        if len(self.drop_vars) > 0:
            x = self.drop_var_zero(x)
        x = self.feature_sequence_interaction(x)
        return x

    def drop_var_zero(self, x):
        x_copy = copy.deepcopy(x)
        x_copy = x_copy.drop(list(set(list(x_copy.columns[x_copy.var() == 0]) + self.drop_vars)), axis=1)
        return x_copy

    def feature_sequence_interaction(self, x):
        x_copy = dataframe(index=x.index.values)
        cols = list(x.columns)
        for i in range(x.shape[1]):
            if i >= self.process_interaction - 1:
                tmp1 = dataframe(self.scaler.transform(x), columns=cols).iloc[:, (i + 1 - self.process_interaction):(i + 1)]
                # tmp1 = dataframe(array(x), columns=cols).iloc[:, (i+1-self.process_interaction):(i+1)]
                tmp2 = tmp1.prod(axis=1)
                tmp2.name = "_".join(list(tmp1.columns))
                x_copy = pd.concat([x_copy, tmp2.to_frame()], axis=1)
        return x_copy


class feature_engineering_v1():
    def __init__(self):
        pass

    def fit(self, x):
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        return x_copy


class feature_engineering_v2():
    def __init__(self):
        self.feature_scale_vars = None
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"] + ["X_02"]
        # self.scaler = None
        self.fe = []

    def fit(self, x):
        x_copy = copy.deepcopy(x)
        # N개 까지의 feature interaction feature 추가
        self.feature_scale_vars = diff(x_copy.columns, self.drop_vars)
        self.fe.append(
            GetSequenceInteraction(process_interaction=2, feature_scale_vars=self.feature_scale_vars, drop_vars=[i for i in x_copy.columns if i in self.drop_vars]).fit(x_copy))
        self.fe.append(
            GetSequenceInteraction(process_interaction=3, feature_scale_vars=self.feature_scale_vars, drop_vars=[i for i in x_copy.columns if i in self.drop_vars]).fit(x_copy))
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        x_copy = pd.concat(
            [x_copy.drop([i for i in x_copy.columns if i in self.drop_vars], axis=1), self.fe[0].transform(x_copy), self.fe[1].transform(x_copy)],
            axis=1
        )
        return x_copy


class feature_engineering_v3():
    def __init__(self):
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"] + ["X_02"]

    def fit(self, x):
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        return x_copy.drop(self.drop_vars, axis=1)


class feature_engineering_v4():
    def __init__(self):
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"] + ["X_02"]
        self.scaler = StandardScaler()
        self.pf = PolynomialFeatures(interaction_only=False, include_bias=False)

    def fit(self, x):
        x_copy = copy.deepcopy(x)
        x_copy = x_copy.drop(self.drop_vars, axis=1)
        x_copy = self.scaler.fit_transform(x_copy)
        self.pf.fit(x_copy)
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        x_copy = x_copy.drop(self.drop_vars, axis=1)
        x_copy = self.scaler.transform(x_copy)
        x_copy = self.pf.transform(x_copy)
        return dataframe(x_copy)


class feature_engineering_v5():
    def __init__(self):
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"]
        self.consider_as_process = [
            ["X_01", "X_02", "X_05", "X_06"],
            ["X_14", "X_15", "X_16", "X_17", "X_18"],
            ["X_19", "X_20", "X_21", "X_22"],
            ["X_24", "X_25", "X_26", "X_27", "X_28", "X_29"],
            ["X_30", "X_31", "X_32", "X_33"],
            ["X_34", "X_35", "X_36", "X_37"],
            ["X_38", "X_39", "X_40"],
            ["X_41", "X_42", "X_43", "X_44"],
            ["X_50", "X_51", "X_52", "X_53", "X_54", "X_55", "X_56"],
        ]

    def fit(self, x):
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        x_copy = x_copy.drop(self.drop_vars, axis=1)

        for idx, value in enumerate(self.consider_as_process):
            x_copy["process_" + str(idx) + "_mean"] = x_copy[value].mean(axis=1)
            x_copy["process_" + str(idx) + "_std"] = x_copy[value].std(axis=1)

        return x_copy


class feature_engineering_v6():
    def __init__(self):
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"] + ["X_02"]

    def fit(self, x):
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        return x_copy.drop(self.drop_vars, axis=1)


class feature_engineering_v7():
    def __init__(self):
        self.drop_vars = ["X_04", "X_23", "X_47", "X_48"] + ["X_10", "X_11"] + ["X_02"]
        self.consider_as_process = [
            ["X_01", "X_05", "X_06"],
            ["X_14", "X_15", "X_16", "X_17", "X_18"],
            ["X_19", "X_20", "X_21", "X_22"],
            ["X_24", "X_25", "X_26", "X_27", "X_28", "X_29"],
            ["X_30", "X_31", "X_32", "X_33"],
            ["X_34", "X_35", "X_36", "X_37"],
            ["X_38", "X_39", "X_40"],
            ["X_41", "X_42", "X_43", "X_44"],
            ["X_50", "X_51", "X_52", "X_53", "X_54", "X_55", "X_56"],
        ]

    def fit(self, x):
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)

        for idx, value in enumerate(self.consider_as_process):
            x_copy["process_" + str(idx) + "_mean"] = x_copy[value].mean(axis=1)
            x_copy["process_" + str(idx) + "_std"] = x_copy[value].std(axis=1)

        x_copy = x_copy.drop(self.drop_vars, axis=1)
        return x_copy


class feature_engineering_v8():
    def __init__(self):
        self.fe4 = feature_engineering_v4()
        self.fe7 = feature_engineering_v7()

    def fit(self, x):
        x_copy = copy.deepcopy(x)
        self.fe4.fit(x_copy)
        self.fe7.fit(x_copy)
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)

        fe4_x = self.fe4.transform(x_copy)
        fe7_x = self.fe7.transform(x_copy)

        x_copy = pd.concat([fe4_x, fe7_x], axis=1, ignore_index=True)
        return x_copy


class feature_engineering_v9():
    def __init__(self):
        self.fe4 = feature_engineering_v4()
        self.fe7 = feature_engineering_v7()
        self.fe2 = feature_engineering_v2()

    def fit(self, x):
        x_copy = copy.deepcopy(x)

        self.fe4.fit(x_copy)
        self.fe7.fit(x_copy)
        x_copy = self.fe7.transform(x_copy)
        self.fe2.fit(x_copy.filter(regex="mean"))
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)

        fe4_x = self.fe4.transform(x_copy)
        fe7_x = self.fe7.transform(x_copy)
        fe2_x = self.fe2.transform(fe7_x.filter(regex="mean"))

        x_copy = pd.concat([fe7_x, fe4_x.iloc[:, 49:], fe2_x.iloc[:, 9:]], axis=1, ignore_index=True)
        return x_copy


class feature_engineering_v10():
    def __init__(self):
        self.fe7 = feature_engineering_v7()

    def fit(self, x):
        x_copy = copy.deepcopy(x)

        self.fe7.fit(x_copy)
        return self

    def transform(self, x):
        x_copy = copy.deepcopy(x)
        fe7_x = self.fe7.transform(x_copy)
        x_copy = fe7_x
        return x_copy

fe = feature_engineering_v3()
fe.fit(df_full_x)
df_full_x = fe.transform(df_full_x)
df_test_x = fe.transform(df_test_x.drop("ID", axis=1))

np_rnd.seed(42)
shuffled_idx = np_rnd.permutation(len(df_full_x))
df_full_x = df_full_x.iloc[shuffled_idx, :].reset_index(drop=True)
df_full_y = df_full_y.iloc[shuffled_idx, :].reset_index(drop=True)

# Training & Inference - H2O Automl

In [None]:
h2o.init(nthreads=-1, max_mem_size=8)

seed_everything()
for cnt, value in enumerate(allTarget):
    tmp_df = h2o.H2OFrame(pd.concat([df_full_x.astype("float32"), df_full_y[[value]].astype("float32")], axis=1))
    feature_names = list(df_full_x.columns)
    target_name = value

    aml = h2o.automl.H2OAutoML(max_runtime_secs=int(3600 * 24 / len(allTarget)), seed=42, project_name="LG_selfDriving_automl_" + value + "_try1",
                               nfolds=5, stopping_metric="MAE", stopping_rounds=100)
    aml.train(training_frame=tmp_df, x=feature_names, y=target_name)
    print(aml.leaderboard.head())
    inference_root_path = folder_path + "inference/" + "featureV" + str(feature_version) + "+" + "H2OAutoML" + "/"
    createFolder(inference_root_path)
    if os.path.exists(inference_root_path + "submission_" + "featureV" + str(feature_version) + "+" + "H2OAutoML" + ".csv"):
        submission = pd.read_csv(inference_root_path + "submission_" + "featureV" + str(feature_version) + "+" + "H2OAutoML" + ".csv")
    else:
        submission = pd.read_csv(folder_path + 'rawdata/sample_submission.csv')
    submission[value] = aml.predict(h2o.H2OFrame(df_test_x.astype("float32"))).as_data_frame().to_numpy().flatten()
    submission.to_csv(inference_root_path + "submission_" + "featureV" + str(feature_version) + "+" + "H2OAutoML" + ".csv", index=False)

h2o.cluster().shutdown()