In [1]:
import joblib

import pyarrow.parquet as pq

import pandas as pd
import numpy as np
import polars as pl

import lightgbm as lgb
import xgboost as xgb
import catboost as cbt

from sklearn.preprocessing import StandardScaler

import gc

from utils import reduce_memory, config

In [2]:
CONFIG = config.CONFIG

In [3]:
# Custom R2 metric for XGBoost
def r2_xgb(y_true, y_pred, sample_weight):
    r2 = 1 - np.average((y_pred - y_true) ** 2, weights=sample_weight) / (
        np.average((y_true) ** 2, weights=sample_weight) + 1e-38
    )
    return -r2


# Custom R2 metric for LightGBM
def r2_lgb(y_true, y_pred, sample_weight):
    r2 = 1 - np.average((y_pred - y_true) ** 2, weights=sample_weight) / (
        np.average((y_true) ** 2, weights=sample_weight) + 1e-38
    )
    return "r2", r2, True


# Custom R2 metric for CatBoost
class r2_cbt(object):
    def get_final_error(self, error, weight):
        return 1 - error / (weight + 1e-38)

    def is_max_optimal(self):
        return True

    def evaluate(self, approxes, target, weight):
        assert len(approxes) == 1
        assert len(target) == len(approxes[0])

        approx = approxes[0]

        error_sum = 0.0
        weight_sum = 0.0

        for i in range(len(approx)):
            w = 1.0 if weight is None else weight[i]
            weight_sum += w * (target[i] ** 2)
            error_sum += w * ((approx[i] - target[i]) ** 2)

        return error_sum, weight_sum


LGB_Params = {
    "n_estimators": 500,
    "device": "gpu",
    "gpu_use_dp": True,
    "objective": "l2",
    "n_jobs": 2,
}

XGB_Params = {
    "learning_rate": 0.05,
    "max_depth": 6,
    "n_estimators": 200,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "reg_alpha": 1,
    "reg_lambda": 5,
    "eval_metric": r2_xgb,
    "disable_default_eval_metric": True,
    "device": "cuda",
    "early_stopping_rounds": 100,
}

CBT_Params = {
    "iterations": 1000,
    "learning_rate": 0.05,
    "task_type": "GPU",
    "loss_function": "RMSE",
    "eval_metric": r2_cbt(),
}

In [4]:
X_valid = pd.read_parquet(f"{CONFIG.main}/data/training_data_impt/X_valid.parquet")
y_valid = (
    pd.read_parquet(f"{CONFIG.main}/data/training_data_impt/y_valid.parquet")
    .to_numpy()
    .flatten()
)
w_valid = (
    pd.read_parquet(f"{CONFIG.main}/data/training_data_impt/w_valid.parquet")
    .to_numpy()
    .flatten()
)

In [5]:
model_dict = {
    # "XGB": xgb.XGBRegressor(**XGB_Params),
    # "LGB": lgb.LGBMRegressor(**LGB_Params),
    "CBT": cbt.CatBoostRegressor(**CBT_Params),
}

In [6]:
def training_models(model_dict, fold):
    for name, model in model_dict.items():
        if name != "CBT":
            X_train = reduce_memory.reduce_mem_usage(
                pd.read_parquet(
                    f"{CONFIG.main}/data/training_data_impt/X_train_{fold}.parquet"
                )
            ).fillna(0)
            y_train = (
                reduce_memory.reduce_mem_usage(
                    pd.read_parquet(
                        f"{CONFIG.main}/data/training_data_impt/y_train_{fold}.parquet"
                    )
                )
                .to_numpy()
                .flatten()
            )
            w_train = (
                reduce_memory.reduce_mem_usage(
                    pd.read_parquet(
                        f"{CONFIG.main}/data/training_data_impt/w_train_{fold}.parquet"
                    )
                )
                .to_numpy()
                .flatten()
            )

        if name == "XGB":
            model.fit(
                X_train,
                y_train,
                sample_weight=w_train,
                eval_set=[(X_valid, y_valid)],
                sample_weight_eval_set=[w_valid],
                verbose=10,
            )

        if name == "LGB":
            model.fit(
                X_train,
                y_train,
                w_train,
                eval_metric=[r2_lgb],
                eval_set=[(X_valid, y_valid, w_valid)],
                callbacks=[lgb.early_stopping(100), lgb.log_evaluation(10)],
            )

        if name == "CBT":
            CBT_list = []

            train_features_file_path = (
                f"{CONFIG.main}/data/training_data_impt/X_train_{fold}.parquet"
            )
            train_labels_file_path = (
                f"{CONFIG.main}/data/training_data_impt/y_train_{fold}.parquet"
            )
            train_weights_file_path = (
                f"{CONFIG.main}/data/training_data_impt/w_train_{fold}.parquet"
            )
            train_features_file = pq.ParquetFile(train_features_file_path)
            train_labels_file = pq.ParquetFile(train_labels_file_path)
            train_weights_file = pq.ParquetFile(train_weights_file_path)

            train_row_group = train_features_file.num_row_groups

            step = int(train_row_group / 2)
            for i in range(0, train_row_group, step):
                print(i)
                batch_end = min(i + step, train_row_group - 1)
                X_train = reduce_memory.reduce_mem_usage(
                    train_features_file.read_row_groups(
                        [i for i in range(i, batch_end)]
                    )
                    .to_pandas()
                    .fillna(0)
                ).values
                y_train = reduce_memory.reduce_mem_usage(
                    train_labels_file.read_row_groups([i for i in range(i, batch_end)])
                    .to_pandas()
                    .fillna(0)
                ).values.squeeze()
                w_train = reduce_memory.reduce_mem_usage(
                    train_weights_file.read_row_groups([i for i in range(i, batch_end)])
                    .to_pandas()
                    .fillna(0)
                ).values.squeeze()

                evalset = cbt.Pool(X_valid, y_valid, weight=w_valid)

                CBT_list.append(
                    model.fit(
                        X_train,
                        y_train,
                        sample_weight=w_train,
                        eval_set=[evalset],
                        verbose=10,
                        early_stopping_rounds=100,
                    )
                )
            model = cbt.sum_models(
                CBT_list, weights=[1.0 / len(CBT_list)] * len(CBT_list)
            )

        model_filename = f"{CONFIG.main}/Models_impt/{name}/{name}_{fold+1}.pkl"
        joblib.dump(model, model_filename)

        gc.collect()

    del (
        X_train,
        y_train,
        w_train,
    )

In [7]:
for fold in range(CONFIG.N_fold):
    training_models(model_dict=model_dict, fold=fold)

0
Memory usage of dataframe is 6120.00 MB
Memory usage after optimization is: 5712.00 MB
Decreased by 6.67%
Memory usage of dataframe is 34.00 MB
Memory usage after optimization is: 34.00 MB
Decreased by 0.00%
Memory usage of dataframe is 34.00 MB
Memory usage after optimization is: 34.00 MB
Decreased by 0.00%


Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005675	test: 0.0002167	best: 0.0002167 (0)	total: 1.12s	remaining: 18m 35s
10:	learn: 0.0043166	test: 0.0012520	best: 0.0012520 (10)	total: 3.71s	remaining: 5m 34s
20:	learn: 0.0064801	test: 0.0016179	best: 0.0016179 (20)	total: 6.24s	remaining: 4m 50s
30:	learn: 0.0079577	test: 0.0018387	best: 0.0018387 (30)	total: 8.77s	remaining: 4m 34s
40:	learn: 0.0092179	test: 0.0014201	best: 0.0019324 (33)	total: 11.1s	remaining: 4m 18s
50:	learn: 0.0102420	test: 0.0015305	best: 0.0019324 (33)	total: 13.5s	remaining: 4m 10s
60:	learn: 0.0110805	test: 0.0016154	best: 0.0019324 (33)	total: 15.8s	remaining: 4m 2s
70:	learn: 0.0119759	test: 0.0004450	best: 0.0019324 (33)	total: 18s	remaining: 3m 56s
80:	learn: 0.0126686	test: 0.0003550	best: 0.0019324 (33)	total: 20.4s	remaining: 3m 51s
90:	learn: 0.0133861	test: -0.0009274	best: 0.0019324 (33)	total: 22.6s	remaining: 3m 45s
100:	learn: 0.0140355	test: -0.0017595	best: 0.0019324 (33)	total: 24.9s	remaining: 3m 42s
110:	learn: 0.0146655	

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005734	test: 0.0002332	best: 0.0002332 (0)	total: 1.11s	remaining: 18m 31s
10:	learn: 0.0044228	test: 0.0019829	best: 0.0019829 (10)	total: 3.63s	remaining: 5m 26s
20:	learn: 0.0065953	test: 0.0028946	best: 0.0028946 (20)	total: 5.94s	remaining: 4m 36s
30:	learn: 0.0082070	test: 0.0035715	best: 0.0035715 (30)	total: 8.18s	remaining: 4m 15s
40:	learn: 0.0094633	test: 0.0039916	best: 0.0039916 (40)	total: 10.4s	remaining: 4m 2s
50:	learn: 0.0105988	test: 0.0043229	best: 0.0043229 (50)	total: 12.6s	remaining: 3m 53s
60:	learn: 0.0116588	test: 0.0045770	best: 0.0045770 (60)	total: 14.8s	remaining: 3m 48s
70:	learn: 0.0128133	test: 0.0047482	best: 0.0047537 (69)	total: 16.9s	remaining: 3m 41s
80:	learn: 0.0136341	test: 0.0049458	best: 0.0049458 (80)	total: 19.2s	remaining: 3m 37s
90:	learn: 0.0146938	test: 0.0050136	best: 0.0050515 (88)	total: 21.3s	remaining: 3m 32s
100:	learn: 0.0157150	test: 0.0051802	best: 0.0051802 (100)	total: 23.4s	remaining: 3m 28s
110:	learn: 0.0164827

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005658	test: 0.0002270	best: 0.0002270 (0)	total: 523ms	remaining: 8m 42s
10:	learn: 0.0043262	test: 0.0012795	best: 0.0012795 (10)	total: 2.98s	remaining: 4m 28s
20:	learn: 0.0065608	test: 0.0016281	best: 0.0016281 (20)	total: 5.41s	remaining: 4m 11s
30:	learn: 0.0080760	test: 0.0017777	best: 0.0017777 (30)	total: 7.62s	remaining: 3m 58s
40:	learn: 0.0093597	test: 0.0015372	best: 0.0018119 (33)	total: 9.88s	remaining: 3m 51s
50:	learn: 0.0104133	test: 0.0011380	best: 0.0018119 (33)	total: 12.1s	remaining: 3m 44s
60:	learn: 0.0113779	test: 0.0006137	best: 0.0018119 (33)	total: 14.4s	remaining: 3m 40s
70:	learn: 0.0121777	test: 0.0001766	best: 0.0018119 (33)	total: 16.5s	remaining: 3m 35s
80:	learn: 0.0129749	test: 0.0000460	best: 0.0018119 (33)	total: 18.6s	remaining: 3m 31s
90:	learn: 0.0137041	test: -0.0012376	best: 0.0018119 (33)	total: 20.8s	remaining: 3m 28s
100:	learn: 0.0143416	test: -0.0011568	best: 0.0018119 (33)	total: 23.1s	remaining: 3m 25s
110:	learn: 0.015002

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005381	test: 0.0002955	best: 0.0002955 (0)	total: 783ms	remaining: 13m 2s
10:	learn: 0.0042977	test: 0.0020798	best: 0.0020798 (10)	total: 3.23s	remaining: 4m 50s
20:	learn: 0.0065274	test: 0.0029681	best: 0.0029681 (20)	total: 5.72s	remaining: 4m 26s
30:	learn: 0.0081146	test: 0.0035642	best: 0.0035642 (30)	total: 8s	remaining: 4m 9s
40:	learn: 0.0094872	test: 0.0040172	best: 0.0040172 (40)	total: 10.3s	remaining: 4m
50:	learn: 0.0106628	test: 0.0043105	best: 0.0043105 (50)	total: 12.4s	remaining: 3m 51s
60:	learn: 0.0117549	test: 0.0045522	best: 0.0045522 (60)	total: 14.5s	remaining: 3m 43s
70:	learn: 0.0126168	test: 0.0047436	best: 0.0047436 (70)	total: 16.5s	remaining: 3m 36s
80:	learn: 0.0134786	test: 0.0049428	best: 0.0049428 (80)	total: 18.7s	remaining: 3m 31s
90:	learn: 0.0141439	test: 0.0051275	best: 0.0051275 (90)	total: 20.9s	remaining: 3m 28s
100:	learn: 0.0150053	test: 0.0052672	best: 0.0052696 (99)	total: 23s	remaining: 3m 25s
110:	learn: 0.0156610	test: 0.00

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005800	test: 0.0002000	best: 0.0002000 (0)	total: 476ms	remaining: 7m 55s
10:	learn: 0.0044908	test: 0.0011535	best: 0.0011535 (10)	total: 2.95s	remaining: 4m 25s
20:	learn: 0.0067777	test: 0.0014731	best: 0.0014731 (20)	total: 5.29s	remaining: 4m 6s
30:	learn: 0.0083348	test: 0.0016171	best: 0.0016171 (30)	total: 8.78s	remaining: 4m 34s
40:	learn: 0.0096220	test: 0.0013511	best: 0.0017210 (36)	total: 12s	remaining: 4m 39s
50:	learn: 0.0106816	test: 0.0013482	best: 0.0017210 (36)	total: 15.3s	remaining: 4m 45s
60:	learn: 0.0116043	test: 0.0001114	best: 0.0017210 (36)	total: 18.7s	remaining: 4m 47s
70:	learn: 0.0124492	test: -0.0001424	best: 0.0017210 (36)	total: 21.6s	remaining: 4m 42s
80:	learn: 0.0132140	test: -0.0012332	best: 0.0017210 (36)	total: 25s	remaining: 4m 43s
90:	learn: 0.0138998	test: -0.0016243	best: 0.0017210 (36)	total: 27.3s	remaining: 4m 33s
100:	learn: 0.0146007	test: -0.0017683	best: 0.0017210 (36)	total: 29.7s	remaining: 4m 24s
110:	learn: 0.0151939	t

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005311	test: 0.0002799	best: 0.0002799 (0)	total: 854ms	remaining: 14m 13s
10:	learn: 0.0039339	test: 0.0020173	best: 0.0020173 (10)	total: 3.22s	remaining: 4m 49s
20:	learn: 0.0059488	test: 0.0029824	best: 0.0029824 (20)	total: 5.55s	remaining: 4m 18s
30:	learn: 0.0074430	test: 0.0036267	best: 0.0036267 (30)	total: 7.71s	remaining: 4m
40:	learn: 0.0085906	test: 0.0040610	best: 0.0040610 (40)	total: 9.87s	remaining: 3m 50s
50:	learn: 0.0095967	test: 0.0044804	best: 0.0044804 (50)	total: 12s	remaining: 3m 43s
60:	learn: 0.0107567	test: 0.0047603	best: 0.0047603 (60)	total: 14.1s	remaining: 3m 36s
70:	learn: 0.0116327	test: 0.0050369	best: 0.0050369 (70)	total: 16.3s	remaining: 3m 32s
80:	learn: 0.0124708	test: 0.0052662	best: 0.0052662 (80)	total: 18.4s	remaining: 3m 29s
90:	learn: 0.0130791	test: 0.0054563	best: 0.0054563 (90)	total: 20.6s	remaining: 3m 26s
100:	learn: 0.0138302	test: 0.0056006	best: 0.0056006 (100)	total: 22.8s	remaining: 3m 23s
110:	learn: 0.0149000	test

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005747	test: 0.0002066	best: 0.0002066 (0)	total: 503ms	remaining: 8m 22s
10:	learn: 0.0043789	test: 0.0012454	best: 0.0012454 (10)	total: 3.16s	remaining: 4m 44s
20:	learn: 0.0065314	test: 0.0016326	best: 0.0016326 (20)	total: 5.67s	remaining: 4m 24s
30:	learn: 0.0080062	test: 0.0018037	best: 0.0018037 (30)	total: 8.16s	remaining: 4m 15s
40:	learn: 0.0092692	test: 0.0019675	best: 0.0019675 (40)	total: 10.7s	remaining: 4m 10s
50:	learn: 0.0102326	test: 0.0021372	best: 0.0021372 (50)	total: 13.1s	remaining: 4m 4s
60:	learn: 0.0110424	test: 0.0023237	best: 0.0023237 (60)	total: 15.5s	remaining: 3m 57s
70:	learn: 0.0117791	test: 0.0024227	best: 0.0024227 (70)	total: 18s	remaining: 3m 55s
80:	learn: 0.0124117	test: 0.0025920	best: 0.0025952 (78)	total: 20.3s	remaining: 3m 49s
90:	learn: 0.0130341	test: 0.0027462	best: 0.0027558 (89)	total: 22.6s	remaining: 3m 45s
100:	learn: 0.0135742	test: 0.0028262	best: 0.0028262 (100)	total: 24.9s	remaining: 3m 41s
110:	learn: 0.0141695	te

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005282	test: 0.0002848	best: 0.0002848 (0)	total: 1.11s	remaining: 18m 32s
10:	learn: 0.0040532	test: 0.0020671	best: 0.0020671 (10)	total: 3.58s	remaining: 5m 22s
20:	learn: 0.0061027	test: 0.0030389	best: 0.0030389 (20)	total: 5.94s	remaining: 4m 36s
30:	learn: 0.0077488	test: 0.0036875	best: 0.0036875 (30)	total: 8.21s	remaining: 4m 16s
40:	learn: 0.0090359	test: 0.0041737	best: 0.0041737 (40)	total: 10.6s	remaining: 4m 7s
50:	learn: 0.0100730	test: 0.0045268	best: 0.0045268 (50)	total: 12.9s	remaining: 3m 59s
60:	learn: 0.0109852	test: 0.0048073	best: 0.0048073 (60)	total: 14.9s	remaining: 3m 49s
70:	learn: 0.0117745	test: 0.0050225	best: 0.0050225 (70)	total: 17.1s	remaining: 3m 43s
80:	learn: 0.0125413	test: 0.0052622	best: 0.0052622 (80)	total: 19.4s	remaining: 3m 40s
90:	learn: 0.0137159	test: 0.0053971	best: 0.0053971 (90)	total: 21.6s	remaining: 3m 35s
100:	learn: 0.0145595	test: 0.0055022	best: 0.0055036 (99)	total: 23.7s	remaining: 3m 31s
110:	learn: 0.0152424	

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005712	test: 0.0002083	best: 0.0002083 (0)	total: 513ms	remaining: 8m 32s
10:	learn: 0.0043785	test: 0.0012018	best: 0.0012018 (10)	total: 3.12s	remaining: 4m 40s
20:	learn: 0.0066471	test: 0.0016713	best: 0.0016713 (20)	total: 5.65s	remaining: 4m 23s
30:	learn: 0.0081633	test: 0.0018889	best: 0.0018889 (30)	total: 8.23s	remaining: 4m 17s
40:	learn: 0.0093580	test: 0.0020898	best: 0.0020898 (40)	total: 10.8s	remaining: 4m 11s
50:	learn: 0.0103805	test: 0.0014939	best: 0.0020981 (42)	total: 13s	remaining: 4m 1s
60:	learn: 0.0113328	test: 0.0005869	best: 0.0020981 (42)	total: 15.3s	remaining: 3m 55s
70:	learn: 0.0121310	test: -0.0001863	best: 0.0020981 (42)	total: 17.6s	remaining: 3m 49s
80:	learn: 0.0128383	test: -0.0019501	best: 0.0020981 (42)	total: 19.9s	remaining: 3m 45s
90:	learn: 0.0135211	test: -0.0030214	best: 0.0020981 (42)	total: 22.1s	remaining: 3m 41s
100:	learn: 0.0141135	test: -0.0031720	best: 0.0020981 (42)	total: 24.4s	remaining: 3m 37s
110:	learn: 0.0147308

Default metric period is 5 because PythonUserDefinedPerObject is/are not implemented for GPU


0:	learn: 0.0005754	test: 0.0003097	best: 0.0003097 (0)	total: 948ms	remaining: 15m 47s
10:	learn: 0.0044891	test: 0.0020442	best: 0.0020442 (10)	total: 3.33s	remaining: 4m 59s
20:	learn: 0.0068155	test: 0.0030343	best: 0.0030343 (20)	total: 5.61s	remaining: 4m 21s
30:	learn: 0.0085049	test: 0.0036983	best: 0.0036983 (30)	total: 7.88s	remaining: 4m 6s
40:	learn: 0.0098170	test: 0.0041344	best: 0.0041344 (40)	total: 10s	remaining: 3m 54s
50:	learn: 0.0108809	test: 0.0044869	best: 0.0044869 (50)	total: 12.2s	remaining: 3m 47s
60:	learn: 0.0118233	test: 0.0047949	best: 0.0047949 (60)	total: 14.6s	remaining: 3m 44s
70:	learn: 0.0127851	test: 0.0049882	best: 0.0049882 (70)	total: 16.8s	remaining: 3m 39s
80:	learn: 0.0135521	test: 0.0052164	best: 0.0052164 (80)	total: 19.1s	remaining: 3m 36s
90:	learn: 0.0142711	test: 0.0053482	best: 0.0053482 (90)	total: 21.3s	remaining: 3m 32s
100:	learn: 0.0149024	test: 0.0055259	best: 0.0055259 (100)	total: 23.4s	remaining: 3m 28s
110:	learn: 0.0156105	t