# Module

In [4]:
# !pip install -qq pytorch_tabnet
# !pip install -q lifelines

In [5]:
# !git clone https://github.com/muhammadabdullah0303/AbdML /teamspace/studios/this_studio/AbdML/

In [2]:
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
%%time

import random
import pandas as pd
import numpy as np
import polars as pl
from tqdm import tqdm
import category_encoders as ce
from IPython.display import clear_output

from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer  
from sklearn.base import clone
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, PowerTransformer, OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import *
from sklearn.metrics import *
from sklearn.ensemble import HistGradientBoostingRegressor

import optuna
import lightgbm as lgb
from lightgbm import early_stopping  
import xgboost as xgb
from catboost import CatBoostRegressor, CatBoostClassifier, Pool

import warnings

warnings.filterwarnings('ignore')

CPU times: user 830 ms, sys: 750 ms, total: 1.58 s
Wall time: 1.23 s


In [4]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

In [5]:
np.random.seed(2024)
random.seed(2024)

# Func

In [6]:
%%time

def load_data():    
    train = pd.read_csv('data/train.csv')
    test = pd.read_csv('data/test.csv')    
    all_df = pd.concat([train, test], sort=False).reset_index(drop=True)
    return train, test, all_df

def skewed(df, all_df):
    pt = PowerTransformer(method='yeo-johnson')
    pt.fit(df[['Annual Income']])
    all_df['transformed_Annual_Income'] = pt.transform(all_df[['Annual Income']])

    return all_df
    
def date(df):
    df['Policy Start Date'] = pd.to_datetime(df['Policy Start Date'])
    df['Year'] = df['Policy Start Date'].dt.year
    df['Day'] = df['Policy Start Date'].dt.day
    df['Month'] = df['Policy Start Date'].dt.month
    df['Month_name'] = df['Policy Start Date'].dt.month_name()
    df['Day_of_week'] = df['Policy Start Date'].dt.day_name()
    df['Week'] = df['Policy Start Date'].dt.isocalendar().week
    df['Year_sin'] = np.sin(2 * np.pi * df['Year'])
    df['Year_cos'] = np.cos(2 * np.pi * df['Year'])
    min_year = df['Year'].min()
    max_year = df['Year'].max()
    df['Year_sin'] = np.sin(2 * np.pi * (df['Year'] - min_year) / (max_year - min_year))
    df['Year_cos'] = np.cos(2 * np.pi * (df['Year'] - min_year) / (max_year - min_year))
    df['Month_sin'] = np.sin(2 * np.pi * df['Month'] / 12) 
    df['Month_cos'] = np.cos(2 * np.pi * df['Month'] / 12)
    df['Day_sin'] = np.sin(2 * np.pi * df['Day'] / 31)  
    df['Day_cos'] = np.cos(2 * np.pi * df['Day'] / 31)
    df['Group']=(df['Year']-2020)*48+df['Month']*4+df['Day']//7    
    df.drop('Policy Start Date', axis=1, inplace=True)
    return df

def fe(df):
    df['contract length'] = pd.cut(
        df["Insurance Duration"].fillna(99),  
        bins=[-float('inf'), 1, 3, float('inf')],  
        labels=[0, 1, 2]  
    ).astype(int)

    return df

def get_nan_cols(df):
    nan_cols = ['Marital Status', 'Customer Feedback', 'Health Score', 'Previous Claims', 'Vehicle Age', 'Credit Score', 'Insurance Duration']

    for col in nan_cols:
        col_name = col + '_NA'
        df[col_name] = df[col].isnull().astype(int)
    return df

def get_encoding(df):
    def encode_ordinal(df):
        educ = {"High School":0, "Bachelor's":1, "Master's":2, "PhD":3}
        policy = {'Basic':0, 'Comprehensive':1, 'Premium':2}
        exerc = {'Rarely':0, 'Daily':1, 'Weekly':2, 'Monthly': 3}
        feedback = {'Poor':0, 'Average':1, 'Good':2, "Unknown": 0}

        df['Education Level'] = df['Education Level'].map(educ)
        df['Policy Type'] = df['Policy Type'].map(policy)
        df['Exercise Frequency'] = df['Exercise Frequency'].map(exerc)
        df['Customer Feedback'] = df['Customer Feedback'].map(feedback)
        return df

    def encode_binary(df):
        df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})
        df['Smoking Status'] = df['Smoking Status'].map({'Yes':1, 'No':0})
        return df

    def one_hot_dummies(df, categorical):
        oh = pd.get_dummies(df[categorical])
        df = df.drop(categorical, axis=1)
        return pd.concat([df, oh], axis=1)
        return df

    df = encode_binary(df)
    df = encode_ordinal(df)
    
    categorical_features = df.select_dtypes(include='object').columns
    df = one_hot_dummies(df, categorical_features)
    return df

def add_new_features(df):
    df['Income to Dependents Ratio'] = df['Annual Income'] / (df['Number of Dependents'].fillna(0) + 1)
    df['Income_per_Dependent'] = df['Annual Income'] / (df['Number of Dependents'] + 1)
    df['CreditScore_InsuranceDuration'] = df['Credit Score'] * df['Insurance Duration']
    df['Health_Risk_Score'] = df['Smoking Status'].apply(lambda x: 1 if x == 'Smoker' else 0) + \
                                df['Exercise Frequency'].apply(lambda x: 1 if x == 'Low' else (0.5 if x == 'Medium' else 0)) + \
                                (100 - df['Health Score']) / 20
    df['Credit_Health_Score'] = df['Credit Score'] * df['Health Score']
    df['Health_Age_Interaction'] = df['Health Score'] * df['Age']
    df['contract length'] = pd.cut(
        df["Insurance Duration"].fillna(99),  
        bins=[-float('inf'), 1, 3, float('inf')],  
        labels=[0, 1, 2]  
    ).astype(int)

    return df

def prep():
    train, test, all_df = load_data()

    all_df = skewed(train, all_df)
    all_df = date(all_df)
    all_df = fe(all_df)
    all_df = get_nan_cols(all_df)
    all_df = get_encoding(all_df)
    all_df = add_new_features(all_df)

    del all_df['Annual Income']
    
    train = all_df[~all_df['Premium Amount'].isnull()]
    test = all_df[all_df['Premium Amount'].isnull()]

    test.drop('Premium Amount', axis=1, inplace=True)

    return train, test, all_df

train, test, all_df = prep()

CPU times: user 8.22 s, sys: 2.69 s, total: 10.9 s
Wall time: 11 s


In [7]:
all_df.head()

Unnamed: 0,id,Age,Gender,Number of Dependents,Education Level,Health Score,Policy Type,Previous Claims,Vehicle Age,Credit Score,Insurance Duration,Customer Feedback,Smoking Status,Exercise Frequency,Premium Amount,transformed_Annual_Income,Year,Day,Month,Week,Year_sin,Year_cos,Month_sin,Month_cos,Day_sin,...,Month_name_April,Month_name_August,Month_name_December,Month_name_February,Month_name_January,Month_name_July,Month_name_June,Month_name_March,Month_name_May,Month_name_November,Month_name_October,Month_name_September,Day_of_week_Friday,Day_of_week_Monday,Day_of_week_Saturday,Day_of_week_Sunday,Day_of_week_Thursday,Day_of_week_Tuesday,Day_of_week_Wednesday,Income to Dependents Ratio,Income_per_Dependent,CreditScore_InsuranceDuration,Health_Risk_Score,Credit_Health_Score,Health_Age_Interaction
0,0,19.0,1,1.0,1,22.598761,2,2.0,17.0,372.0,5.0,0.0,0,2,2869.0,-0.596487,2023,23,12,51,-0.9510565,0.309017,-2.449294e-16,1.0,-0.998717,...,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,5024.5,5024.5,1860.0,3.870062,8406.73897,429.376453
1,1,39.0,1,3.0,2,15.569731,1,1.0,12.0,694.0,2.0,1.0,1,3,1483.0,0.336563,2023,12,6,24,-0.9510565,0.309017,1.224647e-16,-1.0,0.651372,...,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,7919.5,7919.5,1388.0,4.221513,10805.393307,607.219509
2,2,23.0,0,3.0,0,47.177549,2,1.0,14.0,,3.0,2.0,1,2,567.0,0.140781,2023,30,9,39,-0.9510565,0.309017,-1.0,-1.83697e-16,-0.201299,...,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,6400.5,6400.5,,2.641123,,1085.083634
3,3,21.0,0,2.0,1,10.938144,0,1.0,0.0,367.0,1.0,0.0,1,1,765.0,2.088459,2024,12,6,24,-2.449294e-16,1.0,1.224647e-16,-1.0,0.651372,...,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,47285.0,47285.0,367.0,4.453093,4014.298906,229.701027
4,4,21.0,0,1.0,1,20.376094,2,0.0,8.0,598.0,4.0,0.0,1,2,2022.0,0.555622,2021,1,12,48,0.5877853,-0.809017,-2.449294e-16,1.0,0.201299,...,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,19825.5,19825.5,2392.0,3.981195,12184.903989,427.897966


In [8]:
train.shape, test.shape

((1200000, 72), (800000, 71))

# AbdML

In [9]:
import os
import sys

sys.path.append(os.path.abspath("../AbdML"))

from main import AbdBase

In [10]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

train.shape, test.shape

((1200000, 71), (800000, 70))

## LGBM

In [11]:
SEED = 2024
n_splits = 10

base = AbdBase(train_data=train, test_data=test, target_column='Premium Amount', gpu=False,
                 problem_type="regression", metric="rmsle", seed=SEED,
                 n_splits=n_splits, early_stop=True, num_classes=0, #cat_features = cat_c,
                 fold_type='RKF')

[31m*** AbdBase ['V_1.3'] ***

[31m *** Available Settings *** 

[31mAvailable Models: [36mLGBM, [36mCAT, [36mXGB, [36mVoting, [36mTABNET
[31mAvailable Metrics: [36mroc_auc, [36maccuracy, [36mf1, [36mprecision, [36mrecall, [36mrmse, [36mwmae, [36mrmsle, [36mmae, [36mr2, [36mmse
[31mAvailable Problem Types: [36mclassification, [36mregression
[31mAvailable Fold Types: [36mSKF, [36mKF, [36mGKF, [36mGSKF, [36mRKF
[31m
 *** Configuration *** 

[31mProblem Type Selected: [36mREGRESSION
[31mMetric Selected: [36mRMSLE
[31mFold Type Selected: [36mRKF
[31mCalculate Train Probabilities: [36mFalse
[31mCalculate Test Probabilities: [36mFalse
[31mEarly Stopping: [36mTrue
[31mGPU: [36mFalse


In [16]:
%%time

param_space = {
    'estimators': 300,
    'boosting_type': 'gbdt',
    'num_leaves': (10, 300),                        
    'learning_rate': (1e-4, 1e-1),                  
    'feature_fraction': (0.6, 1.0),                 
    'bagging_fraction': (0.6, 1.0),                 
    'bagging_freq': (5, 12),                        
    'min_data_in_leaf': (10, 100),                  
    'max_depth': (-1, 12),                          
    'lambda_l1': (1e-4, 10.0),                      
    'lambda_l2': (1e-4, 10.0),                      
    'min_gain_to_split': (0.001, 0.1),
    'n_jobs': -1
}

lgb_study = base.RUN_OPTUNA(
    MODEL_NAME="LGBM",
    PARAMS=param_space,
    DIRECTION='minimize',
    TRIALS=5,
    ENABLE_PRUNER=True,              # Early termination of ineffective attempts
    PRUNER_PARAMS={'n_startup_trials': 3, 'n_warmup_steps': 3, 'interval_steps': 3},
    y_log=True
)

lgb_study.best_params

[I 2024-12-27 03:59:17,820] A new study created in memory with name: no-name-dbd777b8-fe87-476c-8ffc-cb88de464c58
Training Folds: 100%|██████████| 10/10 [02:08<00:00, 12.81s/it]
[I 2024-12-27 04:01:25,998] Trial 0 finished with value: 1.0458 and parameters: {'num_leaves': 118, 'learning_rate': 0.07114476009343425, 'feature_fraction': 0.8720536237417198, 'bagging_fraction': 0.8146346649119967, 'bagging_freq': 6, 'min_data_in_leaf': 24, 'max_depth': -1, 'lambda_l1': 2.1423021757741068, 'lambda_l2': 0.10129197956845731, 'min_gain_to_split': 0.02607024758370768}. Best is trial 0 with value: 1.0458.
Training Folds: 100%|██████████| 10/10 [01:21<00:00,  8.16s/it]
[I 2024-12-27 04:02:47,663] Trial 1 finished with value: 1.0529 and parameters: {'num_leaves': 15, 'learning_rate': 0.0812324508558869, 'feature_fraction': 0.9179681421265244, 'bagging_fraction': 0.6687417180293094, 'bagging_freq': 6, 'min_data_in_leaf': 26, 'max_depth': 3, 'lambda_l1': 0.042051564509138675, 'lambda_l2': 0.014445251

CPU times: user 23min 54s, sys: 25.6 s, total: 24min 20s
Wall time: 7min 21s


{'num_leaves': 118,
 'learning_rate': 0.07114476009343425,
 'feature_fraction': 0.8720536237417198,
 'bagging_fraction': 0.8146346649119967,
 'bagging_freq': 6,
 'min_data_in_leaf': 24,
 'max_depth': -1,
 'lambda_l1': 2.1423021757741068,
 'lambda_l2': 0.10129197956845731,
 'min_gain_to_split': 0.02607024758370768}

In [12]:
%%time

# Params = lgb_study.best_params
# Params['estimators'] = 1000

Params = {
 'estimators': 300,
 'boosting_type': 'gbdt',
 'num_leaves': 118,
 'learning_rate': 0.07114476009343425,
 'feature_fraction': 0.8720536237417198,
 'bagging_fraction': 0.8146346649119967,
 'bagging_freq': 6,
 'min_data_in_leaf': 24,
 'max_depth': -1,
 'lambda_l1': 2.1423021757741068,
 'lambda_l2': 0.10129197956845731,
 'min_gain_to_split': 0.02607024758370768,
 'n_jobs': -1
}

results_lgb = base.Train_ML(Params,'LGBM', e_stop=200, y_log=True)

Training Folds: 100%|██████████| 10/10 [02:03<00:00, 12.33s/it]

Overall Train RMSLE: 1.0373
Overall OOF RMSLE: 1.0458 
CPU times: user 7min 1s, sys: 5.78 s, total: 7min 7s
Wall time: 2min 3s





In [13]:
results_lgb

# 1. OOF 에측결과, 2. test 예측결과, 3. 최종학습모델 4.fold별 모델 리스트, 5. OOF 스코어, 6. 학습데이터 스코어

(array([966.55530898, 734.93968814, 808.78972131, ..., 186.60993404,
        759.51377062, 272.23902899]),
 array([827.78980521, 790.25433658, 793.97637455, ..., 805.35630968,
        814.12190218, 787.19724619]),
 LGBMRegressor(bagging_fraction=0.8146346649119967, bagging_freq=6, device='cpu',
               estimators=300, feature_fraction=0.8720536237417198,
               lambda_l1=2.1423021757741068, lambda_l2=0.10129197956845731,
               learning_rate=0.07114476009343425, min_data_in_leaf=24,
               min_gain_to_split=0.02607024758370768, n_jobs=-1, num_leaves=118,
               random_state=2024, verbose=-1),
 [LGBMRegressor(bagging_fraction=0.8146346649119967, bagging_freq=6, device='cpu',
                estimators=300, feature_fraction=0.8720536237417198,
                lambda_l1=2.1423021757741068, lambda_l2=0.10129197956845731,
                learning_rate=0.07114476009343425, min_data_in_leaf=24,
                min_gain_to_split=0.02607024758370768, n_job

-> Public Score : 1.04476

## CatBoost

In [32]:
%%time

param_space = {
    "iterations":300,
    "learning_rate": (1e-4, 1e-1),
    "depth": (3, 12),
    "l2_leaf_reg": (1e-4, 10.0),
    "bagging_temperature": (1e-3, 1.0),
    "random_strength": (1e-3, 10.0),
    "border_count": (32, 255),
    "colsample_bylevel": (0.6, 1.0),
}

cat_study = base.RUN_OPTUNA(
    MODEL_NAME="CAT",
    PARAMS=param_space,
    DIRECTION='minimize',
    TRIALS=5,
    ENABLE_PRUNER=True,              # Early termination of ineffective attempts
    PRUNER_PARAMS={'n_startup_trials': 3, 'n_warmup_steps': 3, 'interval_steps': 3},
    y_log=True
)

cat_study.best_params

[I 2024-12-27 05:31:08,488] A new study created in memory with name: no-name-e5a97b98-2a66-46c7-924d-bb5e0f409fbf
Training Folds: 100%|██████████| 10/10 [09:58<00:00, 59.86s/it]
[I 2024-12-27 05:41:07,157] Trial 0 finished with value: 1.0703 and parameters: {'learning_rate': 0.0013292918943162175, 'depth': 12, 'l2_leaf_reg': 0.4570563099801455, 'bagging_temperature': 0.06251373574521749, 'random_strength': 0.004207988669606638, 'border_count': 66, 'colsample_bylevel': 0.6180690932801379}. Best is trial 0 with value: 1.0703.
Training Folds: 100%|██████████| 10/10 [05:16<00:00, 31.66s/it]
[I 2024-12-27 05:46:23,778] Trial 1 finished with value: 1.0523 and parameters: {'learning_rate': 0.0396760507705299, 'depth': 9, 'l2_leaf_reg': 0.3470266988650412, 'bagging_temperature': 0.00115279871282324, 'random_strength': 7.579479953348009, 'border_count': 218, 'colsample_bylevel': 0.6687417180293094}. Best is trial 1 with value: 1.0523.
Training Folds: 100%|██████████| 10/10 [03:02<00:00, 18.29s/

CPU times: user 1h 20min 15s, sys: 1min 32s, total: 1h 21min 47s
Wall time: 24min 18s


{'learning_rate': 0.0396760507705299,
 'depth': 9,
 'l2_leaf_reg': 0.3470266988650412,
 'bagging_temperature': 0.00115279871282324,
 'random_strength': 7.579479953348009,
 'border_count': 218,
 'colsample_bylevel': 0.6687417180293094}

In [34]:
%%time

Params = cat_study.best_params
Params['iterations'] = 1000

results_cat = base.Train_ML(Params,'CAT', e_stop=200, y_log=True)

Training Folds: 100%|██████████| 10/10 [17:39<00:00, 105.95s/it]

Overall Train RMSLE: 1.0331
Overall OOF RMSLE: 1.0470 
CPU times: user 56min 24s, sys: 2min 26s, total: 58min 50s
Wall time: 17min 39s





In [40]:
results_cat

(array([948.54578956, 750.34836391, 804.01920793, ..., 183.65217835,
        737.65949109, 290.38160347]),
 array([791.53121895, 777.52688274, 762.57050624, ..., 819.85472178,
        799.67671511, 763.55549787]),
 <catboost.core.CatBoostRegressor at 0x7fe0ce627940>,
 [<catboost.core.CatBoostRegressor at 0x7fe0cd3fe770>,
  <catboost.core.CatBoostRegressor at 0x7fe0ce208f40>,
  <catboost.core.CatBoostRegressor at 0x7fe0cc8d7640>,
  <catboost.core.CatBoostRegressor at 0x7fe0cc8d64a0>,
  <catboost.core.CatBoostRegressor at 0x7fe0bed8c820>,
  <catboost.core.CatBoostRegressor at 0x7fe0bedd87f0>,
  <catboost.core.CatBoostRegressor at 0x7fe0ceb32200>,
  <catboost.core.CatBoostRegressor at 0x7fe0ce9b84c0>,
  <catboost.core.CatBoostRegressor at 0x7fe0bed8c760>,
  <catboost.core.CatBoostRegressor at 0x7fe0ce627940>],
 '1.0470',
 '1.0331')

# Ensemble

In [35]:
def objective(trial):
    global results_lgb, results_cat, train
    
    w1 = trial.suggest_float('w1', 0.0, 1.0)
    w2 = 1.0 - w1
    
    ensemble_vote = (w1 * results_lgb[0]) + (w2 * results_cat[0])
    rmsle = root_mean_squared_log_error(train['Premium Amount'], ensemble_vote)
    
    return rmsle

study_vote = optuna.create_study(direction='minimize')
study_vote.optimize(objective, n_trials=100)

print(f"Best Weights: {study_vote.best_params}")
print(f"Best RMSLE: {study_vote.best_value:.4f}")

[I 2024-12-27 06:13:50,513] A new study created in memory with name: no-name-f9b1792d-9712-40d4-b200-e6d54bff001d
[I 2024-12-27 06:13:50,530] Trial 0 finished with value: 1.0457973797810431 and parameters: {'w1': 0.5738049154531849}. Best is trial 0 with value: 1.0457973797810431.
[I 2024-12-27 06:13:50,543] Trial 1 finished with value: 1.0462403546306638 and parameters: {'w1': 0.2812729924615003}. Best is trial 0 with value: 1.0457973797810431.
[I 2024-12-27 06:13:50,555] Trial 2 finished with value: 1.0457072389813378 and parameters: {'w1': 0.7136440114053322}. Best is trial 2 with value: 1.0457072389813378.
[I 2024-12-27 06:13:50,567] Trial 3 finished with value: 1.0465784933914017 and parameters: {'w1': 0.14006825934310196}. Best is trial 2 with value: 1.0457072389813378.
[I 2024-12-27 06:13:50,580] Trial 4 finished with value: 1.045693008461401 and parameters: {'w1': 0.7744819362300569}. Best is trial 4 with value: 1.045693008461401.
[I 2024-12-27 06:13:50,591] Trial 5 finished wi

Best Weights: {'w1': 0.7993802602017795}
Best RMSLE: 1.0457


In [38]:
best_weights = study_vote.best_params
best_weights['w2'] = 1 - best_weights['w1']

In [39]:
ensemble_vote = (best_weights['w1'] * results_lgb[0]) + (best_weights['w2'] * results_cat[0])

rmse = root_mean_squared_log_error(train['Premium Amount'], ensemble_vote)
mae = mean_absolute_error(train['Premium Amount'], ensemble_vote)
r2 = r2_score(train['Premium Amount'], ensemble_vote)

print(f"\nPerformance Metrics:\n{'-'*25}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")


Performance Metrics:
-------------------------
RMSE: 1.0457
MAE: 621.1012
R²: -0.1383


# Submission

In [41]:
submission = pd.read_csv('./data/sample_submission.csv')
submission.head()

Unnamed: 0,id,Premium Amount
0,1200000,1102.545
1,1200001,1102.545
2,1200002,1102.545
3,1200003,1102.545
4,1200004,1102.545


In [42]:
preds = (best_weights['w1'] * results_lgb[1]) + (best_weights['w2'] * results_cat[1])
submission['Premium Amount'] = preds
submission.head()

Unnamed: 0,id,Premium Amount
0,1200000,820.515617
1,1200001,787.700958
2,1200002,787.675737
3,1200003,803.375572
4,1200004,757.157465


In [43]:
submission.to_csv('./data/04_01_AbdML.csv', index=False)
!kaggle competitions submit -c playground-series-s4e12 -f "./data/04_01_AbdML.csv" -m "04_01_AbdML_ensemble"

100%|██████████████████████████████████████| 19.8M/19.8M [00:00<00:00, 37.4MB/s]
Successfully submitted to Regression with an Insurance Dataset

> **Public Score Comparison**

- **Baseline Model:**
  - **Public Score:** 1.04849  
  - **Rank:** 498 / 1653 (30.12%)  

- **Second Model (Feature Engineering + PowerTransformer):**
  - **Public Score:** 1.04506  
  - **Rank:** 334 / 1693 (19.72%)

- **NaN (NA col + No imputer):**
  - **Public Score:** 1.04496  
  - **Rank:** 378 / 1895 (19.94%)

- **Ensemble(lgbm + xgb + catboost):**
  - **Public Score:** 1.04475  
  - **Rank:** 346 / 1906 (18.15%)

- **AbdML(RKF) + Ensemble(lgbm + catboost):**
  - **Public Score:** 1.04473  
  - **Rank:** 357 / 1951 (18.29%)