In [32]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, root_mean_squared_error
import lightgbm

In [33]:
train_df_final = pd.read_csv('./input/processed/train_df_final_v5.csv')
test_df_final = pd.read_csv('./input/processed/test_df_final_v5.csv')

In [34]:
features = [col for col in train_df_final.columns if col not in ['id', 'target']]

X = train_df_final[features].values
y = train_df_final['target'].values
X_test = test_df_final[features].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [35]:
lgbm = lightgbm.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

lgbm.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric='rmse',
    callbacks=[lightgbm.log_evaluation(period=100),
               lightgbm.early_stopping(stopping_rounds=100, verbose=False)]
)

y_pred = lgbm.predict(X_val)
r2 = r2_score(y_val, y_pred)
rmse = root_mean_squared_error(y_val, y_pred)
print(f"\n✅ LightGBM R²: {r2:.4f} | RMSE: {rmse:.4f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000320 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2687
[LightGBM] [Info] Number of data points in the train set: 10505, number of used features: 18
[LightGBM] [Info] Start training from score 0.247066
[100]	valid_0's rmse: 2.3545	valid_0's l2: 5.54369
[200]	valid_0's rmse: 2.24605	valid_0's l2: 5.04472
[300]	valid_0's rmse: 2.17247	valid_0's l2: 4.71964
[400]	valid_0's rmse: 2.1164	valid_0's l2: 4.47914
[500]	valid_0's rmse: 2.07847	valid_0's l2: 4.32005
[600]	valid_0's rmse: 2.04621	valid_0's l2: 4.18698
[700]	valid_0's rmse: 2.02088	valid_0's l2: 4.08395
[800]	valid_0's rmse: 1.99718	valid_0's l2: 3.98871
[900]	valid_0's rmse: 1.97669	valid_0's l2: 3.90731
[1000]	valid_0's rmse: 1.95945	valid_0's l2: 3.83945

✅ LightGBM R²: 0.5718 | RMSE: 1.9595


