In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, root_mean_squared_error
import warnings
from datetime import datetime
from ngboost import NGBRegressor
from ngboost.distns import Normal

In [9]:
warnings.filterwarnings('ignore')

In [10]:
train_df_final = pd.read_csv('./input/processed/train_df_imputed.csv')
test_df_final = pd.read_csv('./input/processed/test_df_imputed.csv')

In [11]:
train_df_final.head()

Unnamed: 0,id,station,station_name,date,cloud_cover_0,cloud_cover_1,cloud_cover_10,cloud_cover_11,cloud_cover_12,cloud_cover_13,...,wind_speed_23,wind_speed_3,wind_speed_4,wind_speed_5,wind_speed_6,wind_speed_7,wind_speed_8,wind_speed_9,climatology_temp,target
0,0,98,동두천,01-01,0.0,0.0,9.0,0.0,3.0,3.0,...,2.3,0.6,0.3,0.7,0.6,0.7,0.8,0.1,-2.707143,-3.992857
1,1,98,동두천,01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.7,0.2,0.0,1.1,1.3,0.5,0.9,0.4,-3.646429,-1.653571
2,2,98,동두천,01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,0.4,1.5,0.8,0.8,0.9,1.0,1.1,0.1,-2.694643,-0.005357
3,3,98,동두천,01-04,0.0,0.0,2.0,0.0,0.0,1.0,...,0.9,0.3,0.5,0.2,0.5,1.3,0.5,0.2,-2.501786,-0.898214
4,4,98,동두천,01-05,0.0,0.0,0.0,0.0,0.0,0.0,...,1.4,1.1,1.6,1.4,1.8,0.5,1.1,0.6,-2.625,-1.775


# Dataset Splitting

In [14]:
features = [col for col in train_df_final.columns if col not in ['id', 'target', 'station', 'date', 'station_name']]

X = train_df_final[features].values
y = train_df_final['target'].values
X_test = test_df_final[features].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
regressor = NGBRegressor()

regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_val)

r2 = r2_score(y_val, y_pred)
rmse = root_mean_squared_error(y_val, y_pred)
print(f"\n✅ NGB R²: {r2:.4f} | RMSE: {rmse:.4f}")

[iter 0] loss=2.5012 val_loss=0.0000 scale=1.0000 norm=2.3967
[iter 100] loss=2.3205 val_loss=0.0000 scale=1.0000 norm=2.1060
[iter 200] loss=2.2442 val_loss=0.0000 scale=1.0000 norm=1.9844
[iter 300] loss=2.1857 val_loss=0.0000 scale=1.0000 norm=1.8995
[iter 400] loss=2.1342 val_loss=0.0000 scale=1.0000 norm=1.8275

✅ NGB R²: 0.4727 | RMSE: 2.1743


In [170]:
y_test_pred = regressor.predict(X_test)

# Step 8: Save submission
submission = pd.DataFrame({
    "id": test_df_final["id"],
    "target": y_test_pred
})
now = datetime.now().strftime("%Y%m%d_%H%M%S")
submission.to_csv(f'./output/submission_ngb_{now}.csv', index=False)