In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, root_mean_squared_error
import warnings
from datetime import datetime
from tabpfn import TabPFNRegressor

In [2]:
warnings.filterwarnings('ignore')

In [3]:
train_df_final = pd.read_csv('./input/processed/train_df_final_v5.csv')
test_df_final = pd.read_csv('./input/processed/test_df_final_v5.csv')

In [4]:
train_df_final.head()

Unnamed: 0,id,climatology_temp,target,elev,lat,lon,doy_sin,doy_cos,day_of_year,day_length,...,humid_evening_x_slp_std,humid_cloud_stress_alt,humidity_cloud_dew_combo,dew_evening_x_doy,climatology_temp_x_doy_sin,climatology_temp_x_doy_cos,station_month_anomaly_mean,station_month_anomaly_centered,station_norm_anomaly_x_humidity,dp_morning_x_climatology
0,0,-2.707143,-3.992857,115.62,37.9019,127.0607,0.017213,0.999852,1,9.426812,...,65.252515,0.0,-0.0,-17.183333,-0.046599,-2.706742,2.505358,-2.674406,-2.435355,45.209286
1,1,-3.646429,-1.653571,115.62,37.9019,127.0607,0.034422,0.999407,2,9.437325,...,48.571459,0.0,-0.0,-34.066667,-0.125516,-3.644268,2.505358,-0.975001,17.277093,60.530714
2,2,-2.694643,-0.005357,115.62,37.9019,127.0607,0.05162,0.998667,3,9.448699,...,32.524114,0.0,-0.0,-55.65,-0.139097,-2.69105,2.505358,-2.436906,0.8129,42.260982
3,3,-2.501786,-0.898214,115.62,37.9019,127.0607,0.068802,0.99763,4,9.460926,...,212.122251,13.090675,-637.777778,-22.4,-0.172129,-2.495857,2.505358,-1.42262,29.573705,36.77625
4,4,-2.625,-1.775,115.62,37.9019,127.0607,0.085965,0.996298,5,9.473997,...,23.072707,0.0,-0.0,-100.333333,-0.225658,-2.615283,2.505358,-0.647025,19.704225,38.15


# Dataset Splitting

In [5]:
features = [col for col in train_df_final.columns if col not in ['id', 'target']]

X = train_df_final[features].values
y = train_df_final['target'].values
X_test = test_df_final[features].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
regressor = TabPFNRegressor(device="cpu", ignore_pretraining_limits=True, n_jobs=-1)

# Step 5: Fit and predict
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_val)

# Step 6: Evaluate
r2 = r2_score(y_val, y_pred)
rmse = root_mean_squared_error(y_val, y_pred)
print(f"\n✅ TabPFN R²: {r2:.4f} | RMSE: {rmse:.4f}")

In [170]:
y_test_pred = regressor.predict(X_test)

# Step 8: Save submission
submission = pd.DataFrame({
    "id": test_df_final["id"],
    "target": y_test_pred
})
now = datetime.now().strftime("%Y%m%d_%H%M%S")
submission.to_csv(f'./output/submission_tabpfn_{now}.csv', index=False)