In [None]:
import warnings, json, joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor

warnings.filterwarnings("ignore")

# --- Configuration ---
CSV_PATH = "9_makale_data_guncel.csv"
TARGET = "band_gap"
ID_COLS = ["material_id", "formula_pretty"]
RANDOM_STATE = 42

# --- Data Loading ---
df = pd.read_csv(CSV_PATH)
df = df.drop(columns=[c for c in ID_COLS if c in df.columns], errors="ignore")

y_all = df[TARGET].astype(float)
X_all = df.drop(columns=[TARGET]).select_dtypes(include="number").copy()

# --- Loading Predefined Index Splits ---
idx_tr = pd.read_csv("split_train.csv")["idx"].values
idx_va = pd.read_csv("split_val.csv")["idx"].values
idx_te = pd.read_csv("split_test.csv")["idx"].values

X_train, y_train = X_all.iloc[idx_tr], y_all.iloc[idx_tr]
X_val,   y_val   = X_all.iloc[idx_va], y_all.iloc[idx_va]
X_test,  y_test  = X_all.iloc[idx_te], y_all.iloc[idx_te]

# --- Model Training with Optimized Parameters ---
best_params = {
    "n_estimators": 222,
    "max_depth": 16,
    "min_samples_split": 5,
    "min_samples_leaf": 2,
    "max_features": "sqrt",
    "bootstrap": True,
    "random_state": RANDOM_STATE,
    "n_jobs": -1
}

model = RandomForestRegressor(**best_params).fit(X_train, y_train)

# --- Metrics Evaluation ---
def print_metrics(set_name, y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    print(f"{set_name:5} -> RÂ²: {r2:.6f} | MAE: {mae:.6f} | MSE: {mse:.6f}")

print("\n=== Final 70/15/15 Metrics ===")
print_metrics("Train", y_train, model.predict(X_train))
print_metrics("Val",   y_val,   model.predict(X_val))
print_metrics("Test",  y_test,  model.predict(X_test))

# --- Model Persistence ---
joblib.dump(model, "rf_model.joblib")
with open("rf_best_params.json", "w") as f:
    json.dump(best_params, f, indent=2)

print("\nSaved: rf_model.joblib and rf_best_params.json")