In [6]:
import os, joblib, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error

CSV_PATH="9_makale_data_guncel.csv"; TARGET="band_gap"; ID_COLS=["material_id","formula_pretty"]

df=pd.read_csv(CSV_PATH).drop(columns=[c for c in ID_COLS if c in df.columns], errors="ignore")
y_all=df[TARGET].astype(float)
X_all=df.drop(columns=[TARGET]).select_dtypes(include="number")

idx_tr=pd.read_csv("split_train.csv")["idx"].values
idx_va=pd.read_csv("split_val.csv")["idx"].values
idx_te=pd.read_csv("split_test.csv")["idx"].values

X_train,y_train=X_all.iloc[idx_tr],y_all.iloc[idx_tr]
X_val,  y_val  =X_all.iloc[idx_va],y_all.iloc[idx_va]
X_test, y_test =X_all.iloc[idx_te],y_all.iloc[idx_te]

mlp_params={'hidden_layer_sizes':(320,160,80),'alpha':0.004168070141514902,
            'learning_rate_init':0.0009845018911181563,'batch_size':128,
            'max_iter':500,'early_stopping':True,'validation_fraction':0.1,'n_iter_no_change':10}

scaler=StandardScaler().fit(X_train)
X_train_s=scaler.transform(X_train); X_val_s=scaler.transform(X_val); X_test_s=scaler.transform(X_test)

model=MLPRegressor(**mlp_params).fit(X_train_s,y_train)

def pr(n,yt,yp): print(f"{n} -> R²: {r2_score(yt,yp):.4f} | MAE: {mean_absolute_error(yt,yp):.4f}")
pr("Train",y_train,model.predict(X_train_s))
pr("Validation",y_val,model.predict(X_val_s))
pr("Test",y_test,model.predict(X_test_s))

os.makedirs("mlp_artifacts", exist_ok=True)
joblib.dump(scaler,"mlp_artifacts/mlp_scaler.joblib")
joblib.dump(model, "mlp_artifacts/mlp_trained.joblib")
print("saved: mlp_artifacts/mlp_scaler.joblib, mlp_artifacts/mlp_trained.joblib")


Train -> R²: 0.9461 | MAE: 0.1728
Validation -> R²: 0.9027 | MAE: 0.2258
Test -> R²: 0.8982 | MAE: 0.2290
saved: mlp_artifacts/mlp_scaler.joblib, mlp_artifacts/mlp_trained.joblib
