In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import math

# Load the data
data = pd.read_csv('PATH_to_cvs')

# Drop unnecessary columns
data_cleaned = data.drop(columns=['Eb', 'Ec'])
data_cleaned.rename(columns={'Unnamed: 0': 'Label'}, inplace=True)

# Extract features and target
X = data_cleaned.drop(columns=['Label', 'Eform']).values
y = data_cleaned['Eform'].values

#Normalize the features to [0, 1] range using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
import keras as K
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation
from keras.callbacks import EarlyStopping


model = Sequential()
model.add(Dense(100,input_dim=X_train.shape[1],activation="tanh",name="full_1"))
model.add(Dense(100,activation="tanh",name="full_2"))
model.add(Dense(100,activation="tanh",name="full_3"))
model.add(Dense(100,activation="tanh",name="full_4"))
model.add(Dense(1, activation="linear"))
model.summary()

In [None]:

early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model.compile(loss='mean_squared_error', optimizer='adam')   

model.fit(X_train, y_train,batch_size=512,epochs=200, verbose=0,validation_split=0.2,callbacks=[early_stopping])


y_pred_train_model=model.predict(X_train)
y_pred_test_model=model.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
rmse_tr=mean_squared_error(y_train,y_pred_train_model,squared=False)
rmse_te=mean_squared_error(y_test,y_pred_test_model,squared=False)
mae_tr = mean_absolute_error(y_train, y_pred_train_model)
mae_te = mean_absolute_error(y_test, y_pred_test_model)
r2_tr = r2_score(y_train, y_pred_train_model)
r2_te = r2_score(y_test, y_pred_test_model)

# Calculate Standard Error
train_se_rmse = rmse_tr / math.sqrt(len(y_train))
test_se_rmse = rmse_te / math.sqrt(len(y_test))
train_se_mae = mae_tr / math.sqrt(len(y_train))
test_se_mae = mae_te / math.sqrt(len(y_test))
train_se_r2 = r2_tr / math.sqrt(len(y_train))
test_se_r2 = r2_te / math.sqrt(len(y_test))

# Print results
print('RMSE (training) = %.3f' % rmse_tr)
print('RMSE (test) = %.3f' % rmse_te)
print('MAE (training) = %.3f' % mae_tr)
print('MAE (test) = %.3f' % mae_te)
print('R² (training) = %.3f' % r2_tr)
print('R² (test) = %.3f' % r2_te)

print(f'Training Standard Error_RMSE: {train_se_rmse}')
print(f'Test Standard Error_RMSE: {test_se_rmse}')
print(f'Training Standard Error_MAE: {train_se_mae}')
print(f'Test Standard Error_MAE: {test_se_mae}')
print(f'Training Standard Error_R²: {train_se_r2}')
print(f'Test Standard Error_R²: {test_se_r2}')

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5,5))
plt.scatter(y_train,y_pred_train_model,alpha=0.5,color='blue',label='Training')
plt.scatter(y_test,y_pred_test_model,alpha=0.5,color='r',label='Test',)
plt.plot([-5,10],[-5,10], color='k', linewidth=2,linestyle='--',alpha=0.4)
plt.legend()
plt.xlabel('Eform_DFT')
plt.ylabel('Eform_ML_MLP')
plt.savefig('./MLP_Eform.jpg',dpi=500,bbox_inches='tight')
plt.show()

In [None]:
# Save results to CSV for use in other software

d = {
    'y_train': y_train,
    'y_train_pred': y_pred_train_model.flatten(),
    'y_test': y_test,
    'y_test_pred': y_pred_test_model.flatten()
}
results_df = pd.DataFrame(pd.DataFrame.from_dict(d, orient='index').values.T, columns=list(d.keys()))

results_df.to_csv('./MLP_Eform_results.csv', index=False)