In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error


import warnings
warnings.filterwarnings('ignore')

#### ALL 4435

In [None]:
df1 = pd.read_csv('../../../data/all/G_all_4435.csv')
# df1 = pd.read_csv('../../../data/all/B_all_4435.csv')
df1

In [3]:
data_set = df1.drop('material_id', axis=1)
X = data_set.drop('GVRH',axis=1)
y = data_set['GVRH']

In [None]:

mlp_params = {'hidden_layer_sizes':(128,64,32), 'activation':'tanh', 'solver':'sgd', 'max_iter':3000}
mlp_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('ann', MLPRegressor(**mlp_params)
    )
])
kf = KFold(n_splits=10, shuffle=True, random_state=99)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    mlp_pipeline.fit(X_train, y_train)
    
    y_pred = mlp_pipeline.predict(X_test)
    
    score = mlp_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))


#### FP 34

In [None]:
df2 = pd.read_csv('../../../data/FP/G_FP_343.csv')
# df2 = pd.read_csv('../../../data/FP/B_FP_343.csv')
df2

In [7]:
data_set = df2.drop('material_id', axis=1)
X = data_set.drop('GVRH',axis=1)
y = data_set['GVRH']

In [None]:
mlp_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('ann', MLPRegressor(**mlp_params)
    )
])
kf = KFold(n_splits=10, shuffle=True, random_state=99)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    mlp_pipeline.fit(X_train, y_train)
    
    y_pred = mlp_pipeline.predict(X_test)
    
    score = mlp_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))


#### IFS-HC 34

In [None]:
df3 = pd.read_csv('../../../data/IFS-HC/G_IFS-HC_34.csv')
# df3 = pd.read_csv('../../../data/IFS-HC/B_IFS-HC_34.csv')
df3

In [None]:
X = df3.drop('GVRH',axis=1)
y = df3['GVRH']

In [None]:
mlp_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('ann', MLPRegressor(**mlp_params)
    )
])
kf = KFold(n_splits=10, shuffle=True, random_state=99)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    mlp_pipeline.fit(X_train, y_train)
    
    y_pred = mlp_pipeline.predict(X_test)
    
    score = mlp_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))
