In [3]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
from sklearn.svm import SVR

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
df1 = pd.read_csv('../../../data/all/G_all_4435.csv')
df1

In [5]:
data_set = df1.drop('material_id', axis=1)
X = data_set.drop('GVRH',axis=1)
y = data_set['GVRH']

In [None]:
svr_params = {'C': 300, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
svr_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('svr', SVR(**svr_params)
    )
])

kf = KFold(n_splits=10, shuffle=True, random_state=12)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    svr_pipeline.fit(X_train, y_train)
    
    y_pred = svr_pipeline.predict(X_test)
    
    score = svr_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))


#### FP 343

In [None]:
df2 = pd.read_csv('../../../data/FP/G_FP_343.csv')
df2

In [9]:
data_set = df2.drop('material_id', axis=1)
X = data_set.drop('GVRH',axis=1)
y = data_set['GVRH']

In [None]:
svr_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('svr', SVR(**svr_params)
    )
])

kf = KFold(n_splits=10, shuffle=True, random_state=12)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    svr_pipeline.fit(X_train, y_train)
    
    y_pred = svr_pipeline.predict(X_test)
    
    score = svr_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))


#### IFS-HC 34

In [None]:
df3 = pd.read_csv('../../../data/IFS-HC/G_IFS-HC_34.csv')
df3

In [13]:
X = df3.drop('GVRH',axis=1)
y = df3['GVRH']

In [None]:
svr_pipeline = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler(copy = True, with_mean = True, with_std = True)),
    ('svr', SVR(**svr_params)
    )
])

kf = KFold(n_splits=10, shuffle=True, random_state=12)

scores = []
mae_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    imputer = SimpleImputer(strategy='median')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    svr_pipeline.fit(X_train, y_train)
    
    y_pred = svr_pipeline.predict(X_test)
    
    score = svr_pipeline.score(X_test, y_test)  
    mae = mean_absolute_error(y_test, y_pred)
    rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
    
    scores.append(score)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

print("R^2 scores:", scores)
print("Mean R^2 score:", sum(scores) / len(scores))
print("MAE scores:", mae_scores)
print("Mean MAE:", sum(mae_scores) / len(mae_scores))
print("RMSE scores:", rmse_scores)
print("Mean RMSE:", sum(rmse_scores) / len(rmse_scores))
