In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor

In [2]:
def relative_deviation(obs, pred):
    return np.sum(np.abs(obs-pred)/obs)/len(obs) * 100


def R2(target, pred):
    return np.corrcoef(target, pred)[0, 1] ** 2


def RRMSE(target, pred):
    num = np.sum((target - pred) ** 2)
    den = np.sum((np.mean(target) - target) ** 2)
    return np.sqrt(num/den)


def RMSE(target, pred):
    N = len(target)
    return np.sqrt(np.sum((target-pred)**2)/N)

In [3]:
data = pd.read_csv('ICMC_oxides_TEC.csv')
data = data.iloc[:, 1:]
X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values

In [4]:
rds = []
r2s = []
rrmses = []
rmses = []

kf = KFold(n_splits=10, random_state=1, shuffle=True)
fold = 1
for train_index, test_index in kf.split(X):
    print('Fold {:02d}'.format(fold))
    X_train, y_train = X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]
    
    y_train = np.log(y_train)
    
    rf = RandomForestRegressor(n_estimators=100)
    rf.fit(X_train, y_train)
    
    preds = np.exp(rf.predict(X_test))
    
    rds.append(relative_deviation(y_test, preds))
    r2s.append(R2(y_test, preds))
    rrmses.append(RRMSE(y_test, preds))
    rmses.append(RMSE(y_test, preds))
    
    fold += 1

Fold 01
Fold 02
Fold 03
Fold 04
Fold 05
Fold 06
Fold 07
Fold 08
Fold 09
Fold 10


In [5]:
print('RD:', np.mean(rds))
print('R2:', np.mean(r2s))
print('RRMSE:', np.mean(rrmses))
print('RMSE:', np.mean(rmses))

RD: 7.435317881392902
R2: 0.853601763520107
RRMSE: 0.3849104091414469
RMSE: 1.5672568771966914e-06
