In [76]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, ElasticNet, SGDRegressor, BayesianRidge
from sklearn.svm import SVR
from sklearn.model_selection import RepeatedStratifiedKFold, KFold, StratifiedKFold
from sklearn.metrics import mean_absolute_error
from catboost import CatBoostRegressor
import matplotlib.pyplot as plt

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

def cross_evaluate(model, dataset, n_folds, features, target):

    result = []

    for i in range(0, n_folds):
        dataset_train = dataset[dataset.Fold == i]
        dataset_test = dataset[dataset.Fold != i]

        dataset_X_train = dataset_train[features]
        dataset_y_train = dataset_train[target]

        dataset_X_test = dataset_test[features]
        dataset_y_test = dataset_test[target]

        model.fit(dataset_X_train, dataset_y_train.values.ravel())

        y_pred = model.predict(dataset_X_test)

        result.append(mean_absolute_error(dataset_y_test, y_pred))

    return result

def create_folds(df, n_s=5, n_grp=None):
    df['Fold'] = -1
    
    if n_grp is None:
        skf = KFold(n_splits=n_s)
        target = df.Age
    else:
        skf = StratifiedKFold(n_splits=n_s)
        df['grp'] = pd.cut(df.Age, n_grp, labels=False)
        target = df.grp
    
    for fold_no, (t, v) in enumerate(skf.split(target, target)):
        df.loc[v, 'Fold'] = fold_no
    return df

path = "../datasets/media-1.xlsx"

SEED = 43

immunology = pd.read_excel(path, skiprows=1)

immunology['Fold'] = -1
features = ['CXCL9', 'CCL22', 'IL6', 'CSF1', 'PDGFB', 'CCL4', 'CXCL10', 'PDGFA', 'CCL2', 'IL1RA']
target = ['Age']
immunology = create_folds(immunology, n_s=5, n_grp=10)

model1 = LinearRegression()
model2 = RandomForestRegressor()
model3 = ElasticNet()
model4 = SGDRegressor()
model5 = SVR()
model6 = BayesianRidge()
model7 = CatBoostRegressor(learning_rate=0.1, silent=True, loss_function='Quantile')

print(cross_evaluate(model1, immunology, 5, features, target))
print(cross_evaluate(model2, immunology, 5, features, target))
print(cross_evaluate(model3, immunology, 5, features, target))
print(cross_evaluate(model4, immunology, 5, features, target))
print(cross_evaluate(model5, immunology, 5, features, target))
print(cross_evaluate(model6, immunology, 5, features, target))
print(cross_evaluate(model7, immunology, 5, features, target))

[21.94489375935786, 22.899315175693193, 13.166823890484242, 14.249454344016083, 16.847231926024804]
[11.463999867466558, 10.848874793346486, 10.31540327933633, 10.471823720143185, 11.047140317343947]
[21.680873327419256, 20.376737480420505, 13.052715268907194, 14.057094146243696, 14.063475491948822]
[1.5880469775453907e+17, 3.594339012128989e+17, 6.234787191438153e+16, 1.6097354517478198e+17, 1.597952356402194e+17]
[15.272305500068011, 15.596083764947021, 15.661102918908542, 15.89615454234385, 16.01736705699762]
[15.392750173499556, 15.14244204505552, 12.522466635698608, 12.569679335258902, 12.534408516321482]
[12.033501968034216, 11.599699347354798, 11.583872915875885, 11.397660916592901, 10.564594947962291]
