In [87]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [116]:
# This notebook uses the wine quality dataset located at https://archive-beta.ics.uci.edu/dataset/186/wine+quality
Data = pd.read_csv("./winequality-red.csv",delimiter=";")

In [70]:

def RidgeNestedResample(alpha):
    inner = KFold(5, shuffle = True, random_state=42)
    outer = KFold(5, shuffle = True, random_state=42) 
    X = Data.loc[:,set.columns != "quality"]
    Y = Data["quality"]

    InnerMSE = []
    OuterMSE = []

    for i, (outtrain_index, outtest_index) in enumerate(outer.split(Data)):
        avgMSE = 0
        for v, (intrain_index, intest_index) in enumerate(inner.split(outtrain_index)):
            model = linear_model.Ridge(alpha = alpha)
            model.fit(X.iloc[intrain_index],Y.iloc[intrain_index])
            predictions = model.predict(X.iloc[intest_index])
            avgMSE += mean_squared_error(Y.iloc[intest_index],predictions)
        InnerMSE.append(avgMSE/5)

        model = linear_model.Ridge(alpha = alpha)
        model.fit(X.iloc[outtrain_index],Y.iloc[outtrain_index])
        predictions = model.predict(X.iloc[outtest_index])
        OuterMSE.append(mean_squared_error(Y.iloc[outtest_index],predictions))
    return InnerMSE,OuterMSE
            
alphas = [x/1000 for x in range(2000)]
Runs = []
for a in alphas:
    Runs.append(RidgeNestedResample(a))
     
    

In [85]:
for run in range(5):
    Inner1 = [Runs[x][0][run] for x in range(len(Runs))]
    best = Inner1.index(min((Inner1)))
    print(f"best inner {best} with {Runs[best][0][run]}")
    print(f"OuterMSE of this run {Runs[best][1][run]}")
    

best inner 455 with 0.4291921469066774
OuterMSE of this run 0.39166712505669404
best inner 455 with 0.4291921469066774
OuterMSE of this run 0.4651694389310969
best inner 455 with 0.4291921469066774
OuterMSE of this run 0.48078449868954304
best inner 455 with 0.4291921469066774
OuterMSE of this run 0.4568551885005114
best inner 305 with 0.4297739669045072
OuterMSE of this run 0.3476732568775663


In [95]:
def TreeNestedResample(estimators,criterion,max_depth,min_samples_split,min_samples_leaf):
    inner = KFold(5, shuffle = True, random_state=42)
    outer = KFold(5, shuffle = True, random_state=42) 
    X = Data.loc[:,set.columns != "quality"]
    Y = Data["quality"]

    InnerMSE = []
    OuterMSE = []

    for i, (outtrain_index, outtest_index) in enumerate(outer.split(Data)):
        avgMSE = 0
        for v, (intrain_index, intest_index) in enumerate(inner.split(outtrain_index)):
            model = RandomForestClassifier(n_estimators=estimators,criterion=criterion,max_depth = max_depth,min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf)
            model.fit(X.iloc[intrain_index],Y.iloc[intrain_index])
            predictions = model.predict(X.iloc[intest_index])
            avgMSE += mean_squared_error(Y.iloc[intest_index],predictions)
        InnerMSE.append(avgMSE/5)

        model = RandomForestClassifier(n_estimators=estimators,criterion=criterion,max_depth = max_depth,min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf)
        model.fit(X.iloc[outtrain_index],Y.iloc[outtrain_index])
        predictions = model.predict(X.iloc[outtest_index])
        OuterMSE.append(mean_squared_error(Y.iloc[outtest_index],predictions))
    return InnerMSE,OuterMSE


In [99]:
TreeRuns = []
for a in range(100):
    estimators = np.random.randint(1,200)
    criterion = np.random.choice(["gini","entropy","log_loss"])
    max_depth = np.random.choice([None,np.random.randint(1,100)])
    min_samples_split = np.random.randint(2,3)
    min_samples_leaf = np.random.randint(1,3)

    print(f"Params: estimators = {estimators}, criterion = {criterion}, max_depth = {max_depth}, minsamplessplit = {min_samples_split}, minsamplesleaf = {min_samples_leaf}")
    TreeRuns.append((TreeNestedResample(estimators,criterion,max_depth,min_samples_split,min_samples_leaf),(estimators,criterion,max_depth,min_samples_split,min_samples_leaf)))
     

Params: estimators = 99, criterion = entropy, max_depth = 26, minsamplessplit = 2, minsamplesleaf = 2
Params: estimators = 98, criterion = entropy, max_depth = None, minsamplessplit = 2, minsamplesleaf = 2
Params: estimators = 17, criterion = log_loss, max_depth = None, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 164, criterion = entropy, max_depth = 34, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 167, criterion = log_loss, max_depth = 43, minsamplessplit = 2, minsamplesleaf = 2
Params: estimators = 138, criterion = entropy, max_depth = None, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 148, criterion = gini, max_depth = None, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 189, criterion = log_loss, max_depth = 15, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 9, criterion = log_loss, max_depth = None, minsamplessplit = 2, minsamplesleaf = 1
Params: estimators = 60, criterion = gini, max_depth = 20, minsampl

In [106]:
print(TreeRuns[0])
for run in range(5):
    Inner1 = [TreeRuns[x][0][0][run] for x in range(len(TreeRuns))]
    best = Inner1.index(min((Inner1)))
    print(f"best inner {best} with {TreeRuns[best][0][0][run]}, with parameters {TreeRuns[best][1]}")
    print(f"OuterMSE of this run {TreeRuns[best][0][1][run]}")
    

(([0.41586703431372546, 0.42370710784313725, 0.4221262254901961, 0.4182322303921569, 0.40078125], [0.38125, 0.384375, 0.43125, 0.440625, 0.29153605015673983]), (99, 'entropy', 26, 2, 2))
best inner 1 with 0.39551776960784313, with parameters (98, 'entropy', None, 2, 2)
OuterMSE of this run 0.38125
best inner 33 with 0.3986458333333333, with parameters (183, 'entropy', None, 2, 1)
OuterMSE of this run 0.415625
best inner 5 with 0.3885018382352941, with parameters (138, 'entropy', None, 2, 1)
OuterMSE of this run 0.434375
best inner 14 with 0.40019607843137256, with parameters (194, 'log_loss', None, 2, 2)
OuterMSE of this run 0.428125
best inner 5 with 0.37421875, with parameters (138, 'entropy', None, 2, 1)
OuterMSE of this run 0.34169278996865204


In [107]:
def SVMNestedResample(regparam,kernel, degree ,gamma,coef0,shrinking):
    inner = KFold(5, shuffle = True, random_state=42)
    outer = KFold(5, shuffle = True, random_state=42) 
    X = Data.loc[:,set.columns != "quality"]
    Y = Data["quality"]

    InnerMSE = []
    OuterMSE = []

    for i, (outtrain_index, outtest_index) in enumerate(outer.split(Data)):
        avgMSE = 0
        for v, (intrain_index, intest_index) in enumerate(inner.split(outtrain_index)):
            model = SVC(C=regparam,kernel=kernel,degree=degree,gamma=gamma,coef0=coef0,shrinking=shrinking)
            model.fit(X.iloc[intrain_index],Y.iloc[intrain_index])
            predictions = model.predict(X.iloc[intest_index])
            avgMSE += mean_squared_error(Y.iloc[intest_index],predictions)
        InnerMSE.append(avgMSE/5)

        model = SVC(C=regparam,kernel=kernel,degree=degree,gamma=gamma,coef0=coef0,shrinking=shrinking)
        model.fit(X.iloc[outtrain_index],Y.iloc[outtrain_index])
        predictions = model.predict(X.iloc[outtest_index])
        OuterMSE.append(mean_squared_error(Y.iloc[outtest_index],predictions))
    return InnerMSE,OuterMSE

In [114]:
SVMRuns = []
for a in range(100):
    regparam = abs(np.random.normal(1,0.5))
    kernel = np.random.choice(["rbf","sigmoid"])
    degree = np.random.randint(1,3)
    gamma = np.random.choice(["scale","auto"])
    coef0 = np.random.random()*5
    shrinking = np.random.choice([True,False])

    print(f"Params: regparam = {regparam}, kernel = {kernel}, degree = {degree}, gamma = {gamma}, coef0 = {coef0}, shrinking = {shrinking}")
    SVMRuns.append((SVMNestedResample(regparam,kernel, degree ,gamma,coef0,shrinking),(regparam,kernel,degree,gamma,coef0,shrinking)))

Params: regparam = 1.3468714702287508, kernel = sigmoid, degree = 2, gamma = auto, coef0 = 2.219734824185645, shrinking = True
Params: regparam = 0.48299169527411645, kernel = rbf, degree = 2, gamma = scale, coef0 = 0.174538268074666, shrinking = True
Params: regparam = 1.3222698357254232, kernel = rbf, degree = 2, gamma = scale, coef0 = 2.6073155576943057, shrinking = True
Params: regparam = 1.2075163222335759, kernel = sigmoid, degree = 1, gamma = auto, coef0 = 2.002469810943654, shrinking = True
Params: regparam = 1.0173168078000454, kernel = sigmoid, degree = 1, gamma = scale, coef0 = 3.6363113015963506, shrinking = True
Params: regparam = 0.8459007932664172, kernel = sigmoid, degree = 1, gamma = auto, coef0 = 0.34046798934666833, shrinking = False
Params: regparam = 1.5621790282217904, kernel = sigmoid, degree = 1, gamma = auto, coef0 = 0.20334529652524025, shrinking = False
Params: regparam = 1.7997765413929554, kernel = rbf, degree = 2, gamma = scale, coef0 = 3.3563569619049223,

In [115]:
print(SVMRuns[0])
for run in range(5):
    Inner1 = [SVMRuns[x][0][0][run] for x in range(len(SVMRuns))]
    best = Inner1.index(min((Inner1)))
    print(f"best inner {best} with {SVMRuns[best][0][0][run]}, with parameters {SVMRuns[best][1]}")
    print(f"OuterMSE of this run {SVMRuns[best][0][1][run]}")
    

(([1.1000643382352941, 1.1000643382352941, 1.1000643382352941, 1.1000643382352941, 1.10234375], [1.121875, 0.9875, 1.059375, 1.096875, 1.0156739811912225]), (1.3468714702287508, 'sigmoid', 2, 'auto', 2.219734824185645, True))
best inner 69 with 0.6059129901960784, with parameters (1.5410804972490766, 'rbf', 2, 'auto', 3.255986620443151, False)
OuterMSE of this run 0.671875
best inner 69 with 0.6059129901960784, with parameters (1.5410804972490766, 'rbf', 2, 'auto', 3.255986620443151, False)
OuterMSE of this run 0.634375
best inner 69 with 0.6059129901960784, with parameters (1.5410804972490766, 'rbf', 2, 'auto', 3.255986620443151, False)
OuterMSE of this run 0.590625
best inner 69 with 0.6059129901960784, with parameters (1.5410804972490766, 'rbf', 2, 'auto', 3.255986620443151, False)
OuterMSE of this run 0.609375
best inner 46 with 0.6328125, with parameters (1.4776920010952042, 'rbf', 2, 'auto', 4.08422835921956, True)
OuterMSE of this run 0.5924764890282131
