# Tuning BaggingRegressor

In [1]:
import numpy
import csv
import time
import pandas
import signal
import warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline

In [3]:
from sklearn.multioutput import ClassifierChain
from sklearn.multioutput import MultiOutputRegressor
from sklearn.multioutput import MultiOutputClassifier

from sklearn.ensemble.weight_boosting import AdaBoostClassifier
from sklearn.ensemble.weight_boosting import AdaBoostRegressor
from sklearn.ensemble.bagging import BaggingClassifier
from sklearn.ensemble.bagging import BaggingRegressor
from sklearn.ensemble.forest import ExtraTreesClassifier
from sklearn.ensemble.forest import ExtraTreesRegressor
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.ensemble.forest import RandomForestRegressor

from sklearn.linear_model.bayes import ARDRegression
from sklearn.linear_model.bayes import BayesianRidge
from sklearn.naive_bayes import BernoulliNB
from sklearn.cross_decomposition.cca_ import CCA
from sklearn.tree.tree import DecisionTreeClassifier
from sklearn.tree.tree import DecisionTreeRegressor
from sklearn.linear_model.coordinate_descent import ElasticNet
from sklearn.tree.tree import ExtraTreeClassifier
from sklearn.tree.tree import ExtraTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process.gpc import GaussianProcessClassifier
from sklearn.gaussian_process.gpr import GaussianProcessRegressor
from sklearn.linear_model.huber import HuberRegressor
from sklearn.neighbors.classification import KNeighborsClassifier
from sklearn.neighbors.regression import KNeighborsRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.semi_supervised.label_propagation import LabelPropagation
from sklearn.semi_supervised.label_propagation import LabelSpreading
from sklearn.linear_model.least_angle import Lars
from sklearn.linear_model.coordinate_descent import Lasso
from sklearn.linear_model.least_angle import LassoLars
from sklearn.linear_model.least_angle import LassoLarsIC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model.base import LinearRegression
from sklearn.svm.classes import LinearSVC
from sklearn.svm.classes import LinearSVR
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.neural_network.multilayer_perceptron import MLPClassifier
from sklearn.neural_network.multilayer_perceptron import MLPRegressor
from sklearn.linear_model.coordinate_descent import MultiTaskElasticNet
from sklearn.linear_model.coordinate_descent import MultiTaskLasso
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.svm.classes import NuSVC
from sklearn.svm.classes import NuSVR
from sklearn.linear_model.omp import OrthogonalMatchingPursuit
from sklearn.cross_decomposition.pls_ import PLSCanonical
from sklearn.cross_decomposition.pls_ import PLSRegression
from sklearn.linear_model.passive_aggressive import PassiveAggressiveClassifier
from sklearn.linear_model.passive_aggressive import PassiveAggressiveRegressor
from sklearn.linear_model.perceptron import Perceptron
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model.ransac import RANSACRegressor
from sklearn.neighbors.classification import RadiusNeighborsClassifier
from sklearn.neighbors.regression import RadiusNeighborsRegressor
from sklearn.linear_model.ridge import Ridge
from sklearn.linear_model.ridge import RidgeClassifier
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.linear_model.stochastic_gradient import SGDRegressor
from sklearn.svm.classes import SVC
from sklearn.svm.classes import SVR
from sklearn.linear_model.theil_sen import TheilSenRegressor

## Problem Type

In [None]:
tfidf = pandas.read_csv('07 NM PT TFIDF.csv', delimiter=',', encoding='latin-1').fillna('')
tfidf[:3]

In [None]:
ohe = pandas.read_csv('07 NM PT OHE.csv', delimiter=',', encoding='latin-1').fillna('')
ohe[:3]

In [6]:
def lowest_correct(trues, preds):
    num_of_options = len(trues)
    drop_down_options = list(reversed(numpy.argsort(preds)))
    correct_options = [i for i in range(num_of_options) if trues[i]==1]
    return max([drop_down_options.index(correct_option) for correct_option in correct_options]) + 1
def average_lowest_correct(list_of_trues, list_of_preds):
    length = len(list_of_trues)
    return numpy.mean([lowest_correct(list(list_of_trues.iloc[i]), list(list_of_preds[i])) for i in range(length)])

## Default

In [7]:
def deadline(timeout, *args):
    def decorate(f):
        def handler(signum, frame):
            raise Exception

        def new_f(*args):
            signal.signal(signal.SIGALRM, handler)
            signal.alarm(timeout)
            return f(*args)
            signal.alarm(0)

        new_f.__name__ = f.__name__
        return new_f
    return decorate

In [8]:
@deadline(500)
def cv_ensemble(model, xs, ys):
    temp = cross_validate(model, xs, ys, return_train_score=True, scoring=make_scorer(average_lowest_correct), n_jobs=1, cv=2)
    return numpy.mean(temp['test_score']).round(2)

In [9]:
bss = [Ridge, LinearSVR, PassiveAggressiveRegressor]

In [27]:
for bs in bss:
    print(cv_ensemble(MultiOutputRegressor(BaggingRegressor(bs())), tfidf, ohe))

2.97
3.92
3.43


## Grid Search

In [10]:
def prn_rsl(gsc):
    prms = gsc.cv_results_['params']
    tsts = (gsc.cv_results_['split0_test_score']+gsc.cv_results_['split1_test_score'])/2
    for i in numpy.argsort(tsts):
        print(prms[i], tsts[i])

In [11]:
bss = [Ridge(alpha=10, fit_intercept=True, max_iter=3000, normalize=False, solver='saga'), 
       LinearSVR(C=0.2, dual=False, epsilon=0.2, fit_intercept=True, loss='squared_epsilon_insensitive', max_iter=1000, tol=0.01),
       PassiveAggressiveRegressor(C=0.1, average=True, epsilon=0.1, fit_intercept=True, max_iter=4, loss='squared_epsilon_insensitive', tol=0.00001)]

In [12]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [i for i in range(2, 12, 2)]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 5 candidates, totalling 10 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] .................... me__estimator__n_estimators=2, total= 2.2min
[CV] me__estimator__n_estimators=6 ...................................
[CV] .................... me__estimator__n_estimators=2, total= 2.3min
[CV] me__estimator__n_estimators=6 ...................................
[CV] .................... me__estimator__n_estimators=4, total= 4.7min
[CV] me__estimator__n_estimators=8 ...................................
[CV] .................... me__estimator__n_estimators=4, total= 7.4min
[CV] me__estimator__n_estimators=8 ...................................
[CV] .................... me__estimator__n_estimators=6, total=11.1min
[CV] me__estimato

[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 24.2min finished


{'me__estimator__n_estimators': 2} 3.423590641120118
{'me__estimator__n_estimators': 4} 5.777840623146467
{'me__estimator__n_estimators': 8} 5.842315478009239
{'me__estimator__n_estimators': 6} 6.573059519744055
{'me__estimator__n_estimators': 10} 8.739986991570358


In [100]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2],
    'me__estimator__max_samples': numpy.linspace(0.2, 1.0, 5),
#     'me__estimator__max_features': numpy.linspace(0.2, 1.0, 5),
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 5 candidates, totalling 10 fits
[CV] me__estimator__max_samples=0.2, me__estimator__n_estimators=2 ...
[CV] me__estimator__max_samples=0.2, me__estimator__n_estimators=2 ...
[CV] me__estimator__max_samples=0.4, me__estimator__n_estimators=2 ...
[CV] me__estimator__max_samples=0.4, me__estimator__n_estimators=2 ...
[CV]  me__estimator__max_samples=0.2, me__estimator__n_estimators=2, total=  17.7s
[CV] me__estimator__max_samples=0.6000000000000001, me__estimator__n_estimators=2 
[CV]  me__estimator__max_samples=0.2, me__estimator__n_estimators=2, total=  18.5s
[CV] me__estimator__max_samples=0.6000000000000001, me__estimator__n_estimators=2 
[CV]  me__estimator__max_samples=0.4, me__estimator__n_estimators=2, total=  21.4s
[CV]  me__estimator__max_samples=0.4, me__estimator__n_estimators=2, total=  21.3s
[CV] me__estimator__max_samples=0.8, me__estimator__n_estimators=2 ...
[CV] me__estimator__max_samples=0.8, me__estimator__n_estimators=2 ...
[CV]  me__estima

[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  3.0min finished


{'me__estimator__max_samples': 0.6000000000000001, 'me__estimator__n_estimators': 2} 3.3286560652084045
{'me__estimator__max_samples': 0.8, 'me__estimator__n_estimators': 2} 3.605194722037493
{'me__estimator__max_samples': 0.4, 'me__estimator__n_estimators': 2} 3.6063691877707282
{'me__estimator__max_samples': 1.0, 'me__estimator__n_estimators': 2} 3.623963706705969
{'me__estimator__max_samples': 0.2, 'me__estimator__n_estimators': 2} 4.111383836295991


In [12]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2],
    'me__estimator__max_samples': [0.6],
    'me__estimator__max_features': numpy.linspace(0.2, 1.0, 5),
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 5 candidates, totalling 10 fits
[CV] me__estimator__max_features=0.2, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV] me__estimator__max_features=0.2, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV] me__estimator__max_features=0.4, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV] me__estimator__max_features=0.4, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV]  me__estimator__max_features=0.2, me__estimator__max_samples=0.6, me__estimator__n_estimators=2, total=   3.9s
[CV] me__estimator__max_features=0.6000000000000001, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV]  me__estimator__max_features=0.2, me__estimator__max_samples=0.6, me__estimator__n_estimators=2, total=   4.0s
[CV] me__estimator__max_features=0.6000000000000001, me__estimator__max_samples=0.6, me__estimator__n_estimators=2 
[CV]  me__estimator__max_features=0.4, me__estimator__max_samples=0.6, m

[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:   38.4s finished


{'me__estimator__max_features': 1.0, 'me__estimator__max_samples': 0.6, 'me__estimator__n_estimators': 2} 3.3440196047594224
{'me__estimator__max_features': 0.8, 'me__estimator__max_samples': 0.6, 'me__estimator__n_estimators': 2} 3.470153068102162
{'me__estimator__max_features': 0.6000000000000001, 'me__estimator__max_samples': 0.6, 'me__estimator__n_estimators': 2} 3.705960062548304
{'me__estimator__max_features': 0.4, 'me__estimator__max_samples': 0.6, 'me__estimator__n_estimators': 2} 4.001191316030699
{'me__estimator__max_features': 0.2, 'me__estimator__max_samples': 0.6, 'me__estimator__n_estimators': 2} 4.452137853406905


In [None]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2],
#     'me__estimator__max_samples': [0.6],
#     'me__estimator__max_features': [1],
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

In [19]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[1])))])
grd = {
    'me__estimator__n_estimators': [i for i in range(2, 6, 2)],
    'me__estimator__max_samples': [0.6],
    'me__estimator__max_samples': [0.6],
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   2.8s
[CV] .................... me__estimator__n_estimators=2, total=   2.9s
[CV] .................... me__estimator__n_estimators=4, total=   4.6s
[CV] .................... me__estimator__n_estimators=4, total=   4.6s


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    5.7s finished


{'me__estimator__n_estimators': 2} 2.76318648111868
{'me__estimator__n_estimators': 4} 2.76318648111868


In [20]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[2])))])
grd = {
    'me__estimator__n_estimators': [i for i in range(2, 6, 2)]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] me__estimator__n_estimators=4 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   6.6s
[CV] .................... me__estimator__n_estimators=2, total=   6.7s
[CV] .................... me__estimator__n_estimators=4, total=  10.9s
[CV] .................... me__estimator__n_estimators=4, total=  11.0s


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:   12.1s finished


{'me__estimator__n_estimators': 4} 2.8235919891439147
{'me__estimator__n_estimators': 2} 2.874427651562809


## Process Step

In [21]:
tfidf = pandas.read_csv('07 NM PS TFIDF.csv', delimiter=',', encoding='latin-1').fillna('')
tfidf[:3]

Unnamed: 0,_____________________,_____________________ follow,able,acceptable,accessory,accessory require,accidentally,accompany,accord,account,...,wrong ct,wrong info,wrong info mri,wrong patient,wrong pt,wrong set,wrong tattoo,xrt,xrt date,xrt pt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
ohe = pandas.read_csv('07 NM PS OHE.csv', delimiter=',', encoding='latin-1').fillna('')
ohe[:3]

Unnamed: 0,Treatment delivery,Radiation treatment prescription scheduling,Pre-treatment quality assurance,Post-treatment completion,Interventional procedure for planning and/or delivery,Contouring and planning,Imaging for treatment planning,Patient medical consultation and physician assessment,On-treatment quality assurance
0,1,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0


In [23]:
bss = [Ridge(alpha=10, fit_intercept=True, max_iter=3000, normalize=False, solver='saga'), 
       LinearSVR(C=0.2, dual=False, epsilon=0.2, fit_intercept=True, loss='squared_epsilon_insensitive', max_iter=1000, tol=0.01),
       PassiveAggressiveRegressor(C=0.1, average=True, epsilon=0.1, fit_intercept=True, max_iter=4, loss='squared_epsilon_insensitive', tol=0.00001)]

In [24]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=  17.3s
[CV] .................... me__estimator__n_estimators=2, total=  17.3s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   18.0s finished


{'me__estimator__n_estimators': 2} 1.7919303797468356


In [25]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[1])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   1.4s
[CV] .................... me__estimator__n_estimators=2, total=   1.4s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    2.1s finished


{'me__estimator__n_estimators': 2} 1.6689082278481013


In [26]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[2])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   3.7s
[CV] .................... me__estimator__n_estimators=2, total=   3.8s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    4.5s finished


{'me__estimator__n_estimators': 2} 1.6839398734177216


# Contributing Factors

In [28]:
tfidf = pandas.read_csv('07 NM CF TFIDF.csv', delimiter=',', encoding='latin-1').fillna('')
tfidf[:3]

Unnamed: 0,____________________,_____________________,_____________________ follow,able,acceptable,access,accessory,accessory require,accidentally,accommodate,...,wrong ct,wrong info,wrong info mri,wrong patient,wrong pt,wrong set,wrong tattoo,xrt,xrt date,xrt pt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
ohe = pandas.read_csv('07 NM CF OHE.csv', delimiter=',', encoding='latin-1').fillna('')
ohe[:3]

Unnamed: 0,Organizational and/or workspace resources inadequate (excluding human resources),Unnamed: 1,Patient or family member medical condition preference or behaviour,Equipment quality assurance and/or maintenance inadequate,Handoffs inadequate,Communication or documentation inadequate (patient specific),Policies and/or procedures non-existent or inadequate,"Patient or family member medical condition, preference or behaviour",Policies and/or procedures not followed,Unfamiliar treatment approach or radiation treatment technique,...,"Equipment software or hardware design, including 'human factors' design, inadequate",Failure to identify potential risks,"Equipment software or hardware commissioning, calibration or acceptance testing inadequate",Distraction or diversions involving staff,Change management,Human resources inadequate,Expectation bias involving staff,Staff behaviour,Staff education or training inadequate,Patient education inadequate
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total= 1.0min
[CV] .................... me__estimator__n_estimators=2, total= 5.4min


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:  5.4min finished


{'me__estimator__n_estimators': 2} 6.593836746117822


In [31]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[1])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   3.9s
[CV] .................... me__estimator__n_estimators=2, total=   4.0s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    5.2s finished


{'me__estimator__n_estimators': 2} 5.721465806348429


In [32]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[2])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   8.5s
[CV] .................... me__estimator__n_estimators=2, total=   8.6s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    9.9s finished


{'me__estimator__n_estimators': 2} 5.762122340452117


# Overall Severity

In [85]:
tfidf = pandas.read_csv('07 NM OS TFIDF.csv', delimiter=',', encoding='latin-1').fillna('')
tfidf[:3]

Unnamed: 0,____________________,_____________________,_____________________ follow,able,acceptable,access,accessory,accessory require,accidentally,accommodate,...,wrong ct,wrong info,wrong info mri,wrong patient,wrong pt,wrong set,wrong tattoo,xrt,xrt date,xrt pt
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [92]:
ohe = pandas.read_csv('07 NM OS OHE.csv', delimiter=',', encoding='latin-1').fillna('')
ohe[:3]

Unnamed: 0,Severe,Moderate,None,Mild
0,0,0,1,0
1,0,0,0,1
2,0,0,1,0


In [94]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[0])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=  12.3s
[CV] .................... me__estimator__n_estimators=2, total=  13.2s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:   13.8s finished


{'me__estimator__n_estimators': 2} 1.1109987954878275


In [95]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[1])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   1.1s
[CV] .................... me__estimator__n_estimators=2, total=   1.1s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    1.8s finished


{'me__estimator__n_estimators': 2} 1.1120588661592055


In [96]:
ppl = Pipeline([('me', MultiOutputRegressor(BaggingRegressor(bss[2])))])
grd = {
    'me__estimator__n_estimators': [2]
}
gsc = GridSearchCV(estimator=ppl, param_grid=grd, scoring=make_scorer(average_lowest_correct), n_jobs=-1, cv=2, verbose=2, error_score=0)
gsc.fit(tfidf, ohe)
prn_rsl(gsc)

Fitting 2 folds for each of 1 candidates, totalling 2 fits
[CV] me__estimator__n_estimators=2 ...................................
[CV] me__estimator__n_estimators=2 ...................................
[CV] .................... me__estimator__n_estimators=2, total=   2.2s
[CV] .................... me__estimator__n_estimators=2, total=   2.2s


[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    2.8s finished


{'me__estimator__n_estimators': 2} 1.110644938799786


Proess Step: 1.69

Problem Type: 2.76

Contributing Factors: 5.72

Overall Severity: 1.11