In [1]:
import pandas as pd
import numpy as np
from time import time
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from diagnostics.evaluation import fit_and_eval, rev_weighted_quad_kappa, rmse, error_rate
from imputers.zero_filler import ZeroFiller
from tools.datasets import get_boston, get_heart, get_houses, get_prudential
from config import *
from transformers import BoxCoxTransformer, LabelsClipper, OutliersClipper, CustomOneHotEncoder, CustomBinner, CustomBinaryBinner, FeatureDropper
import warnings
warnings.filterwarnings('ignore')

In [3]:
# cv = KFold(3, shuffle=True, random_state=0)
# scorer = mean_squared_error
# data, labels, continuous, discrete, dummy, categorical, target = get_boston()
# data, labels, continuous, discrete, dummy, categorical, target = get_houses()
cv = 3
# scorer = accuracy_score
# data, labels, continuous, discrete, dummy, categorical, target = get_heart()
scorer = rev_weighted_quad_kappa
data, labels, continuous, discrete, dummy, categorical, target = get_prudential()

train = data.drop(target, axis=1)

In [4]:
# BINNER_CONFIG = { col: {'values': [train[col].max()]} for col in continuous + discrete }  # houses
# BINNER_CONFIG = { col: {'bins': 3} for col in continuous + discrete }                               # heart
BINNER_CONFIG = { col: {'bins': 5} for col in continuous }                                          # prudential
# BINNER_CONFIG = { col: {'bins': 5} for col in continuous + discrete }                               # boston (or none)

In [5]:
BOX_COX = BOX_COX_P
model = Pipeline([
        ('onehot',       CustomOneHotEncoder(columns=categorical)),
        ('clipper', OutliersClipper(continuous)),
        ('binner', CustomBinaryBinner({})),
        ('binner2', CustomBinaryBinner({})),
        ('zero_filler',  ZeroFiller()),
        ('boxcox', BoxCoxTransformer(BOX_COX)),
        ('scaler', StandardScaler()),
        # ('classifier',   LogisticRegression()),                # for Heart Disease
        ('classifier',   LabelsClipper(regressor=LinearRegression())), # for Prudential
        # ('regressor',    LinearRegression()),                # for Boston and Houses
])


In [6]:
from sklearn.preprocessing import RobustScaler

from config import BINARY_BINNER_CONFIG_PRUD
param_grid = {
    'clipper': [OutliersClipper(continuous), OutliersClipper(continuous), None],
    'binner': [CustomBinaryBinner(BINNER_CONFIG), CustomBinner(BINNER_CONFIG), None],
    'binner2': [CustomBinaryBinner(BINARY_BINNER_CONFIG_PRUD), CustomBinner(BINNER_CONFIG_PRUD), None],
    'boxcox': [ BoxCoxTransformer(BOX_COX), None],
    'scaler': [StandardScaler(), RobustScaler(), None]
}

In [8]:
start = time()
grid = GridSearchCV(model, cv=cv, n_jobs=-1, param_grid=param_grid, scoring=make_scorer(scorer))
grid.fit(train.copy(), labels)
end = time()

(29690, 841)


(29690, 850)


(29690, 855)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 850)


(29690, 855)


(14845, 850)


(14845, 855)


(14845, 841)


(14845, 850)


(29690, 850)


(14845, 855)


(29690, 841)


(29690, 855)


(14845, 841)


(14845, 850)


(29690, 850)


(14845, 855)


(29690, 855)


(29690, 841)


(29690, 850)


(29690, 855)


(29690, 855)


(29690, 850)


(29690, 841)


(29690, 841)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 850)


(14845, 850)


(14845, 855)


(14845, 841)


(14845, 841)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 841)


(14845, 850)


(14845, 855)


(14845, 841)


(14845, 850)


(29690, 850)


(29690, 841)


(29690, 855)


(29690, 850)


(29690, 855)


(29690, 850)


(29690, 841)


(29690, 855)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 841)


(14845, 855)


(14845, 850)


(14845, 841)


(29690, 855)


(14845, 855)


(29690, 850)


(29690, 841)


(29690, 855)


(14845, 850)


(14845, 855)


(14845, 841)


(29690, 850)


(29690, 855)


(14845, 841)


(29690, 841)


(29690, 841)


(29690, 855)


(29690, 850)


(29690, 841)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 850)


(29690, 855)


(14845, 841)


(14845, 855)


(14845, 850)


(14845, 850)


(29690, 850)


(29690, 850)


(29690, 841)


(29690, 855)


(14845, 841)


(14845, 855)


(29690, 841)


(14845, 855)


(29690, 855)


(14845, 850)


(29690, 855)


(29690, 850)


(29690, 850)


(29690, 855)


(29690, 841)


(29690, 841)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 847)


(14845, 855)


(14845, 841)


(14845, 850)


(29690, 855)


(29690, 841)


(29690, 850)


(14845, 841)


(29690, 841)


(14845, 847)


(14845, 852)


(14845, 838)


(29690, 847)


(29690, 852)


(14845, 847)


(29690, 838)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 847)


(29690, 852)


(29690, 852)


(29690, 838)


(29690, 847)


(29690, 838)


(14845, 852)


(29690, 852)


(14845, 838)


(14845, 847)


(29690, 838)


(29690, 847)


(14845, 852)


(29690, 852)


(14845, 852)


(14845, 838)


(29690, 852)


(14845, 847)


(29690, 838)


(29690, 847)


(14845, 838)


(29690, 838)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 847)


(29690, 838)


(29690, 852)


(29690, 847)


(14845, 847)


(14845, 852)


(29690, 852)


(29690, 847)


(29690, 852)


(14845, 838)


(29690, 838)


(14845, 847)


(29690, 847)


(14845, 838)


(14845, 852)


(29690, 838)


(14845, 847)


(29690, 852)


(29690, 847)


(14845, 852)


(29690, 852)


(29690, 838)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 852)


(29690, 847)


(14845, 838)


(29690, 838)


(14845, 847)


(29690, 838)


(29690, 847)


(29690, 847)


(14845, 852)


(29690, 852)


(14845, 838)


(29690, 838)


(14845, 852)


(14845, 847)


(29690, 852)


(14845, 838)


(29690, 847)


(29690, 838)


(14845, 847)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 847)


(29690, 852)


(29690, 838)


(14845, 852)


(14845, 838)


(29690, 847)


(29690, 852)


(29690, 852)


(29690, 838)


(29690, 838)


(14845, 847)


(29690, 847)


(14845, 852)


(29690, 852)


(14845, 838)


(14845, 847)


(14845, 852)


(29690, 838)


(14845, 838)


(29690, 847)


(29690, 852)


(29690, 838)


(29690, 832)


(29690, 827)


(29690, 818)


(29690, 827)


(29690, 827)


(14845, 832)


(29690, 818)


(29690, 832)


(14845, 827)


(29690, 832)


(29690, 832)


(29690, 827)


(14845, 818)


(14845, 827)


(29690, 818)


(29690, 827)


(14845, 818)


(14845, 832)


(14845, 827)


(29690, 818)


(29690, 832)


(14845, 832)


(29690, 827)


(29690, 832)


(29690, 827)


(29690, 818)


(29690, 832)


(29690, 818)


(14845, 827)


(14845, 818)


(29690, 832)


(29690, 827)


(29690, 818)


(29690, 827)


(29690, 818)


(29690, 827)


(14845, 832)


(14845, 818)


(29690, 832)


(29690, 818)


(14845, 832)


(14845, 818)


(14845, 827)


(29690, 832)


(29690, 818)


(29690, 827)


(14845, 827)


(29690, 827)


(29690, 832)


(29690, 818)


(29690, 827)


(29690, 832)


(14845, 832)


(29690, 832)


(14845, 818)


(29690, 827)


(29690, 832)


(29690, 818)


(29690, 818)


(29690, 818)


(14845, 827)


(29690, 827)


(14845, 832)


(29690, 832)


(14845, 827)


(14845, 818)


(29690, 827)


(14845, 832)


(29690, 832)


(29690, 818)


(14845, 818)


(29690, 818)


(29690, 827)


(29690, 832)


(29690, 818)


(29690, 827)


(29690, 832)


(14845, 827)


(14845, 832)


(29690, 832)


(29690, 827)


(29690, 827)


(29690, 818)


(29690, 832)


(14845, 818)


(14845, 827)


(29690, 818)


(29690, 827)


(14845, 832)


(29690, 832)


(14845, 827)


(14845, 818)


(14845, 832)


(29690, 827)


(29690, 818)


(29690, 832)


(29690, 827)


(29690, 818)


(29690, 818)


(29690, 832)


(29690, 811)


(14845, 827)


(14845, 818)


(29690, 827)


(29690, 802)


(29690, 818)


(29690, 816)


(14845, 818)


(29690, 811)


(29690, 818)


(14845, 832)


(29690, 832)


(14845, 811)


(29690, 811)


(14845, 802)


(14845, 816)


(29690, 802)


(14845, 811)


(29690, 816)


(29690, 811)


(29690, 802)


(29690, 816)


(29690, 816)


(29690, 811)


(14845, 802)


(29690, 802)


(14845, 816)


(29690, 802)


(29690, 816)


(29690, 811)


(29690, 816)


(29690, 802)


(14845, 811)


(14845, 816)


(29690, 811)


(29690, 816)


(14845, 802)


(14845, 811)


(29690, 802)


(29690, 811)


(14845, 802)


(14845, 816)


(29690, 802)


(29690, 816)


(29690, 811)


(29690, 816)


(29690, 811)


(29690, 802)


(14845, 816)


(14845, 811)


(29690, 816)


(29690, 802)


(29690, 811)


(29690, 816)


(29690, 811)


(14845, 811)


(29690, 816)


(29690, 811)


(14845, 802)


(29690, 802)


(14845, 816)


(14845, 802)


(29690, 816)


(29690, 802)


(14845, 811)


(14845, 816)


(29690, 811)


(29690, 816)


(29690, 811)


(29690, 802)


(29690, 816)


(29690, 802)


(14845, 811)


(29690, 811)


(29690, 811)


(29690, 816)


(14845, 816)


(14845, 802)


(29690, 816)


(29690, 811)


(14845, 802)


(29690, 802)


(29690, 802)


(29690, 802)


(14845, 816)


(14845, 811)


(29690, 816)


(29690, 811)


(14845, 811)


(29690, 811)


(14845, 802)


(29690, 802)


(29690, 816)


(29690, 802)


(29690, 811)


(29690, 816)


(14845, 816)


(29690, 802)


(14845, 802)


(29690, 811)


(29690, 816)


(14845, 811)


(29690, 802)


(29690, 816)


(29690, 811)


(14845, 816)


(29690, 802)


(29690, 816)


(14845, 802)


(14845, 811)


(29690, 802)


(29690, 811)


(14845, 816)


(29690, 816)


(14845, 802)


(29690, 802)


(29690, 808)


(29690, 813)


(29690, 799)


(29690, 808)


(14845, 808)


(29690, 799)


(14845, 813)


(29690, 813)


(29690, 808)


(14845, 799)


(29690, 813)


(14845, 808)


(29690, 799)


(29690, 808)


(29690, 808)


(29690, 813)


(14845, 799)


(14845, 813)


(29690, 799)


(29690, 813)


(14845, 808)


(29690, 808)


(14845, 813)


(29690, 813)


(29690, 808)


(29690, 799)


(29690, 813)


(29690, 799)


(14845, 808)


(29690, 808)


(14845, 799)


(29690, 808)


(29690, 813)


(14845, 813)


(29690, 799)


(14845, 799)


(29690, 813)


(29690, 799)


(29690, 799)


(29690, 808)


(14845, 813)


(14845, 808)


(29690, 813)


(29690, 808)


(14845, 799)


(14845, 808)


(29690, 799)


(29690, 808)


(29690, 813)


(29690, 808)


(29690, 799)


(29690, 813)


(14845, 813)


(14845, 799)


(14845, 808)


(29690, 813)


(29690, 799)


(29690, 808)


(29690, 808)


(29690, 799)


(14845, 813)


(29690, 813)


(29690, 813)


(29690, 799)


(14845, 808)


(14845, 799)


(29690, 808)


(29690, 799)


(14845, 813)


(14845, 799)


(29690, 813)


(29690, 799)


(29690, 808)


(29690, 813)


(29690, 799)


(29690, 808)


(14845, 808)


(14845, 813)


(29690, 808)


(14845, 799)


(29690, 813)


(29690, 799)


(14845, 808)


(29690, 799)


(29690, 813)


(29690, 808)


(29690, 808)


(29690, 813)


(14845, 813)


(14845, 799)


(29690, 813)


(29690, 799)


(14845, 808)


(14845, 813)


(29690, 808)


(29690, 799)


(29690, 813)


(29690, 813)


(29690, 808)


(29690, 799)


(14845, 799)


(14845, 813)


(14845, 808)


(29690, 813)


(29690, 799)


(14845, 799)


(29690, 808)


(29690, 788)


(29690, 799)


(29690, 793)


(29690, 788)


(29690, 779)


(14845, 788)


(14845, 793)


(29690, 788)


(29690, 793)


(14845, 788)


(29690, 788)


(29690, 793)


(14845, 779)


(29690, 779)


(29690, 788)


(29690, 779)


(29690, 793)


(14845, 793)


(14845, 779)


(14845, 788)


(29690, 793)


(29690, 779)


(14845, 793)


(29690, 788)


(29690, 793)


(29690, 788)


(29690, 779)


(29690, 793)


(29690, 779)


(14845, 788)


(14845, 779)


(29690, 788)


(29690, 779)


(14845, 793)


(14845, 779)


(29690, 788)


(29690, 793)


(29690, 779)


(29690, 793)


(29690, 779)


(29690, 788)


(14845, 788)


(29690, 788)


(14845, 793)


(14845, 788)


(14845, 779)


(29690, 788)


(29690, 793)


(29690, 779)


(29690, 793)


(29690, 779)


(29690, 788)


(29690, 793)


(14845, 793)


(14845, 779)


(29690, 793)


(29690, 779)


(14845, 788)


(29690, 779)


(29690, 788)


(14845, 793)


(29690, 793)


(29690, 793)


(29690, 779)


(29690, 788)


(14845, 779)


(14845, 793)


(29690, 779)


(14845, 779)


(29690, 793)


(14845, 788)


(29690, 779)


(29690, 788)


(29690, 788)


(29690, 793)


(29690, 779)


(29690, 788)


(14845, 788)


(29690, 788)


(14845, 793)


(29690, 793)


(14845, 779)


(29690, 779)


(29690, 779)


(29690, 793)


(14845, 788)


(29690, 788)


(29690, 788)


(29690, 793)


(14845, 779)


(14845, 793)


(29690, 779)


(29690, 793)


(14845, 793)


(14845, 788)


(29690, 793)


(29690, 788)


(29690, 779)


(29690, 788)


(29690, 793)


(29690, 779)


(14845, 779)


(14845, 788)


(29690, 779)


(29690, 788)


(14845, 793)


(29690, 793)


(29690, 798)


(14845, 779)


(29690, 779)


(29690, 798)


(29690, 789)


(29690, 803)


(14845, 798)


(29690, 798)


(14845, 798)


(29690, 798)


(14845, 789)


(14845, 803)


(29690, 789)


(29690, 803)


(29690, 789)


(29690, 803)


(29690, 798)


(29690, 803)


(14845, 803)


(14845, 789)


(29690, 803)


(29690, 789)


(14845, 798)


(29690, 789)


(14845, 803)


(29690, 798)


(29690, 798)


(29690, 803)


(29690, 803)


(29690, 789)


(14845, 789)


(29690, 789)


(14845, 798)


(29690, 798)


(14845, 789)


(14845, 803)


(29690, 803)


(29690, 789)


(29690, 803)


(29690, 798)


(29690, 798)


(29690, 789)


(14845, 803)


(14845, 798)


(29690, 803)


(14845, 798)


(29690, 803)


(14845, 789)


(29690, 798)


(29690, 798)


(29690, 789)


(29690, 789)


(29690, 803)


(29690, 798)


(14845, 803)


(29690, 803)


(14845, 789)


(29690, 789)


(14845, 803)


(14845, 798)


(29690, 803)


(29690, 798)


(29690, 789)


(29690, 789)


(29690, 798)


(29690, 803)


(29690, 798)


(14845, 789)


(14845, 789)


(14845, 798)


(14845, 803)


(29690, 789)


(29690, 798)


(29690, 803)


(29690, 803)


(29690, 789)


(29690, 798)


(29690, 789)


(14845, 798)


(29690, 798)


(14845, 803)


(29690, 803)


(14845, 798)


(14845, 789)


(29690, 798)


(29690, 789)


(29690, 803)


(29690, 789)


(29690, 798)


(29690, 803)


(14845, 789)


(14845, 803)


(29690, 789)


(29690, 803)


(29690, 789)


(14845, 798)


(14845, 803)


(29690, 798)


(29690, 798)


(29690, 803)


(29690, 803)


(29690, 789)


(14845, 789)


(29690, 789)


(14845, 798)


(29690, 798)


(14845, 803)


(14845, 789)


(29690, 803)


(29690, 789)


(29690, 795)


(29690, 800)


(29690, 786)


(29690, 795)


(14845, 795)


(29690, 800)


(14845, 800)


(29690, 795)


(29690, 786)


(29690, 800)


(14845, 786)


(14845, 795)


(29690, 786)


(29690, 795)


(29690, 800)


(29690, 795)


(14845, 800)


(29690, 800)


(14845, 786)


(29690, 786)


(14845, 800)


(14845, 795)


(29690, 795)


(29690, 800)


(29690, 786)


(29690, 795)


(29690, 800)


(29690, 786)


(14845, 786)


(29690, 795)


(14845, 795)


(29690, 786)


(29690, 795)


(29690, 800)


(14845, 800)


(29690, 800)


(14845, 786)


(29690, 786)


(29690, 786)


(29690, 795)


(14845, 795)


(29690, 795)


(14845, 800)


(29690, 800)


(14845, 786)


(29690, 800)


(14845, 795)


(29690, 786)


(29690, 795)


(29690, 786)


(29690, 795)


(29690, 800)


(14845, 800)


(14845, 786)


(29690, 800)


(29690, 786)


(29690, 786)


(14845, 795)


(29690, 795)


(29690, 795)


(14845, 800)


(29690, 800)


(29690, 800)


(29690, 786)


(14845, 786)


(29690, 786)


(14845, 795)


(29690, 795)


(14845, 800)


(14845, 786)


(29690, 795)


(29690, 800)


(29690, 786)


(29690, 800)


(29690, 786)


(29690, 795)


(14845, 795)


(29690, 800)


(29690, 795)


(14845, 800)


(14845, 786)


(29690, 800)


(29690, 786)


(29690, 786)


(14845, 795)


(29690, 795)


(29690, 800)


(29690, 795)


(14845, 800)


(29690, 800)


(14845, 786)


(29690, 786)


(14845, 795)


(14845, 800)


(29690, 786)


(29690, 800)


(29690, 795)


(29690, 800)


(29690, 795)


(29690, 786)


(29690, 775)


(14845, 786)


(14845, 800)


(14845, 795)


(29690, 786)


(29690, 800)


(29690, 795)


(29690, 780)


(14845, 786)


(29690, 786)


(14845, 775)


(29690, 766)


(29690, 775)


(29690, 775)


(14845, 780)


(29690, 780)


(14845, 766)


(29690, 766)


(14845, 775)


(29690, 766)


(29690, 780)


(29690, 775)


(29690, 775)


(29690, 780)


(29690, 766)


(14845, 766)


(14845, 780)


(29690, 766)


(29690, 780)


(14845, 775)


(29690, 775)


(29690, 775)


(14845, 780)


(29690, 780)


(29690, 766)


(14845, 766)


(29690, 780)


(29690, 766)


(14845, 775)


(29690, 775)


(14845, 766)


(29690, 766)


(14845, 780)


(29690, 780)


(29690, 780)


(29690, 775)


(29690, 766)


(29690, 775)


(29690, 780)


(14845, 775)


(14845, 780)


(29690, 775)


(29690, 780)


(29690, 766)


(14845, 766)


(14845, 775)


(29690, 775)


(29690, 766)


(14845, 780)


(29690, 780)


(29690, 775)


(29690, 780)


(14845, 766)


(29690, 766)


(29690, 775)


(29690, 766)


(14845, 775)


(29690, 775)


(14845, 780)


(29690, 780)


(29690, 780)


(29690, 766)


(29690, 775)


(14845, 775)


(14845, 766)


(29690, 775)


(29690, 766)


(29690, 780)


(14845, 766)


(14845, 780)


(29690, 766)


(29690, 780)


(14845, 775)


(29690, 775)


(29690, 766)


(29690, 775)


(14845, 780)


(29690, 780)


(29690, 766)


(29690, 780)


(14845, 766)


(14845, 775)


(29690, 766)


(29690, 775)


(29690, 775)


(14845, 766)


(29690, 780)


(29690, 766)


(14845, 780)


(29690, 766)


(29690, 780)


(29690, 775)


(14845, 775)


(14845, 766)


(29690, 775)


(14845, 780)


(29690, 766)


(29690, 780)


(29690, 780)


(29690, 766)


(14845, 775)


(29690, 775)


(14845, 780)


(29690, 780)


(14845, 766)


(29690, 766)


(44535, 917)


In [1]:
import numpy
numpy.set_printoptions(threshold=numpy.nan)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
      % (end - start, len(grid.cv_results_['params'])))
print(grid.best_params_)


NameError: name 'end' is not defined