In [273]:
import pandas as pd
import numpy as np
import calendar

from bokeh.charts import output_notebook, Scatter, Bar, show, output_file, Line, BoxPlot, Scatter
from bokeh.plotting import figure
from bokeh.layouts import row, column, gridplot
from bokeh.charts import output_notebook, Scatter, Bar, show, output_file, Line, BoxPlot, Scatter
from bokeh.io import hplot

output_notebook() 

In [274]:
INPUT="data/device_failure.csv" 
dataset = pd.read_csv(INPUT,index_col=[0,1],parse_dates=[0])

## per device and time model

 - Set up first model:
     - windowing group by
     - per device test/train split strategy
 - Precision/recall, ROC
 - Calibration
 - feature engineering
     - last n values ?
 - Test other models
 - TPOT

## Build Training set

In [53]:
# fitering methods
suspicious_positives = set(["S1F0GPFZ", "S1F136J0", "W1F0KCP2", "W1F0M35B", "W1F11ZG9"])
def filter_devices(df):
    return df.filter(axis="index",items=(set(df.index) - suspicious_positives)) 

In [54]:
# feature preprocessing
def build_deriv(df,c,n=1):
    def per_device(per_device):
        clean_index = per_device.reset_index(level=1,drop=True)
        resampled=clean_index.resample('1D').pad()
        
        raw_diff = np.diff(resampled,n=n)
        #fill the series start with zeros
        while (len(raw_diff) < len(resampled)):
            raw_diff = np.insert(raw_diff,0,0)
        d = pd.Series(data=raw_diff,index = resampled.index )
        #print d
        return d.dropna()[d>0]
    some_d= df[c].groupby(level="device").apply(per_device)
    return some_d.swaplevel()

In [55]:
from scipy.fftpack import fft
#fft_df = feature_dset[[attribute]].copy()

def to_fft(df):
    try:
        resampled =  df.resample("1D",level="date").mean().fillna(method='pad')
    except:
        #if we cannot resample, this (usually) means that we are using a rolling aggregation, outputing
        #an nd.array rather than a df. the good news is, in this case I shoudl already have resampled.
        resampled = df.copy()
    n = len(resampled)
    return np.abs(fft(resampled))[n//2:]

def fft_line(df):
    print df
    return df.groupby(level="device",sort=True).transform(to_fft)

def peaks(line):
    sorted_by_used = sorted(enumerate(line),key = lambda  t: t[1], reverse=True )
    boundaries = set()
    peaks = []
    for i,value in sorted_by_used:
        if i not in boundaries:
            peaks.append((i,value))
        # in any case, i neighbors cannot be peaks now.
        boundaries.add(i+1)
        boundaries.add(i-1)
    return peaks
            
    
def fft_peak(df,p=0,index_no_value=True):
    fft = to_fft(df)
    #fft = fft_line(df)
    all_peaks = peaks(fft.tolist())
    if (len(all_peaks) > p):
        return all_peaks[p][0 if index_no_value else 1]
    else:
        return 0

In [56]:
def pre_filter(df):
    res = df.copy()
    del res["attribute1"]
    del res["attribute3"]
    #del res["attribute5"]
    dt_list = ["attribute2"]#,"attribute8"]
    for c in dt_list:
        deriv = build_deriv(res,c)
        res["dt_%s" % c] = deriv
        res["dt2_%s" % c] =  build_deriv(res,c,2)
    return res.fillna(0)

def post_filter(df):
    res = df.copy()
    res = filter_devices(res)
    for col in res.columns:
        if "min" in col:
            del res[col]
        if "std" in col:
            del res[col]
    return res

In [330]:
pre_dataset = pre_filter(dataset)
#print feature_set.columns

features = [f for f in pre_dataset.columns if "att" in f]
def f_to_dict(feature):
    indexes = dict( ("avg_over%i_%s" % (i,feature), lambda df: df[:(i+1)].mean()) for i in [0,1,2,3,4,7,14])
    d = {
            "min_%s" % feature:np.min,
            "max_%s" % feature:np.max,
            "mean_%s" % feature :np.mean,
            "std_%s" % feature:np.std
        }
    d.update(indexes)
    dft_list = ["attribute4","attribute5", "attribute6","attribute7","attribute9"]
    if feature in dft_list:
        d["dft_p0_ind%s" % feature] = lambda r : fft_peak(r,p=0,index_no_value=True)
        d["dft_p0_val%s" % feature] = lambda r : fft_peak(r,p=0,index_no_value=False)
        #d["dft_p1_ind%s" % feature] = lambda r : fft_peak(r,p=1,index_no_value=True)
        #d["dft_p1_val%s" % feature] = lambda r : fft_peak(r,p=1,index_no_value=False)
    return d

agg_dict = dict( (f,f_to_dict(f)) for f in features )
#print agg_dict

In [331]:
# bugfix: rolling aggregation after group by does not handle multiple aggregation per column..
# we fix this by flattening the aggregation dict and repeating the data within the dataframe
final_columns = [k for c in agg_dict for k in agg_dict[c]]
input_columns = [c for c in agg_dict for k in agg_dict[c]]
flat_agg_dict = dict( (k,agg_dict[c][k]) for c in agg_dict for k in agg_dict[c])
dup_dataset = pre_dataset[input_columns].sort_index(level="date").sort_index(level="device")
dup_dataset.columns = final_columns

In [332]:
def resample_per_device(df):
    if dataset.index.names == ["device","date"]:
        df = df.swaplevel().sort_index()
    groups = df.groupby(level="device")
    sampled = (
        groups.get_group(g).reset_index(level="device").resample("1D").pad().reset_index()
        for g in groups.groups)
    return pd.concat(sampled).set_index(["date","device"])

In [None]:
#
# This is where magic happens  !!!
# instead of simply grouping by, we roll over the dataset...
# using the "hacky" flat version, to avoid issues.
# The hack increase the memory consumption quite a lot, but it should still be better than
# explicitely building the windowed lines before aggregating over it. 
#

feature_set = resample_per_device(dup_dataset) \
    .groupby(level="device",as_index=False) \
    .rolling(window=360,min_periods=1) \
    .agg(flat_agg_dict) \
    .reset_index(level=0,drop=True) 
feature_set = post_filter(feature_set).sortlevel(level="device")

In [360]:
# feature filtering
# feature filtering
feat_filtering_thres = 5e-3
try : 
    kept_features = feature_imp[feature_imp.importance > feat_filtering_thres]
    print "threshold : %g, kept: %i features" % (feat_filtering_thres,kept_features.size)
    filtered = feature_set.filter(items=feature_imp.index.sort_values())
    print "filtering devices"
    feature_set = filtered
except:
    print "no feature filtering"

threshold : 0.005, kept: 43 features
filtering devices


In [361]:
#
# use label_window to expand label to neighboring days.
# basically, a mainrtenance x days before failure is still OK
#
label_window = 7

label_set =  resample_per_device(dataset[["failure"]]) \
    .sortlevel(level="date",ascending=False) \
    .groupby(level="device",as_index=False) \
    .rolling(window=label_window, min_periods=1) \
    .sum() \
    .reset_index(level=0,drop=True) 
label_set = filter_devices(label_set).sortlevel(level="device")


In [362]:
negative_subsampling_fraction = 5e-2
idx = pd.IndexSlice

def subsample_negatives(frac,label_set,feature_set):
    set_size = label_set.shape[0]
    pos_size = label_set[label_set["failure"]>0].shape[0]
    print "%i, with %i positives" % (set_size,pos_size)
    sub_label = label_set[label_set["failure"]>0].append(label_set[label_set["failure"]==0].sample(frac=frac))
    sub_feature = feature_set.loc[idx[sub_label_set.index]]
    sub_size = sub_label.shape[0]
    sub_size_0 = sub_feature.shape[0]
    print "new size %i" % sub_size_0 
    return sub_label, sub_feature
sub_label_set , sub_feature_set = subsample_negatives(negative_subsampling_fraction,label_set,feature_set)


131479, with 740 positives
new size 7277


In [364]:
# compute groups (devices), to be used when splitting the training set (train/test)
# This is useful to avoid the bias selection generated by a temporal model
# (Basically, a device used in train cannot also be in test, because its attributes will be very similar)
# this is why the cross-val strategy must split by device, and not merely at random
devices = sub_label_set.index.get_level_values("device")
device_index = dict((device,i) for i,device in enumerate(devices.unique()))
n_dev = len(device_index)
device_groups = np.array(devices.to_series().map(device_index).tolist())

## Run model

In [429]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import Normalizer
from sklearn.metrics import roc_curve, auc
from sklearn.svm import SVC, NuSVC
from sklearn.model_selection import LeavePGroupsOut, GroupShuffleSplit

feature_mat = sub_feature_set.as_matrix()
label_mat = sub_label_set.as_matrix().ravel()

#splitting_strategy = LeavePGroupsOut(n_groups=n_dev/4)
splitting_strategy = GroupShuffleSplit(n_splits=4,test_size=0.33)
                                     
#model = GradientBoostingClassifier()
pca = PCA()#n_components="mle",svd_solver="full")
norm = Normalizer()
#model=GradientBoostingClassifier()
model = RandomForestClassifier()
#model = SVC(probability=True)
pipeline= Pipeline([('normalize', norm),('reduce_dim', pca),("model",model)])
pipeline = pipeline_optimizer._fitted_pipeline
#pipeline= Pipeline([("model",model)])
try:
    # use best parameters if available
    #
    pipeline.set_params(**grid_result.best_params_)
    print "using last optimized model"
except:
    print "no optim result, or bad ones: let's keep the default ones"
    pass
scores = cross_val_score(
    pipeline,
    feature_mat,
    label_mat,
    cv=splitting_strategy,
    groups = device_groups,
    verbose=10,
    scoring="f1",
    n_jobs=6)                          
print "accurracy: %g, std(%g))" % (scores.mean(), scores.std())

no optim result, or bad ones: let's keep the default ones
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ................................. , score=0.519481, total=   0.1s
[CV] ................................. , score=0.509240, total=   0.1s
[CV] ................................. , score=0.614876, total=   0.2s
[CV] ................................. , score=0.522727, total=   0.2s
accurracy: 0.541581, std(0.0426086))


[Parallel(n_jobs=6)]: Done   1 tasks      | elapsed:    0.2s
[Parallel(n_jobs=6)]: Batch computation too fast (0.1604s.) Setting batch_size=2.
[Parallel(n_jobs=6)]: Done   2 out of   4 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:    0.2s finished


In [430]:
pipeline.get_params()

{'binarizer': Binarizer(copy=True, threshold=0.3),
 'binarizer__copy': True,
 'binarizer__threshold': 0.3,
 'gaussiannb': GaussianNB(priors=None),
 'gaussiannb__priors': None,
 'robustscaler': RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
        with_scaling=True),
 'robustscaler__copy': True,
 'robustscaler__quantile_range': (25.0, 75.0),
 'robustscaler__with_centering': True,
 'robustscaler__with_scaling': True,
 'selectfwe': SelectFwe(alpha=0.05, score_func=<function f_classif at 0x7fefd403c230>),
 'selectfwe__alpha': 0.05,
 'selectfwe__score_func': <function sklearn.feature_selection.univariate_selection.f_classif>,
 'steps': [('robustscaler',
   RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
          with_scaling=True)),
  ('binarizer', Binarizer(copy=True, threshold=0.3)),
  ('selectfwe',
   SelectFwe(alpha=0.05, score_func=<function f_classif at 0x7fefd403c230>)),
  ('gaussiannb', GaussianNB(priors=None))]}

### Eval Model

In [431]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, accuracy_score,precision_recall_curve, auc

X_train, X_test, Y_train, Y_test = train_test_split(feature_mat,label_mat,test_size=0.3)
# calculate the fpr and tpr for all thresholds of the classification

fitted = pipeline.fit(X_train,Y_train)
probs = fitted.predict_proba(X_test)
preds = probs[:,1]
preds_train = fitted.predict_proba(X_train)[:,1]
fpr, tpr, threshold = roc_curve(Y_test, preds)
fpr_train, tpr_train, threshold_train = roc_curve(Y_train, preds_train)
roc_auc = auc(fpr, tpr)
roc_auc_train = auc(fpr_train, tpr_train)
precision, recall, ths = precision_recall_curve(Y_test, preds)
precision_train, recall_train, ths_train = precision_recall_curve(Y_train, preds_train)

In [432]:
from bokeh.models.ranges import Range1d
#print "auc: %.2g, on train: %.2g" %(roc_auc, roc_auc_train)
roc_df = pd.DataFrame({"fpr":fpr,"tpr":tpr}).set_index("fpr")
pr_df = pd.DataFrame({"precision": precision, "recall":recall}).set_index("recall")
roc_df["diag"] = roc_df.index
pr_df["random"] = pr_df.precision.iloc[0]

# roc curve
roc_f = figure(width=400,height=400,title="roc, auc: %.2g, on train: %.2g"  %(roc_auc, roc_auc_train) )
roc_f.xaxis.axis_label = "tpr"
auc_range= Range1d(0,1)
roc_f.x_range = auc_range 
roc_f.y_range = auc_range 
roc_f.yaxis.axis_label = "fpr"
roc_f.cross(fpr,tpr,size=5)
roc_f.line(fpr,tpr,legend="roc")
roc_f.circle(fpr_train,tpr_train,size=5,color="red", line_width=1)
roc_f.line(fpr_train,tpr_train,color="red",legend="roc on train")
roc_f.line([0,1],[0,1], color="grey")

# pr curve
pr_f = figure(width=400,height=400,title="PR curve")
pr_f.xaxis.axis_label = "recall"
pr_f.yaxis.axis_label = "precision"
pr_f.cross(recall,precision,size=5)
pr_f.line(recall,precision,legend="PR")
pr_f.circle(recall_train,precision_train,size=5,color="red", line_width=1)
pr_f.line(recall_train,precision_train,color="red",legend="PR on train")

show(row(
    pr_f,
    roc_f
))

### Feature Importance

In [370]:
feature_imp = pd.DataFrame({"importance":model.feature_importances_}).set_index(feature_set.columns)
feature_imp.sort_values(by="importance",ascending=False)

Unnamed: 0,importance
avg_over2_attribute7,0.075044
avg_over2_attribute2,0.052664
avg_over1_dt2_attribute2,0.047126
avg_over1_attribute7,0.041062
avg_over0_dt2_attribute2,0.040053
avg_over1_attribute5,0.037641
avg_over14_attribute9,0.037396
avg_over1_attribute4,0.035768
avg_over2_attribute5,0.035039
avg_over0_attribute7,0.034961


### Hyperparameter optimisation




In [371]:
# model : GradientBoostingClassifier, parameters: 
#loss : {‘deviance’, ‘exponential’},
#learning_rate : float, optional (default=0.1)
#n_estimators : int (default=100)
#max_depth : integer, optional (default=3)
#min_samples_split : int, float, optional (default=2)
grids=dict()
XDB_param_grid = {
    #"model__loss":  ["deviance", 'exponential'],
    "model__learning_rate" : [1e-3,0.01, 0.1],
    "model__n_estimators" : [10, 50, 100, 150],
    "model__max_depth" : [5,10,15],
    "model__min_samples_split" : [5,10,20]
}
grids[GradientBoostingClassifier] = XDB_param_grid

In [372]:
# C :penalty
# kernel : ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ 
SVC_param_grid = {
   'model__C': [1e-7,1e-6,1e-5,0.1],
    "model__kernel": ["rbf","linear"],
    #"model_degree" : [1,3,5], # polynomial degrees
    "model__gamma" : ["auto"], # kernel coef (rbf)
    #"coef0" # for poly, signmoid
    "model__tol" : [1e-7,1e-6,1e-5, 1e-4,1e-3]
}
grids[SVC] = SVC_param_grid

In [373]:
# model : RandomForestClassifier, parameters: 
# n_estimators : int (default=100)
# criterion : "gini","entropy"
# max_features : auto , fraction
# max_depth : integer, optional (default=3)
# min_samples_split : int, float, optional (default=2)

RF_param_grid = {
    "model__criterion":  ["entropy"],
    "model__n_estimators" : [100,200,300,400],
    "model__max_features" : ["auto"],
    "model__max_depth" : [20,30,40],
    "model__min_samples_split" : [10,20,30]
}
grids[RandomForestClassifier] = RF_param_grid

In [374]:
m  = type(dict(pipeline.steps)["model"])
param_grid=grids[m]

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

splits = splitting_strategy.split(feature_mat,label_mat,device_groups)
#kfold = StratifiedKFold(n_splits=6, shuffle=True)
grid_search = GridSearchCV(
    pipeline, 
    param_grid,
    scoring="f1",
    n_jobs=-1,
    verbose=1,
    cv=splits
)
grid_result = grid_search.fit(feature_mat,label_mat)


# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
#for mean, stdev, param in zip(means, stds, params):
#    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 4 folds for each of 36 candidates, totalling 144 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 144 out of 144 | elapsed:  5.5min finished


Best: 0.380537 using {'model__n_estimators': 100, 'model__max_features': 'auto', 'model__max_depth': 40, 'model__min_samples_split': 30, 'model__criterion': 'entropy'}


In [None]:
pipeline.get_params()

In [485]:
from tpot import TPOTClassifier
pipeline_optimizer = TPOTClassifier(
    generations=10, # the more generatiion, the more optimized you get
    population_size=200,
    num_cv_folds=4,
    scoring="f1",
    random_state=42,
    verbosity=3)

pipeline_optimizer.fit(feature_mat, label_mat)
print pipeline_optimizer.score(feature_mat, label_mat)
pipeline_optimizer.export('tpot_longrun_exported_pipeline.py')

Optimization Progress:   0%|          | 6/2200 [04:23<22:12:02, 36.43s/pipeline]n Progress:   0%|          | 1/2200 [00:40<24:32:02, 40.16s/pipeline]          Optimization Progress:   0%|          | 5/2200 [04:23<31:41:28, 51.98s/pipeline]

Timeout during evaluation of pipeline #5. Skipping to the next pipeline.


Optimization Progress:   1%|          | 17/2200 [08:09<4:17:38,  7.08s/pipeline] ptimization Progress:   0%|          | 8/2200 [06:23<32:43:34, 53.75s/pipeline]          Optimization Progress:   1%|          | 18/2200 [08:09<14:28:24, 23.88s/pipeline]

Timeout during evaluation of pipeline #18. Skipping to the next pipeline.


Optimization Progress:   1%|          | 23/2200 [13:02<30:10:12, 49.89s/pipeline]Optimization Progress:   1%|          | 20/2200 [09:42<18:55:08, 31.24s/pipeline]          Optimization Progress:   1%|          | 24/2200 [13:02<36:25:49, 60.27s/pipeline]

Timeout during evaluation of pipeline #24. Skipping to the next pipeline.


Optimization Progress:   1%|▏         | 28/2200 [15:56<18:58:37, 31.45s/pipeline]Optimization Progress:   1%|          | 26/2200 [13:08<18:59:39, 31.45s/pipeline]          Optimization Progress:   1%|▏         | 29/2200 [15:56<29:49:14, 49.45s/pipeline]

Timeout during evaluation of pipeline #29. Skipping to the next pipeline.


Optimization Progress:   2%|▏         | 49/2200 [26:22<15:39:37, 26.21s/pipeline]Optimization Progress:   1%|▏         | 31/2200 [16:18<18:31:10, 30.74s/pipeline]          Optimization Progress:   2%|▏         | 48/2200 [26:22<22:20:37, 37.38s/pipeline]

Timeout during evaluation of pipeline #48. Skipping to the next pipeline.


Optimization Progress:   3%|▎         | 61/2200 [28:18<3:27:48,  5.83s/pipeline]]Optimization Progress:   2%|▏         | 51/2200 [26:23<7:42:10, 12.90s/pipeline]           Optimization Progress:   3%|▎         | 62/2200 [28:18<11:09:37, 18.79s/pipeline]

Timeout during evaluation of pipeline #62. Skipping to the next pipeline.


Optimization Progress:   4%|▎         | 77/2200 [35:35<14:11:21, 24.06s/pipeline]Optimization Progress:   3%|▎         | 65/2200 [28:41<8:17:13, 13.97s/pipeline]           Optimization Progress:   4%|▎         | 78/2200 [35:35<19:21:34, 32.84s/pipeline]

Timeout during evaluation of pipeline #78. Skipping to the next pipeline.


Optimization Progress:   4%|▎         | 80/2200 [36:27<15:54:15, 27.01s/pipeline]Optimization Progress:   4%|▎         | 79/2200 [36:27<22:41:51, 38.52s/pipeline]

Timeout during evaluation of pipeline #79. Skipping to the next pipeline.


Optimization Progress:   4%|▍         | 95/2200 [40:05<6:05:02, 10.40s/pipeline] Optimization Progress:   4%|▍         | 83/2200 [37:18<10:56:23, 18.60s/pipeline]          Optimization Progress:   4%|▍         | 96/2200 [40:05<12:50:25, 21.97s/pipeline]

Timeout during evaluation of pipeline #96. Skipping to the next pipeline.


Optimization Progress:   5%|▍         | 107/2200 [58:27<10:14:03, 17.60s/pipeline]ptimization Progress:   4%|▍         | 98/2200 [43:53<45:21:44, 77.69s/pipeline]          Optimization Progress:   5%|▍         | 108/2200 [58:27<117:24:05, 202.03s/pipeline]

Timeout during evaluation of pipeline #108. Skipping to the next pipeline.


                    Optimization Progress:   5%|▍         | 108/2200 [59:15<117:24:05, 202.03s/pipeline]Optimization Progress:   5%|▍         | 109/2200 [59:15<90:30:58, 155.84s/pipeline] 

Timeout during evaluation of pipeline #109. Skipping to the next pipeline.


Optimization Progress:   5%|▌         | 116/2200 [1:02:26<15:31:12, 26.81s/pipeline] timization Progress:   5%|▌         | 111/2200 [1:00:53<60:51:57, 104.89s/pipeline]          Optimization Progress:   5%|▌         | 117/2200 [1:02:26<19:23:13, 33.51s/pipeline]

Timeout during evaluation of pipeline #117. Skipping to the next pipeline.


Optimization Progress:   6%|▌         | 126/2200 [1:08:25<15:49:40, 27.47s/pipeline]Optimization Progress:   5%|▌         | 119/2200 [1:02:44<12:38:32, 21.87s/pipeline]          Optimization Progress:   6%|▌         | 127/2200 [1:08:25<19:37:52, 34.09s/pipeline]

Timeout during evaluation of pipeline #127. Skipping to the next pipeline.


Optimization Progress:   6%|▌         | 130/2200 [1:14:00<39:13:30, 68.22s/pipeline]         Optimization Progress:   6%|▌         | 129/2200 [1:14:00<17:08:02, 29.78s/pipeline]

Timeout during evaluation of pipeline #130. Skipping to the next pipeline.


Optimization Progress:   6%|▌         | 134/2200 [1:15:44<14:57:13, 26.06s/pipeline]Optimization Progress:   6%|▌         | 132/2200 [1:14:39<24:54:06, 43.35s/pipeline]          Optimization Progress:   6%|▌         | 135/2200 [1:15:44<18:55:11, 32.98s/pipeline]

Timeout during evaluation of pipeline #135. Skipping to the next pipeline.


Optimization Progress:   7%|▋         | 145/2200 [1:18:13<9:16:58, 16.26s/pipeline] Optimization Progress:   6%|▌         | 137/2200 [1:16:59<22:02:28, 38.46s/pipeline]          Optimization Progress:   7%|▋         | 144/2200 [1:18:12<13:14:04, 23.17s/pipeline]

Timeout during evaluation of pipeline #144. Skipping to the next pipeline.


Optimization Progress:   7%|▋         | 153/2200 [1:19:41<2:39:58,  4.69s/pipeline]Optimization Progress:   7%|▋         | 147/2200 [1:18:42<8:56:51, 15.69s/pipeline]          Optimization Progress:   7%|▋         | 154/2200 [1:19:41<6:04:43, 10.70s/pipeline]

Timeout during evaluation of pipeline #154. Skipping to the next pipeline.


Optimization Progress:   8%|▊         | 165/2200 [1:22:10<7:54:16, 13.98s/pipeline] ptimization Progress:   7%|▋         | 156/2200 [1:19:43<3:08:56,  5.55s/pipeline]          Optimization Progress:   8%|▊         | 166/2200 [1:22:10<9:44:21, 17.24s/pipeline]

Timeout during evaluation of pipeline #166. Skipping to the next pipeline.


Optimization Progress:   8%|▊         | 174/2200 [1:24:26<5:39:32, 10.06s/pipeline] Optimization Progress:   8%|▊         | 169/2200 [1:23:11<11:59:02, 21.24s/pipeline]          Optimization Progress:   8%|▊         | 175/2200 [1:24:26<12:36:55, 22.43s/pipeline]

Timeout during evaluation of pipeline #175. Skipping to the next pipeline.


                    Optimization Progress:   8%|▊         | 175/2200 [1:25:17<12:36:55, 22.43s/pipeline]Optimization Progress:   8%|▊         | 176/2200 [1:25:17<17:26:44, 31.03s/pipeline]

Timeout during evaluation of pipeline #176. Skipping to the next pipeline.


Optimization Progress:   9%|▉         | 201/2200 [1:29:43<14:50:37, 26.73s/pipeline]timization Progress:   8%|▊         | 178/2200 [1:25:51<13:14:25, 23.57s/pipeline]                    Optimization Progress:   9%|▉         | 200/2200 [1:29:43<21:10:47, 38.12s/pipeline]                    Optimization Progress:   9%|▉         | 200/2200 [1:29:43<21:10:47, 38.12s/pipeline]

Generation 1 - Current Pareto front scores:
1	0.546564102675	BernoulliNB(input_matrix, 0.77000000000000002, 11.0)
2	0.54704102246	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.85999999999999999, 0.84999999999999998)



Optimization Progress:  12%|█▏        | 260/2200 [1:50:11<23:30:18, 43.62s/pipeline]Optimization Progress:   9%|▉         | 204/2200 [1:30:26<12:04:43, 21.79s/pipeline]          Optimization Progress:  12%|█▏        | 261/2200 [1:50:11<24:38:27, 45.75s/pipeline]

Timeout during evaluation of pipeline #261. Skipping to the next pipeline.


Optimization Progress:  12%|█▏        | 269/2200 [1:53:20<10:59:46, 20.50s/pipeline]Optimization Progress:  12%|█▏        | 263/2200 [1:50:13<12:18:51, 22.89s/pipeline]          Optimization Progress:  12%|█▏        | 270/2200 [1:53:20<16:02:32, 29.92s/pipeline]

Timeout during evaluation of pipeline #270. Skipping to the next pipeline.


Optimization Progress:  16%|█▋        | 360/2200 [2:21:23<8:28:49, 16.59s/pipeline] Optimization Progress:  12%|█▏        | 272/2200 [1:54:41<20:47:54, 38.84s/pipeline]Optimization Progress:  13%|█▎        | 286/2200 [1:54:56<1:44:49,  3.29s/pipeline]Optimization Progress:  16%|█▋        | 359/2200 [2:19:07<50:36,  1.65s/pipeline]          Optimization Progress:  16%|█▋        | 361/2200 [2:21:23<14:03:30, 27.52s/pipeline]

Timeout during evaluation of pipeline #361. Skipping to the next pipeline.


Optimization Progress:  18%|█▊        | 401/2200 [2:29:16<7:15:51, 14.54s/pipeline] timization Progress:  17%|█▋        | 364/2200 [2:21:28<7:20:02, 14.38s/pipeline]                     Optimization Progress:  17%|█▋        | 383/2200 [2:29:16<10:28:47, 20.76s/pipeline]                    Optimization Progress:  17%|█▋        | 383/2200 [2:29:16<10:28:47, 20.76s/pipeline]

Generation 2 - Current Pareto front scores:
1	0.546564102675	BernoulliNB(input_matrix, 0.77000000000000002, 11.0)
2	0.551495349613	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.85999999999999999, 0.5)



Optimization Progress:  19%|█▉        | 416/2200 [2:30:43<2:11:04,  4.41s/pipeline]Optimization Progress:  19%|█▊        | 408/2200 [2:29:32<4:14:33,  8.52s/pipeline]          Optimization Progress:  19%|█▉        | 417/2200 [2:30:43<5:27:14, 11.01s/pipeline]

Timeout during evaluation of pipeline #417. Skipping to the next pipeline.


Optimization Progress:  22%|██▏       | 492/2200 [2:33:19<5:40:39, 11.97s/pipeline]Optimization Progress:  19%|█▉        | 420/2200 [2:30:44<2:43:13,  5.50s/pipeline]Optimization Progress:  20%|█▉        | 429/2200 [2:30:58<40:25,  1.37s/pipeline]          Optimization Progress:  22%|██▏       | 490/2200 [2:33:19<8:06:23, 17.07s/pipeline]

Timeout during evaluation of pipeline #490. Skipping to the next pipeline.


Optimization Progress:  26%|██▌       | 577/2200 [2:34:34<05:21,  5.05pipeline/s] Optimization Progress:  22%|██▏       | 494/2200 [2:33:30<3:48:37,  8.04s/pipeline]                    Optimization Progress:  26%|██▌       | 577/2200 [2:34:34<05:21,  5.05pipeline/s]          

Generation 3 - Current Pareto front scores:
1	0.551891875346	BernoulliNB(CombineDFs(input_matrix, input_matrix), 0.83999999999999997, 13.0)



Optimization Progress:  32%|███▏      | 693/2200 [2:39:16<1:38:08,  3.91s/pipeline] imization Progress:  28%|██▊       | 605/2200 [2:34:35<04:06,  6.47pipeline/s]Optimization Progress:  31%|███       | 682/2200 [2:37:18<04:21,  5.82pipeline/s]Optimization Progress:  32%|███▏      | 693/2200 [2:38:28<1:38:08,  3.91s/pipeline]          Optimization Progress:  32%|███▏      | 694/2200 [2:39:16<4:54:39, 11.74s/pipeline]

Timeout during evaluation of pipeline #694. Skipping to the next pipeline.


Optimization Progress:  33%|███▎      | 720/2200 [2:40:35<3:35:59,  8.76s/pipeline]Optimization Progress:  32%|███▏      | 698/2200 [2:39:16<2:25:04,  5.80s/pipeline]          Optimization Progress:  33%|███▎      | 718/2200 [2:40:35<5:08:18, 12.48s/pipeline]

Timeout during evaluation of pipeline #718. Skipping to the next pipeline.


Optimization Progress:  34%|███▍      | 750/2200 [2:43:17<2:41:32,  6.68s/pipeline]Optimization Progress:  33%|███▎      | 723/2200 [2:40:36<1:49:16,  4.44s/pipeline]          Optimization Progress:  34%|███▍      | 751/2200 [2:43:17<8:10:51, 20.33s/pipeline]

Timeout during evaluation of pipeline #751. Skipping to the next pipeline.


Optimization Progress:  34%|███▍      | 754/2200 [2:54:08<58:51:26, 146.53s/pipeline]timization Progress:  34%|███▍      | 752/2200 [2:54:08<84:11:13, 209.30s/pipeline]

Timeout during evaluation of pipeline #752. Skipping to the next pipeline.


          n Progress:  36%|███▌      | 789/2200 [2:56:27<1:00:29,  2.57s/pipeline]]Optimization Progress:  34%|███▍      | 758/2200 [2:54:08<28:46:23, 71.83s/pipeline] Optimization Progress:  34%|███▍      | 759/2200 [2:54:19<28:45:12, 71.83s/pipeline]Optimization Progress:  35%|███▍      | 766/2200 [2:54:39<7:18:22, 18.34s/pipeline]                    Optimization Progress:  36%|███▌      | 789/2200 [2:56:27<1:00:29,  2.57s/pipeline]                    Optimization Progress:  36%|███▌      | 789/2200 [2:56:27<1:00:29,  2.57s/pipeline]

Generation 4 - Current Pareto front scores:
1	0.551891875346	BernoulliNB(CombineDFs(input_matrix, input_matrix), 0.83999999999999997, 13.0)
2	0.555308354395	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.96999999999999997, 0.070000000000000007)



Optimization Progress:  42%|████▏     | 929/2200 [3:00:42<3:56:17, 11.15s/pipeline]Optimization Progress:  37%|███▋      | 804/2200 [2:56:27<29:58,  1.29s/pipeline]Optimization Progress:  39%|███▉      | 853/2200 [2:57:00<05:28,  4.10pipeline/s]Optimization Progress:  40%|███▉      | 879/2200 [2:57:40<08:37,  2.55pipeline/s]Optimization Progress:  41%|████      | 907/2200 [2:58:10<03:44,  5.75pipeline/s]          Optimization Progress:  42%|████▏     | 930/2200 [3:00:42<8:17:57, 23.53s/pipeline]

Timeout during evaluation of pipeline #930. Skipping to the next pipeline.


Optimization Progress:  44%|████▍     | 976/2200 [3:02:26<04:55,  4.14pipeline/s]  Optimization Progress:  42%|████▏     | 933/2200 [3:00:43<4:07:37, 11.73s/pipeline]Optimization Progress:  44%|████▍     | 976/2200 [3:01:40<04:55,  4.14pipeline/s]          Optimization Progress:  44%|████▍     | 977/2200 [3:02:26<6:45:59, 19.92s/pipeline]

Timeout during evaluation of pipeline #977. Skipping to the next pipeline.


Optimization Progress:  44%|████▍     | 978/2200 [3:02:28<4:52:03, 14.34s/pipeline]        Optimization Progress:  44%|████▍     | 978/2200 [3:02:28<4:52:03, 14.34s/pipeline]                    Optimization Progress:  44%|████▍     | 978/2200 [3:02:28<4:52:03, 14.34s/pipeline]          

Generation 5 - Current Pareto front scores:
1	0.551891875346	BernoulliNB(CombineDFs(input_matrix, input_matrix), 0.83999999999999997, 13.0)
2	0.562297456421	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.20000000000000001, 0.070000000000000007)



Optimization Progress:  47%|████▋     | 1026/2200 [3:05:52<4:50:11, 14.83s/pipeline]Optimization Progress:  46%|████▌     | 1003/2200 [3:02:29<2:21:05,  7.07s/pipeline]          Optimization Progress:  47%|████▋     | 1027/2200 [3:05:52<12:22:26, 37.98s/pipeline]

Timeout during evaluation of pipeline #1027. Skipping to the next pipeline.


Optimization Progress:  49%|████▉     | 1073/2200 [3:07:47<3:44:21, 11.94s/pipeline] Optimization Progress:  47%|████▋     | 1029/2200 [3:05:52<6:05:28, 18.73s/pipeline]Optimization Progress:  48%|████▊     | 1059/2200 [3:06:50<04:15,  4.47pipeline/s]          Optimization Progress:  49%|████▊     | 1071/2200 [3:07:47<5:20:33, 17.04s/pipeline]

Timeout during evaluation of pipeline #1071. Skipping to the next pipeline.


Optimization Progress:  49%|████▉     | 1083/2200 [3:09:04<44:18,  2.38s/pipeline]  Optimization Progress:  49%|████▉     | 1077/2200 [3:08:09<2:50:40,  9.12s/pipeline]          Optimization Progress:  49%|████▉     | 1084/2200 [3:09:04<5:31:39, 17.83s/pipeline]

Timeout during evaluation of pipeline #1084. Skipping to the next pipeline.


Optimization Progress:  51%|█████     | 1114/2200 [3:11:14<3:32:47, 11.76s/pipeline]Optimization Progress:  49%|████▉     | 1087/2200 [3:09:52<5:47:29, 18.73s/pipeline]Optimization Progress:  51%|█████     | 1112/2200 [3:10:30<14:23,  1.26pipeline/s]          Optimization Progress:  51%|█████     | 1113/2200 [3:11:14<5:03:06, 16.73s/pipeline]

Timeout during evaluation of pipeline #1113. Skipping to the next pipeline.


Optimization Progress:  51%|█████     | 1119/2200 [3:12:10<2:58:12,  9.89s/pipeline]         Optimization Progress:  51%|█████     | 1116/2200 [3:12:10<2:31:05,  8.36s/pipeline]

Timeout during evaluation of pipeline #1117. Skipping to the next pipeline.


          n Progress:  54%|█████▍    | 1183/2200 [3:16:58<4:02:39, 14.32s/pipeline]ptimization Progress:  51%|█████     | 1123/2200 [3:12:11<1:29:15,  4.97s/pipeline]Optimization Progress:  52%|█████▏    | 1149/2200 [3:12:50<03:34,  4.89pipeline/s]Optimization Progress:  54%|█████▎    | 1181/2200 [3:16:11<05:17,  3.21pipeline/s]          Optimization Progress:  54%|█████▎    | 1182/2200 [3:16:58<5:46:16, 20.41s/pipeline]                    Optimization Progress:  54%|█████▍    | 1183/2200 [3:16:58<4:02:39, 14.32s/pipeline]                    Optimization Progress:  54%|█████▍    | 1183/2200 [3:16:58<4:02:39, 14.32s/pipeline]

Timeout during evaluation of pipeline #1182. Skipping to the next pipeline.
Generation 6 - Current Pareto front scores:
1	0.551891875346	BernoulliNB(CombineDFs(input_matrix, input_matrix), 0.83999999999999997, 13.0)
2	0.562297456421	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.20000000000000001, 0.070000000000000007)



Optimization Progress:  58%|█████▊    | 1287/2200 [3:21:29<2:39:23, 10.47s/pipeline]Optimization Progress:  55%|█████▍    | 1205/2200 [3:17:00<1:57:56,  7.11s/pipeline]Optimization Progress:  56%|█████▌    | 1226/2200 [3:17:21<10:26,  1.55pipeline/s]          Optimization Progress:  58%|█████▊    | 1285/2200 [3:21:28<3:47:36, 14.93s/pipeline]

Timeout during evaluation of pipeline #1285. Skipping to the next pipeline.


Optimization Progress:  59%|█████▉    | 1301/2200 [3:24:14<3:45:14, 15.03s/pipeline]Optimization Progress:  59%|█████▊    | 1290/2200 [3:23:06<6:50:23, 27.06s/pipeline]          Optimization Progress:  59%|█████▉    | 1299/2200 [3:24:14<5:22:04, 21.45s/pipeline]

Timeout during evaluation of pipeline #1299. Skipping to the next pipeline.


Optimization Progress:  60%|█████▉    | 1318/2200 [3:26:19<1:29:13,  6.07s/pipeline]Optimization Progress:  59%|█████▉    | 1304/2200 [3:24:35<2:51:58, 11.52s/pipeline]          Optimization Progress:  60%|█████▉    | 1319/2200 [3:26:19<5:04:02, 20.71s/pipeline]

Timeout during evaluation of pipeline #1319. Skipping to the next pipeline.


Optimization Progress:  61%|██████    | 1341/2200 [3:31:02<1:09:40,  4.87s/pipeline] Optimization Progress:  60%|██████    | 1322/2200 [3:28:49<10:09:49, 41.67s/pipeline]Optimization Progress:  60%|██████    | 1331/2200 [3:29:11<1:48:38,  7.50s/pipeline]Optimization Progress:  61%|██████    | 1341/2200 [3:30:21<1:09:40,  4.87s/pipeline]          Optimization Progress:  61%|██████    | 1342/2200 [3:31:02<4:51:28, 20.38s/pipeline]

Timeout during evaluation of pipeline #1342. Skipping to the next pipeline.


Optimization Progress:  64%|██████▎   | 1401/2200 [3:33:35<04:43,  2.82pipeline/s]Optimization Progress:  61%|██████    | 1344/2200 [3:31:03<2:25:56, 10.23s/pipeline]                    Optimization Progress:  63%|██████▎   | 1381/2200 [3:33:35<06:52,  1.98pipeline/s]                    Optimization Progress:  63%|██████▎   | 1381/2200 [3:33:35<06:52,  1.98pipeline/s]

Generation 7 - Current Pareto front scores:
1	0.553092147034	BernoulliNB(CombineDFs(input_matrix, input_matrix), 0.83999999999999997, 6.0)
2	0.562297456421	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.20000000000000001, 0.070000000000000007)



Optimization Progress:  66%|██████▌   | 1444/2200 [3:37:02<2:29:20, 11.85s/pipeline]                  Optimization Progress:  66%|██████▌   | 1442/2200 [3:37:02<3:33:31, 16.90s/pipeline]

Timeout during evaluation of pipeline #1442. Skipping to the next pipeline.


Optimization Progress:  72%|███████▏  | 1587/2200 [3:43:34<18:29,  1.81s/pipeline]Optimization Progress:  66%|██████▌   | 1447/2200 [3:37:04<1:17:04,  6.14s/pipeline]Optimization Progress:  70%|███████   | 1549/2200 [3:41:12<02:17,  4.72pipeline/s]Optimization Progress:  72%|███████▏  | 1581/2200 [3:43:32<08:39,  1.19pipeline/s]                    Optimization Progress:  72%|███████▏  | 1587/2200 [3:43:34<18:29,  1.81s/pipeline]                    Optimization Progress:  72%|███████▏  | 1587/2200 [3:43:34<18:29,  1.81s/pipeline]          Optimization Progress:  73%|███████▎  | 1601/2200 [3:43:34<12:41,  1.27s/pipeline]

Generation 8 - Current Pareto front scores:
1	0.556803458605	BernoulliNB(input_matrix, 0.83999999999999997, 6.0)
2	0.563730889172	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.12, 0.17000000000000001)
5	0.57105250506	BernoulliNB(DecisionTreeClassifier(SelectPercentile(CombineDFs(Normalizer(input_matrix, 20), LinearSVC(input_matrix, 36.0, 36, False)), 16)), 0.48999999999999999, 0.050000000000000003)



Optimization Progress:  73%|███████▎  | 1609/2200 [3:45:11<20:32,  2.08s/pipeline]                    Optimization Progress:  73%|███████▎  | 1610/2200 [3:45:11<3:46:48, 23.07s/pipeline]

Timeout during evaluation of pipeline #1610. Skipping to the next pipeline.


Optimization Progress:  74%|███████▍  | 1623/2200 [3:46:11<12:19,  1.28s/pipeline]  Optimization Progress:  73%|███████▎  | 1613/2200 [3:45:12<1:52:13, 11.47s/pipeline]          Optimization Progress:  74%|███████▍  | 1624/2200 [3:46:11<2:51:07, 17.82s/pipeline]

Timeout during evaluation of pipeline #1624. Skipping to the next pipeline.


Optimization Progress:  74%|███████▍  | 1627/2200 [3:47:17<1:14:22,  7.79s/pipeline]Optimization Progress:  74%|███████▍  | 1626/2200 [3:46:18<1:44:28, 10.92s/pipeline]          Optimization Progress:  74%|███████▍  | 1628/2200 [3:47:17<3:39:54, 23.07s/pipeline]

Timeout during evaluation of pipeline #1628. Skipping to the next pipeline.


Optimization Progress:  80%|████████  | 1771/2200 [3:55:02<02:26,  2.94pipeline/s]  Optimization Progress:  74%|███████▍  | 1630/2200 [3:48:05<3:23:35, 21.43s/pipeline]          Optimization Progress:  81%|████████  | 1772/2200 [3:55:02<2:25:38, 20.42s/pipeline]

Timeout during evaluation of pipeline #1772. Skipping to the next pipeline.


Optimization Progress:  82%|████████▏ | 1801/2200 [3:56:30<24:45,  3.72s/pipeline]ptimization Progress:  81%|████████  | 1775/2200 [3:55:40<1:39:55, 14.11s/pipeline]                    Optimization Progress:  81%|████████▏ | 1791/2200 [3:56:30<36:13,  5.32s/pipeline]                    Optimization Progress:  81%|████████▏ | 1791/2200 [3:56:30<36:13,  5.32s/pipeline]                    Optimization Progress:  81%|████████▏ | 1791/2200 [3:56:30<36:13,  5.32s/pipeline]

Generation 9 - Current Pareto front scores:
1	0.556803458605	BernoulliNB(input_matrix, 0.83999999999999997, 6.0)
2	0.564612431998	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.10000000000000001, 0.070000000000000007)
3	0.568044093439	BernoulliNB(DecisionTreeClassifier(ZeroCount(input_matrix)), 0.48999999999999999, 0.050000000000000003)
5	0.57105250506	BernoulliNB(DecisionTreeClassifier(SelectPercentile(CombineDFs(Normalizer(input_matrix, 20), LinearSVC(input_matrix, 36.0, 36, False)), 16)), 0.48999999999999999, 0.050000000000000003)



Optimization Progress:  84%|████████▎ | 1842/2200 [3:59:38<11:41,  1.96s/pipeline]                    Optimization Progress:  84%|████████▍ | 1843/2200 [3:59:38<1:58:47, 19.97s/pipeline]

Timeout during evaluation of pipeline #1843. Skipping to the next pipeline.


Optimization Progress:  84%|████████▍ | 1856/2200 [4:01:06<38:05,  6.64s/pipeline]e]Optimization Progress:  84%|████████▍ | 1846/2200 [3:59:40<59:41, 10.12s/pipeline]            Optimization Progress:  84%|████████▍ | 1857/2200 [4:01:06<2:06:56, 22.21s/pipeline]

Timeout during evaluation of pipeline #1857. Skipping to the next pipeline.


                    Optimization Progress:  84%|████████▍ | 1857/2200 [4:02:05<2:06:56, 22.21s/pipeline]Optimization Progress:  84%|████████▍ | 1858/2200 [4:02:05<3:09:21, 33.22s/pipeline]

Timeout during evaluation of pipeline #1858. Skipping to the next pipeline.


Optimization Progress:  85%|████████▍ | 1861/2200 [4:03:03<2:40:13, 28.36s/pipeline]Optimization Progress:  84%|████████▍ | 1859/2200 [4:03:03<3:50:06, 40.49s/pipeline]

Timeout during evaluation of pipeline #1859. Skipping to the next pipeline.


Optimization Progress:  85%|████████▌ | 1876/2200 [4:07:37<2:03:57, 22.96s/pipeline]Optimization Progress:  85%|████████▍ | 1863/2200 [4:03:42<2:22:24, 25.35s/pipeline]          Optimization Progress:  85%|████████▌ | 1874/2200 [4:07:37<2:58:01, 32.77s/pipeline]

Timeout during evaluation of pipeline #1874. Skipping to the next pipeline.


Optimization Progress:  90%|█████████ | 1984/2200 [4:14:05<09:36,  2.67s/pipeline]Optimization Progress:  85%|████████▌ | 1879/2200 [4:07:57<1:22:46, 15.47s/pipeline]                    Optimization Progress:  90%|█████████ | 1984/2200 [4:14:05<09:36,  2.67s/pipeline]                    Optimization Progress:  90%|█████████ | 1984/2200 [4:14:05<09:36,  2.67s/pipeline]          

Generation 10 - Current Pareto front scores:
1	0.556803458605	BernoulliNB(input_matrix, 0.83999999999999997, 6.0)
2	0.564612431998	BernoulliNB(DecisionTreeClassifier(input_matrix), 0.10000000000000001, 0.070000000000000007)
3	0.576280240377	BernoulliNB(DecisionTreeClassifier(LogisticRegression(input_matrix, 0.78000000000000003, 25, False)), 0.20000000000000001, 0.070000000000000007)



Optimization Progress:  97%|█████████▋| 2143/2200 [4:28:51<14:14, 15.00s/pipeline]Optimization Progress:  91%|█████████ | 2003/2200 [4:14:05<04:22,  1.33s/pipeline]Optimization Progress:  91%|█████████▏| 2012/2200 [4:14:24<01:21,  2.31pipeline/s]Optimization Progress:  94%|█████████▎| 2057/2200 [4:14:44<00:12, 11.47pipeline/s]Optimization Progress:  94%|█████████▍| 2065/2200 [4:15:04<03:48,  1.69s/pipeline]Optimization Progress:  96%|█████████▋| 2121/2200 [4:17:04<00:15,  5.19pipeline/s]          Optimization Progress:  97%|█████████▋| 2144/2200 [4:28:51<2:19:15, 149.20s/pipeline]

Timeout during evaluation of pipeline #2144. Skipping to the next pipeline.


                                                                                    ]Optimization Progress:  98%|█████████▊| 2147/2200 [4:28:51<1:04:40, 73.22s/pipeline] Optimization Progress:  98%|█████████▊| 2159/2200 [4:29:06<05:55,  8.67s/pipeline]

0.679319371728


In [488]:
pipeline_optimizer.export("device_time_export.py")
#with file("device_time-export.feat","w+ as feature_mat

In [486]:
model_name = "device_time"

from sklearn.externals import joblib
import pickle
joblib.dump(pipeline_optimizer._fitted_pipeline, 'model_%s.pkl' % model_name) 
features = feature_set.columns
with open('model_%s.feat' % model_name, "w+") as f: 
    pickle.dump(features,f)

PicklingError: Can't pickle <function <lambda> at 0x7fefd9f3b578>: it's not found as __main__.<lambda>

In [397]:
# mixing model_per_device and model_per_device_and_time
model_name = "device"

from sklearn.externals import joblib
import pickle

device_model = joblib.load("model.pkl")
device_model.named_steps
with open('model_%s.feat' % model_name, "r") as f: 
    device_model_features = pickle.load(f)

In [462]:
device_mat = sub_feature_set[device_model_features]
predicted_device = device_model.predict_proba(device_mat)

In [481]:
device_prediction = predicted_device[:,1]

device_time_prediction = device_time_model.predict_proba(feature_mat)[:,1]
#device_time_predictions = fitted.predict_proba(feature_mat)[:,1]
# basic (two models) vote
confidence_0 = np.abs(device_prediction -0.5)
confidence_1 = np.abs(device_time_prediction -0.5)
vote_0 = confidence_0 > confidence_1
vote_1 =  np.invert(vote_0)

#prediction = vote_0*device_prediction +vote_1*device_time_predictions
prediction= device_time_predictions*device_prediction

In [482]:
total_prec, total_recall, ths = precision_recall_curve(label_mat, prediction)
dev_prec, dev_recall, ths = precision_recall_curve(label_mat, device_prediction)
dev_t_prec, dev_t_recall, ths = precision_recall_curve(label_mat, device_time_prediction)

In [483]:
# pr curve
tpr_f = figure(width=400,height=400,title="PR curve")
tpr_f.xaxis.axis_label = "recall"
tpr_f.yaxis.axis_label = "precision"

tpr_f.cross(dev_recall,dev_prec,size=5,color="green")
tpr_f.line(dev_recall,dev_prec,legend="dev PR",color="green")

tpr_f.cross(dev_t_recall,dev_t_prec,size=5,color="red")
tpr_f.line(dev_t_recall,dev_t_prec,legend="dev time PR",color="red")

tpr_f.cross(total_recall,total_prec,size=5)
tpr_f.line(total_recall,total_prec,legend="PR")

show(tpr_f)