In [17]:
import pandas as pd
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

In [3]:
combined_set = pd.concat([train_data, test_data])
combined_set['combined_var'] = (combined_set.hair_length * .40) + (combined_set.has_soul * .40)

# Replace categorical variables with numbers
def label_encoding(df, col):
    label_map = { key: float(n) for n, key in enumerate(df[col].unique()) }
    label_reverse_map = { label_map[key]: key for key in label_map }
    df[col] = df[col].apply(lambda x: label_map[x])
    return df, label_map, label_reverse_map

combined_set = pd.get_dummies(combined_set, columns=['color'])
combined_set

train_set = combined_set[:len(train_data.index)]
test_set = combined_set[len(train_data.index):]

In [4]:
train_cols = ['combined_var', 'rotting_flesh', 'bone_length', 'has_soul', 'hair_length']
target_var = ['type']
selected_cols = train_cols + target_var

In [5]:
train_set, type_label_map, type_label_reverse_map = label_encoding(train_set, 'type')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
p_train,val = train_test_split(train_set, train_size=.75, test_size=.25)

In [7]:
p_train.shape, val.shape

((278, 13), (93, 13))

In [8]:
p_train[train_cols].head().values

array([[ 0.48459981,  0.60417429,  0.50259908,  0.59354486,  0.61795467],
       [ 0.53332275,  0.38980477,  0.62661456,  0.71254883,  0.62075804],
       [ 0.32187598,  0.24625769,  0.25077   ,  0.25003552,  0.55465444],
       [ 0.49072729,  0.41651319,  0.44400926,  0.50805655,  0.71876167],
       [ 0.3488198 ,  0.68268741,  0.40399456,  0.49952043,  0.37252906]])

In [9]:
tpot = TPOTClassifier(verbosity=3)

In [10]:
help(tpot.fit)

Help on method fit in module tpot.base:

fit(features, classes) method of tpot.tpot.TPOTClassifier instance
    Fits a machine learning pipeline that maximizes classification score
    on the provided data
    
    Uses genetic programming to optimize a machine learning pipeline that
    maximizes classification score on the provided features and classes.
    Performs an internal stratified training/testing cross-validaton split
    to avoid overfitting on the provided data.
    
    Parameters
    ----------
    features: array-like {n_samples, n_features}
        Feature matrix
    classes: array-like {n_samples}
        List of class labels for prediction
    
    Returns
    -------
    None



In [11]:
tpot.fit(pd.np.array(p_train[train_cols]), pd.np.array(p_train[target_var]).ravel())

Optimization Progress:   1%|          | 102/10100 [01:06<2:58:34,  1.07s/pipeline]

Generation 1 - Current Pareto front scores:
1	0.8090703466224024	LinearSVC(input_matrix, 0.80000000000000004, 100, True)
2	0.819869459623558	LinearSVC(LogisticRegression(input_matrix, 0.34000000000000002, 12, True), 20.0, 24, False)



Optimization Progress:   2%|▏         | 202/10100 [01:33<1:15:43,  2.18pipeline/s]

Generation 2 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)



Optimization Progress:   3%|▎         | 301/10100 [01:53<11:38, 14.04pipeline/s]

Generation 3 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)



Optimization Progress:   4%|▍         | 406/10100 [02:04<18:10,  8.89pipeline/s]

Generation 4 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)



Optimization Progress:   5%|▍         | 502/10100 [02:37<50:32,  3.17pipeline/s]  

Generation 5 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
4	0.8250070999079461	LogisticRegression(BernoulliNB(PCA(SelectFwe(input_matrix, 26.0), 48), 0.80000000000000004, 16.0), 10.0, 90, False)



Optimization Progress:   6%|▌         | 603/10100 [02:54<09:27, 16.72pipeline/s]

Generation 6 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:   7%|▋         | 701/10100 [03:06<37:05,  4.22pipeline/s]

Generation 7 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:   8%|▊         | 796/10100 [03:25<32:39,  4.75pipeline/s]

Generation 8 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:   9%|▉         | 901/10100 [03:35<05:13, 29.31pipeline/s]

Generation 9 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  10%|▉         | 1001/10100 [03:42<05:09, 29.44pipeline/s]

Generation 10 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  11%|█         | 1101/10100 [03:57<09:55, 15.11pipeline/s]

Generation 11 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  12%|█▏        | 1201/10100 [04:11<15:10,  9.77pipeline/s]

Generation 12 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  13%|█▎        | 1300/10100 [04:23<05:55, 24.78pipeline/s]

Generation 13 - Current Pareto front scores:
1	0.8222591907084239	LogisticRegression(input_matrix, 10.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  14%|█▍        | 1402/10100 [04:41<24:20,  5.96pipeline/s]

Generation 14 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  15%|█▍        | 1502/10100 [05:02<10:49, 13.24pipeline/s]

Generation 15 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  16%|█▌        | 1601/10100 [05:08<05:23, 26.24pipeline/s]

Generation 16 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  17%|█▋        | 1692/10100 [05:24<07:42, 18.19pipeline/s]

Generation 17 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  18%|█▊        | 1801/10100 [05:50<17:22,  7.96pipeline/s]

Generation 18 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  19%|█▉        | 1901/10100 [05:58<07:04, 19.34pipeline/s]

Generation 19 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  20%|█▉        | 2002/10100 [06:14<09:38, 13.99pipeline/s]

Generation 20 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  21%|██        | 2102/10100 [06:37<12:39, 10.53pipeline/s]

Generation 21 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  22%|██▏       | 2200/10100 [06:49<38:32,  3.42pipeline/s]

Generation 22 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)



Optimization Progress:  23%|██▎       | 2301/10100 [07:06<55:21,  2.35pipeline/s]  

Generation 23 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  24%|██▍       | 2401/10100 [07:18<06:40, 19.23pipeline/s]

Generation 24 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  25%|██▍       | 2501/10100 [07:34<27:32,  4.60pipeline/s]

Generation 25 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  26%|██▌       | 2601/10100 [07:53<53:08,  2.35pipeline/s]  

Generation 26 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  27%|██▋       | 2701/10100 [08:11<19:26,  6.34pipeline/s]

Generation 27 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  28%|██▊       | 2801/10100 [08:33<23:40,  5.14pipeline/s]

Generation 28 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8251751106606342	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 15.0, 90, False)



Optimization Progress:  29%|██▊       | 2901/10100 [08:55<35:17,  3.40pipeline/s]

Generation 29 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.82518980012535	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 17.0, 90, False)



Optimization Progress:  30%|██▉       | 3001/10100 [09:13<22:19,  5.30pipeline/s]

Generation 30 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.82518980012535	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 17.0, 90, False)



Optimization Progress:  31%|███       | 3102/10100 [09:35<06:16, 18.58pipeline/s]

Generation 31 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.82518980012535	LogisticRegression(FastICA(MultinomialNB(input_matrix, 39.0), 44.0), 17.0, 90, False)



Optimization Progress:  32%|███▏      | 3202/10100 [09:48<09:04, 12.66pipeline/s]

Generation 32 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8299139931840883	LogisticRegression(MultinomialNB(PolynomialFeatures(input_matrix), 0.17000000000000001), 15.0, 12, False)



Optimization Progress:  33%|███▎      | 3289/10100 [10:14<25:21,  4.48pipeline/s]

Generation 33 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8299139931840883	LogisticRegression(MultinomialNB(PolynomialFeatures(input_matrix), 0.17000000000000001), 15.0, 12, False)



Optimization Progress:  34%|███▎      | 3392/10100 [10:25<06:17, 17.79pipeline/s]

Generation 34 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  35%|███▍      | 3502/10100 [10:39<16:10,  6.80pipeline/s]

Generation 35 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  36%|███▌      | 3591/10100 [11:05<43:30,  2.49pipeline/s]

Generation 36 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  37%|███▋      | 3702/10100 [11:20<09:30, 11.21pipeline/s]

Generation 37 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  38%|███▊      | 3801/10100 [11:48<08:01, 13.09pipeline/s]

Generation 38 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  39%|███▊      | 3902/10100 [12:29<10:50,  9.53pipeline/s]

Generation 39 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  40%|███▉      | 4001/10100 [12:50<08:21, 12.17pipeline/s]

Generation 40 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  41%|████      | 4102/10100 [13:09<04:23, 22.74pipeline/s]

Generation 41 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  42%|████▏     | 4194/10100 [13:19<06:15, 15.73pipeline/s]

Generation 42 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  43%|████▎     | 4300/10100 [13:36<03:37, 26.72pipeline/s]

Generation 43 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  44%|████▎     | 4402/10100 [13:55<05:52, 16.18pipeline/s]

Generation 44 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  45%|████▍     | 4500/10100 [14:09<06:50, 13.63pipeline/s]

Generation 45 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  46%|████▌     | 4601/10100 [14:27<03:53, 23.54pipeline/s]

Generation 46 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  47%|████▋     | 4702/10100 [14:52<05:48, 15.48pipeline/s]

Generation 47 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  48%|████▊     | 4801/10100 [14:56<03:16, 26.96pipeline/s]

Generation 48 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  49%|████▊     | 4901/10100 [15:02<03:42, 23.34pipeline/s]

Generation 49 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  50%|████▉     | 5000/10100 [15:16<11:30,  7.38pipeline/s]

Generation 50 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  51%|█████     | 5101/10100 [15:27<03:48, 21.85pipeline/s]

Generation 51 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  52%|█████▏    | 5202/10100 [15:42<04:46, 17.11pipeline/s]

Generation 52 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  53%|█████▎    | 5303/10100 [15:54<20:46,  3.85pipeline/s]

Generation 53 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  53%|█████▎    | 5385/10100 [16:04<04:58, 15.77pipeline/s]

Generation 54 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  54%|█████▍    | 5503/10100 [16:20<03:29, 21.99pipeline/s]

Generation 55 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  55%|█████▌    | 5604/10100 [16:29<09:50,  7.61pipeline/s]

Generation 56 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  56%|█████▋    | 5701/10100 [16:36<08:35,  8.54pipeline/s]

Generation 57 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  57%|█████▋    | 5797/10100 [16:45<03:35, 19.92pipeline/s]

Generation 58 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  58%|█████▊    | 5902/10100 [17:08<18:08,  3.86pipeline/s]

Generation 59 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  59%|█████▉    | 6001/10100 [17:29<22:37,  3.02pipeline/s]

Generation 60 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  60%|██████    | 6101/10100 [17:38<05:53, 11.30pipeline/s]

Generation 61 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  61%|██████▏   | 6202/10100 [17:52<16:34,  3.92pipeline/s]

Generation 62 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  62%|██████▏   | 6301/10100 [17:59<03:04, 20.56pipeline/s]

Generation 63 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  63%|██████▎   | 6402/10100 [18:15<11:06,  5.54pipeline/s]

Generation 64 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  64%|██████▍   | 6501/10100 [18:34<04:25, 13.54pipeline/s]

Generation 65 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  65%|██████▌   | 6602/10100 [18:59<09:33,  6.09pipeline/s]

Generation 66 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  66%|██████▌   | 6691/10100 [19:29<21:07,  2.69pipeline/s]

Generation 67 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  67%|██████▋   | 6789/10100 [19:39<02:29, 22.08pipeline/s]

Generation 68 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  68%|██████▊   | 6901/10100 [19:59<06:07,  8.71pipeline/s]

Generation 69 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  69%|██████▉   | 7001/10100 [20:10<02:06, 24.50pipeline/s]

Generation 70 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  70%|███████   | 7102/10100 [20:20<01:50, 27.07pipeline/s]

Generation 71 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  71%|███████▏  | 7203/10100 [20:32<06:21,  7.59pipeline/s]

Generation 72 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  72%|███████▏  | 7301/10100 [20:51<03:17, 14.21pipeline/s]

Generation 73 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  73%|███████▎  | 7400/10100 [21:03<06:59,  6.44pipeline/s]

Generation 74 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  74%|███████▍  | 7501/10100 [21:19<04:46,  9.06pipeline/s]

Generation 75 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  75%|███████▌  | 7601/10100 [21:29<08:43,  4.78pipeline/s]

Generation 76 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  76%|███████▌  | 7701/10100 [21:44<01:44, 22.92pipeline/s]

Generation 77 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  77%|███████▋  | 7801/10100 [22:00<10:12,  3.75pipeline/s]

Generation 78 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  78%|███████▊  | 7901/10100 [22:25<16:22,  2.24pipeline/s]

Generation 79 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  79%|███████▉  | 8001/10100 [22:39<04:04,  8.58pipeline/s]

Generation 80 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  80%|████████  | 8088/10100 [22:59<08:17,  4.04pipeline/s]

Generation 81 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  81%|████████  | 8200/10100 [23:17<09:31,  3.32pipeline/s]

Generation 82 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  82%|████████▏ | 8287/10100 [23:35<04:15,  7.09pipeline/s]

Generation 83 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  83%|████████▎ | 8401/10100 [23:48<01:21, 20.85pipeline/s]

Generation 84 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  84%|████████▍ | 8487/10100 [23:57<01:30, 17.79pipeline/s]

Generation 85 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  85%|████████▌ | 8600/10100 [24:12<01:53, 13.18pipeline/s]

Generation 86 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  86%|████████▌ | 8700/10100 [24:30<01:15, 18.58pipeline/s]

Generation 87 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  87%|████████▋ | 8801/10100 [24:39<00:49, 26.22pipeline/s]

Generation 88 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  88%|████████▊ | 8884/10100 [24:49<01:05, 18.59pipeline/s]

Generation 89 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  89%|████████▉ | 9001/10100 [25:15<07:55,  2.31pipeline/s]

Generation 90 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  90%|████████▉ | 9088/10100 [25:35<03:01,  5.58pipeline/s]

Generation 91 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  91%|█████████ | 9201/10100 [25:55<01:32,  9.71pipeline/s]

Generation 92 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  92%|█████████▏| 9301/10100 [26:06<00:36, 22.13pipeline/s]

Generation 93 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  93%|█████████▎| 9401/10100 [26:17<00:43, 16.06pipeline/s]

Generation 94 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  94%|█████████▍| 9502/10100 [26:38<03:59,  2.50pipeline/s]

Generation 95 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8250313681277787	LogisticRegression(MultinomialNB(input_matrix, 39.0), 10.0, 10, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  95%|█████████▌| 9601/10100 [26:50<00:48, 10.36pipeline/s]

Generation 96 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8275293728088217	LogisticRegression(MultinomialNB(input_matrix, 4.0), 41.0, 34, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  96%|█████████▌| 9702/10100 [27:05<00:26, 15.04pipeline/s]

Generation 97 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8275293728088217	LogisticRegression(MultinomialNB(input_matrix, 4.0), 41.0, 34, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  97%|█████████▋| 9800/10100 [27:13<00:14, 21.18pipeline/s]

Generation 98 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8275293728088217	LogisticRegression(MultinomialNB(input_matrix, 4.0), 41.0, 34, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  98%|█████████▊| 9902/10100 [27:30<00:26,  7.36pipeline/s]

Generation 99 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8275293728088217	LogisticRegression(MultinomialNB(input_matrix, 4.0), 41.0, 34, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)



Optimization Progress:  99%|█████████▉| 10001/10100 [27:46<00:22,  4.34pipeline/s]

Generation 100 - Current Pareto front scores:
1	0.8224029332412793	LogisticRegression(input_matrix, 15.0, 90, False)
2	0.8275293728088217	LogisticRegression(MultinomialNB(input_matrix, 4.0), 41.0, 34, False)
3	0.8326875477407603	LogisticRegression(MultinomialNB(CombineDFs(PolynomialFeatures(input_matrix), input_matrix), 0.17000000000000001), 15.0, 12, True)





In [19]:
print(classification_report(val[target_var], tpot.predict(val[train_cols])))

             precision    recall  f1-score   support

        0.0       0.79      0.77      0.78        39
        1.0       0.63      0.55      0.59        31
        2.0       0.79      0.96      0.86        23

avg / total       0.74      0.74      0.74        93



In [21]:
predictions = tpot.predict(test_set[train_cols])
sub = pd.DataFrame({'id': test_set.id, 'type': predictions})
sub['type'] = sub.type.apply(lambda x: type_label_reverse_map[x])
sub.to_csv('submission7_tpot.csv', index=False)