In [1]:
import pandas as pd
import numpy as np
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, OneHotEncoder, Imputer

In [3]:
train_df = pd.read_csv('/var/data/train.csv')
test_df = pd.read_csv('/var/data/test.csv')

Use IDs as index

In [4]:
train_df.set_index('Id', inplace=True, verify_integrity=True)
test_df.set_index('Id', inplace=True, verify_integrity=True)

### Cleaning

The target variable has to be called **class** (tpot internal constraint)

In [5]:
train_df.rename(columns={'SalePrice': 'class'}, inplace=True)

#### Missing values imputation

In [6]:
train_df.LotFrontage.fillna(value=0.0, inplace=True)
train_df.Alley.fillna(value='No alley', inplace=True)
train_df[['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2']] = train_df[['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2']].fillna(value='No basement')
train_df.FireplaceQu.fillna(value='No fireplace', inplace=True)
train_df[['GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']] = train_df[['GarageType', 'GarageFinish', 'GarageQual', 'GarageCond']].fillna(value='No garage')
train_df.PoolQC.fillna(value='No pool', inplace=True)
train_df.Fence.fillna(value='No fence', inplace=True)
train_df.MiscFeature.fillna(value='No extra', inplace=True)

train_df.MasVnrType.fillna(value='Unknown', inplace=True)
train_df.drop(['MasVnrArea', 'GarageYrBlt', 'MiscVal'], axis=1, inplace=True)

Fill missing value missing value for variable _Electrical_ with a possible value

In [8]:
print(train_df.loc[train_df.Electrical.isnull(), 'Electrical'])

Id
1380    NaN
Name: Electrical, dtype: object


In [9]:
train_df.Electrical.describe()

count      1459
unique        5
top       SBrkr
freq       1334
Name: Electrical, dtype: object

In [10]:
train_df.Electrical.fillna(value='SBrkr', inplace=True)

In [9]:
# # train_df.drop(train_df.loc[train_df.Electrical.isnull()].index, inplace=True)
# imputer = Imputer(strategy='most_frequent', axis=0, copy=True)
# impute_model = imputer.fit(train_df)
# train_df_ = impute_model.transform(train_df)
# print train_df_.loc[train_df.Electrical.isnull(), 'Electrical']

### Transform categorical variables into numerical

#### Label Binariser
Transform categorical variables into one-hot-encoded variables

In [11]:
lb_var_list = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 
               'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl',
               'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 
               'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 
               'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 
               'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
lb_transfo_list = ['lb_' + var for var in lb_var_list]
lb_model_list = ['lb_' + var + '_model' for var in lb_var_list]
lb_train_list = [var + '_train' for var in lb_var_list]

lb_transfo_dict = {}
for transfo in lb_transfo_list:
    lb_transfo_dict[transfo] = LabelBinarizer()

lb_model_dict = {}
lb_train_dict = {}
for i in range(len(lb_var_list)):
    lb_model_dict[lb_model_list[i]] = lb_transfo_dict[lb_transfo_list[i]].fit(train_df[lb_var_list[i]])
    lb_train_dict[lb_train_list[i]] = lb_model_dict[lb_model_list[i]].transform(train_df[lb_var_list[i]])

In [12]:
len(lb_train_dict)

43

In [13]:
lb_train_dict['MSZoning_train'].shape

(1460, 5)

#### One Hot Encoder
Transform numerical variables into one-hot-encoded variables

In [14]:
ohe_var_list = ['MSSubClass', 'MoSold']
ohe_transfo_list = ['ohe_' + var for var in ohe_var_list]
ohe_model_list = ['ohe_' + var + '_model' for var in ohe_var_list]
ohe_train_list = [var + '_train' for var in ohe_var_list]

ohe_transfo_dict = {}
for transfo in ohe_transfo_list:
    ohe_transfo_dict[transfo] = OneHotEncoder(sparse=False)

ohe_model_dict = {}
ohe_train_dict = {}
for i in range(len(ohe_var_list)):
    ohe_model_dict[ohe_model_list[i]] = ohe_transfo_dict[ohe_transfo_list[i]].fit(train_df[ohe_var_list[i]].values.reshape(-1, 1))
    ohe_train_dict[ohe_train_list[i]] = ohe_model_dict[ohe_model_list[i]].transform(train_df[ohe_var_list[i]].values.reshape(-1, 1))

In [15]:
len(ohe_train_dict)

2

In [16]:
ohe_train_dict['MSSubClass_train'].shape

(1460, 15)

In [17]:
ohe_train_dict['MoSold_train'].shape

(1460, 12)

#### Concatenate transformed features

In [18]:
# Untouched variables
unt_var_list = list(set(train_df.columns) - set(lb_var_list) - set(ohe_var_list) - {'class'})

In [36]:
tuple([value[1] for value in lb_train_dict.items()])

(array([[0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        ..., 
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0]]), array([[1],
        [1],
        [1],
        ..., 
        [1],
        [1],
        [1]]), array([[0, 1, 0],
        [0, 1, 0],
        [0, 1, 0],
        ..., 
        [0, 1, 0],
        [0, 1, 0],
        [0, 1, 0]]), array([[0, 0, 0, 1],
        [0, 0, 0, 1],
        [1, 0, 0, 0],
        ..., 
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1]]), array([[0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        ..., 
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1]]), array([[0],
        [0],
        [0],
        ..., 
        [0],
        [0],
        [0]]), array([[0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1],
        ..., 
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1]]), array([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
    

In [37]:
array_unt = train_df.as_matrix(columns=unt_var_list)
array_lb = np.hstack(tuple([value[1] for value in lb_train_dict.items()]))
array_ohe = np.hstack(tuple([value[1] for value in ohe_train_dict.items()]))

In [38]:
train_array = np.hstack((array_unt, array_lb, array_ohe))

In [39]:
train_array.shape

(1460, 322)

### tpot

In [41]:
X_train, X_test, y_train, y_test = train_test_split(train_array, train_df['class'], train_size=0.95, test_size=0.05)

tpot = TPOTRegressor(generations=100, population_size=10, verbosity=2, n_jobs=-1, random_state=26)
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))

# About the warning
# https://stackoverflow.com/questions/41238769/warning-messages-when-using-python
# https://github.com/rhiever/tpot/issues/284

Optimization Progress:   2%|▏         | 20/1010 [00:41<3:10:30, 11.55s/pipeline]

Generation 1 - Current best internal CV score: 904943648.044514


Optimization Progress:   3%|▎         | 30/1010 [01:47<4:12:41, 15.47s/pipeline]

Generation 2 - Current best internal CV score: 904943648.044514


Optimization Progress:   4%|▍         | 40/1010 [01:57<2:27:07,  9.10s/pipeline]

Generation 3 - Current best internal CV score: 904943648.044514


Optimization Progress:   5%|▌         | 51/1010 [07:03<17:57:02, 67.39s/pipeline]

Generation 4 - Current best internal CV score: 904943648.044514


Optimization Progress:   6%|▌         | 62/1010 [12:27<34:12:33, 129.91s/pipeline]

Generation 5 - Current best internal CV score: 827054686.5795491


Optimization Progress:   7%|▋         | 72/1010 [12:48<17:23:42, 66.76s/pipeline] 

Generation 6 - Current best internal CV score: 827054686.5795491


Optimization Progress:   8%|▊         | 82/1010 [13:15<9:28:08, 36.73s/pipeline] 

Generation 7 - Current best internal CV score: 827054686.5795491


Optimization Progress:   9%|▉         | 92/1010 [13:34<5:59:57, 23.53s/pipeline]

Generation 8 - Current best internal CV score: 827054686.5795491


Optimization Progress:  10%|█         | 102/1010 [13:55<4:27:06, 17.65s/pipeline]

Generation 9 - Current best internal CV score: 820864616.2623087


Optimization Progress:  11%|█         | 112/1010 [14:22<4:06:43, 16.48s/pipeline]

Generation 10 - Current best internal CV score: 820864616.2623087


Optimization Progress:  12%|█▏        | 122/1010 [14:48<3:53:55, 15.81s/pipeline]

Generation 11 - Current best internal CV score: 820864616.2623087


Optimization Progress:  13%|█▎        | 132/1010 [15:13<3:42:23, 15.20s/pipeline]

Generation 12 - Current best internal CV score: 730175538.8972764


Optimization Progress:  14%|█▍        | 143/1010 [20:23<24:09:12, 100.29s/pipeline]

Generation 13 - Current best internal CV score: 730175538.8972764


Optimization Progress:  15%|█▌        | 154/1010 [25:25<33:10:49, 139.54s/pipeline]

Generation 14 - Current best internal CV score: 730175538.8972764


Optimization Progress:  16%|█▌        | 164/1010 [25:44<16:29:43, 70.19s/pipeline] 

Generation 15 - Current best internal CV score: 730175538.8972764


Optimization Progress:  17%|█▋        | 174/1010 [26:05<8:41:47, 37.45s/pipeline] 

Generation 16 - Current best internal CV score: 730175538.8972764


Optimization Progress:  18%|█▊        | 184/1010 [26:25<5:33:33, 24.23s/pipeline]

Generation 17 - Current best internal CV score: 718172308.145935


Optimization Progress:  19%|█▉        | 194/1010 [27:02<5:09:06, 22.73s/pipeline]

Generation 18 - Current best internal CV score: 718172308.145935


Optimization Progress:  20%|██        | 204/1010 [27:23<3:10:22, 14.17s/pipeline]

Generation 19 - Current best internal CV score: 718172308.145935


Optimization Progress:  21%|██        | 214/1010 [27:51<2:01:09,  9.13s/pipeline]

Generation 20 - Current best internal CV score: 718172308.145935


Optimization Progress:  22%|██▏       | 224/1010 [28:20<3:18:08, 15.13s/pipeline]

Generation 21 - Current best internal CV score: 718172308.145935


Optimization Progress:  23%|██▎       | 234/1010 [28:43<3:05:57, 14.38s/pipeline]

Generation 22 - Current best internal CV score: 718172308.145935


Optimization Progress:  24%|██▍       | 244/1010 [29:58<6:16:55, 29.52s/pipeline]

Generation 23 - Current best internal CV score: 718172308.145935


Optimization Progress:  25%|██▌       | 254/1010 [30:39<3:55:07, 18.66s/pipeline]

Generation 24 - Current best internal CV score: 718172308.145935


Optimization Progress:  26%|██▌       | 264/1010 [31:03<4:11:47, 20.25s/pipeline]

Generation 25 - Current best internal CV score: 718172308.145935


Optimization Progress:  27%|██▋       | 274/1010 [31:23<1:50:24,  9.00s/pipeline]

Generation 26 - Current best internal CV score: 662852609.0646234


Optimization Progress:  28%|██▊       | 284/1010 [31:41<1:08:28,  5.66s/pipeline]

Generation 27 - Current best internal CV score: 630836638.6102053


Optimization Progress:  29%|██▉       | 294/1010 [32:03<2:05:05, 10.48s/pipeline]

Generation 28 - Current best internal CV score: 630836638.6102053


Optimization Progress:  30%|███       | 304/1010 [32:33<1:07:11,  5.71s/pipeline]

Generation 29 - Current best internal CV score: 630836638.6102053


Optimization Progress:  31%|███       | 314/1010 [32:59<1:34:42,  8.16s/pipeline]

Generation 30 - Current best internal CV score: 628989663.7367141


Optimization Progress:  32%|███▏      | 324/1010 [33:37<3:16:03, 17.15s/pipeline]

Generation 31 - Current best internal CV score: 628989663.7367141


Optimization Progress:  33%|███▎      | 334/1010 [34:01<2:54:47, 15.51s/pipeline]

Generation 32 - Current best internal CV score: 612615947.6897339


Optimization Progress:  34%|███▍      | 344/1010 [34:29<2:55:27, 15.81s/pipeline]

Generation 33 - Current best internal CV score: 612615947.6897339


Optimization Progress:  35%|███▌      | 354/1010 [34:59<3:01:27, 16.60s/pipeline]

Generation 34 - Current best internal CV score: 612615947.6897339


Optimization Progress:  36%|███▌      | 365/1010 [40:01<17:41:23, 98.73s/pipeline]

Generation 35 - Current best internal CV score: 612615947.6897339


Optimization Progress:  37%|███▋      | 375/1010 [40:29<9:59:08, 56.61s/pipeline] 

Generation 36 - Current best internal CV score: 612615947.6897339


Optimization Progress:  38%|███▊      | 385/1010 [42:08<9:58:49, 57.49s/pipeline] 

Generation 37 - Current best internal CV score: 612522052.9311179


Optimization Progress:  39%|███▉      | 395/1010 [42:32<6:02:13, 35.34s/pipeline]

Generation 38 - Current best internal CV score: 575141459.8670925


Optimization Progress:  40%|████      | 405/1010 [43:03<3:07:03, 18.55s/pipeline]

Generation 39 - Current best internal CV score: 575141459.8670925


Optimization Progress:  41%|████      | 415/1010 [43:26<3:16:07, 19.78s/pipeline]

Generation 40 - Current best internal CV score: 575141459.8670925


Optimization Progress:  42%|████▏     | 425/1010 [43:54<2:02:07, 12.53s/pipeline]

Generation 41 - Current best internal CV score: 575141459.8670925


Optimization Progress:  43%|████▎     | 435/1010 [44:19<1:59:57, 12.52s/pipeline]

Generation 42 - Current best internal CV score: 575141459.8670925


Optimization Progress:  44%|████▍     | 445/1010 [44:42<2:02:47, 13.04s/pipeline]

Generation 43 - Current best internal CV score: 575141459.8670925


Optimization Progress:  45%|████▌     | 456/1010 [49:45<14:58:03, 97.26s/pipeline]

Generation 44 - Current best internal CV score: 575141459.8670925


Optimization Progress:  46%|████▌     | 467/1010 [54:46<20:46:33, 137.74s/pipeline]

Generation 45 - Current best internal CV score: 575141459.8670925


Optimization Progress:  47%|████▋     | 478/1010 [59:47<23:17:51, 157.65s/pipeline]

Generation 46 - Current best internal CV score: 575141459.8670925


Optimization Progress:  48%|████▊     | 488/1010 [1:00:15<12:24:27, 85.57s/pipeline]

Generation 47 - Current best internal CV score: 575141459.8670925


Optimization Progress:  49%|████▉     | 498/1010 [1:00:45<7:14:22, 50.90s/pipeline] 

Generation 48 - Current best internal CV score: 575141459.8670925


Optimization Progress:  50%|█████     | 508/1010 [1:01:04<2:42:20, 19.40s/pipeline]

Generation 49 - Current best internal CV score: 575141459.8670925


Optimization Progress:  51%|█████▏    | 518/1010 [1:01:27<2:48:34, 20.56s/pipeline]

Generation 50 - Current best internal CV score: 575141459.8670925


Optimization Progress:  52%|█████▏    | 529/1010 [1:06:28<13:23:53, 100.28s/pipeline]

Generation 51 - Current best internal CV score: 575141459.8670925


Optimization Progress:  53%|█████▎    | 539/1010 [1:07:00<7:39:42, 58.56s/pipeline]  

Generation 52 - Current best internal CV score: 575141459.8670925


Optimization Progress:  54%|█████▍    | 549/1010 [1:07:37<5:03:39, 39.52s/pipeline]

Generation 53 - Current best internal CV score: 575141459.8670925


Optimization Progress:  55%|█████▌    | 559/1010 [1:08:01<3:18:22, 26.39s/pipeline]

Generation 54 - Current best internal CV score: 575141459.8670925


Optimization Progress:  56%|█████▋    | 569/1010 [1:08:17<2:10:16, 17.72s/pipeline]

Generation 55 - Current best internal CV score: 575141459.8670925


Optimization Progress:  57%|█████▋    | 579/1010 [1:08:41<1:53:45, 15.84s/pipeline]

Generation 56 - Current best internal CV score: 575141459.8670925


Optimization Progress:  58%|█████▊    | 589/1010 [1:09:08<1:22:25, 11.75s/pipeline]

Generation 57 - Current best internal CV score: 575141459.8670925


Optimization Progress:  60%|█████▉    | 601/1010 [1:14:13<10:50:00, 95.36s/pipeline]

Generation 58 - Current best internal CV score: 575141459.8670925


Optimization Progress:  60%|██████    | 611/1010 [1:14:41<6:05:47, 55.01s/pipeline] 

Generation 59 - Current best internal CV score: 575141459.8670925


Optimization Progress:  61%|██████▏   | 621/1010 [1:15:06<3:18:23, 30.60s/pipeline]

Generation 60 - Current best internal CV score: 575141459.8670925


Optimization Progress:  62%|██████▏   | 631/1010 [1:15:25<1:46:43, 16.89s/pipeline]

Generation 61 - Current best internal CV score: 575141459.8670925


Optimization Progress:  63%|██████▎   | 641/1010 [1:15:48<1:32:05, 14.97s/pipeline]

Generation 62 - Current best internal CV score: 575141459.8670925


Optimization Progress:  64%|██████▍   | 651/1010 [1:16:12<1:25:53, 14.36s/pipeline]

Generation 63 - Current best internal CV score: 575141459.8670925


Optimization Progress:  65%|██████▌   | 661/1010 [1:16:38<1:25:40, 14.73s/pipeline]

Generation 64 - Current best internal CV score: 575141459.8670925


Optimization Progress:  67%|██████▋   | 672/1010 [1:21:40<9:09:56, 97.62s/pipeline]

Generation 65 - Current best internal CV score: 575141459.8670925


Optimization Progress:  68%|██████▊   | 682/1010 [1:22:03<4:40:17, 51.27s/pipeline]

Generation 66 - Current best internal CV score: 563346955.9640961


Optimization Progress:  69%|██████▊   | 693/1010 [1:27:15<10:26:57, 118.67s/pipeline]

Generation 67 - Current best internal CV score: 563346955.9640961


Optimization Progress:  70%|██████▉   | 703/1010 [1:27:38<5:32:32, 64.99s/pipeline]  

Generation 68 - Current best internal CV score: 563346955.9640961


Optimization Progress:  71%|███████   | 713/1010 [1:28:05<3:16:18, 39.66s/pipeline]

Generation 69 - Current best internal CV score: 563346955.9640961


Optimization Progress:  72%|███████▏  | 723/1010 [1:28:32<2:12:07, 27.62s/pipeline]

Generation 70 - Current best internal CV score: 563346955.9640961


Optimization Progress:  73%|███████▎  | 733/1010 [1:28:54<1:17:27, 16.78s/pipeline]

Generation 71 - Current best internal CV score: 563346955.9640961


Optimization Progress:  74%|███████▎  | 743/1010 [1:29:27<1:19:41, 17.91s/pipeline]

Generation 72 - Current best internal CV score: 563346955.9640961


Optimization Progress:  75%|███████▍  | 754/1010 [1:34:37<7:13:31, 101.61s/pipeline]

Generation 73 - Current best internal CV score: 563346955.9640961


Optimization Progress:  76%|███████▌  | 764/1010 [1:35:12<4:06:40, 60.17s/pipeline] 

Generation 74 - Current best internal CV score: 542885880.9505274


Optimization Progress:  77%|███████▋  | 774/1010 [1:35:34<2:21:23, 35.95s/pipeline]

Generation 75 - Current best internal CV score: 542885880.9505274


Optimization Progress:  78%|███████▊  | 784/1010 [1:36:11<1:48:06, 28.70s/pipeline]

Generation 76 - Current best internal CV score: 542885880.9505274


Optimization Progress:  79%|███████▊  | 794/1010 [1:36:35<1:03:19, 17.59s/pipeline]

Generation 77 - Current best internal CV score: 542885880.9505274


Optimization Progress:  80%|███████▉  | 804/1010 [1:36:57<40:43, 11.86s/pipeline]  

Generation 78 - Current best internal CV score: 542885880.9505274


Optimization Progress:  81%|████████  | 814/1010 [1:37:30<51:24, 15.74s/pipeline]

Generation 79 - Current best internal CV score: 542885880.9505274


Optimization Progress:  82%|████████▏ | 824/1010 [1:38:06<56:56, 18.37s/pipeline]

Generation 80 - Current best internal CV score: 542885880.9505274


Optimization Progress:  83%|████████▎ | 834/1010 [1:38:41<39:34, 13.49s/pipeline]

Generation 81 - Current best internal CV score: 542885880.9505274


Optimization Progress:  84%|████████▎ | 844/1010 [1:39:17<56:17, 20.35s/pipeline]

Generation 82 - Current best internal CV score: 542885880.9505274


Optimization Progress:  85%|████████▍ | 854/1010 [1:39:56<56:19, 21.66s/pipeline]

Generation 83 - Current best internal CV score: 542885880.9505274


Optimization Progress:  86%|████████▌ | 865/1010 [1:44:58<1:34:28, 39.09s/pipeline]

Generation 84 - Current best internal CV score: 527850570.6127537


Optimization Progress:  87%|████████▋ | 875/1010 [1:45:29<1:22:39, 36.74s/pipeline]

Generation 85 - Current best internal CV score: 527850570.6127537


Optimization Progress:  88%|████████▊ | 885/1010 [1:45:57<46:06, 22.13s/pipeline]  

Generation 86 - Current best internal CV score: 527850570.6127537


Optimization Progress:  89%|████████▊ | 895/1010 [1:46:20<23:36, 12.32s/pipeline]

Generation 87 - Current best internal CV score: 527850570.6127537


Optimization Progress:  90%|████████▉ | 905/1010 [1:46:57<24:51, 14.21s/pipeline]

Generation 88 - Current best internal CV score: 527850570.6127537


Optimization Progress:  91%|█████████ | 915/1010 [1:47:23<16:16, 10.28s/pipeline]

Generation 89 - Current best internal CV score: 527850570.6127537


Optimization Progress:  92%|█████████▏| 925/1010 [1:48:03<18:52, 13.32s/pipeline]

Generation 90 - Current best internal CV score: 527850570.6127537


Optimization Progress:  93%|█████████▎| 935/1010 [1:48:37<17:04, 13.66s/pipeline]

Generation 91 - Current best internal CV score: 527850570.6127537


Optimization Progress:  94%|█████████▎| 945/1010 [1:49:19<16:40, 15.39s/pipeline]

Generation 92 - Current best internal CV score: 527850570.6127537


Optimization Progress:  95%|█████████▍| 955/1010 [1:49:55<19:50, 21.64s/pipeline]

Generation 93 - Current best internal CV score: 527850570.6127537


Optimization Progress:  96%|█████████▌| 965/1010 [1:50:28<15:26, 20.59s/pipeline]

Generation 94 - Current best internal CV score: 527850570.6127537


Optimization Progress:  97%|█████████▋| 976/1010 [23:25:45<220:17:28, 23324.95s/pipeline]

Generation 95 - Current best internal CV score: 527850570.6127537


Optimization Progress:  98%|█████████▊| 986/1010 [23:26:20<76:15:52, 11439.67s/pipeline] 

Generation 96 - Current best internal CV score: 527850570.6127537


Optimization Progress:  99%|█████████▊| 996/1010 [23:27:09<21:51:19, 5619.97s/pipeline] 

Generation 97 - Current best internal CV score: 527850570.6127537


Optimization Progress: 100%|█████████▉| 1006/1010 [23:27:37<2:08:54, 1933.57s/pipeline]

Generation 98 - Current best internal CV score: 527850570.6127537


Optimization Progress: 1016pipeline [23:28:13, 951.21s/pipeline]                       

Generation 99 - Current best internal CV score: 527850570.6127537


                                                                

Generation 100 - Current best internal CV score: 527850570.6127537

Best pipeline: XGBRegressor(LinearSVR(CombineDFs(LinearSVR(CombineDFs(input_matrix, input_matrix), C=0.1, dual=False, epsilon=0.1, loss=squared_epsilon_insensitive, tol=1e-05), MaxAbsScaler(input_matrix)), C=0.1, dual=False, epsilon=0.1, loss=squared_epsilon_insensitive, tol=0.1), learning_rate=0.1, max_depth=5, min_child_weight=2, n_estimators=100, nthread=1, subsample=0.95)
487708836.924


In [42]:
tpot.export('tpot_exported_pipeline.py')

True

In [43]:
tpot.evaluated_individuals_

{'DecisionTreeRegressor(DecisionTreeRegressor(input_matrix, DecisionTreeRegressor__max_depth=3, DecisionTreeRegressor__min_samples_leaf=9, DecisionTreeRegressor__min_samples_split=13), DecisionTreeRegressor__max_depth=9, DecisionTreeRegressor__min_samples_leaf=9, DecisionTreeRegressor__min_samples_split=7)': (2,
  -1656784604.539309),
 'DecisionTreeRegressor(LinearSVR(input_matrix, LinearSVR__C=0.1, LinearSVR__dual=True, LinearSVR__epsilon=0.001, LinearSVR__loss=squared_epsilon_insensitive, LinearSVR__tol=0.01), DecisionTreeRegressor__max_depth=6, DecisionTreeRegressor__min_samples_leaf=4, DecisionTreeRegressor__min_samples_split=11)': (2,
  -1206903849.5031352),
 'DecisionTreeRegressor(LinearSVR(input_matrix, LinearSVR__C=0.5, LinearSVR__dual=True, LinearSVR__epsilon=0.001, LinearSVR__loss=squared_epsilon_insensitive, LinearSVR__tol=1e-05), DecisionTreeRegressor__max_depth=10, DecisionTreeRegressor__min_samples_leaf=16, DecisionTreeRegressor__min_samples_split=3)': (2,
  -1216302454.1