In [1]:
import pandas as pd
import numpy as np
import sklearn.metrics
import sklearn.model_selection
import time

In [2]:
import autosklearn.regression

In [3]:
def compute(x_path, y_path, comp_time):
    X = pd.read_csv(x_path)
    y = pd.read_csv(y_path)
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=7)
    del X,y
    
    t0 = time.time()
    
    reg = autosklearn.regression.AutoSklearnRegressor(
        time_left_for_this_task=comp_time,
        per_run_time_limit=1800,
        n_jobs=4,
        memory_limit=40*1024,
        tmp_folder='/home/hx152/tmp/autosklearn_regression_example_tmp',
        metric=autosklearn.metrics.r2)
    reg.fit(X_train, y_train, dataset_name='QSAR')
    
    t1 = time.time()
    print("training time cost:",t1-t0)
    
    print(reg.leaderboard())
    train_predictions = reg.predict(X_train)
    print("Train R2 score:", sklearn.metrics.r2_score(y_train, train_predictions))
    test_predictions = reg.predict(X_test)
    print("Test R2 score:", sklearn.metrics.r2_score(y_test, test_predictions))
    
    t2 = time.time()
    print("prediction time cost",t2-t1)

In [5]:
i = 13
for comp_time in [600, 1200, 1800, 2400, 3000, 7200]:
    x_path = 'dataset/QSAR_{}_train_x.csv'.format(i)
    y_path = 'dataset/QSAR_{}_train_y.csv'.format(i)
    compute(x_path, y_path,comp_time)
    print("now is processing dataset %s" % i)



  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 606.0470721721649
          rank  ensemble_weight               type      cost    duration
model_id                                                                
4            1             0.68        extra_trees  0.458721  165.498317
22           2             0.20  gradient_boosting  0.487001  128.601851
21           3             0.08     ard_regression  0.577180   87.531581
7            4             0.04      liblinear_svr  0.673353    7.454353
Train R2 score: 0.7682426978968384
Test R2 score: 0.4947638509558939
prediction time cost 9.940997123718262
now is processing dataset 13


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 1203.2657797336578
          rank  ensemble_weight               type      cost    duration
model_id                                                                
4            1             0.58        extra_trees  0.458721  158.176299
22           2             0.20  gradient_boosting  0.487001  126.655718
32           3             0.12        extra_trees  0.491117  144.152162
21           4             0.06     ard_regression  0.577180   88.615530
7            5             0.04      liblinear_svr  0.673353    6.400471
Train R2 score: 0.7752031720312023
Test R2 score: 0.49501679847926683
prediction time cost 13.384210586547852
now is processing dataset 13


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 1802.4991173744202
          rank  ensemble_weight               type      cost    duration
model_id                                                                
4            1             0.66        extra_trees  0.458721  170.368645
22           2             0.20  gradient_boosting  0.487001  131.628007
21           3             0.08     ard_regression  0.577180   77.793598
7            4             0.04      liblinear_svr  0.673353    7.839689
30           5             0.02  gradient_boosting  0.689549   80.553014
Train R2 score: 0.7711578841250634
Test R2 score: 0.49547082296133393
prediction time cost 10.43220043182373
now is processing dataset 13


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 2404.110934972763
          rank  ensemble_weight               type      cost    duration
model_id                                                                
4            1             0.66        extra_trees  0.458721  177.668751
22           2             0.20  gradient_boosting  0.487001  145.507342
21           3             0.08     ard_regression  0.577180   83.399717
7            4             0.04      liblinear_svr  0.673353    6.488607
30           5             0.02  gradient_boosting  0.689549   68.303834
Train R2 score: 0.7711578841250634
Test R2 score: 0.49547082296133393
prediction time cost 9.934521913528442
now is processing dataset 13


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 2998.0951583385468
          rank  ensemble_weight                 type      cost    duration
model_id                                                                  
4            1             0.58          extra_trees  0.458721  166.189289
22           2             0.20    gradient_boosting  0.487001  140.770126
21           3             0.10       ard_regression  0.577180   79.405410
7            4             0.02        liblinear_svr  0.673353    6.529928
30           5             0.02    gradient_boosting  0.689549   63.762416
31           6             0.08  k_nearest_neighbors  0.774752   76.515829
Train R2 score: 0.7824217338977466
Test R2 score: 0.4945401054945836
prediction time cost 110.77276396751404
now is processing dataset 13


  self.metafeatures = self.metafeatures.append(metafeatures)
  self.algorithm_runs[metric].append(runs)


training time cost: 7334.099231004715
          rank  ensemble_weight                 type      cost    duration
model_id                                                                  
50           1             0.12          extra_trees  0.453518  115.588275
85           2             0.42          extra_trees  0.454196  672.431728
22           3             0.08    gradient_boosting  0.487001  127.949448
48           4             0.16           libsvm_svr  0.531114  201.531819
21           5             0.06       ard_regression  0.577180   86.432356
53           6             0.10       ard_regression  0.705401   48.974086
31           7             0.06  k_nearest_neighbors  0.774752   91.285148
Train R2 score: 0.7976489946877341
Test R2 score: 0.4985760581062625
prediction time cost 254.81248688697815
now is processing dataset 13
