In [1]:
# Surpress warnings
import warnings
warnings.filterwarnings("ignore")

# Import libraries
from pmlb import dataset_names, classification_dataset_names, regression_dataset_names, fetch_data
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sb

# Import SK-learn and AutoSK-Learn
import autosklearn.classification
import autosklearn.regression
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import time
import shutil
from multiprocessing import Process, current_process, Queue

  from collections import Mapping, defaultdict


In [18]:
dataset = classification_dataset_names[8]

In [19]:
X, y = fetch_data(dataset, return_X_y=True)

In [20]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=1)

In [21]:
# Set the tmp folders where the models will take data out of
tmp_folder = '/tmp/autosklearn_parallel_example_tmp'
output_folder = '/tmp/autosklearn_parallel_example_out'

# Clear the folders if there are contents from previous runs
for dir in [tmp_folder, output_folder]:
    try:
        shutil.rmtree(dir)
    except OSError as e:
        print('Exception occurred')

In [15]:
# A function to run the main model on the main training data
def run_main_model(X_train, y_train, return_queue):
    automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=150, # sec., how long should this seed fit process run
            per_run_time_limit = 15,        
            shared_mode=True, # tmp folder will be shared between seeds
            tmp_folder=tmp_folder,
            output_folder=output_folder,
            delete_tmp_folder_after_terminate=False,
            delete_output_folder_after_terminate=False,
            seed=1,)
    automl.fit(X_train, y_train)    
    print("Done fitting")
    current_score = automl.score(X_test, y_test)
    print(f"Final score: {current_score}")
    return_queue.put(automl)

In [16]:
# A function that will be threaded periodically to take snapshots of the main model
def snapshot_model_and_score(X_test, y_test, seed, curr_snap_time):
    snapshot = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=0,
            shared_mode=True, # tmp folder will be shared between seeds
            tmp_folder=tmp_folder,
            output_folder=output_folder,
            delete_tmp_folder_after_terminate=False,
            delete_output_folder_after_terminate=False,
            seed=seed,)
    
    # Run the snapshot model to retrieve the model information from the temp folder
    # This solution is not ideal even though it works. It currently does print an error because the time cap is 0.
    try:
        snapshot.fit(X_test, y_test)
    except:
        pass
    
    current_score = snapshot.score(X_test, y_test)
    print(f"Current snapshot score at time {curr_snap_time}: {current_score}")

In [22]:
# Main script to run the processes for the automl training and the snapshotting
snap_times = 5 # How many snaps to take
snap_interval = 15 # Seconds between snaps
return_queue = Queue()

base_process = Process(target = run_main_model, args = (X_train, y_train, return_queue))
base_process.start()

for snap_time in range(snap_times):
    time.sleep(snap_interval)
    print(f'Current time is {time.perf_counter()}')
    process = Process(target = snapshot_model_and_score, args = (X_test, y_test, snap_time+2, (snap_time+1)*snap_interval))
    automl_final = process.start()
    

Current time is 3237241.026639331
[ERROR] [2019-03-15 23:25:30,665:AutoML(2):af3670a46ea6c9dd172de477f812b1ff] Error creating dummy predictions: {'error': 'Timeout', 'configuration_origin': 'DUMMY'} 
Current snapshot score at time 15: 0.8613545164196217
Current time is 3237256.051771911
[ERROR] [2019-03-15 23:25:45,682:AutoML(3):af3670a46ea6c9dd172de477f812b1ff] Error creating dummy predictions: {'error': 'Timeout', 'configuration_origin': 'DUMMY'} 
Current snapshot score at time 30: 0.860289902546884
Current time is 3237271.076934277
[ERROR] [2019-03-15 23:26:00,685:AutoML(4):af3670a46ea6c9dd172de477f812b1ff] Error creating dummy predictions: {'error': 'Timeout', 'configuration_origin': 'DUMMY'} 
Current snapshot score at time 45: 0.860289902546884
Current time is 3237286.101172735
[ERROR] [2019-03-15 23:26:15,696:AutoML(5):af3670a46ea6c9dd172de477f812b1ff] Error creating dummy predictions: {'error': 'Timeout', 'configuration_origin': 'DUMMY'} 
Current snapshot score at time 60: 0.860

In [23]:
# Return the final model
automl = return_queue.get()

In [24]:
print(f"Final score confirmation: {automl.score(X_test, y_test)}")

Final score confirmation: 0.8700352141511751
