# Run missing models

## Import modules

In [16]:
import openml
from openml import tasks, flows, runs
import sklearn
from sklearn import feature_selection
from sklearn.linear_model import LogisticRegression
import pandas as pd
import pprint
from collections import OrderedDict, Counter
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer
import re
import random
import numpy as np
from datetime import datetime
import sys
import json
from itertools import combinations
import signal
import importlib
import utils.functions_analyze_runs
importlib.reload(utils.functions_analyze_runs)
importlib.reload(sklearn)
from utils.functions_analyze_runs import get_run_info_lr, get_run_info

# set api key
openml.config.apikey = open('.key', 'r').readline().strip('\n')

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
missing = pd.read_csv('lr_missing.csv').iloc[:,[1,2]]

In [18]:
missing

Unnamed: 0,setup_id,task_id
0,8275573,3021
1,8275574,3021
2,8275575,3021
3,8275614,3021
4,8275615,3021
...,...,...
77,8275917,167125
78,8275918,167125
79,8275919,167125
80,8275920,167125


## Iterate through missing runs in random order

In [19]:
# infinite loop
for i in missing.task_id.unique():

    # get task
    task = openml.tasks.get_task(i)
    
    # get dataset object
    data = openml.datasets.get_dataset(task.dataset_id)

    # get relevant info from dataset object
    X, y, categorical_indicator, attribute_names = data.get_data(dataset_format='array',
                                                                target=data.default_target_attribute)

    # mask with feature types
    cat = categorical_indicator
    num = [not k for k in categorical_indicator]

    # create column transformers
    numeric_transformer = make_pipeline(#SimpleImputer(strategy='median'), 
                                        StandardScaler())

    categorical_transformer = make_pipeline(#SimpleImputer(strategy='most_frequent'),
                                            OneHotEncoder(handle_unknown='ignore'))

    preprocessor = ColumnTransformer(
    transformers=[
    ('num', numeric_transformer, num),
    ('cat', categorical_transformer, cat)])
    
    # loop over runs in random order
    for k in missing[missing['task_id'] == i].setup_id:
        
        print('Run', k, 'on task', i)
        print(datetime.now())
        
        try:
            # get params
            params = openml.setups.initialize_model(k).steps[2][1].get_params()

            # define classifier
            clf = LogisticRegression(**params)

            # pick pipeline according to feature types
            if not any(categorical_indicator):
                pipe = make_pipeline(SimpleImputer(strategy='median'), StandardScaler(), clf)
            elif all(categorical_indicator):
                pipe = make_pipeline(SimpleImputer(strategy='most_frequent'), OneHotEncoder(handle_unknown='ignore'), clf)
            else:
                pipe = make_pipeline(SimpleImputer(strategy='most_frequent'), preprocessor, clf)
                
            # run best model on the task
            run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)

            # print feedbackack
            print('Publish openml run...')

            # push tag
            # run.push_tag('best_models')
            # publish the run
            
            run.publish()
            # print feedback
            print('View run online: https://www.openml.org/r/' + str(run.run_id))
            print('Setup', openml.runs.get_run(run.run_id).setup_id)
            print('Flow', openml.runs.get_run(run.run_id).flow_id)
            print()

        except Exception as e:
            print(e)


Run 8275573 on task 3021
2020-08-11 17:29:37.952516
boolean index did not match indexed array along dimension 0; dimension is 28 but corresponding boolean dimension is 29
Run 8275574 on task 3021
2020-08-11 17:29:38.060078
boolean index did not match indexed array along dimension 0; dimension is 28 but corresponding boolean dimension is 29
Run 8275575 on task 3021
2020-08-11 17:29:38.163648
boolean index did not match indexed array along dimension 0; dimension is 28 but corresponding boolean dimension is 29
Run 8275614 on task 3021
2020-08-11 17:29:38.272440
boolean index did not match indexed array along dimension 0; dimension is 28 but corresponding boolean dimension is 29
Run 8275615 on task 3021
2020-08-11 17:29:38.378873
boolean index did not match indexed array along dimension 0; dimension is 28 but corresponding boolean dimension is 29
Run 8275616 on task 3021
2020-08-11 17:29:38.495579
boolean index did not match indexed array along dimension 0; dimension is 28 but correspondin

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 



Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

Publish openml run...
Could not validate run xml by xsd - XML does not correspond to XSD schema. Error Element '{http://openml.org/openml}value': [facet 'maxLength'] The value has a length of '20490'; this exceeds the allowed maximum length of '2048'.
 on line 71 column 0. Error Element '{http://openml.org/openml}value': '[{&quot;oml-python:serialized_object&quot;: &quot;component_reference&quot;, &quot;value&quot;: {&quot;key&quot;: &quot;num&quot;, &quot;step_name&quot;: &quot;num&quot;, &quot;argument_1&quot;: [true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, 

KeyboardInterrupt: 