# CLI Config Generator

In [1]:
from aaai20.io import (
    build_filesystem,
    default_prefix_exp_fn_suffix,
    filename_config,
    filename_cli_commands,
    filename_script,
    filename_logs,
)

from aaai20.cmd_gen import (
    all_fnames_in_dir,
    generate_config_and_log_fname,
    generate_df_commands,
    extract_idx_from_fnames,
    default_start_idx,
    prepare_fs,
)


import json
import os
import pandas as pd
import datetime

# Functions

In [2]:
def generate_predict_config(
    dataset="nltcs",
    model_keyword="default",
    exp_keyword="test-exp",
    script="run_mercs",
    exp_idx=0,
    **predict_config
):

    default_predict_config = dict(prediction_algorithm="mi")

    predict_config = {**default_predict_config, **predict_config}

    config = dict(
        dataset=dataset,
        model_keyword=model_keyword,
        exp_keyword=exp_keyword,
        exp_idx=exp_idx,
        predict_config=predict_config,
        script=script,
    )

    return config

def generate_fit_config(
    dataset="nltcs",
    model_keyword="default",
    exp_keyword="test-exp",
    script="run_mercs",
    exp_idx=0,
    **fit_config
):

    default_fit_config = dict()

    fit_config = {**default_fit_config, **fit_config}

    config = dict(
        dataset=dataset,
        model_keyword=model_keyword,
        exp_keyword=exp_keyword,
        exp_idx=exp_idx,
        fit_config=fit_config,
        script=script,
    )

    return config

In [3]:
def default_mercs_models(max_depth=4, random_state=42, selection_algorithm='base', nb_iterations=1, fraction_missing=0):
    # Config
    fit_config = dict(
        random_state=random_state,
        max_depth=max_depth,
        selection_algorithm=selection_algorithm,
        nb_iterations=nb_iterations,
        fraction_missing=fraction_missing,
    )

    # Fixed parameters
    RANDOM_STATE=random_state
    model_keyword='md{}-{}'.format(max_depth, selection_algorithm) 
    exp_keyword = model_keyword + "-fit"
    script="fit_mercs"
    
    fs = build_filesystem()
    prepare_fs(exp_keyword, script, fs)

    timeout = datetime.timedelta(hours=1).seconds
    start_idx = default_start_idx(fs, script=script)

    # Grid definition
    phase_space = starai_ds

    # Inits
    df_commands = pd.DataFrame()
    fn_cfg = []
    fn_log = []
    fn_script = filename_script(script, kind='fit')

    for p_idx, p in enumerate(phase_space):

        # Init
        exp_idx = start_idx + p_idx

        # Generate config
        config = generate_fit_config(
            dataset=p,
            script=script,
            model_keyword=model_keyword,
            exp_keyword=exp_keyword,
            exp_idx=exp_idx,
            **fit_config
        )

        # Save config
        single_fn_cfg, single_fn_log = generate_config_and_log_fname(config, kind='fit')
        with open(single_fn_cfg, 'w') as f:
            json.dump(config, f, indent=4)

        fn_cfg.append(single_fn_cfg)
        fn_log.append(single_fn_log)


    df_commands = generate_df_commands(fn_script, fn_cfg, fn_log, timeout)

    fn_commands = filename_cli_commands(exp_keyword)
    df_commands.to_csv(fn_commands, index=False)
    return

# FIT MERCS - Basic

Just one model per attribute.

In [4]:
fs = build_filesystem()

starai_ds = set(os.listdir(fs['datasets-starai']))
starai_ds

{'accidents',
 'ad',
 'baudio',
 'bbc',
 'bnetflix',
 'book',
 'c20ng',
 'cr52',
 'cwebkb',
 'dna',
 'jester',
 'kdd',
 'kosarek',
 'msnbc',
 'msweb',
 'nltcs',
 'plants',
 'pumsb_star',
 'tmovie',
 'tretail',
 'voting'}

In [5]:
default_mercs_models(max_depth=2, selection_algorithm='base')
default_mercs_models(max_depth=4, selection_algorithm='base')
default_mercs_models(max_depth=8, selection_algorithm='base')

Nothing found, so index is 0
Nothing found, so index is 0
Nothing found, so index is 0


# FIT MERCS - Random

Build the default MERCS models.

In [6]:
default_mercs_models(max_depth=2, selection_algorithm='random', fraction_missing=[0,0.3])
default_mercs_models(max_depth=4, selection_algorithm='random', fraction_missing=[0,0.3])
default_mercs_models(max_depth=8, selection_algorithm='random', fraction_missing=[0,0.3])

Nothing found, so index is 0
Nothing found, so index is 0
Nothing found, so index is 0
