# Final MERCS predict commands


In [1]:
from aaai20.io import (
    build_filesystem,
    default_prefix_exp_fn_suffix,
    filename_config,
    filename_cli_commands,
    filename_script,
    filename_logs,
    filename_query
)

from aaai20.cmd_gen import (
    all_fnames_in_dir,
    generate_config_and_log_fname,
    generate_df_commands,
    extract_idx_from_fnames,
    default_start_idx,
    prepare_fs,
)


import json
import os
import pandas as pd
import datetime
import numpy as np

from itertools import product

# Functions

In [2]:
def generate_mercs_prediction_config(
    model_keyword="md16-basic",
    kind="SL",
    random_state=42,
    script="run_mercs",
    exp_fn_fields=["prediction_algorithm"],
    timeout=1,
    datasets=None,
    start_idx=None,
    parallel=False,
):
    default_predict_config = dict(prediction_algorithm="mi", random_state=random_state)

    exp_keyword = model_keyword + "-final"
    nb_queries = 100

    # Fixed parameters
    fs = build_filesystem()
    prepare_fs(exp_keyword, script, fs)

    timeout = datetime.timedelta(hours=timeout).seconds
    
    if start_idx is None:
        start_idx = default_start_idx(fs, script=script)

    # Grid definition
    if kind in {"SL"}:
        phase_space = product(datasets, ["mi", "mrai"])
        predict_config = dict()
    elif kind in {"it"}:
        phase_space = product(datasets, ["it"])
        predict_config = dict(max_steps=8)
    elif kind in {"rw"}:
        phase_space = product(datasets, ["rw"])
        predict_config = dict(max_steps=4, nb_walks=5)
    else:
        raise NotImplementedError("I do not know.")

    # Update default
    predict_config = {**default_predict_config, **predict_config}

    # Inits
    df_commands = pd.DataFrame()
    fn_cfg = []
    fn_log = []
    fn_script = filename_script(script, kind="predict")
    q_idx = None

    if not parallel:
        for p_idx, (ds, prediction_algorithm) in enumerate(phase_space):
            # Init
            exp_idx = start_idx + p_idx

            # Generate config
            predict_config["prediction_algorithm"] = prediction_algorithm

            config = dict(
                dataset=ds,
                script=script,
                model_keyword=model_keyword,
                exp_keyword=exp_keyword,
                exp_idx=exp_idx,
                exp_fn_fields=exp_fn_fields,
                predict_config=predict_config,
            )

            # Save config
            single_fn_cfg, single_fn_log = generate_config_and_log_fname(
                config, kind="predict", exp_fn_fields=exp_fn_fields
            )
            with open(single_fn_cfg, "w") as f:
                json.dump(config, f, indent=4)

            fn_cfg.append(single_fn_cfg)
            fn_log.append(single_fn_log)
    else:
        q_idx = []
        for p_idx, (ds, prediction_algorithm) in enumerate(phase_space):
            for single_q_idx in range(nb_queries):
                # Init
                exp_idx = start_idx + p_idx

                # Generate config
                predict_config["prediction_algorithm"] = prediction_algorithm

                config = dict(
                    dataset=ds,
                    script=script,
                    model_keyword=model_keyword,
                    exp_keyword=exp_keyword,
                    exp_idx=exp_idx,
                    exp_fn_fields=exp_fn_fields,
                    predict_config=predict_config,
                )

                # Save config
                single_fn_cfg, single_fn_log = generate_config_and_log_fname(
                    config,
                    kind="predict",
                    exp_fn_fields=exp_fn_fields,
                    qry_idx=single_q_idx,
                )
                with open(single_fn_cfg, "w") as f:
                    json.dump(config, f, indent=4)

                fn_cfg.append(single_fn_cfg)
                fn_log.append(single_fn_log)
                q_idx.append(single_q_idx)

    df_commands = generate_df_commands(fn_script, fn_cfg, fn_log, timeout, q_idx=q_idx)

    return df_commands

In [3]:
def default_mercs_predict_combo(
    datasets=None,
    model_keyword="md4-base",
    parallel=False,
    timeout=0.3,
    script="run_mercs",
):

    exp_keyword = model_keyword + "-predict"

    start_idx_SL = default_start_idx(fs, script=script)

    df_SL = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="SL",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_SL,
    )

    start_idx_it = start_idx_SL + len(df_SL)
    df_it = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="it",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_it,
    )

    """
    start_idx_rw = start_idx_it + len(df_it)
    df_rw = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="rw",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_rw,
    )
    """

    #df_commands = pd.concat([df_SL, df_it, df_rw])
    df_commands = pd.concat([df_SL, df_it])

    fn_commands = filename_cli_commands(exp_keyword)
    df_commands.to_csv(fn_commands, index=False)
    
    
    return fn_commands

# General Parameters

Some things that remain constant, such as typically the datasets.

In [4]:
fs = build_filesystem()
starai_ds = os.listdir(fs['datasets-starai'])
starai_ds

['tretail',
 'pumsb_star',
 'bnetflix',
 'kdd',
 'bbc',
 'nltcs',
 'plants',
 'kosarek',
 'baudio',
 'tmovie',
 'voting',
 'book',
 'dna',
 'cwebkb',
 'jester',
 'msnbc',
 'msweb',
 'cr52',
 'c20ng',
 'accidents',
 'ad']

# MD 8

## Base

In [5]:
default_mercs_predict_combo(datasets=starai_ds, model_keyword="md8-base")

'/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/cli-config/md8-base-predict.csv'

## Random

In [6]:
default_mercs_predict_combo(datasets=starai_ds, model_keyword="md8-random")

'/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/cli-config/md8-random-predict.csv'

# MD16

Mercs Prediction with models of max depth 16

## Base

default_mercs_predict_combo(datasets=starai_ds, model_keyword="md16-base")

## Random