# Final MERCS predict commands


In [1]:
from aaai20.io import (
    build_filesystem,
    default_prefix_exp_fn_suffix,
    filename_config,
    filename_cli_commands,
    filename_script,
    filename_logs,
    filename_query
)

from aaai20.cmd_gen import (
    all_fnames_in_dir,
    generate_config_and_log_fname,
    generate_df_commands,
    extract_idx_from_fnames,
    default_start_idx,
    prepare_fs,
)


import json
import os
import pandas as pd
import datetime
import numpy as np

from itertools import product

# Functions

In [2]:
def generate_mercs_prediction_config(
    model_keyword="md16-basic",
    kind="SL",
    random_state=42,
    script="run_mercs",
    exp_fn_fields=["prediction_algorithm"],
    timeout=1,
    datasets=None,
    start_idx=None,
    parallel=False,
):
    default_predict_config = dict(prediction_algorithm="mi", random_state=random_state)

    exp_keyword = model_keyword + "-final"
    nb_queries = 100

    # Fixed parameters
    fs = build_filesystem()
    prepare_fs(exp_keyword, script, fs)

    timeout = datetime.timedelta(hours=timeout).seconds
    
    if start_idx is None:
        start_idx = default_start_idx(fs, script=script)

    # Grid definition
    if kind in {"SL"}:
        phase_space = product(datasets, ["mi", "mrai"])
        predict_config = dict()
    elif kind in {"it"}:
        phase_space = product(datasets, ["it"])
        predict_config = dict(max_steps=8)
    elif kind in {"rw"}:
        phase_space = product(datasets, ["rw"])
        predict_config = dict(max_steps=8, nb_walks=20)
    else:
        raise NotImplementedError("I do not know.")

    # Update default
    predict_config = {**default_predict_config, **predict_config}

    # Inits
    df_commands = pd.DataFrame()
    fn_cfg = []
    fn_log = []
    fn_script = filename_script(script, kind="predict")
    q_idx = None

    if not parallel:
        for p_idx, (ds, prediction_algorithm) in enumerate(phase_space):
            # Init
            exp_idx = start_idx + p_idx

            # Generate config
            predict_config["prediction_algorithm"] = prediction_algorithm

            config = dict(
                dataset=ds,
                script=script,
                model_keyword=model_keyword,
                exp_keyword=exp_keyword,
                exp_idx=exp_idx,
                exp_fn_fields=exp_fn_fields,
                predict_config=predict_config,
            )

            # Save config
            single_fn_cfg, single_fn_log = generate_config_and_log_fname(
                config, kind="predict", exp_fn_fields=exp_fn_fields
            )
            with open(single_fn_cfg, "w") as f:
                json.dump(config, f, indent=4)

            fn_cfg.append(single_fn_cfg)
            fn_log.append(single_fn_log)
    else:
        q_idx = []
        for p_idx, (ds, prediction_algorithm) in enumerate(phase_space):
            for single_q_idx in range(nb_queries):
                # Init
                exp_idx = start_idx + p_idx

                # Generate config
                predict_config["prediction_algorithm"] = prediction_algorithm

                config = dict(
                    dataset=ds,
                    script=script,
                    model_keyword=model_keyword,
                    exp_keyword=exp_keyword,
                    exp_idx=exp_idx,
                    exp_fn_fields=exp_fn_fields,
                    predict_config=predict_config,
                )

                # Save config
                single_fn_cfg, single_fn_log = generate_config_and_log_fname(
                    config,
                    kind="predict",
                    exp_fn_fields=exp_fn_fields,
                    qry_idx=single_q_idx,
                )
                with open(single_fn_cfg, "w") as f:
                    json.dump(config, f, indent=4)

                fn_cfg.append(single_fn_cfg)
                fn_log.append(single_fn_log)
                q_idx.append(single_q_idx)

    df_commands = generate_df_commands(fn_script, fn_cfg, fn_log, timeout, q_idx=q_idx)

    return df_commands

In [3]:
def default_mercs_predict_combo(
    datasets=None,
    model_keyword="md4-base",
    parallel=False,
    timeout=0.3,
    script="run_mercs",
):

    exp_keyword = model_keyword + "-predict"

    start_idx_SL = default_start_idx(fs, script=script)
    
    print(start_idx_SL)

    df_SL = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="SL",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_SL,
    )

    start_idx_it = start_idx_SL + len(df_SL)
    
    print(start_idx_it)
    
    df_it = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="it",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_it,
    )

    start_idx_rw = start_idx_it + len(df_it)
    
    print(start_idx_rw)
    
    df_rw = generate_mercs_prediction_config(
        model_keyword=model_keyword,
        kind="rw",
        random_state=42,
        script=script,
        timeout=timeout,
        datasets=datasets,
        parallel=parallel,
        start_idx=start_idx_rw,
    )
    

    df_commands = pd.concat([df_SL, df_it, df_rw])
    #df_commands = pd.concat([df_SL, df_it])

    fn_commands = filename_cli_commands(exp_keyword)
    df_commands.to_csv(fn_commands, index=False)
    
    
    return fn_commands

# General Parameters

Some things that remain constant, such as typically the datasets.

In [4]:
fs = build_filesystem()
starai_ds = os.listdir(fs['datasets-starai'])
starai_ds

['tretail',
 'pumsb_star',
 'bnetflix',
 'kdd',
 'bbc',
 'nltcs',
 'plants',
 'kosarek',
 'baudio',
 'tmovie',
 'voting',
 'book',
 'dna',
 'cwebkb',
 'jester',
 'msnbc',
 'msweb',
 'cr52',
 'c20ng',
 'accidents',
 'ad']

In [5]:
fs

{'root': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20',
 'config': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/config',
 'data': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/data',
 'out': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/out',
 'cli': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli',
 'query': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/config/query',
 'model': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/config/model',
 'fit': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/fit',
 'predict': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/predict',
 'exe': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/exe',
 'cli-config': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/cli-config',
 'run_mercs': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/out/run_mercs',
 'run_pxs': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/out/run_pxs',
 'fit_pxs': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/out/fit_pxs',
 'fit_mercs': '/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/out/fit_mercs',
 'raw': '/cw/dtaijupiter

# MD 8

## Base

In [6]:
#default_mercs_predict_combo(datasets=starai_ds, model_keyword="md8-base")

['0041-msweb-mrai', '0043-cr52-mrai', '0056-plants-it', '0034-cwebkb-mi', '0010-pumsb_star-mi', '0013-bnetflix-mrai', '0019-nltcs-mrai', '0045-c20ng-mrai', '0057-kosarek-it', '0024-baudio-mi', '0064-jester-it', '0015-kdd-mrai', '0042-cr52-mi', '0033-dna-mrai', '0032-dna-mi', '0059-tmovie-it', '0026-tmovie-mi', '0070-ad-it', '0036-jester-mi', '0060-voting-it', '0055-nltcs-it', '0017-bbc-mrai', '0063-cwebkb-it', '0061-book-it', '0035-cwebkb-mrai', '0031-book-mrai', '0011-pumsb_star-mrai', '0023-kosarek-mrai', '0044-c20ng-mi', '0065-msnbc-it', '0051-pumsb_star-it', '0054-bbc-it', '0014-kdd-mi', '0038-msnbc-mi', '0029-voting-mrai', '0027-tmovie-mrai', '0053-kdd-it', '0018-nltcs-mi', '0040-msweb-mi', '0039-msnbc-mrai', '0050-tretail-it', '0020-plants-mi', '0049-ad-mrai', '0025-baudio-mrai', '0058-baudio-it', '0021-plants-mrai', '0066-msweb-it', '0047-accidents-mrai', '0008-tretail-mi', '0012-bnetflix-mi', '0009-tretail-mrai', '0030-book-mi', '0052-bnetflix-it', '0016-bbc-mi', '0046-accident

'/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/cli-config/md8-base-predict.csv'

## Random

In [7]:
default_mercs_predict_combo(datasets=starai_ds, model_keyword="md8-random")

['0129-msweb-it', '0092-voting-mrai', '0087-baudio-mi', '0127-jester-it', '0091-voting-mi', '0098-cwebkb-mrai', '0101-msnbc-mi', '0107-c20ng-mi', '0133-ad-it', '0124-book-it', '0142-baudio-rw', '0134-tretail-rw', '0082-nltcs-mrai', '0111-ad-mi', '0097-cwebkb-mi', '0079-bbc-mi', '0072-tretail-mrai', '0074-pumsb_star-mrai', '0090-tmovie-mrai', '0102-msnbc-mrai', '0112-ad-mrai', '0128-msnbc-it', '0084-plants-mrai', '0113-tretail-it', '0077-kdd-mi', '0152-c20ng-rw', '0103-msweb-mi', '0151-cr52-rw', '0105-cr52-mi', '0093-book-mi', '0141-kosarek-rw', '0110-accidents-mrai', '0104-msweb-mrai', '0121-baudio-it', '0109-accidents-mi', '0123-voting-it', '0080-bbc-mrai', '0120-kosarek-it', '0117-bbc-it', '0078-kdd-mrai', '0146-dna-rw', '0086-kosarek-mrai', '0132-accidents-it', '0115-bnetflix-it', '0076-bnetflix-mrai', '0118-nltcs-it', '0130-cr52-it', '0119-plants-it', '0125-dna-it', '0095-dna-mi', '0108-c20ng-mrai', '0116-kdd-it', '0096-dna-mrai', '0071-tretail-mi', '0088-baudio-mrai', '0073-pumsb_

'/cw/dtaijupiter/NoCsBack/dtai/elia/aaai20/cli/cli-config/md8-random-predict.csv'

# MD16

Mercs Prediction with models of max depth 16

## Base

## Random