# Generate CLI configs

Generate config files suitable for the CLI for the IJCAI 2019 submission.

## Preliminaries

### Imports

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import json
import sys
import pickle as pkl
import warnings

from os.path import dirname

In [2]:
# Custom Imports
root_dir = dirname(dirname(os.getcwd()))
src_dir = os.path.join(root_dir, 'src')
sys.path.append(src_dir)

from exp.utils.filesystem import ensure_dir

### Methods

In [3]:
def save_parameters(d, fname, folder=None, fresh=False):
    """
    Function that saves the parameters as a json file.
    """
    
    if folder is not None:
        ensure_dir(folder, empty=fresh)
        
    ext = "json"
    fname = os.path.join(folder, fname) + '.' + ext
    
    with open(fname, 'w') as f:
        json.dump(d, f, indent=4, sort_keys=True)
    return fname

### Global Variables

For all info dataset-related, cf. https://github.com/UCLA-StarAI/Density-Estimation-Datasets. That is the source. 

For the rest this section defines some lists and variables that will recur.

In [4]:
# tiny datasets < 20 variables
tiny_datasets = ['nltcs',
                 'msnbc']

In [5]:
# small datasets: < 101 variables
small_datasets = ['jester',
                  'kdd',
                  'plants',
                  'baudio',
                  'bnetflix']

tiny_small_datasets = tiny_datasets + small_datasets

In [6]:
# middle datasets: (100 < x < 501) variables
medium_datasets = ['accidents',
                   'tretail',
                   'pumsb_star',
                   'dna',
                   'kosarek',
                   'msweb',
                   'book',
                   'tmovie']

In [7]:
# large datasets: 500 < x variables
large_datasets = ['cwebkb',
                  'cr52',
                  'c20ng',
                  'bbc',
                  'voting',
                  'ad']

In [8]:
all_datasets = tiny_small_datasets + medium_datasets + large_datasets

In [9]:
hr_in_s = 60*60

## Local Demo - MERCS

### Fixed Parameters

Defining some fixed parameters.

In [10]:
cfg_folder = 'ijcai-local'

In [11]:
to_hrs =  1 * hr_in_s # locally, I invest a max of 1 hr.
folds = [0]           # One fold.

In [12]:
meta_parameters = {'RunMercs.folds':                   folds,
                   'RunMercs.timeout':                 to_hrs,
                   'RunMercs.qry.keyword':            'it-S'}

fixed_exploration = {'explore.dataset':                tiny_small_datasets}

ind_sel_parameters = {'RunMercs.Mercs.ind.type':       'DT',
                      'RunMercs.Mercs.ind.max_depth':  4,
                      'RunMercs.Mercs.sel.its':        4,
                      'RunMercs.Mercs.sel.param':      2}

In [13]:
fixed_parameters = {**ind_sel_parameters,
                    **meta_parameters,
                    **fixed_exploration}

### Exploration

Here we define the grid that has to be searched.

In [14]:
idx = 70

In [15]:
exp_parameters = {'idx':                                    idx,
                  'explore.Mercs.pred.type':                ['MI', 'MA', 'MAFI'],
                  'explore.Mercs.pred.param':               [0.95],
                  'explore.Mercs.pred.its':                 [0.1]}

In [16]:
fname = str(exp_parameters['idx']) +'_' +  'local-mercs'
save_parameters({**fixed_parameters, **exp_parameters}, fname, folder=cfg_folder)

'ijcai-local/70_local-mercs.json'

## Remote Lite

Making the settings for a selection of `lightweight` remote experiments. This means:
    
    1. One fold
    2. Small datasets only.

### Fixed Params

In [17]:
cfg_folder = 'ijcai-mercs-A'
to_hrs =  1 * hr_in_s # I invest a max of 1 hr.
folds = [0]           # One fold.
start_idx = 1

In [18]:
meta_parameters = {'RunMercs.folds':                   folds,
                   'RunMercs.timeout':                 to_hrs,
                   'RunMercs.qry.keyword':            'it-S'}

fixed_exploration = {'explore.dataset':                tiny_small_datasets}

ind_sel_parameters = {'RunMercs.Mercs.ind.type':       'DT',
                      'RunMercs.Mercs.ind.max_depth':  8,
                      'RunMercs.Mercs.sel.its':        4,
                      'RunMercs.Mercs.sel.param':      2}

In [19]:
fixed_parameters = {**ind_sel_parameters,
                    **meta_parameters,
                    **fixed_exploration}

### Exploration: mercs-sl

Single-layer MERCS exploration.

In [20]:
exp_parameters = {'idx':                                    start_idx + 0,
                  'explore.Mercs.pred.type':                ['MI', 'MA', 'MAFI'],
                  'explore.Mercs.pred.param':               [0.95],
                  'explore.Mercs.pred.its':                 [0.1]}

In [21]:
fname = str(exp_parameters['idx']) +'_' +  'mercs-sl'
save_parameters({**fixed_parameters, **exp_parameters}, fname, folder=cfg_folder)

'ijcai-mercs-A/1_mercs-sl.json'

### Exploration: mercs-ml

Multi-layer MERCS exploration.

In [22]:
exp_parameters = {'idx':                                    start_idx + 1,
                  'explore.Mercs.pred.type':                ['IT'],
                  'explore.Mercs.pred.param':               [0.1],
                  'explore.Mercs.pred.its':                 [8]}

In [23]:
fname = str(exp_parameters['idx']) +'_' +  'mercs-it'
save_parameters({**fixed_parameters, **exp_parameters}, fname, folder=cfg_folder)

'ijcai-mercs-A/2_mercs-it.json'

In [24]:
exp_parameters = {'idx':                                    start_idx + 2,
                  'explore.Mercs.pred.type':                ['RW'],
                  'explore.Mercs.pred.param':               [2],
                  'explore.Mercs.pred.its':                 [16]}

In [25]:
fname = str(exp_parameters['idx']) +'_' +  'mercs-rw'
save_parameters({**fixed_parameters, **exp_parameters}, fname, folder=cfg_folder)

'ijcai-mercs-A/3_mercs-rw.json'