# Generate Modulo Configuration

Modulo inference configuration.

## Preliminaries

### Imports

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import json
import sys
import dill as pkl
import warnings

from datetime import datetime
from os.path import dirname

In [2]:
# Custom Imports
root_dir = dirname(dirname(os.getcwd()))
src_dir = os.path.join(root_dir, 'src')
libs_dir = os.path.join(root_dir, 'libs')
cli_dir = os.path.join(root_dir, 'cli')
cli_cfg_dir = os.path.join(cli_dir, 'config')
sys.path.append(src_dir)
sys.path.append(libs_dir)

from exp.utils.filesystem import ensure_dir

### Methods

In [None]:
def save_parameters(d, fname, folder=None, fresh=False):
    """
    Function that saves the parameters as a json file.
    """
    
    if folder is not None:
        ensure_dir(folder, empty=fresh)
        
    ext = "json"
    fname = os.path.join(folder, fname) + '.' + ext
    
    with open(fname, 'w') as f:
        json.dump(d, f, indent=4, sort_keys=True)
    return fname

### Global Variables

For all info dataset-related, cf. https://github.com/UCLA-StarAI/Density-Estimation-Datasets. That is the source. 

For the rest this section defines some lists and variables that will recur.

In [None]:
# Time-related variables
hr_in_s = 60*60
datecode = "{:%y%m%d}".format(datetime.now())

In [None]:
# tiny datasets < 20 variables
# tiny_datasets = ['nltcs', 'msnbc']
tiny_datasets = ['nltcs']

# small datasets: < 101 variables
small_datasets = ['jester',
                  'kdd',
                  'plants',
                  'baudio',
                  'bnetflix']

# middle datasets: (100 < x < 501) variables
medium_datasets = ['accidents',
                   'tretail',
                   'pumsb_star',
                   'dna',
                   'kosarek',
                   'msweb',
                   'book',
                   'tmovie']

# large datasets: 500 < x variables
large_datasets = ['cwebkb',
                  'cr52',
                  'c20ng',
                  'bbc',
                  'voting',
                  'ad']

tiny_small_datasets = tiny_datasets + small_datasets
medium_large_datasets = medium_datasets + large_datasets
all_datasets = tiny_datasets + small_datasets + medium_datasets + large_datasets

## Local Demo

### Fixed Parameters

Defining some fixed parameters.

In [None]:
cfg_dir = datecode + '-modulo-test'
cfg_dir = os.path.join(cli_cfg_dir, cfg_dir)
child = 'RunModulo'

In [None]:
to_hrs =  int(1 * hr_in_s) 
folds = [0]                  

In [None]:
gen_parameters = {"child":                            child,
                  child + ".folds":                   folds,
                  child + ".timeout":                 to_hrs}

qry_parameters = {child + ".qry.keyword":             "it-S",
                  child + ".nb_queries":               100}


mod_parameters = {"keyword":             "default",
                  "type":                "Modulo"}

mod_parameters = {child + ".mod." + k:v for k,v in mod_parameters.items()}


predict_parameters = {}

In [None]:
fixed_parameters = {**gen_parameters, **qry_parameters, **mod_parameters, **predict_parameters }

### Exploration - SL

Here we define the grid that has to be searched.

In [None]:
idx = 2

In [None]:
model = "Modulo"

In [None]:
exp_parameters = {"idx":                             idx,
                  "explore.dataset":                 tiny_datasets,
                  "explore.Modulo.prediction_algorithm":      ["mi", "mrai"]}

In [None]:
fname = str(exp_parameters['idx']) + '_' +  "modulo"
save_parameters({**fixed_parameters, **exp_parameters}, fname, folder=cfg_dir, fresh=False)

### Exploration - ML

Here we define the grid that has to be searched.

In [None]:
idx = 13

In [None]:
exp_parameters = {"idx":                             idx,
                  "explore.dataset":                 all_datasets,
                  "explore.Mercs.predict.algo":      ["IT"],
                  "explore.Mercs.predict.its":       [2, 10],
                  "explore.Mercs.predict.param":     [0.1]}

In [None]:
idx = 14

In [None]:
exp_parameters = {"idx":                             idx,
                  "explore.dataset":                 all_datasets,
                  "explore.Mercs.predict.algo":      ["RW"],
                  "explore.Mercs.predict.its":       [16, 32],
                  "explore.Mercs.predict.param":     [1, 2, 4]}