# Info

- Yaml files:
    - Pros:
        - Hierarchical view of configuration
    - Cons:
        - Not python = Need to use specific code for parameters that need special types

- Argparse:
    - Pros:
        - Python = can use other libraries as defaults and not have specific code to handle special parameters
    - Cons:
        - Not hierarchical?


Goals
- Handle experiments configurations

    - Save with unique experiment ID
        - exp ID = 16 (default) chars long alphanumeric string
        - prefix can be used to run simultaneously on different machines and not mix experiments
    - Check if one configuration has already been run

Questions:

- Should we separate keys that need to be swept?
    - e.g. dataset name: [dataset_name1, dataset_name2, ...]

    - Simplest solution: Give a warning if calling a cfg containing a "to-sweep" key

# Test

In [2]:
import sys
from copy import deepcopy

In [3]:
project_folder = "../"
sys.path.insert(0, project_folder)
print(sys.path) # view the path and verify

['../', '/Users/federicosiciliano/Desktop/Py_utils/exp_utils/ntb', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python38.zip', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/lib-dynload', '', '/Users/federicosiciliano/Library/Python/3.8/lib/python/site-packages', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages']


In [4]:
import exp_utils

## Configuration

### Load

In [5]:
cfg = exp_utils.cfg.load_configuration()

In [6]:
cfg

{'app1': {'foo_str': 'string_value',
  'foo_int': 123,
  'foo_float': 1.23,
  'foo_none': None,
  'foo_list1': [123, '456', 789.0],
  'foo_list2': [123, '456'],
  'foo_ref1': 'string_value',
  'foo_ref2': 'string_value',
  'foo_parse1': ['a', 'b', 'c'],
  'foo_parse2': array([1, 2, 3]),
  'foo_parse3':    0  1  2
  0  1  2  3
  1  4  5  6,
  'foo_parse4': 8,
  'app1_2': {'foo_str1_2': 'app2_foo'},
  'app_glob': 'glob',
  'app_glob2': 'glob2',
  'app_glob1_2': 'glob1_2'},
 '__exp__': {'name': 'prova',
  '__imports__': ['numpy as np',
   {'name': 'pandas', 'fromlist': ['DataFrame']},
   'multiprocessing'],
  '__nosave__': {'app1.foo_parse4': None,
   'app1.app1_2': None,
   'app2.foo_str': None,
   'app2.foo_int2': None,
   'app2.app3.foo_int1': None,
   'app1.app_glob1_2': None},
  'project_folder': '../',
  'key_len': 16,
  'key_prefix': ''},
 'app2': {'app3': {'foo_int1': 123}, 'foo_str': 'app2_foo', 'foo_int2': 123}}

In [7]:
type(cfg)

exp_utils.cfg.ConfigObject

In [8]:
type(cfg["app1"]), type(cfg["__exp__"])

(dict, dict)

In [9]:
cfg["__exp__"]

{'name': 'prova',
 '__imports__': ['numpy as np',
  {'name': 'pandas', 'fromlist': ['DataFrame']},
  'multiprocessing'],
 '__nosave__': {'app1.foo_parse4': None,
  'app1.app1_2': None,
  'app2.foo_str': None,
  'app2.foo_int2': None,
  'app2.app3.foo_int1': None,
  'app1.app_glob1_2': None},
 'project_folder': '../',
 'key_len': 16,
 'key_prefix': ''}

In [10]:
cfg["app1"]["foo_parse2"]

array([1, 2, 3])

In [11]:
cfg["app1.foo_parse2"]

array([1, 2, 3])

In [12]:
#cfg.app1.foo_parse2 #This doesn't work and gives AttributeError: 'dict' object has no attribute 'foo_parse2'
# unless each dict inside cfg is converted to ConfigObject

In [13]:
cfg["app3.foo_str"] = "abc"

In [14]:
cfg["app3.foo_str"],cfg["app3"]["foo_str"]

('abc', 'abc')

## Experiment

In [14]:
cfg, exp_cfg = exp_utils.exp.separate_exp_cfg(cfg) #drop __exp__ key cause it doesn't define an experiment's parameters

### Experiment ids

In [15]:
print(exp_utils.exp.generate_random_id())
print(exp_utils.exp.generate_random_id(key_prefix="B-"))
print(exp_utils.exp.generate_random_id(key_len=8))
print(exp_utils.exp.generate_random_id(key_len=4,key_prefix="FS"))

eNhVVnmZQ7jDMnAp
B-0BDI2sx6Z6BMgfdk
zJsQ2euT
FSajX3


In [16]:
import string
import numpy as np
poss_chars = len(string.ascii_letters + string.digits) #62
default_k = 16
default_poss_keys = poss_chars ** default_k
print("Default number of possible keys: 10","^",np.log10(float(default_poss_keys)))

Default number of possible keys: 10 ^ 28.678267031972062


### Hashing

In [17]:
#json.dumps(cfg, sort_keys=True).encode()
#this gives TypeError: Object of type ndarray is not JSON serializable
#or TypeError: Object of type DataFrame is not JSON serializable

#The only solution, is save each numpy array / pandas DF / else in the configuration as their own experiment, having a certain experiment ID
numpy_key = "app1.foo_parse2"
np_to_save_elsewhere = cfg[numpy_key]
np_exp_id = 123 #exp_utils.exp.generate_random_id() #get numpy object experiment ID somehow
cfg[numpy_key] = np_exp_id #now cfg doesn't have the numpy array anymore

pandas_key = "app1.foo_parse3"
pd_to_save_elsewhere = cfg[pandas_key]
pd_exp_id = 42 #exp_utils.exp.generate_random_id() #get pandas object experiment ID somehow
cfg[pandas_key] = pd_exp_id #now cfg doesn't have the pandas DF anymore

In [18]:
exp_utils.exp.hash_config(cfg)

'7d5beb1aeca81e09a51095e098c7172b'

In [19]:
cfg

{'app1': {'foo_str': 'string_value',
  'foo_int': 123,
  'foo_float': 1.23,
  'foo_none': None,
  'foo_list1': [123, '456', 789.0],
  'foo_list2': [123, '456'],
  'foo_ref1': 'string_value',
  'foo_ref2': 'string_value',
  'foo_parse1': ['a', 'b', 'c'],
  'foo_parse2': 123,
  'foo_parse3': 42,
  'foo_parse4': 8,
  'app1_2': {'foo_str1_2': 'app2_foo'},
  'app_glob': 'glob',
  'app_glob2': 'glob2',
  'app_glob1_2': 'glob1_2'},
 'app2': {'app3': {'foo_int1': 123}, 'foo_str': 'app2_foo', 'foo_int2': 123},
 'app3': {'foo_str': 'abc'}}

In [20]:
#cfg, exp_cfg = exp_utils.exp.separate_exp_cfg(cfg) #drop __exp__ key cause it doesn't define an experiment's parameters
exp_utils.exp.get_experiment_id(cfg, exp_cfg)

(True, 'b8rTUqbVFHHqBrJP')

### Save experiments

In [21]:
cfg = exp_utils.exp.combine_exp_cfg(cfg, exp_cfg)

In [22]:
cfg["__exp__"]

{'name': 'prova',
 '__imports__': ['numpy as np',
  {'name': 'pandas', 'fromlist': ['DataFrame']},
  'multiprocessing'],
 '__nosave__': {'app1.foo_parse4': None,
  'app1.app1_2': None,
  'app2.foo_str': None,
  'app2.foo_int2': None,
  'app2.app3.foo_int1': None,
  'app1.app_glob1_2': None},
 'project_folder': '../',
 'key_len': 16,
 'key_prefix': '',
 'hash': '691c996f5bad5a61d0b524a155f2908b'}

In [23]:
exp_utils.exp.save_experiment(cfg)
#save experiment can be called directly; it will set the experiment id directly

In [24]:
exp_utils.exp.get_experiment_id(cfg), exp_utils.exp.get_experiment_id(cfg)
#If the cfg exists, we get
# exp_found True
# always the same experiment_id

((True, 'b8rTUqbVFHHqBrJP'), (True, 'b8rTUqbVFHHqBrJP'))

In [25]:
cfg, exp_cfg = exp_utils.exp.separate_exp_cfg(cfg) #drop __exp__ key cause it doesn't define an experiment's parameters

In [26]:
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_utils.exp.get_experiment_id(cfg,exp_cfg) #Even if the cfg is divided, the result is the same

((True, 'b8rTUqbVFHHqBrJP'), (True, 'b8rTUqbVFHHqBrJP'))

In [27]:
cfg["app5"] = 5

In [28]:
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_utils.exp.get_experiment_id(cfg,exp_cfg) #If the cfg doesn't exist, we get a different experiment_id. Also, exp_found is False

((True, '0sMmmIRL2vbiGUyH'), (True, '0sMmmIRL2vbiGUyH'))

In [29]:
exp_utils.exp.get_set_experiment_id(cfg,exp_cfg)

(True, '0sMmmIRL2vbiGUyH')

In [30]:
exp_cfg["experiment_id"] #now the experiment_id is saved inside exp_cfg

'0sMmmIRL2vbiGUyH'

In [31]:
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"]
# get_experiment_id is meant to search for the experiment in the file,
# so, if it doesn't find the cfg in the saved configurations,
# it will not set the exp_id inside exp_cfg

((True, '0sMmmIRL2vbiGUyH'), '0sMmmIRL2vbiGUyH')

In [32]:
exp_utils.exp.get_set_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"]
# in the same way, get_set_experiment_id is meant to search for the experiment in the file,
# so, if it doesn't find the cfg in the saved configurations,
# it will not return the exp_id inside exp_cfg
# and will overwrite it

((True, '0sMmmIRL2vbiGUyH'), '0sMmmIRL2vbiGUyH')

In [33]:
prev_cfg = deepcopy(cfg)

In [34]:
exp_utils.exp.save_experiment(cfg,exp_cfg)

In [35]:
prev_cfg == cfg #check if cfg has been restored

True

In [36]:
#Now that the experiment is saved, we will always get the same experiment_id
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"], exp_utils.exp.get_set_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"]

((True, '0sMmmIRL2vbiGUyH'),
 '0sMmmIRL2vbiGUyH',
 (True, '0sMmmIRL2vbiGUyH'),
 '0sMmmIRL2vbiGUyH')

In [37]:
#If we modify cfg so that is new, we get the same results as before
cfg["new_key"] = "kmasdkasm"
cfg["new_key2"] = {"abc":123}
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"], exp_utils.exp.get_set_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"]

((True, 'DO2T84byfPBTAviv'),
 '0sMmmIRL2vbiGUyH',
 (True, 'DO2T84byfPBTAviv'),
 'DO2T84byfPBTAviv')

In [38]:
exp_utils.exp.save_experiment(cfg,exp_cfg)

In [39]:
exp_cfg["__nosave__"] #check nosave keys

{'app1.foo_parse4': None,
 'app1.app1_2': None,
 'app2.foo_str': None,
 'app2.foo_int2': None,
 'app2.app3.foo_int1': None,
 'app1.app_glob1_2': None}

In [40]:
cfg["app2.foo_str"] = "modified" #modify a nosave key

In [41]:
#If a nosave key is modified, the cfg will still match a saved one
exp_utils.exp.get_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"], exp_utils.exp.get_set_experiment_id(cfg,exp_cfg), exp_cfg["experiment_id"]

((True, 'DO2T84byfPBTAviv'),
 'DO2T84byfPBTAviv',
 (True, 'DO2T84byfPBTAviv'),
 'DO2T84byfPBTAviv')

### Load experiments

In [42]:
all_exps = exp_utils.exp.get_experiments(**exp_cfg)
all_exps

{'0sMmmIRL2vbiGUyH': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': [123, '456', 789.0],
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5},
 'HhN3ZNkPkoxHkDzz': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': 123,
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 'mTzSztICfKt0Voj3': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',


In [43]:
all_exps = exp_utils.exp.get_experiments(**exp_cfg, sub_cfg={"abc":123}) #if not setting check_type, nothing will be checked
all_exps

{'0sMmmIRL2vbiGUyH': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': [123, '456', 789.0],
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5},
 'HhN3ZNkPkoxHkDzz': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': 123,
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 'mTzSztICfKt0Voj3': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',


In [44]:
#with contain, sub_cfg must be a "subset" of the dict
to_check = {'new_key': 'kmasdkasm','new_key2': {'abc': 123}}
all_exps = exp_utils.exp.get_experiments(**exp_cfg, sub_cfg = to_check, check_type="contain") #if not setting check_type, nothing will be checked
all_exps

{'HhN3ZNkPkoxHkDzz': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': 123,
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 'mTzSztICfKt0Voj3': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': '456',
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 '8WNMLIBK5jkrJaIY': {'app1': {'app_

In [45]:
#with match, sub_cfg must use relative keys
to_check = {'new_key2.abc': 123}
all_exps = exp_utils.exp.get_experiments(**exp_cfg, sub_cfg = to_check, check_type="match")
all_exps

{'HhN3ZNkPkoxHkDzz': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': 123,
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 'mTzSztICfKt0Voj3': {'app1': {'app_glob': 'glob',
   'app_glob2': 'glob2',
   'foo_float': 1.23,
   'foo_int': 123,
   'foo_list1': '456',
   'foo_list2': [123, '456'],
   'foo_none': None,
   'foo_parse1': ['a', 'b', 'c'],
   'foo_parse2': 123,
   'foo_parse3': 42,
   'foo_ref1': 'string_value',
   'foo_ref2': 'string_value',
   'foo_str': 'string_value'},
  'app2': {'app3': {}},
  'app3': {'foo_str': 'abc'},
  'app5': 5,
  'new_key': 'kmasdkasm',
  'new_key2': {'abc': 123}},
 '8WNMLIBK5jkrJaIY': {'app1': {'app_

### Sweep parameter

In [48]:
#If one parameter is an iterable of parameters to sweep, then use sweep. It will:
#1) cycle on every value in the
#2) set the value to the key
#3) return the value
#4) At the end of the cycle, restore the iterable as value to the key
print(cfg['app1.foo_list1'])
for param_value in cfg.sweep('app1.foo_list1'):
    print(param_value,"___",cfg['app1.foo_list1'])
    exp_utils.exp.get_set_experiment_id(cfg,exp_cfg),exp_utils.exp.save_experiment(cfg,exp_cfg)
print(cfg['app1.foo_list1'])

[123, '456', 789.0]
123 ___ 123
456 ___ 456
789.0 ___ 789.0
[123, '456', 789.0]


## Sweep additions

In [22]:
cfg = exp_utils.cfg.load_configuration("config2")

In [23]:
cfg

{'app1': {'+/app1_2': ['app1_2_file']},
 '+app2': ['app2_file', 'app2bis_file'],
 '__exp__': {'name': 'prova',
  'project_folder': '../',
  'key_len': 16,
  'key_prefix': '',
  '__nosave__': {}}}

In [24]:
for value1 in cfg.sweep_additions("app2"):
    print(asdsadas)
    for value2 in cfg.sweep_additions("app1./app1_2"):
        print(value2)
        print(cfg)
        print()

NameError: name 'asdsadas' is not defined

In [26]:
cfg["app2"]

{'app3': {'foo_int1': 123},
 'foo_str': 'app2_foo',
 '__nosave__': ['foo_str', 'foo_int2', 'app3.foo_int1'],
 'foo_int2': 123}

In [25]:
cfg

{'app1': {'+/app1_2': ['app1_2_file']},
 '__exp__': {'name': 'prova',
  'project_folder': '../',
  'key_len': 16,
  'key_prefix': '',
  '__nosave__': {}},
 '__global__': {'app1': {'app_glob': 'glob', 'app_glob2': 'glob2'}},
 'app2': {'app3': {'foo_int1': 123},
  'foo_str': 'app2_foo',
  '__nosave__': ['foo_str', 'foo_int2', 'app3.foo_int1'],
  'foo_int2': 123}}

# Yaml loading times

In [None]:
import yaml, os, time

In [None]:
for i in range(10):
    with open(os.path.join("../out/exp",str(i)+".yaml"), 'w') as f:
        yaml.safe_dump(cfg["__exp__"],f)

In [None]:
start_time = time.time()

for cont in range(10001):
    with open(os.path.join("../out/exp",str(cont%10)+".yaml"), 'r') as f:
       cfg2 = yaml.safe_load(f)

       app = cfg==cfg2

    if cont in [1,10,100,1000,10000]:
        end_time = time.time()
        print(cont,"Time/read:",(end_time-start_time)/cont)
        print(cont,"Tot time:",(end_time-start_time))
        print()

1 Time/read: 0.0011250972747802734
1 Tot time: 0.0011250972747802734

10 Time/read: 0.0002203702926635742
10 Tot time: 0.002203702926635742

100 Time/read: 6.832838058471679e-05
100 Tot time: 0.00683283805847168

1000 Time/read: 5.3598880767822266e-05
1000 Tot time: 0.053598880767822266

10000 Time/read: 5.807058811187744e-05
10000 Tot time: 0.5807058811187744



## Argparse

In [None]:
import argparse, numpy as np

In [None]:
parser = argparse.ArgumentParser(
                    prog = 'ProgramName',
                    description = 'What the program does',
                    epilog = 'Text at the bottom of help')

In [None]:
parser.add_argument(
    "--foo.foo", #name or flags - Either a name or a list of option strings, e.g. foo or -f, --foo.
    #optional arguments will be identified by the - prefix; remaining arguments will be assumed to be positional
    # action - The basic type of action to be taken when this argument is encountered at the command line.
# nargs - The number of command-line arguments that should be consumed.
# const - A constant value required by some action and nargs selections.
    default = [1,2,3], #np.ones((2,3))# default - The value produced if the argument is absent from the command line and if it is absent from the namespace object.
# type - The type to which the command-line argument should be converted.
# choices - A sequence of the allowable values for the argument.
# required - Whether or not the command-line option may be omitted (optionals only).
# help - A brief description of what the argument does.
# metavar - A name for the argument in usage messages.
# dest - The name of the attribute to be added to the object returned by parse_args().
    )

_StoreAction(option_strings=['--foo.foo'], dest='foo.foo', nargs=None, const=None, default=[1, 2, 3], type=None, choices=None, help=None, metavar=None)

In [None]:
args = parser.parse_args(["--foo","1"])

In [None]:
eval("np.ones((1,2))")

array([[1., 1.]])

In [None]:
parse_argument()

NameError: name 'parse_argument' is not defined