# Generate configs for evaluation.py

In [15]:
import os, yaml, itertools, hashlib

In [18]:
def md5(txt):
    return hashlib.md5(txt).hexdigest()

def make_configs(out_dir, **params):
    param_names = params.keys()
    result = []
    for values in itertools.product(*params.values()):
        conf = dict(itertools.izip(param_names, values))
        fname = md5('_'.join('%s-%s' % (k, str(v)) for k, v in conf.viewitems()))

        if 'ctor' in conf:
            ctor = conf['ctor']
            del conf['ctor']
            conf = dict(ctor = ctor, kwargs = conf)

        with open(os.path.join(out_dir, fname + '.yaml'), 'w') as f:
            yaml.dump(conf, f)
        result.append(fname)

## Name refresher

In [3]:
import dlpf

Using Theano backend.
Using gpu device 0: GeForce GTX 560 Ti (CNMeM is disabled, cuDNN not available)


In [4]:
dlpf.gym_environ.get_available_path_policies(), dlpf.gym_environ.DEFAULT_PATH_POLICY

(['go_straight', 'random_start_and_finish_straight', 'follow_gold'],
 'follow_gold')

In [5]:
dlpf.gym_environ.get_available_task_policies(), dlpf.gym_environ.DEFAULT_TASK_POLICY

(['random', 'sequential'], 'random')

In [6]:
dlpf.agents.policies.get_available_action_policies(), dlpf.agents.policies.DEFAULT_ACTION_POLICY

(['annealed_epsilon_greedy', 'epsilon_greedy'], 'epsilon_greedy')

In [7]:
dlpf.keras_utils.get_available_optimizers()

['rmsprop', 'adagrad', 'nadam', 'adadelta']

## Environment

In [8]:
VISION_RANGE = 10

In [13]:
make_configs('configs/env/',
             ctor = ['PathFindingByPixelWithDistanceMapEnv-v1'],
             maps_dir = ['data/current/imported/maps'],
             map_shape = [(501, 501)],
             path_policy = ['go_straight'],
             task_policy = ['sequential'],
             obstacle_punishment = [dlpf.gym_environ.DEFAULT_OBSTACLE_PUNISHMENT],
             local_goal_reward = [dlpf.gym_environ.DEFAULT_GOAL_REWARD],
             done_reward = [dlpf.gym_environ.DEFAULT_DONE_REWARD],
             stop_game_after_invalid_action = [False],
             vision_range = [10],
             target_on_border_reward = [5])

## Agent

In [14]:
make_configs('configs/agent/',
             ctor = ['dlpf.agents.our_dqn.OneLayerAgent'],
             action_policy = [dict(ctor = 'epsilon_greedy')],
             max_memory_size = [250],
             loss = ['mean_squared_error'],
             optimizer = ['rmsprop'],
             epoch_number = [100],
             passes_over_train_data = [2],
             validation_part = [0.1],
             batch_size = [32])

In [21]:
make_configs('configs/agent/two_layer',
             ctor = ['dlpf.agents.our_dqn.TwoLayerAgent'],
             action_policy = [dict(ctor = 'epsilon_greedy', eps = 0.05),
                              dict(ctor = 'epsilon_greedy', eps = 0.1),
                              dict(ctor = 'annealed_epsilon_greedy', eps = 0.05, decrease_coef = 0.99),
                              dict(ctor = 'annealed_epsilon_greedy', eps = 0.3, decrease_coef = 0.99),
                              dict(ctor = 'annealed_epsilon_greedy', eps = 0.6, decrease_coef = 0.99)],
             max_memory_size = [250],
             loss = ['mean_squared_error'],
             optimizer = [dict(ctor = 'rmsprop', lr = 0.01),
                          dict(ctor = 'rmsprop', lr = 0.1),
                          dict(ctor = 'adam', lr = 0.01),
                          dict(ctor = 'adam', lr = 0.1)],
             epoch_number = [20],
             passes_over_train_data = [10],
             validation_part = [0.1],
             batch_size = [64],
             hidden_size = [8, 16, 32, 64, 128],
             hidden_activation = ['relu', 'tanh'])

## Apply_agent kwargs

In [15]:
make_configs('configs/apply/',
             episodes_number = [3000],
             max_steps = [100],
             train_each_episodes = [10])