In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from helpers.replay_buffer import ReplayBuffer
from helpers.shedules import LinearSchedule
from helpers.create_empty_directory import create_empty_directory
from helpers.plots import plot_q_func_and_visitations

In [3]:
from tabular_environments.flipping_chain_environment import FlippingChain

In [4]:
from dqn import train

In [5]:
seed_range =[10, 42, 51, 38, 50]  #np.array([10, 42, 51, 38, 50])
dim_range = [5]
eps_params = {'exploration_fraction': 0.25,
              'exploration_final_eps': 0.001}

In [6]:
common_params = dict( gamma=0.99, write_logs=None, do_pretraining=True,
                     print_freq=None, plot_freq=None,
                     target_type='double_q_learning')

experiments = []

experiments.append({'name': 'eps_greedy',
                    'params': dict(eps_params=eps_params, act_type='epsilon_greedy', reward_shaping_type=None),
                    'iterate_seeds': True})
# ----------------------------------------------------------------------------- #
experiments.append({'name': 'ucb-1',
                    'params': dict(eps_params=None, act_type='ucb-1', reward_shaping_type=None),
                    'iterate_seeds': True})


experiments.append({'name': 'ucb-2',
                    'params': dict(eps_params=None, act_type='ucb-2', reward_shaping_type=None),
                    'iterate_seeds': True})
# ----------------------------------------------------------------------------- #
experiments.append({'name': 'count_based_state_action',
                    'params': dict(eps_params=None, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_state_action'),
                    'iterate_seeds': True})

experiments.append({'name': 'count_based_next_state_action',
                    'params': dict(eps_params=None, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_next_state_action'),
                    'iterate_seeds': True})

experiments.append({'name': 'count_based_state',
                    'params': dict(eps_params=None, act_type='epsilon_greedy',
                                       reward_shaping_type='count_based_state'),
                    'iterate_seeds': True})

experiments.append({'name': 'count_based_next_state',
                    'params': dict(eps_params=None, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_next_state'),
                    'iterate_seeds': True})
# ----------------------------------------------------------------------------- #
experiments.append({'name': 'eps_greedy_count_based_state_action',
                    'params': dict(eps_params=eps_params, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_state_action'),
                    'iterate_seeds': True})

experiments.append({'name': 'eps_greedy_count_based_next_state_action',
                    'params': dict(eps_params=eps_params, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_next_state_action'),
                    'iterate_seeds': True})

experiments.append({'name': 'eps_greedy_count_based_state',
                    'params': dict(eps_params=eps_params, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_state'),
                    'iterate_seeds': True})

experiments.append({'name': 'eps_greedy_count_based_next_state',
                    'params': dict(eps_params=eps_params, act_type='epsilon_greedy',
                                   reward_shaping_type='count_based_next_state'),
                    'iterate_seeds': True})


In [7]:
eps_params = {'exploration_fraction': 0.5,
              'exploration_final_eps': 0.05}

tau_params = {'fraction': 0.95,
              'final_tau': 0.05}

alpha_params = {'fraction': 0.95,
                'initial_alpha': 10,
                'final_alpha': 1}

In [11]:
2000*14

28000

In [12]:
%%time

folder = 'results/dqn/flipping_chain_1/'
create_empty_directory(folder)


for experiment in experiments:
    name = experiment['name']   
    print(name)
    results = np.zeros((len(seed_range), len(dim_range)))
    
    for i, seed in enumerate(seed_range):
        for j, dim in enumerate(dim_range):
            env = FlippingChain(dim)
            rews, num_episodes = train(env,
                                   seed=seed,
                                   learning_starts_in_steps=(dim+9)*3,
                                   max_steps=1000*30,
                                   train_freq_in_steps=10,
                                   update_freq_in_steps=60,
                                   **common_params, **experiment['params'])
            
            results[i][j] = rews
            
    np.save(folder+name, results)

eps_greedy
ucb-1
ucb-2
count_based_state_action
count_based_next_state_action
count_based_state
count_based_next_state


OSError: [Errno 24] Too many open files: 'logs/tensorboard_logs/dqn/q_values/state_5/action_right/events.out.tfevents.1522738488.liza'

In [31]:
x = np.arange(0, 1, 0.01)
y1 = 1-0.9**x
y2 = 1-0.99**x

In [34]:
y1

array([0.        , 0.00105305, 0.00210499, 0.00315583, 0.00420555,
       0.00525417, 0.00630169, 0.00734811, 0.00839342, 0.00943763,
       0.01048074, 0.01152276, 0.01256367, 0.01360349, 0.01464222,
       0.01567985, 0.01671639, 0.01775183, 0.01878619, 0.01981946,
       0.02085164, 0.02188273, 0.02291274, 0.02394166, 0.0249695 ,
       0.02599625, 0.02702193, 0.02804652, 0.02907004, 0.03009248,
       0.03111384, 0.03213412, 0.03315334, 0.03417147, 0.03518854,
       0.03620454, 0.03721946, 0.03823332, 0.03924611, 0.04025783,
       0.04126848, 0.04227808, 0.04328661, 0.04429407, 0.04530048,
       0.04630583, 0.04731011, 0.04831335, 0.04931552, 0.05031664,
       0.0513167 , 0.05231571, 0.05331367, 0.05431058, 0.05530644,
       0.05630125, 0.05729501, 0.05828773, 0.0592794 , 0.06027002,
       0.06125961, 0.06224815, 0.06323565, 0.06422211, 0.06520753,
       0.06619191, 0.06717526, 0.06815757, 0.06913885, 0.07011909,
       0.0710983 , 0.07207648, 0.07305363, 0.07402975, 0.07500

In [33]:
plt.plot(y1)

[<matplotlib.lines.Line2D at 0x7f6ee8076f98>]

TypeError: First argument must be a path or file object reading bytes

<matplotlib.figure.Figure at 0x7f6ee81723c8>

In [25]:
1 - 0.9**10000

1.0

In [26]:
1 - 0.99**10000

1.0

In [None]:
0.01

In [13]:
import os
def print_results(experiments, folder, to_print=True):
    all_stats = []
    for i, experiment in enumerate(experiments):
        name = experiment['name']
        
        arr = np.load(folder+name+'.npy')
        stats = np.zeros((3, arr.shape[1]))
        stats[0] = arr.min(axis=0)
        stats[1] = arr.mean(axis=0)
        stats[2] = arr.max(axis=0)
        all_stats.append(stats)
        if to_print:
            print(i, name)
            print(stats[:,:5])
            print('\n')
    return all_stats

In [15]:
all_stats = print_results(experiments, 'results/dqn/flipping_chain_1/')

OSError: [Errno 24] Too many open files

In [19]:
1 - 0.99**1

0.010000000000000009