In [1]:
import gym
import rl
import numpy as np
import os
import sys
import random
from __future__ import division
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
from keras.layers import Conv1D, Dense, Dropout, BatchNormalization, MaxPooling1D
from keras.layers import Embedding, Flatten, Input
from keras.models import Model, load_model
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import utilities as util
import agent as local_agent
reload(util)
reload(local_agent)

<module 'agent' from 'agent.pyc'>

In [3]:
# set up logging
log_id = util.get_log_id("../logs")
os.makedirs("../logs/log_{}".format(log_id))

# load in some real and generated documents based on log_ids
gen_doc_log_ids = [0, 38, 40, 42, 48, 68]
real_recs_train = util.get_real_docs(data_dir="../data/real/")
real_recs_val = util.get_real_docs(data_dir="../data/real/", train=False)

gen_recs_train = util.get_generated_docs(data_dir="../data/generated/", log_ids=gen_doc_log_ids)
gen_recs_val = util.get_generated_docs(data_dir="../data/generated/", log_ids=gen_doc_log_ids, train=False)
gen_doc_log_ids.append(log_id)

In [4]:
import evaluators as ev
import text_env as te
import processor as proc
reload(te)
reload(proc)
reload(ev)
tp = proc.TextProcessor(te.DEFAULT_CHAR_DICT, 50)
env = te.Environment([], te.DEFAULT_CHAR_DICT, tp.max_len, log_id)

# set the env's real vs. generated documents, both train and val/dev set
env.real_docs_train = real_recs_train
env.real_docs_val = real_recs_val
env.gen_docs_train = gen_recs_train
env.gen_docs_val = gen_recs_val

In [5]:
# specify whether to load evaluators or train new ones

#discrim_path = None
discrim_log_id = 74
discrim_path = "../models/discriminators/discrim_{dtype}_{logid}"
kl_path = "../models/discriminators/{dtype}_{logid}"

#whole_disc = util.Discriminator("global_{}".format(log_id), 600, True, text_proc)
local_disc = ev.Discriminator("local_{}".format(log_id), 20, False, env.char_dict)
word_disc = ev.Discriminator("word_{}".format(log_id), 8, False, env.char_dict)

In [6]:

klcalc = ev.KLCalculator("topic_model_{}".format(log_id), True,
                         lda_args={'learning_method': 'batch',
                                   'n_components': 30, 'n_jobs': 1}
                          )

if discrim_path is None:
#    whole_disc.get_model(text_processor=text_proc)
    local_disc.get_model(text_processor=text_proc)
    word_disc.get_model(text_processor=text_proc)

    local_disc.fit(text_proc, real_recs_train+gen_recs_train, real_recs_val+gen_recs_val,
                   np.append(np.ones(len(real_recs_train)),np.zeros(len(gen_recs_train))),
                   np.append(np.ones(len(real_recs_val)),np.zeros(len(gen_recs_val))), num_per=5,
                   epochs=2)
#    whole_disc.fit(text_proc, real_recs_train+gen_recs_train, real_recs_val+gen_recs_val,
#                   np.append(np.ones(len(real_recs_train)),np.zeros(len(gen_recs_train))),
#                   np.append(np.ones(len(real_recs_val)),np.zeros(len(gen_recs_val))),
#                   whole_doc=True,
#                   epochs=1)
    word_disc.fit(text_proc, real_recs_train+gen_recs_train, real_recs_val+gen_recs_val,
                  np.append(np.ones(len(real_recs_train)),np.zeros(len(gen_recs_train))),
                  np.append(np.ones(len(real_recs_val)),np.zeros(len(gen_recs_val))), num_per=20,
                  epochs=3)
    klcalc.fit(env)
else:
    #whole_disc.get_model(path=discrim_path.format(dtype='global', logid=discrim_log_id))
    local_disc.get_model(path=discrim_path.format(dtype='local', logid=discrim_log_id))
    cd = local_disc.char_dict
    word_disc.get_model(path=discrim_path.format(dtype='word', logid=discrim_log_id))

    klcalc.get_model(model_path=kl_path.format(dtype='topic_model', logid=discrim_log_id))


env.evaluators.append(local_disc)
env.evaluators.append(word_disc)
env.evaluators.append(klcalc)

loading from ../models/discriminators/discrim_local_74
loading from ../models/discriminators/discrim_word_74


In [12]:
WINDOW_LENGTH = 1

filter_size = 64
embedding_size = 16
num_blocks = 2
char_inp = Input(shape=(env.max_len,))
emb = Embedding(len(env.char_dict), embedding_size)(char_inp)
layer_in = emb
for n in range(0, num_blocks):
    m = 'conv_{}{}'
    conv1 = Conv1D(filter_size, 5, padding="same", dilation_rate=1,
                   activation='tanh',
                   name=m.format(n, 'a'))(layer_in)
    conv2 = Conv1D(filter_size, 5, padding="same", dilation_rate=1,
                   activation='tanh',
                   name=m.format(n, 'b'))(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Dropout(0.25)(conv2)
    conv3 = Conv1D(filter_size, 5, padding="same", dilation_rate=2,
                   activation='tanh',
                   name=m.format(n, 'c'))(conv2)
    conv3 = BatchNormalization()(conv3)
    conv3 = Dropout(0.25)(conv3)
    conv4 = Conv1D(filter_size, 5, padding="same", dilation_rate=4,
                   activation='tanh',
                   name=m.format(n, 'd'))(conv3)
    pool = MaxPooling1D(pool_size=5)(conv4)
    layer_in = pool
flat = Dropout(0.25)(Flatten()(conv4))
d = Dense(env.num_actions)(flat)
model = Model(char_inp, d)


memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)
dqn = DQNAgent(model=model, nb_actions=env.num_actions, policy=policy, memory=memory,
               processor=tp, nb_steps_warmup=500, gamma=.99, target_model_update=100,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

In [13]:
weights_filename = '../models/dqns/dqn_{}/weights.h5f'.format(log_id)
checkpoint_weights_filename = '../models/log_' + str(log_id) + '/dqn_weights_{step}.h5f'
log_filename = '../logs/log_{}/dqn_log.json'.format(log_id)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
dqn.fit(env, callbacks=callbacks, nb_steps=5000, log_interval=1000)

# After training is done, we save the final weights one more time.
dqn.save_weights(weights_filename, overwrite=True)

# Finally, evaluate our algorithm for 10 episodes.
dqn.test(env, nb_episodes=10, visualize=False)

Training for 5000 steps ...
Interval 1 (0 steps performed)

AssertionError: 

In [11]:
dqn.model.predict_on_batch(np.zeros((32, 50))).shape

(32, 51)

In [None]:
observation = dqn.recent_observation
state = dqn.memory.get_recent_state(observation)
print(observation)