In [None]:
import gym
import torch
import numpy as np
from torch import nn
import random
import torch.nn.functional as F
import collections
from torch.optim.lr_scheduler import StepLR
from banana_env import BananaEnv
from music_env import MusicEnv
import music_utils

"""
Implementation of Double DQN for gym environments with discrete action space.
"""

print(torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

"""
The Q-Network has as input a state s and outputs the state-action values q(s,a_1), ..., q(s,a_n) for all n actions.
"""
class QNetwork(nn.Module):
    def __init__(self, action_dim, state_dim, hidden_dim):
        super(QNetwork, self).__init__()

        self.fc_1 = nn.Linear(state_dim, hidden_dim)
        self.fc_2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc_3 = nn.Linear(hidden_dim, action_dim)

    def forward(self, inp):

        x1 = F.leaky_relu(self.fc_1(inp))
        x1 = F.leaky_relu(self.fc_2(x1))
        x1 = self.fc_3(x1)

        return x1


"""
If the observations are images we use CNNs.
"""
class QNetworkCNN(nn.Module):
    def __init__(self, action_dim):
        super(QNetworkCNN, self).__init__()

        self.conv_1 = nn.Conv2d(3, 32, kernel_size=8, stride=4)
        self.conv_2 = nn.Conv2d(32, 64, kernel_size=4, stride=3)
        self.conv_3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc_1 = nn.Linear(8960, 512)
        self.fc_2 = nn.Linear(512, action_dim)

    def forward(self, inp):
        inp = inp.view((1, 3, 210, 160))
        x1 = F.relu(self.conv_1(inp))
        x1 = F.relu(self.conv_2(x1))
        x1 = F.relu(self.conv_3(x1))
        x1 = torch.flatten(x1, 1)
        x1 = F.leaky_relu(self.fc_1(x1))
        x1 = self.fc_2(x1)

        return x1


"""
memory to save the state, action, reward sequence from the current episode. 
"""
class Memory:
    def __init__(self, len):
        self.rewards = collections.deque(maxlen=len)
        self.state = collections.deque(maxlen=len)
        self.action = collections.deque(maxlen=len)
        self.is_done = collections.deque(maxlen=len)

    def update(self, state, action, reward, done):
        # if the episode is finished we do not save to new state. Otherwise we have more states per episode than rewards
        # and actions whcih leads to a mismatch when we sample from memory.
        if not done:
            self.state.append(state)
        self.action.append(action)
        self.rewards.append(reward)
        self.is_done.append(done)

    def sample(self, batch_size):
        """
        sample "batch_size" many (state, action, reward, next state, is_done) datapoints.
        """
        n = len(self.is_done)
        #print(n,batch_size)
        batch_size = min(n-1,batch_size)
        idx = random.sample(range(0, n-1), batch_size)

        #print(self.state)
        return torch.Tensor(self.state)[idx].to(device), torch.LongTensor(self.action)[idx].to(device), \
               torch.Tensor(self.state)[1+np.array(idx)].to(device), torch.Tensor(self.rewards)[idx].to(device), \
               torch.Tensor(self.is_done)[idx].to(device)

    def reset(self):
        self.rewards.clear()
        self.state.clear()
        self.action.clear()
        self.is_done.clear()


def select_action(model, env, state, eps):
    #print(state)
    state = torch.Tensor(state).to(device)
    with torch.no_grad():
        values = model(state)

    # select a random action wih probability eps
    if random.random() <= eps:
        action = env.chose_random_action() #np.random.randint(0, env.action_space.n)
    else:
        action = np.argmax(values.cpu().numpy())

    return action


def train(batch_size, current, target, optim, memory, gamma):

    states, actions, next_states, rewards, is_done = memory.sample(batch_size)

    q_values = current(states)

    next_q_values = current(next_states)
    next_q_state_values = target(next_states)

    q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)
    next_q_value = next_q_state_values.gather(1, torch.max(next_q_values, 1)[1].unsqueeze(1)).squeeze(1)
    expected_q_value = rewards + gamma * next_q_value * (1 - is_done)

    loss = (q_value - expected_q_value.detach()).pow(2).mean()

    optim.zero_grad()
    loss.backward()
    optim.step()


def evaluate(Qmodel, env, repeats):
    """
    Runs a greedy policy with respect to the current Q-Network for "repeats" many episodes. Returns the average
    episode reward.
    """
    Qmodel.eval()
    perform = 0
    for _ in range(repeats):
        state = env.reset()[0]
        done = False
        while not done:
            #print(state)
            #vectorstate = state[0]
            #print(state)
            state = torch.Tensor(state).to(device)
            with torch.no_grad():
                values = Qmodel(state)
            action = np.argmax(values.cpu().numpy())
            #print(env.step(action))
            state, reward, done, _, _ = env.step(action)
            perform += reward
    Qmodel.train()
    return perform/repeats


def compose(Qmodel,env,fn="Qmodel.mid",N=200):
    import copy
    Qmodel.eval()
    iinds = [[]]
    done = False
    env0 = copy.deepcopy(env)
    for random_init in [False,True]:
        env = copy.deepcopy(env0)
        env.random_init = random_init
        env.is_composing = True
        env.nr_of_notes_to_compose = N
        state = env.reset()[0]
        iinds = [[]]
        done = False        
        while not done:
            state = torch.Tensor(state).to(device)
            with torch.no_grad():
                values = Qmodel(state)
            action = np.argmax(values.cpu().numpy())
            state,reward,done,truncated, info = env.step(action)
            print("compose ",action,reward,info)
            iinds[0].append(info["keyPressed"])          
        music_utils.writePitches(fn+"-"+str(random_init)+".mid",iinds,tempo=70,instrument=[0],add21=False,start_at= [0],durationsInQuarterNotes=False)
    
def update_parameters(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())


def main(gamma=0.99, lr=1e-3, min_episodes=20, eps=1, eps_decay=0.995, eps_min=0.01, update_step=10, batch_size=64, update_repeats=50,
         num_episodes=3000, seed=42, max_memory_size=50000, lr_gamma=0.9, lr_step=100, measure_step=100,
         measure_repeats=100, hidden_dim=64, env=gym.make('CartPole-v1'), cnn=False, horizon=np.inf, render=True, render_step=50):
    """
    :param gamma: reward discount factor
    :param lr: learning rate for the Q-Network
    :param min_episodes: we wait "min_episodes" many episodes in order to aggregate enough data before starting to train
    :param eps: probability to take a random action during training
    :param eps_decay: after every episode "eps" is multiplied by "eps_decay" to reduces exploration over time
    :param eps_min: minimal value of "eps"
    :param update_step: after "update_step" many episodes the Q-Network is trained "update_repeats" many times with a
    batch of size "batch_size" from the memory.
    :param batch_size: see above
    :param update_repeats: see above
    :param num_episodes: the number of episodes played in total
    :param seed: random seed for reproducibility
    :param max_memory_size: size of the replay memory
    :param lr_gamma: learning rate decay for the Q-Network
    :param lr_step: every "lr_step" episodes we decay the learning rate
    :param measure_step: every "measure_step" episode the performance is measured
    :param measure_repeats: the amount of episodes played in to asses performance
    :param hidden_dim: hidden dimensions for the Q_network
    :param env_name: name of the gym environment
    :param cnn: set to "True" when using environments with image observations like "Pong-v0"
    :param horizon: number of steps taken in the environment before terminating the episode (prevents very long episodes)
    :param render: if "True" renders the environment every "render_step" episodes
    :param render_step: see above
    :return: the trained Q-Network and the measured performances
    """
    #env = gym.make(env_name)
    torch.manual_seed(seed)
    env.action_space.seed(seed)

    if cnn:
        Q_1 = QNetworkCNN(action_dim=env.action_space.n).to(device)
        Q_2 = QNetworkCNN(action_dim=env.action_space.n).to(device)
    else:
        print(env.observation_space.shape)
        Q_1 = QNetwork(action_dim=env.action_space.n, state_dim=env.observation_space.shape[0],
                                        hidden_dim=hidden_dim).to(device)
        Q_2 = QNetwork(action_dim=env.action_space.n, state_dim=env.observation_space.shape[0],
                                        hidden_dim=hidden_dim).to(device)
    # transfer parameters from Q_1 to Q_2
    update_parameters(Q_1, Q_2)

    # we only train Q_1
    for param in Q_2.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Q_1.parameters(), lr=lr)
    scheduler = StepLR(optimizer, step_size=lr_step, gamma=lr_gamma)

    memory = Memory(max_memory_size)
    performance = []

    maxRewards = -np.inf
    
    for episode in range(num_episodes):
        # display the performance
        if episode % measure_step == 0:
            performance.append([episode, evaluate(Q_1, env, measure_repeats)])
            print("Episode: ", episode)
            print("rewards: ", performance[-1][1])
            print("lr: ", scheduler.get_last_lr()[0])
            print("eps: ", eps)
            if performance[-1][1] > maxRewards:
                maxRewards = performance[-1][1]
                import copy
                compose(Q_1,env,fn="Qmodel"+str(episode)+"-"+str(np.round(performance[-1][1],3))+".mid",N=200)

        state = env.reset()
        if type(state) == type((1,2)):
            state = state[0]
        memory.state.append(state)

        done = False
        i = 0
        while not done:
            i += 1
            #if type(state) == type((1,2)):
            #    state = state[0]
            action = select_action(Q_2, env, state, eps)
            state, reward, done, truncated,info = env.step(action)
            if i%100==0:
                print(reward,info)

            if i > horizon:
                done = True

            # render the environment if render == True
            if render and episode % render_step == 0:
                env.render()

            # save state, action, reward sequence
            memory.update(state, action, reward, done)

        if episode >= min_episodes and episode % update_step == 0:
            for _ in range(update_repeats):
                train(batch_size, Q_1, Q_2, optimizer, memory, gamma)

            # transfer new parameter from Q_1 to Q_2
            update_parameters(Q_1, Q_2)

        # update learning rate and eps
        #optimizer.step()
        scheduler.step()
        eps = max(eps*eps_decay, eps_min)

    return Q_1, performance


#if __name__ == '__main__':
env = MusicEnv(file="for_elise_by_beethoven.musicxml",maxNotes = 5,max_wrong_notes = 2,help_factor = 0.5)
#env = MusicEnv(file="reinforcement_learning.musicxml",maxNotes = 10)
horizon = np.inf
#horizon = 100
gamma = 0.99
#gamma = 0.05


Q_1,performance = main(gamma=gamma, lr=1e-3, min_episodes=20, eps=1, eps_decay=0.995, eps_min=0.01, update_step=10, batch_size=64, update_repeats=50,
         num_episodes=30000, seed=42, max_memory_size=50000, lr_gamma=0.9, lr_step=100, measure_step=10,
         measure_repeats=100, hidden_dim=640, env=env, cnn=False, horizon=horizon, render=True, render_step=50)
compose(Q_1,env,N=200)

[0.00390625, 0.0078125, 0.015625, 0.03125, 0.0625, 0.125, 0.25, 0.5, 1, 2, 4, 8, 0.005859375, 0.01171875, 0.0234375, 0.046875, 0.09375, 0.1875, 0.375, 0.75, 1.5, 3, 6, 12]


  return torch._C._cuda_getDeviceCount() > 0


False
dimObs =  238
#Act =  97
computing vectors..
print computing knn..
..done
(238, 1)
Episode:  0
rewards:  -2.756809750418039
lr:  0.001
eps:  1
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.3784048752090226 {'keyPressed': (68, 0.0625, 76, False)}

compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424004376894 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.396424



Episode:  10
rewards:  -2.756809750418039
lr:  0.001
eps:  0.9511101304657719
Episode:  20
rewards:  -2.756809750418039
lr:  0.001
eps:  0.9046104802746175


  return torch.Tensor(self.state)[idx].to(device), torch.LongTensor(self.action)[idx].to(device), \


Episode:  30
rewards:  -1.1965217359806384
lr:  0.001
eps:  0.8603841919146962
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4941406026043456 {'keyPressed': (52, 0.0625,

compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, 

Episode:  40
rewards:  0.16201483390407034
lr:  0.001
eps:  0.8183201210226743
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.5080194982219375 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.5080194982219375 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.5080194982219375 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  11 -1.4941406026043456 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.4941406026043456 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 6

compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, 

Episode:  50
rewards:  -1.026210337675091
lr:  0.001
eps:  0.778312557068642
Episode:  60
rewards:  -1.0924013265457824
lr:  0.001
eps:  0.7402609576967045
Episode:  70
rewards:  -1.1919915668685932
lr:  0.001
eps:  0.7040696960536299
Episode:  80
rewards:  -1.1919915668685932
lr:  0.001
eps:  0.6696478204705644
Episode:  90
rewards:  -1.0851032875132665
lr:  0.001
eps:  0.6369088258938781
Episode:  100
rewards:  -2.120497255779897
lr:  0.0009000000000000001
eps:  0.6057704364907278
Episode:  110
rewards:  -1.0702664297665465
lr:  0.0009000000000000001
eps:  0.5761543988830038
Episode:  120
rewards:  -1.054627308305805
lr:  0.0009000000000000001
eps:  0.547986285490042
Episode:  130
rewards:  -0.5645560905547948
lr:  0.0009000000000000001
eps:  0.5211953074858876
Episode:  140
rewards:  0.7557664321508978
lr:  0.0009000000000000001
eps:  0.49571413690105054
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  74 -1.35400640077266 {'keyPressed': (64, 0.0625, 76

compose  19 -1.488552811855935 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.488552811855935 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.488552811855935 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 -0.3973597071195133 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.3964240043768943 {'keyPressed': (64, 0.0625, 76, False)}
compose  29 -1.3416407864998736 {'keyPressed': (40, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -0.9999999999999998 {'keyPressed': (68, 0.0625, 76, False)}
com

Episode:  150
rewards:  -1.177514334896421
lr:  0.0009000000000000001
eps:  0.47147873742168567
Episode:  160
rewards:  2.742194364071993
lr:  0.0009000000000000001
eps:  0.4484282034609769
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 0.6931471805599453 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.4392458342578491 {'keyPressed': (52, 0.0625, 64, False)}
compose  7 -1.0760551736979407 {'keyPressed': (68, 0.0625, 76, False)}
compose  39 -1.5099402897846192 {'keyPressed': (71, 0.0625, 83, False)}
compose  39 -1.5099402897846192 {'keyPressed': (71, 0.0625, 83, False)}
compose  39 -1.5099402897846192 {'keyPressed': (71, 0.0625, 83, False)}
compose  29 -1.399247918291146 {'keyPressed': (40, 0.0625, 76, False)}
compose  7 -1.0760551736979407 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.076055173697

compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compose  7 -1.4345364187925331 {'keyPressed': (68, 0.0625, 76, False)}
compos

Episode:  170
rewards:  -0.620465858876837
lr:  0.0009000000000000001
eps:  0.42650460709830135
Episode:  180
rewards:  -0.620465858876837
lr:  0.0009000000000000001
eps:  0.40565285250151817
Episode:  190
rewards:  -0.9919469882820199
lr:  0.0009000000000000001
eps:  0.3858205374665315
Episode:  200
rewards:  6.277319003966652
lr:  0.0008100000000000001
eps:  0.3669578217261671
compose  89 -0.3973597071195133 {'keyPressed': (64, 0.0625, 70, False)}
compose  74 1.0986122886681098 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.0986122886681098 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  15 -1.4518590623204806 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (6

compose  46 -2.465661610904298 {'keyPressed': (64, 0.0625, 78, False)}
compose  3 -2.439750182371333 {'keyPressed': (69, 0.0625, 70, False)}
compose  3 -2.439750182371333 {'keyPressed': (69, 0.0625, 70, False)}
compose  3 -2.439750182371333 {'keyPressed': (69, 0.0625, 70, False)}
compose  29 -2.4764681982295076 {'keyPressed': (40, 0.0625, 76, False)}
compose  24 -2.4612790855248625 {'keyPressed': (56, 0.0625, 76, False)}
compose  74 -2.461279085524863 {'keyPressed': (64, 0.0625, 76, False)}
compose  84 -2.4174076299049787 {'keyPressed': (45, 0.0625, 57, False)}
compose  84 -2.4174076299049787 {'keyPressed': (45, 0.0625, 57, False)}
compose  21 -2.478150130677203 {'keyPressed': (71, 0.0625, 76, False)}
compose  21 -2.478150130677203 {'keyPressed': (71, 0.0625, 76, False)}
compose  85 -2.4714155330178906 {'keyPressed': (52, 0.0625, 76, False)}
compose  84 -2.4174076299049787 {'keyPressed': (45, 0.0625, 57, False)}
compose  84 -2.4174076299049787 {'keyPressed': (45, 0.0625, 57, False)}
co

Episode:  210
rewards:  5.836121584247569
lr:  0.0008100000000000001
eps:  0.34901730169741024
Episode:  220
rewards:  1.9616768593140597
lr:  0.0008100000000000001
eps:  0.33195389135223546
Episode:  230
rewards:  2.9315954533099515
lr:  0.0008100000000000001
eps:  0.3157247089126454
Episode:  240
rewards:  3.6195356837405406
lr:  0.0008100000000000001
eps:  0.30028896908517405
Episode:  250
rewards:  3.759560627009073
lr:  0.0008100000000000001
eps:  0.285607880564032
Episode:  260
rewards:  4.7823419890781995
lr:  0.0008100000000000001
eps:  0.27164454854530906
Episode:  270
rewards:  2.5883516824520076
lr:  0.0008100000000000001
eps:  0.2583638820072446
Episode:  280
rewards:  0.20178880354946202
lr:  0.0008100000000000001
eps:  0.2457325055235537
Episode:  290
rewards:  4.100383941691146
lr:  0.0008100000000000001
eps:  0.23371867538818816
Episode:  300
rewards:  3.349675573326792
lr:  0.000729
eps:  0.22229219984074702
Episode:  310
rewards:  6.2828872494986046
lr:  0.000729
eps:

compose  21 -1.3904435743076138 {'keyPressed': (71, 0.0625, 76, False)}
compose  15 -1.0760551736979407 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.3784048752090226 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  74 -1.2909944487358052 {'keyPressed': (64, 0.0625, 76, False)}
compose  38 -1.3743685418725535 {'keyPressed': (45, 0.0625, 76, False)}
compose  38 -1.3743685418725535 {'keyPressed': (45, 0.0625, 76, False)}
compose  85 -1.1547005383792512 {'keyPressed': (52, 0.0625, 76, False)}
compose  85 -1.1547005383792512 {'keyPressed': (52, 0.0625, 76, False)}
compose  14 -1.570562531918633 {'keyPressed': (56, 0.0625, 57, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  15 -1.4083086782851744 {'keyPressed': (56, 0.0625, 70, False)}
compose  35 -1.2266873704256946 {'keyPressed': (64, 0.0625, 64, False)}
compose  7 -1.4575875285926305 {'keyPressed': (68, 0.0625, 76, False

Episode:  320
rewards:  4.842585682416651
lr:  0.000729
eps:  0.2010878536592394
Episode:  330
rewards:  5.100105186717921
lr:  0.000729
eps:  0.1912566947289212
Episode:  340
rewards:  8.32808093324483
lr:  0.000729
eps:  0.18190617987607657
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -0.3973597071195133 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.0986122886681098 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  21 -1.4848159355870003 {'keyPressed': (7

compose  67 -1.5101992432741989 {'keyPressed': (40, 0.0625, 64, False)}
compose  14 -1.5165750888103096 {'keyPressed': (56, 0.0625, 57, False)}
compose  15 -1.3992479182911457 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.3964240043768938 {'keyPressed': (64, 0.0625, 76, False)}
compose  74 -1.3964240043768938 {'keyPressed': (64, 0.0625, 76, False)}
compose  21 -1.411611511296054 {'keyPressed': (71, 0.0625, 76, False)}
compose  21 -1.411611511296054 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -1.5131006854173739 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -1.5131006854173739 {'keyPressed': (45, 0.0625, 64, False)}
compose  11 -1.3904435743076142 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -1.5101992432741989 {'keyPressed': (40, 0.0625, 64, False)}
compose  15 -1.3992479182911457 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.3964240043768938 {'keyPressed': (64, 0.0625, 76, False)}
compose  19 -1.5135354220117245 {'keyPressed': (52, 0.0625, 64, Fa

Episode:  350
rewards:  6.083972420303301
lr:  0.000729
eps:  0.1730128104744653
Episode:  360
rewards:  9.0086475882555
lr:  0.000729
eps:  0.16455423674261854
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -0.3973597071195133 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.6094379124341003 {'keyPressed': (52, 0.0625, 64, False)}
compose  24 -0.3973597071195133 {'keyPressed': (56, 0.0625, 76, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  35 -0.5619514869490165 {'keyPressed': (64, 0.0625, 64, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  38 -0.5619514869490165 {'keyPressed': (45, 0.0625,

compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5327003743086027 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5327003743086027 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  59 -1.4107430944120802 {'keyPressed': (52, 0.0625, 78, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5327003743086027 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5327003743086027 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  59 -1.4107430944120802 {'keyPressed': (52, 0.0625, 78, False)}
compose  11 -1.420545218551215 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5327003743086027 {'keyPressed': (52, 0.0625, 64, False)}

Episode:  370
rewards:  4.897949705161963
lr:  0.000729
eps:  0.15650920157696743
Episode:  380
rewards:  7.093075979369383
lr:  0.000729
eps:  0.14885748713096328
Episode:  390
rewards:  7.735331881814585
lr:  0.000729
eps:  0.14157986400593744
Episode:  400
rewards:  4.440759742652955
lr:  0.0006561000000000001
eps:  0.1346580429260134
Episode:  410
rewards:  7.735331881814585
lr:  0.0006561000000000001
eps:  0.12807462877562611
Episode:  420
rewards:  8.80610848179619
lr:  0.0006561000000000001
eps:  0.12181307688414106
Episode:  430
rewards:  11.091772519990641
lr:  0.0006561000000000001
eps:  0.11585765144771248
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -0.3973597071195133 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.6094379124341003 {'keyPressed': (52, 0.0625, 64, False)}
compose  14 -0.609449

compose  3 -1.4111088087654469 {'keyPressed': (69, 0.0625, 70, False)}
compose  3 -1.4111088087654469 {'keyPressed': (69, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  67 -2.230470802319546 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 -2.224859546128698 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -2.2415533461215618 {'keyPressed': (69, 0.0625, 76, False)}
compose  14 -2.185679066664669 {'keyPressed': (56, 0.0625, 57, False)}
compose  15 -2.232871309707365 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -2.248976375341036 {'keyPressed': (64, 0.0625, 76, False)}
compose  74 -2.248976375341036 {'keyPressed': (64, 0.0625, 76, False)}
compose  19 -2.224859546128698 {'keyPressed': (52, 0.0625, 64, False)}
compose  7 -2.2600652063252737 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 -2.2674276328419913 {'keyPressed': (71, 0.0625, 76, False)}
compose  26 -2.2721840842723355 {'keyPressed': (71, 0.0625, 78, False)}
comp

Episode:  440
rewards:  4.334728012653267
lr:  0.0006561000000000001
eps:  0.11019338598389174
Episode:  450
rewards:  8.463002487414819
lr:  0.0006561000000000001
eps:  0.10480604571960442
Episode:  460
rewards:  2.417269522115025
lr:  0.0006561000000000001
eps:  0.0996820918179746
Episode:  470
rewards:  10.149313920209288
lr:  0.0006561000000000001
eps:  0.09480864735409487
Episode:  480
rewards:  8.196681110501709
lr:  0.0006561000000000001
eps:  0.09017346495423652
Episode:  490
rewards:  11.736614022593262
lr:  0.0006561000000000001
eps:  0.08576489601717459
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -0.3973597071195133 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.0986122886681098 {'keyPressed': (52, 0.0625, 64, False)}
com

compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  67 -0.9999999999999997 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.0986122886681098 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 -0.6094494002200436 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.5652475842498528 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 -1.2247448713915885 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 -1.570562531918633 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -1.4839418227590102 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -1.4839418227590102 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 -1.5071259308460496 {'keyPressed': (52, 0.0625, 70, False)}
compose  96 -1.5788119136944492 {'keyPressed': (57, 0.0625, 76, False)}
compose  49 -1.4735767952260141 {'keyPressed': (40, 0.0625, 70, False)}
compose  74 -1.5652475842498528 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 -1.5764815627361641 {'keyPressed': (69, 0.0625, 76, Fals

Episode:  500
rewards:  13.884508825283268
lr:  0.00059049
eps:  0.08157186144027828
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  30 -0.41070025419420075 {'keyPressed': (69, 0.0625, 83, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  49 -0.4140393356054125 {'keyPressed': (40, 0.0625, 70, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.3862943611198906 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 1.9459101490553132 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -1.3557637102737479 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 1.6094379124341003 {'keyPressed': (52, 0.0625, 70, 

compose  74 -1.2909944487358052 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 -1.3904435743076138 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -0.4140393356054125 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -0.4140393356054125 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 -1.290994448735806 {'keyPressed': (52, 0.0625, 70, False)}
compose  96 -1.076055173697941 {'keyPressed': (57, 0.0625, 76, False)}
compose  74 -1.2211584951084111 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 -1.2875926129184283 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -1.4158961490031343 {'keyPressed': (40, 0.0625, 64, False)}
compose  14 -1.534523347740889 {'keyPressed': (56, 0.0625, 57, False)}
compose  19 -1.3557637102737479 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 -1.4083086782851744 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.2211584951084111 {'keyPressed': (64, 0.0625, 76, False)

Episode:  510
rewards:  11.944243748559884
lr:  0.00059049
eps:  0.07758382377679894
Episode:  520
rewards:  13.10796665877934
lr:  0.00059049
eps:  0.07379076075438468
Episode:  530
rewards:  10.938652310037794
lr:  0.00059049
eps:  0.07018314008827135
Episode:  540
rewards:  15.173860261590576
lr:  0.00059049
eps:  0.0667518955258533
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  30 -0.41070025419420075 {'keyPressed': (69, 0.0625, 83, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.6094379124341003 {'keyPressed': (52, 0.0625, 64, False)}
compose  24 -0.3973597071195133 {'keyPressed': (56, 0.0625, 76, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.609437912

compose  73 -1.4234753427025184 {'keyPressed': (45, 0.0625, 78, False)}
compose  5 -1.463963406130482 {'keyPressed': (52, 0.0625, 70, False)}
compose  39 -1.4693096260579503 {'keyPressed': (71, 0.0625, 83, False)}
compose  74 -1.4116115112960541 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -1.4345364187925325 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 -1.3784048752090223 {'keyPressed': (69, 0.0625, 76, False)}
compose  19 -1.5169322962252696 {'keyPressed': (52, 0.0625, 64, False)}
compose  19 -1.5169322962252696 {'keyPressed': (52, 0.0625, 64, False)}
compose  11 -1.3784048752090223 {'keyPressed': (69, 0.0625, 76, False)}
compose  73 -1.4234753427025184 {'keyPressed': (45, 0.0625, 78, False)}
compose  73 -1.4234753427025184 {'keyPressed': (45, 0.0625, 78, False)}
compose  96 -1.3964240043768945 {'keyPressed': (57, 0.0625, 76, False)}
compose  74 -1.4116115112960541 {'keyPressed': (64, 0.0625, 76, False)}
compose  5 -1.463963406130482 {'keyPressed': (52, 0.0625, 70, False

Episode:  550
rewards:  11.593310877745704
lr:  0.00059049
eps:  0.06348840406243188
Episode:  560
rewards:  9.500133794792008
lr:  0.00059049
eps:  0.06038446427088321
Episode:  570
rewards:  10.619027029075399
lr:  0.00059049
eps:  0.05743227569078546
Episode:  580
rewards:  11.118696615190393
lr:  0.00059049
eps:  0.05462441922520914
Episode:  590
rewards:  11.870362719947927
lr:  0.00059049
eps:  0.05195383849590569
Episode:  600
rewards:  12.816972210043557
lr:  0.000531441
eps:  0.0494138221100385
Episode:  610
rewards:  12.677603027174085
lr:  0.000531441
eps:  0.046997986793891174
Episode:  620
rewards:  12.47441897125163
lr:  0.000531441
eps:  0.04470026135116646
Episode:  630
rewards:  5.084759892772386
lr:  0.000531441
eps:  0.04251487140556204
Episode:  640
rewards:  11.856132855808546
lr:  0.000531441
eps:  0.04043632488927963
Episode:  650
rewards:  11.995667596901773
lr:  0.000531441
eps:  0.03845939824099909
Episode:  660
rewards:  13.86328402359086
lr:  0.000531441
eps

compose  95 -1.4461079040567606 {'keyPressed': (68, 0.0625, 70, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -0.4140393356054125 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -0.4140393356054125 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 -1.290994448735806 {'keyPressed': (52, 0.0625, 70, False)}
compose  96 -1.076055173697941 {'keyPressed': (57, 0.0625, 76, False)}
compose  74 -1.2211584951084111 {'keyPressed': (64, 0.0625, 76, False)}
compose  18 -1.350763267011941 {'keyPressed': (76, 0.0625, 76, False)}
compose  18 -1.350763267011941 {'keyPressed': (76, 0.0625, 76, False)}
compose  84 -0.6094494002200436 {'keyPressed': (45, 0.0625, 57, False)}
compose  78 -0.4140393356054125 {'keyPressed': (45, 0.0625, 64, False)}
compose  15 -1.4083086782851744 {'keyPressed': (56, 0.0625, 70, False)}
compose  19 -1.3557637102737479 {'keyPressed': (52, 0.0625, 64, False)}
compose  96 -1.076055173697941 {'keyPressed': (57, 0.0625, 76, False)}

Episode:  740
rewards:  9.94560581350547
lr:  0.0004782969
eps:  0.02449513017825978
Episode:  750
rewards:  13.486651063522533
lr:  0.0004782969
eps:  0.023297566459620722
Episode:  760
rewards:  15.43409359043649
lr:  0.0004782969
eps:  0.022158551474944856
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  30 -0.41070025419420075 {'keyPressed': (69, 0.0625, 83, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  49 -0.4140393356054125 {'keyPressed': (40, 0.0625, 70, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.3862943611198906 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 1.9459101490553132

compose  73 -2.2685135763653355 {'keyPressed': (45, 0.0625, 78, False)}
compose  58 -2.279382006008924 {'keyPressed': (45, 0.0625, 83, False)}
compose  67 -2.230470802319546 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 -2.224859546128698 {'keyPressed': (52, 0.0625, 64, False)}
compose  89 -2.232871309707365 {'keyPressed': (64, 0.0625, 70, False)}
compose  74 -2.248976375341036 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 -2.2415533461215618 {'keyPressed': (69, 0.0625, 76, False)}
compose  15 -2.232871309707365 {'keyPressed': (56, 0.0625, 70, False)}
compose  7 -2.2600652063252737 {'keyPressed': (68, 0.0625, 76, False)}
compose  85 -2.2600652063252737 {'keyPressed': (52, 0.0625, 76, False)}
compose  95 -2.2440397246292862 {'keyPressed': (68, 0.0625, 70, False)}
compose  95 -2.2440397246292862 {'keyPressed': (68, 0.0625, 70, False)}
compose  21 -2.2674276328419913 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -2.2286019533929036 {'keyPressed': (45, 0.0625, 64, False)}

Episode:  770
rewards:  14.433075700070047
lr:  0.0004782969
eps:  0.021075222784267326
Episode:  780
rewards:  15.746098160783376
lr:  0.0004782969
eps:  0.020044857891939702
compose  3 -1.4111088087654469 {'keyPressed': (69, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  49 -0.4140393356054125 {'keyPressed': (40, 0.0625, 70, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.0986122886681098 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 1.9459101490553132 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -1.3557637102737479 {'keyPressed'

compose  94 -1.2255643729027517 {'keyPressed': (64, 0.0625, 83, False)}
compose  74 -1.1547005383792517 {'keyPressed': (64, 0.0625, 76, False)}
compose  3 -1.2875926129184283 {'keyPressed': (69, 0.0625, 70, False)}
compose  11 -1.224744871391589 {'keyPressed': (69, 0.0625, 76, False)}
compose  67 -1.4659886790209349 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 -1.4079972089286537 {'keyPressed': (52, 0.0625, 64, False)}
compose  24 -1.4083086782851744 {'keyPressed': (56, 0.0625, 76, False)}
compose  15 -1.4632935693754139 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.1547005383792517 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 -1.402378931197509 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 -1.3743685418725535 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 -0.5619514869490165 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -0.5619514869490165 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 -1.3507632670119414 {'keyPressed': (52, 0.0625, 70, False

Episode:  790
rewards:  17.147119795682666
lr:  0.0004782969
eps:  0.019064867404770626
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  30 -0.41070025419420075 {'keyPressed': (69, 0.0625, 83, False)}
compose  11 1.3862943611198906 {'keyPressed': (69, 0.0625, 76, False)}
compose  49 -0.4140393356054125 {'keyPressed': (40, 0.0625, 70, False)}
compose  67 1.3862943611198906 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 1.3862943611198906 {'keyPressed': (52, 0.0625, 64, False)}
compose  15 1.6094379124341003 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 1.791759469228055 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 1.0986122886681098 {'keyPressed': (68, 0.0625, 76, False)}
compose  21 1.6094379124341003 {'keyPressed': (71, 0.0625, 76, False)}
compose  78 1.9459101490553132 {'keyPressed': (45, 0.0625, 64, False)}
compose  78 -1.3557637102737479 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 1.6094379124341003 {'keyPressed': (52, 0.0625, 7

compose  78 -1.1470786693528088 {'keyPressed': (45, 0.0625, 64, False)}
compose  5 -1.4111088087654469 {'keyPressed': (52, 0.0625, 70, False)}
compose  96 1.3862943611198906 {'keyPressed': (57, 0.0625, 76, False)}
compose  89 -1.4345364187925325 {'keyPressed': (64, 0.0625, 70, False)}
compose  74 -1.378404875209022 {'keyPressed': (64, 0.0625, 76, False)}
compose  11 -1.3662601021279466 {'keyPressed': (69, 0.0625, 76, False)}
compose  74 -1.378404875209022 {'keyPressed': (64, 0.0625, 76, False)}
compose  67 -1.5135354220117248 {'keyPressed': (40, 0.0625, 64, False)}
compose  19 -1.5052539565416228 {'keyPressed': (52, 0.0625, 64, False)}
compose  14 -1.5491933384829661 {'keyPressed': (56, 0.0625, 57, False)}
compose  15 -1.4345364187925322 {'keyPressed': (56, 0.0625, 70, False)}
compose  74 -1.378404875209022 {'keyPressed': (64, 0.0625, 76, False)}
compose  74 -1.378404875209022 {'keyPressed': (64, 0.0625, 76, False)}
compose  7 -1.3964240043768936 {'keyPressed': (68, 0.0625, 76, False)}

Episode:  800
rewards:  15.362118811544706
lr:  0.00043046721
eps:  0.018132788524664028
