In [101]:
import gym
import numpy as np

In [102]:
import flappy_bird_gymnasium
import gymnasium as gym

# env = gym.make("FlappyBird-v0", render_mode="human", use_lidar=True)
env = gym.make("FlappyBird-v0",use_lidar=False)
obs, info = env.reset()


In [103]:
from torch import nn
import torch

In [104]:
# class PolicyNet(nn.Module):
    
#     def __init__(self,input_dim,output_dim):
#         super().__init__()
#         self.linear1 = nn.Linear(input_dim,200)
#         self.relu = nn.ReLU()
#         self.linear2 = nn.Linear(200,output_dim)
#         self.softmax = nn.Softmax(dim=-1)
    
#     def forward(self,state):
#         ### n
#         x = self.linear1(state)
#         x = self.relu(x)
#         x = self.linear2(x) # n
#         x = self.softmax(x) # n
#         return x

In [105]:
class PolicyNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(PolicyNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim),
            nn.Softmax(dim=-1)
        )
    
    def forward(self, x):
        return self.network(x)

In [106]:
# class ValueNet(nn.Module):
#     ### 用来学习值
    
#     def __init__(self,input_dim):
#         super().__init__()
#         self.linear1 = nn.Linear(input_dim,200)
#         self.relu = nn.ReLU()
#         self.linear2 = nn.Linear(200,1)
    
#     def forward(self,state):
#         ### n
#         x = self.linear1(state)
#         x = self.relu(x)
#         x = self.linear2(x) # n
#         return x

In [114]:
class ValueNet(nn.Module):
    def __init__(self, input_dim):
        super(ValueNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        return self.network(x)

In [115]:
from torch.distributions import Categorical
import numpy as np
np.bool8 = np.bool_

from torch.optim import AdamW

In [116]:
class Agent:
    
    def __init__(self):
        self.policy_net = PolicyNet(12,2)
        self.value_net = ValueNet(12)
        self.optimizer = AdamW(self.policy_net.parameters(),lr=1e-3)
        self.value_optimizer = AdamW(self.value_net.parameters(),lr=1e-3)
    
    def sample_action(self,state):
        probs = self.policy_net(state) # 4
        if np.random.uniform() < 0.0:
            action = np.random.randint(0,2)
            return action, torch.log(probs[action]+1e-8).detach()
        dist = Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action).detach()
        return action.item(),log_prob
    
    def update(self,rewards,log_probs,xs,old_actions):
        ### 一次游戏时间
        ret = []
        adding = 0
        for r in rewards[::-1]:
            adding = adding * 0.99 + r
            ret.insert(0,adding)
        ret = torch.FloatTensor(ret)
        ret = ret - ret.mean()
        ret = ret / (ret.std()+1e-8)
        
        
        
        for _ in range(4):
            values = self.value_net(xs) ## B,48 ==> B,1
#             print(values.shape)

            new_probs = self.policy_net(xs) ## B,4
            dist = Categorical(new_probs) # B,4
#             actions = dist.sample() # B,1
            new_logprobs = dist.log_prob(old_actions.squeeze(-1)) # B,1
#             print('new log probs',new_logprobs.shape)
            advantages = ret - values.squeeze(-1).detach() # B
#             print('adv shape',advantages.shape,'ret shape',ret.shape,'value shape',values.shape,'log',log_probs.shape)
#             print('new_probs',new_probs.shape,'new_logprobs shape',new_logprobs.shape,'old_actions',old_actions.shape)
            ratio = torch.exp(new_logprobs - log_probs.squeeze(-1)) ## B
#             print('shape',ratio.shape)
            surr1 = ratio * advantages.squeeze(-1)
            surr2 = torch.clamp(ratio,0.8,1.2) * advantages
            loss = -torch.min(surr1,surr2).mean()


            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()


            value_loss = (values.squeeze(0) - ret).pow(2).mean()
            self.value_optimizer.zero_grad()
            value_loss.backward()
            self.value_optimizer.step()


    #         r_log_probs = []
    #         for r,log_prob in zip(ret,log_probs):
    #             r_log_probs.append(-r*log_prob)
    #         r_log_probs = torch.vstack(r_log_probs)

    #         loss = r_log_probs.sum()



            return loss

In [117]:
def convert2tensor(state):
    state_arr = torch.FloatTensor(state)
    return state_arr

In [118]:
def train(agent,env):
    success_count = []
    max_size = 1000
    for epoch in range(20000000):
        rewards = []
        xs = []
        log_probs = []
        old_actions = []
        terminated = False
        truncated = False
        success = True
        state,_ = env.reset()
        while not terminated and not truncated:
            state_arr = convert2tensor(state)
            xs.append(state_arr)
            action, log_prob = agent.sample_action(state_arr)
            next_state, reward, terminated, truncated, _ = env.step(action)
            state = next_state
            rewards.append(reward)
            log_probs.append(log_prob)
            old_actions.append(action)
        xs = torch.vstack(xs)
        log_probs = torch.vstack(log_probs)
        old_actions = torch.LongTensor(old_actions)
        loss = agent.update(rewards,log_probs,xs,old_actions) 
        success_count.append(success)
        
        
        if (epoch+1) % 10 == 0:
            print(f'epoch: {epoch}, loss: {loss}, rewards: {sum(rewards)}, count: {len(rewards)}')

In [119]:
agent = Agent()



In [None]:
env = gym.make('FlappyBird-v0',use_lidar=False)
train(agent,env)

epoch: 9, loss: 0.04407816752791405, rewards: -6.899999999999999, count: 50
epoch: 19, loss: -0.023937908932566643, rewards: -8.099999999999998, count: 50
epoch: 29, loss: 0.013372253626585007, rewards: -8.099999999999998, count: 50
epoch: 39, loss: -0.004789469297975302, rewards: -3.899999999999998, count: 50
epoch: 49, loss: 0.00654315110296011, rewards: -7.499999999999998, count: 50
epoch: 59, loss: -0.004368507768958807, rewards: -8.099999999999998, count: 50
epoch: 69, loss: 0.0017002713866531849, rewards: -6.899999999999999, count: 50
epoch: 79, loss: -0.0033569764345884323, rewards: -7.499999999999998, count: 50
epoch: 89, loss: -0.0008155059767886996, rewards: -7.499999999999998, count: 50
epoch: 99, loss: 0.0015263987006619573, rewards: -6.899999999999999, count: 50
epoch: 109, loss: 0.00032965780701488256, rewards: -6.299999999999999, count: 50
epoch: 119, loss: -0.002542436122894287, rewards: -6.899999999999999, count: 50
epoch: 129, loss: -0.0016233313363045454, rewards: -5

epoch: 1039, loss: 0.0006546787335537374, rewards: 10.89999999999998, count: 102
epoch: 1049, loss: 0.0003180730273015797, rewards: 3.8999999999999986, count: 50
epoch: 1059, loss: 0.000244314840529114, rewards: 6.299999999999994, count: 65
epoch: 1069, loss: -0.0006813764339312911, rewards: 6.299999999999994, count: 65
epoch: 1079, loss: 0.0005248566740192473, rewards: 4.899999999999995, count: 60
epoch: 1089, loss: -2.9611587706313003e-06, rewards: 3.8999999999999986, count: 50
epoch: 1099, loss: -0.0002592706587165594, rewards: 4.1999999999999975, count: 53
epoch: 1109, loss: 0.00016945242532528937, rewards: 3.8999999999999986, count: 50
epoch: 1119, loss: 5.808320929645561e-05, rewards: 4.799999999999995, count: 59
epoch: 1129, loss: -0.00026944393175654113, rewards: 4.099999999999998, count: 52
epoch: 1139, loss: 0.00014848416321910918, rewards: 6.099999999999994, count: 63
epoch: 1149, loss: -0.00046703219413757324, rewards: 4.4999999999999964, count: 56
epoch: 1159, loss: 0.0003

epoch: 2069, loss: -9.579181642038748e-05, rewards: 3.8999999999999986, count: 50
epoch: 2079, loss: 0.000301290798233822, rewards: 4.1999999999999975, count: 53
epoch: 2089, loss: 0.0004834085702896118, rewards: 4.099999999999998, count: 52
epoch: 2099, loss: -0.00027454039081931114, rewards: 8.399999999999986, count: 86
epoch: 2109, loss: 1.8975733837578446e-05, rewards: 3.8999999999999986, count: 50
epoch: 2119, loss: -0.00033499597338959575, rewards: 3.8999999999999986, count: 50
epoch: 2129, loss: 9.423017763765529e-05, rewards: 3.8999999999999986, count: 50
epoch: 2139, loss: -1.4817377632425632e-05, rewards: 4.1999999999999975, count: 53
epoch: 2149, loss: 0.0008175015682354569, rewards: 3.8999999999999986, count: 50
epoch: 2159, loss: -8.798171620583162e-05, rewards: 6.5999999999999925, count: 68
epoch: 2169, loss: -9.927630162565038e-05, rewards: 3.8999999999999986, count: 50
epoch: 2179, loss: -4.310282747610472e-05, rewards: 4.399999999999997, count: 55
epoch: 2189, loss: -0

epoch: 3079, loss: 0.0008471186156384647, rewards: 4.1999999999999975, count: 53
epoch: 3089, loss: -0.0005863721598871052, rewards: 4.099999999999998, count: 52
epoch: 3099, loss: 0.0004789524246007204, rewards: 8.399999999999986, count: 86
epoch: 3109, loss: -0.00031126581598073244, rewards: 6.699999999999992, count: 69
epoch: 3119, loss: 0.00011244897177675739, rewards: 9.099999999999984, count: 93
epoch: 3129, loss: 0.000183765179826878, rewards: 4.399999999999997, count: 55
epoch: 3139, loss: 0.0001834249560488388, rewards: 3.8999999999999986, count: 50
epoch: 3149, loss: -5.03420815221034e-05, rewards: 6.799999999999992, count: 70
epoch: 3159, loss: -0.0004531191079877317, rewards: 4.099999999999998, count: 52
epoch: 3169, loss: -0.0004030728305224329, rewards: 3.8999999999999986, count: 50
epoch: 3179, loss: 0.0004765653284266591, rewards: 8.899999999999984, count: 91
epoch: 3189, loss: -0.0002178460854338482, rewards: 6.499999999999993, count: 67
epoch: 3199, loss: 0.0002202794

epoch: 4089, loss: -0.00036562563036568463, rewards: 3.8999999999999986, count: 50
epoch: 4099, loss: -0.0003627717378549278, rewards: 3.8999999999999986, count: 50
epoch: 4109, loss: -0.00016996980411931872, rewards: 3.8999999999999986, count: 50
epoch: 4119, loss: 9.167194366455078e-05, rewards: 4.799999999999995, count: 59
epoch: 4129, loss: 0.0002106142055708915, rewards: 3.8999999999999986, count: 50
epoch: 4139, loss: -8.28874108265154e-05, rewards: 3.8999999999999986, count: 50
epoch: 4149, loss: -8.602023444836959e-05, rewards: 3.8999999999999986, count: 50
epoch: 4159, loss: 1.9886494555976242e-05, rewards: 3.8999999999999986, count: 50
epoch: 4169, loss: -0.0001983833353733644, rewards: 3.8999999999999986, count: 50
epoch: 4179, loss: 0.00030039786361157894, rewards: 3.8999999999999986, count: 50
epoch: 4189, loss: -4.32133674621582e-05, rewards: 3.8999999999999986, count: 50
epoch: 4199, loss: -0.00022426366922445595, rewards: 3.8999999999999986, count: 50
epoch: 4209, loss:

epoch: 5099, loss: -0.000544307695236057, rewards: 13.199999999999973, count: 125
epoch: 5109, loss: 0.0002627781650517136, rewards: 8.399999999999986, count: 86
epoch: 5119, loss: -0.00125465530436486, rewards: 12.899999999999974, count: 122
epoch: 5129, loss: 0.0010586678981781006, rewards: 8.399999999999986, count: 86
epoch: 5139, loss: -0.00017726827354636043, rewards: 4.399999999999997, count: 55
epoch: 5149, loss: -0.0002263604837935418, rewards: 18.200000000000014, count: 166
epoch: 5159, loss: 4.531323156697908e-06, rewards: 8.499999999999986, count: 87
epoch: 5169, loss: -0.0005876487120985985, rewards: 4.399999999999997, count: 55
epoch: 5179, loss: 0.0001611078914720565, rewards: 8.399999999999986, count: 86
epoch: 5189, loss: -0.0004534526087809354, rewards: 12.899999999999974, count: 122
epoch: 5199, loss: -0.000459887960460037, rewards: 8.399999999999986, count: 86
epoch: 5209, loss: 0.0003003306337632239, rewards: 13.89999999999997, count: 132
epoch: 5219, loss: 0.000139

epoch: 6109, loss: 0.00023256908752955496, rewards: 9.199999999999983, count: 94
epoch: 6119, loss: -0.0010546633275225759, rewards: 8.399999999999986, count: 86
epoch: 6129, loss: -0.00027128486544825137, rewards: 4.399999999999997, count: 55
epoch: 6139, loss: 2.200728158641141e-05, rewards: 8.399999999999986, count: 86
epoch: 6149, loss: 0.0006648939452134073, rewards: 8.399999999999986, count: 86
epoch: 6159, loss: -0.0008189391810446978, rewards: 3.8999999999999986, count: 50
epoch: 6169, loss: 0.0005269509274512529, rewards: 4.099999999999998, count: 52
epoch: 6179, loss: 0.000130326792714186, rewards: 9.399999999999983, count: 96
epoch: 6189, loss: 0.0012574732536450028, rewards: 3.8999999999999986, count: 50
epoch: 6199, loss: -0.001759423641487956, rewards: 8.899999999999984, count: 91
epoch: 6209, loss: 0.002412711502984166, rewards: 12.899999999999974, count: 122
epoch: 6219, loss: -0.0003516878350637853, rewards: 11.19999999999998, count: 105
epoch: 6229, loss: 2.8071344786

epoch: 7129, loss: 0.0006052079843357205, rewards: 17.90000000000001, count: 163
epoch: 7139, loss: -0.0016712748911231756, rewards: 3.8999999999999986, count: 50
epoch: 7149, loss: 0.001493496703915298, rewards: 8.999999999999984, count: 92
epoch: 7159, loss: 0.0011705593205988407, rewards: 8.599999999999985, count: 88
epoch: 7169, loss: -0.0015666827093809843, rewards: 23.400000000000073, count: 209
epoch: 7179, loss: 0.0017468652222305536, rewards: 4.699999999999996, count: 58
epoch: 7189, loss: -0.0022023566998541355, rewards: 4.1999999999999975, count: 53
epoch: 7199, loss: 0.0015998402377590537, rewards: 4.099999999999998, count: 52
epoch: 7209, loss: 0.001426446484401822, rewards: 3.8999999999999986, count: 50
epoch: 7219, loss: 0.002415167400613427, rewards: 4.4999999999999964, count: 56
epoch: 7229, loss: 0.0005848759901709855, rewards: 4.399999999999997, count: 55
epoch: 7239, loss: 0.0005226926878094673, rewards: 12.899999999999974, count: 122
epoch: 7249, loss: -0.001180353

epoch: 8149, loss: 1.7568750990903936e-05, rewards: 4.1999999999999975, count: 53
epoch: 8159, loss: 7.58965834393166e-05, rewards: 8.799999999999985, count: 90
epoch: 8169, loss: 0.00011530876508913934, rewards: 3.8999999999999986, count: 50
epoch: 8179, loss: -0.000243724585743621, rewards: 3.8999999999999986, count: 50
epoch: 8189, loss: -0.000372833019355312, rewards: 3.8999999999999986, count: 50
epoch: 8199, loss: -6.266832315304782e-06, rewards: 3.8999999999999986, count: 50
epoch: 8209, loss: 2.0005703845527023e-05, rewards: 3.8999999999999986, count: 50
epoch: 8219, loss: -0.00019153833272866905, rewards: 3.8999999999999986, count: 50
epoch: 8229, loss: -0.00010914298036368564, rewards: 4.099999999999998, count: 52
epoch: 8239, loss: 0.00020639176364056766, rewards: 4.1999999999999975, count: 53
epoch: 8249, loss: -0.00017405033577233553, rewards: 3.8999999999999986, count: 50
epoch: 8259, loss: -0.00022012353292666376, rewards: 3.8999999999999986, count: 50
epoch: 8269, loss:

epoch: 9159, loss: 2.946340828202665e-05, rewards: 8.399999999999986, count: 86
epoch: 9169, loss: 5.5486740166088566e-05, rewards: 8.999999999999984, count: 92
epoch: 9179, loss: -0.00010971139272442088, rewards: 8.699999999999985, count: 89
epoch: 9189, loss: -0.00013803703768644482, rewards: 17.90000000000001, count: 163
epoch: 9199, loss: 0.00020543976279441267, rewards: 18.00000000000001, count: 164
epoch: 9209, loss: 8.465562132187188e-06, rewards: 6.799999999999992, count: 70
epoch: 9219, loss: 0.00011709862883435562, rewards: 17.90000000000001, count: 163
epoch: 9229, loss: 0.00021332170581445098, rewards: 13.299999999999972, count: 126
epoch: 9239, loss: 3.2448864658363163e-05, rewards: 12.899999999999974, count: 122
epoch: 9249, loss: -4.738911684398772e-06, rewards: 8.499999999999986, count: 87
epoch: 9259, loss: -4.1267699998570606e-05, rewards: 4.4999999999999964, count: 56
epoch: 9269, loss: -7.232671487145126e-05, rewards: 9.099999999999984, count: 93
epoch: 9279, loss: 

epoch: 10169, loss: -8.077068196143955e-05, rewards: 4.4999999999999964, count: 56
epoch: 10179, loss: -0.0002761912764981389, rewards: 8.399999999999986, count: 86
epoch: 10189, loss: 0.00010734727402450517, rewards: 8.399999999999986, count: 86
epoch: 10199, loss: -0.00014686945360153913, rewards: 6.399999999999993, count: 66
epoch: 10209, loss: -2.9430389986373484e-05, rewards: 3.8999999999999986, count: 50
epoch: 10219, loss: -4.31454463978298e-05, rewards: 8.399999999999986, count: 86
epoch: 10229, loss: 6.540350295836106e-05, rewards: 15.79999999999998, count: 142
epoch: 10239, loss: 0.00010373333498137072, rewards: 13.599999999999971, count: 129
epoch: 10249, loss: -6.459592896135291e-07, rewards: 12.999999999999973, count: 123
epoch: 10259, loss: -0.00010453139839228243, rewards: 12.899999999999974, count: 122
epoch: 10269, loss: -5.633359251078218e-05, rewards: 8.899999999999984, count: 91
epoch: 10279, loss: -2.8783180823666044e-05, rewards: 12.899999999999974, count: 122
epo

epoch: 11169, loss: -2.2452237317338586e-05, rewards: 8.399999999999986, count: 86
epoch: 11179, loss: 9.92557397694327e-05, rewards: 18.200000000000014, count: 166
epoch: 11189, loss: -3.0321734811877832e-05, rewards: 8.899999999999984, count: 91
epoch: 11199, loss: 0.00013412801490630955, rewards: 13.599999999999971, count: 129
epoch: 11209, loss: -0.00013001050683669746, rewards: 4.4999999999999964, count: 56
epoch: 11219, loss: -6.54180403216742e-05, rewards: 8.999999999999984, count: 92
epoch: 11229, loss: 0.00016285997116938233, rewards: 8.999999999999984, count: 92
epoch: 11239, loss: -7.787694630678743e-05, rewards: 27.400000000000116, count: 240
epoch: 11249, loss: 2.6730604076874442e-05, rewards: 8.399999999999986, count: 86
epoch: 11259, loss: 4.682360668084584e-05, rewards: 8.399999999999986, count: 86
epoch: 11269, loss: -0.00023266326752491295, rewards: 8.399999999999986, count: 86
epoch: 11279, loss: 0.00035467982525005937, rewards: 17.90000000000001, count: 163
epoch: 1

epoch: 12179, loss: 0.000520906294696033, rewards: 4.4999999999999964, count: 56
epoch: 12189, loss: 0.0005909846513532102, rewards: 6.299999999999994, count: 65
epoch: 12199, loss: 0.00020369362027850002, rewards: 3.9999999999999982, count: 51
epoch: 12209, loss: -2.2232532501220703e-05, rewards: 4.099999999999998, count: 52
epoch: 12219, loss: -0.0001237034739460796, rewards: 6.799999999999992, count: 70
epoch: 12229, loss: -2.6299509045202285e-05, rewards: 8.399999999999986, count: 86
epoch: 12239, loss: -0.00028066232334822416, rewards: 8.399999999999986, count: 86
epoch: 12249, loss: -0.00019803951727226377, rewards: 8.499999999999986, count: 87
epoch: 12259, loss: 1.5929710571072064e-05, rewards: 27.60000000000012, count: 242
epoch: 12269, loss: 8.718623575987294e-05, rewards: 9.499999999999982, count: 97
epoch: 12279, loss: -0.0002978661214001477, rewards: 32.30000000000017, count: 280
epoch: 12289, loss: -0.00030647183302789927, rewards: 8.399999999999986, count: 86
epoch: 1229

epoch: 13189, loss: 7.473054574802518e-05, rewards: 12.899999999999974, count: 122
epoch: 13199, loss: -0.00012837715621571988, rewards: 4.1999999999999975, count: 53
epoch: 13209, loss: -0.00018366107542533427, rewards: 4.4999999999999964, count: 56
epoch: 13219, loss: 0.0001769198279362172, rewards: 8.799999999999985, count: 90
epoch: 13229, loss: -6.477264105342329e-05, rewards: 8.599999999999985, count: 88
epoch: 13239, loss: 1.3431365005089901e-05, rewards: 8.599999999999985, count: 88
epoch: 13249, loss: 8.321614586748183e-06, rewards: 23.20000000000007, count: 207
epoch: 13259, loss: 6.357899837894365e-05, rewards: 4.4999999999999964, count: 56
epoch: 13269, loss: -4.496715700952336e-05, rewards: 4.799999999999995, count: 59
epoch: 13279, loss: 4.1686042095534503e-05, rewards: 18.100000000000012, count: 165
epoch: 13289, loss: 4.7293942770920694e-05, rewards: 8.699999999999985, count: 89
epoch: 13299, loss: -7.882075442466885e-05, rewards: 4.4999999999999964, count: 56
epoch: 13

epoch: 14189, loss: 3.819465564447455e-05, rewards: 3.8999999999999986, count: 50
epoch: 14199, loss: 0.00023888821306172758, rewards: 8.899999999999984, count: 91
epoch: 14209, loss: 0.0003520054742693901, rewards: 13.499999999999972, count: 128
epoch: 14219, loss: 0.00011082513083238155, rewards: 8.399999999999986, count: 86
epoch: 14229, loss: -7.075710163917392e-05, rewards: 4.4999999999999964, count: 56
epoch: 14239, loss: 0.000177664594957605, rewards: 8.999999999999984, count: 92
epoch: 14249, loss: 0.00012152574345236644, rewards: 8.599999999999985, count: 88
epoch: 14259, loss: 0.00012970190437044948, rewards: 17.90000000000001, count: 163
epoch: 14269, loss: 6.740797107340768e-05, rewards: 12.899999999999974, count: 122
epoch: 14279, loss: -0.00018036499386653304, rewards: 8.699999999999985, count: 89
epoch: 14289, loss: 8.680964674567804e-05, rewards: 6.399999999999993, count: 66
epoch: 14299, loss: 0.00013375219714362174, rewards: 9.299999999999983, count: 95
epoch: 14309, 

epoch: 15199, loss: -0.00012074410915374756, rewards: 8.999999999999984, count: 92
epoch: 15209, loss: -5.870095992577262e-05, rewards: 9.099999999999984, count: 93
epoch: 15219, loss: 2.3471558961318806e-05, rewards: 13.399999999999972, count: 127
epoch: 15229, loss: 2.7924226742470637e-05, rewards: 17.90000000000001, count: 163
epoch: 15239, loss: 4.552304744720459e-05, rewards: 13.499999999999972, count: 128
epoch: 15249, loss: 2.6171095669269562e-05, rewards: 13.499999999999972, count: 128
epoch: 15259, loss: 0.0001036306275636889, rewards: 32.40000000000017, count: 281
epoch: 15269, loss: 0.00018635857850313187, rewards: 13.499999999999972, count: 128
epoch: 15279, loss: 6.325700087472796e-05, rewards: 12.999999999999973, count: 123
epoch: 15289, loss: -7.862136612857284e-07, rewards: 18.400000000000016, count: 168
epoch: 15299, loss: -0.00014821549120824784, rewards: 50.50000000000037, count: 426
epoch: 15309, loss: -0.0005092413048259914, rewards: 8.399999999999986, count: 86
ep

epoch: 16199, loss: -0.00022524256200995296, rewards: 12.999999999999973, count: 123
epoch: 16209, loss: 3.4691984183155e-05, rewards: 8.399999999999986, count: 86
epoch: 16219, loss: -0.00021338254737202078, rewards: 8.399999999999986, count: 86
epoch: 16229, loss: -8.850319136399776e-05, rewards: 8.399999999999986, count: 86
epoch: 16239, loss: 0.0001701354922261089, rewards: 11.19999999999998, count: 105
epoch: 16249, loss: -0.0001853815047070384, rewards: 9.499999999999982, count: 97
epoch: 16259, loss: 8.656201680423692e-05, rewards: 8.699999999999985, count: 89
epoch: 16269, loss: 4.677301330957562e-05, rewards: 8.399999999999986, count: 86
epoch: 16279, loss: -0.0001672265207162127, rewards: 8.699999999999985, count: 89
epoch: 16289, loss: -0.0006412924267351627, rewards: 8.699999999999985, count: 89
epoch: 16299, loss: 0.0001227835164172575, rewards: 8.499999999999986, count: 87
epoch: 16309, loss: -0.000306136003928259, rewards: 8.899999999999984, count: 91
epoch: 16319, loss:

epoch: 17199, loss: 1.990199052670505e-05, rewards: 3.8999999999999986, count: 50
epoch: 17209, loss: -2.850626742656459e-06, rewards: 8.399999999999986, count: 86
epoch: 17219, loss: 2.672711525519844e-05, rewards: 8.899999999999984, count: 91
epoch: 17229, loss: -6.031708835507743e-05, rewards: 4.1999999999999975, count: 53
epoch: 17239, loss: 0.00012521410826593637, rewards: 8.399999999999986, count: 86
epoch: 17249, loss: -0.00021300898515619338, rewards: 8.399999999999986, count: 86
epoch: 17259, loss: 0.0005537867546081543, rewards: 4.4999999999999964, count: 56
epoch: 17269, loss: -0.0003194728633388877, rewards: 17.90000000000001, count: 163
epoch: 17279, loss: 0.0005036821239627898, rewards: 8.399999999999986, count: 86
epoch: 17289, loss: -0.0001400321489199996, rewards: 12.999999999999973, count: 123
epoch: 17299, loss: -0.0001192661002278328, rewards: 13.499999999999972, count: 128
epoch: 17309, loss: -9.408877667738125e-05, rewards: 27.500000000000117, count: 241
epoch: 17

epoch: 18199, loss: 3.0889776098774746e-05, rewards: 8.799999999999985, count: 90
epoch: 18209, loss: 2.2941550923860632e-05, rewards: 8.399999999999986, count: 86
epoch: 18219, loss: 1.6387390132877044e-05, rewards: 8.999999999999984, count: 92
epoch: 18229, loss: -4.5161094021750614e-05, rewards: 9.099999999999984, count: 93
epoch: 18239, loss: -2.654868694662582e-05, rewards: 8.999999999999984, count: 92
epoch: 18249, loss: 4.202372292638756e-05, rewards: 25.300000000000086, count: 219
epoch: 18259, loss: 4.283019279682776e-06, rewards: 13.299999999999972, count: 126
epoch: 18269, loss: -3.9646702134632505e-06, rewards: 9.099999999999984, count: 93
epoch: 18279, loss: 5.099128429719713e-06, rewards: 12.899999999999974, count: 122
epoch: 18289, loss: -6.293154001468793e-05, rewards: 8.499999999999986, count: 87
epoch: 18299, loss: -5.472329576150514e-05, rewards: 4.399999999999997, count: 55
epoch: 18309, loss: 1.9664947103592567e-05, rewards: 13.69999999999997, count: 130
epoch: 183

epoch: 19199, loss: -2.4413700884906575e-05, rewards: 18.100000000000012, count: 165
epoch: 19209, loss: 4.896722748526372e-05, rewards: 17.90000000000001, count: 163
epoch: 19219, loss: 5.531057468033396e-05, rewards: 26.90000000000011, count: 235
epoch: 19229, loss: -8.50825454108417e-05, rewards: 4.699999999999996, count: 58
epoch: 19239, loss: 4.075223841937259e-05, rewards: 18.100000000000012, count: 165
epoch: 19249, loss: 9.04113312572008e-06, rewards: 13.399999999999972, count: 127
epoch: 19259, loss: 6.0842736274935305e-05, rewards: 8.999999999999984, count: 92
epoch: 19269, loss: -8.42515000840649e-05, rewards: 8.699999999999985, count: 89
epoch: 19279, loss: 7.372687832685187e-05, rewards: 8.599999999999985, count: 88
epoch: 19289, loss: -4.409187022247352e-05, rewards: 14.99999999999997, count: 134
epoch: 19299, loss: 1.763820728228893e-05, rewards: 3.8999999999999986, count: 50
epoch: 19309, loss: -9.993712410505395e-06, rewards: 8.799999999999985, count: 90
epoch: 19319, 

epoch: 20199, loss: 2.948443125205813e-06, rewards: 8.799999999999985, count: 90
epoch: 20209, loss: 1.3943790690973401e-05, rewards: 13.599999999999971, count: 129
epoch: 20219, loss: -3.84389677492436e-05, rewards: 8.699999999999985, count: 89
epoch: 20229, loss: 4.700341014540754e-05, rewards: 4.399999999999997, count: 55
epoch: 20239, loss: 6.43570747342892e-05, rewards: 8.399999999999986, count: 86
epoch: 20249, loss: -4.313019962864928e-05, rewards: 8.399999999999986, count: 86
epoch: 20259, loss: 6.236309218365932e-06, rewards: 8.399999999999986, count: 86
epoch: 20269, loss: 0.00016327874618582428, rewards: 13.099999999999973, count: 124
epoch: 20279, loss: -2.293392753927037e-06, rewards: 8.399999999999986, count: 86
epoch: 20289, loss: -4.019640255137347e-05, rewards: 8.399999999999986, count: 86
epoch: 20299, loss: 0.00013959042553324252, rewards: 17.90000000000001, count: 163
epoch: 20309, loss: 7.367345824604854e-05, rewards: 18.500000000000018, count: 169
epoch: 20319, lo

epoch: 21199, loss: 5.1156472181901336e-05, rewards: 9.299999999999983, count: 95
epoch: 21209, loss: -6.789913459215313e-05, rewards: 22.700000000000063, count: 202
epoch: 21219, loss: 9.134532774623949e-06, rewards: 12.999999999999973, count: 123
epoch: 21229, loss: 6.505746569018811e-05, rewards: 8.699999999999985, count: 89
epoch: 21239, loss: 2.066578235826455e-05, rewards: 4.4999999999999964, count: 56
epoch: 21249, loss: 8.314331353176385e-05, rewards: 13.399999999999972, count: 127
epoch: 21259, loss: 1.7892109099193476e-05, rewards: 8.699999999999985, count: 89
epoch: 21269, loss: 2.911416049755644e-05, rewards: 6.399999999999993, count: 66
epoch: 21279, loss: 1.829723987611942e-05, rewards: 8.399999999999986, count: 86
epoch: 21289, loss: -2.8547594411065802e-05, rewards: 4.799999999999995, count: 59
epoch: 21299, loss: 4.5790922740707174e-05, rewards: 8.399999999999986, count: 86
epoch: 21309, loss: 4.6907902287784964e-05, rewards: 13.199999999999973, count: 125
epoch: 21319

epoch: 22209, loss: 4.660528793465346e-05, rewards: 8.399999999999986, count: 86
epoch: 22219, loss: 5.281099674903089e-06, rewards: 9.099999999999984, count: 93
epoch: 22229, loss: -7.69088183005806e-06, rewards: 9.299999999999983, count: 95
epoch: 22239, loss: -6.613070581806824e-05, rewards: 8.999999999999984, count: 92
epoch: 22249, loss: 2.9571678169304505e-05, rewards: 8.999999999999984, count: 92
epoch: 22259, loss: -3.5042564832110656e-06, rewards: 27.400000000000116, count: 240
epoch: 22269, loss: 1.4898718291078694e-05, rewards: 12.899999999999974, count: 122
epoch: 22279, loss: 5.5691889428999275e-06, rewards: 13.099999999999973, count: 124
epoch: 22289, loss: 2.8136213586549275e-05, rewards: 18.500000000000018, count: 169
epoch: 22299, loss: 2.2789179638493806e-05, rewards: 4.799999999999995, count: 59
epoch: 22309, loss: -3.135204315185547e-05, rewards: 8.799999999999985, count: 90
epoch: 22319, loss: 3.451585871516727e-05, rewards: 3.8999999999999986, count: 50
epoch: 223

epoch: 23219, loss: 1.3264112567412667e-05, rewards: 8.399999999999986, count: 86
epoch: 23229, loss: 8.599091415817384e-06, rewards: 9.099999999999984, count: 93
epoch: 23239, loss: -3.546232255757786e-05, rewards: 12.899999999999974, count: 122
epoch: 23249, loss: -3.236670818296261e-05, rewards: 8.399999999999986, count: 86
epoch: 23259, loss: 8.947469723352697e-06, rewards: 8.599999999999985, count: 88
epoch: 23269, loss: 3.480326267890632e-05, rewards: 17.90000000000001, count: 163
epoch: 23279, loss: 2.5618206564104185e-05, rewards: 8.899999999999984, count: 91
epoch: 23289, loss: 1.25952929010964e-05, rewards: 8.399999999999986, count: 86
epoch: 23299, loss: 3.1733234209241346e-05, rewards: 8.399999999999986, count: 86
epoch: 23309, loss: -4.186186743027065e-06, rewards: 8.399999999999986, count: 86
epoch: 23319, loss: 2.4077344278339297e-05, rewards: 12.899999999999974, count: 122
epoch: 23329, loss: 2.528345794416964e-05, rewards: 8.399999999999986, count: 86
epoch: 23339, los

epoch: 24239, loss: 1.411438006471144e-05, rewards: 3.8999999999999986, count: 50
epoch: 24249, loss: 2.7671265343087725e-05, rewards: 26.90000000000011, count: 235
epoch: 24259, loss: 5.12886508658994e-05, rewards: 4.099999999999998, count: 52
epoch: 24269, loss: 8.971885108621791e-06, rewards: 8.399999999999986, count: 86
epoch: 24279, loss: 3.560383993317373e-05, rewards: 4.599999999999996, count: 57
epoch: 24289, loss: 0.00014166461187414825, rewards: 12.899999999999974, count: 122
epoch: 24299, loss: 1.5539484593318775e-05, rewards: 8.399999999999986, count: 86
epoch: 24309, loss: -5.7569864111428615e-06, rewards: 4.699999999999996, count: 58
epoch: 24319, loss: -5.7370680224266835e-06, rewards: 12.999999999999973, count: 123
epoch: 24329, loss: -5.581161985901417e-06, rewards: 8.599999999999985, count: 88
epoch: 24339, loss: 4.205972800264135e-05, rewards: 9.099999999999984, count: 93
epoch: 24349, loss: 2.8927479434059933e-05, rewards: 9.099999999999984, count: 93
epoch: 24359, 

epoch: 25249, loss: -0.00010023797949543223, rewards: 4.4999999999999964, count: 56
epoch: 25259, loss: -6.887787458254024e-05, rewards: 4.599999999999996, count: 57
epoch: 25269, loss: 1.8457340047461912e-05, rewards: 8.399999999999986, count: 86
epoch: 25279, loss: 0.00010188236046815291, rewards: 8.399999999999986, count: 86
epoch: 25289, loss: -7.368161459453404e-05, rewards: 8.499999999999986, count: 87
epoch: 25299, loss: 2.7540811061044224e-05, rewards: 8.399999999999986, count: 86
epoch: 25309, loss: -2.970603782159742e-05, rewards: 13.69999999999997, count: 130
epoch: 25319, loss: -7.352926331805065e-05, rewards: 8.399999999999986, count: 86
epoch: 25329, loss: 2.886875518015586e-05, rewards: 18.200000000000014, count: 166
epoch: 25339, loss: 3.4822973248083144e-05, rewards: 8.399999999999986, count: 86
epoch: 25349, loss: 7.329014624701813e-05, rewards: 8.399999999999986, count: 86
epoch: 25359, loss: -9.430124191567302e-05, rewards: 8.699999999999985, count: 89
epoch: 25369,

epoch: 26259, loss: -1.3281901374284644e-05, rewards: 4.299999999999997, count: 54
epoch: 26269, loss: -2.66377319348976e-05, rewards: 8.399999999999986, count: 86
epoch: 26279, loss: -2.1673911760444753e-05, rewards: 13.599999999999971, count: 129
epoch: 26289, loss: 9.409216545464005e-06, rewards: 8.399999999999986, count: 86
epoch: 26299, loss: 1.9612933101598173e-05, rewards: 27.200000000000113, count: 238
epoch: 26309, loss: -5.481662356032757e-06, rewards: 8.899999999999984, count: 91
epoch: 26319, loss: -7.005500083323568e-05, rewards: 12.899999999999974, count: 122
epoch: 26329, loss: 3.9709466364001855e-05, rewards: 4.4999999999999964, count: 56
epoch: 26339, loss: -2.5639479645178653e-05, rewards: 8.599999999999985, count: 88
epoch: 26349, loss: 5.780529318144545e-05, rewards: 22.700000000000063, count: 202
epoch: 26359, loss: 1.5395364243886434e-05, rewards: 13.399999999999972, count: 127
epoch: 26369, loss: -4.365347194834612e-05, rewards: 8.399999999999986, count: 86
epoch

epoch: 27259, loss: -2.4289398425025865e-05, rewards: 4.799999999999995, count: 59
epoch: 27269, loss: 2.34737108257832e-05, rewards: 6.5999999999999925, count: 68
epoch: 27279, loss: 0.0001033337612170726, rewards: 27.60000000000012, count: 242
epoch: 27289, loss: -3.2446212571812794e-05, rewards: 4.4999999999999964, count: 56
epoch: 27299, loss: -1.659681038290728e-05, rewards: 8.499999999999986, count: 87
epoch: 27309, loss: 5.8757847000379115e-05, rewards: 8.499999999999986, count: 87
epoch: 27319, loss: -6.305320130195469e-05, rewards: 4.4999999999999964, count: 56
epoch: 27329, loss: -7.32260596123524e-05, rewards: 6.5999999999999925, count: 68
epoch: 27339, loss: -3.166808528476395e-05, rewards: 8.399999999999986, count: 86
epoch: 27349, loss: -1.0674712029867806e-05, rewards: 8.499999999999986, count: 87
epoch: 27359, loss: -6.966757064219564e-05, rewards: 12.899999999999974, count: 122
epoch: 27369, loss: -5.253791641735006e-06, rewards: 13.199999999999973, count: 125
epoch: 2

epoch: 28269, loss: 9.802884960663505e-06, rewards: 8.399999999999986, count: 86
epoch: 28279, loss: 2.6640684609446907e-06, rewards: 8.999999999999984, count: 92
epoch: 28289, loss: -9.668267011875287e-06, rewards: 27.60000000000012, count: 242
epoch: 28299, loss: 3.5920234040531795e-06, rewards: 4.1999999999999975, count: 53
epoch: 28309, loss: -1.746525776979979e-05, rewards: 17.90000000000001, count: 163
epoch: 28319, loss: -1.216649980051443e-05, rewards: 3.8999999999999986, count: 50
epoch: 28329, loss: -7.1057252171158325e-06, rewards: 18.400000000000016, count: 168
epoch: 28339, loss: -1.6011548723326996e-05, rewards: 8.699999999999985, count: 89
epoch: 28349, loss: -8.020629138627555e-06, rewards: 9.199999999999983, count: 94
epoch: 28359, loss: -1.464666911488166e-05, rewards: 8.699999999999985, count: 89
epoch: 28369, loss: -2.6496609280002303e-05, rewards: 8.699999999999985, count: 89
epoch: 28379, loss: -1.8694372556637973e-05, rewards: 8.399999999999986, count: 86
epoch: 

epoch: 29269, loss: 3.858776835841127e-05, rewards: 8.399999999999986, count: 86
epoch: 29279, loss: -0.00013967585982754827, rewards: 8.399999999999986, count: 86
epoch: 29289, loss: -0.00010433632269268855, rewards: 18.300000000000015, count: 167
epoch: 29299, loss: 0.00016668390890117735, rewards: 13.299999999999972, count: 126
epoch: 29309, loss: -5.563751619774848e-05, rewards: 12.899999999999974, count: 122
epoch: 29319, loss: -5.585276448982768e-05, rewards: 18.300000000000015, count: 167
epoch: 29329, loss: -2.3126602172851562e-05, rewards: 8.499999999999986, count: 87
epoch: 29339, loss: 2.085608139168471e-05, rewards: 8.399999999999986, count: 86
epoch: 29349, loss: 3.969953104387969e-05, rewards: 18.400000000000016, count: 168
epoch: 29359, loss: 1.859390795289073e-05, rewards: 8.499999999999986, count: 87
epoch: 29369, loss: 4.076056575286202e-05, rewards: 8.399999999999986, count: 86
epoch: 29379, loss: 4.321549567976035e-05, rewards: 18.400000000000016, count: 168
epoch: 

epoch: 30259, loss: -5.112244707561331e-06, rewards: 23.30000000000007, count: 208
epoch: 30269, loss: -4.3936724978266284e-05, rewards: 23.10000000000007, count: 206
epoch: 30279, loss: -8.475584763800725e-06, rewards: 27.80000000000012, count: 244
epoch: 30289, loss: 2.3846625936130295e-06, rewards: 13.199999999999973, count: 125
epoch: 30299, loss: -6.898891115270089e-06, rewards: 8.399999999999986, count: 86
epoch: 30309, loss: 1.832991802075412e-05, rewards: 27.400000000000116, count: 240
epoch: 30319, loss: 5.2723728003911674e-05, rewards: 12.899999999999974, count: 122
epoch: 30329, loss: 4.59088232673821e-06, rewards: 8.799999999999985, count: 90
epoch: 30339, loss: -0.0001217208118760027, rewards: 8.599999999999985, count: 88
epoch: 30349, loss: 7.36462461645715e-05, rewards: 9.299999999999983, count: 95
epoch: 30359, loss: -5.288202373776585e-06, rewards: 12.899999999999974, count: 122
epoch: 30369, loss: 3.3417686040593253e-07, rewards: 12.899999999999974, count: 122
epoch: 

epoch: 31259, loss: 0.0004471713618841022, rewards: 8.399999999999986, count: 86
epoch: 31269, loss: 0.00021222066425252706, rewards: 8.699999999999985, count: 89
epoch: 31279, loss: -4.8601436901662964e-06, rewards: 13.299999999999972, count: 126
epoch: 31289, loss: -4.8322734073735774e-05, rewards: 8.399999999999986, count: 86
epoch: 31299, loss: 0.00013545174442697316, rewards: 17.90000000000001, count: 163
epoch: 31309, loss: 0.00011258447921136394, rewards: 9.399999999999983, count: 96
epoch: 31319, loss: 0.00030003246502019465, rewards: 8.399999999999986, count: 86
epoch: 31329, loss: 0.0001942664384841919, rewards: 13.499999999999972, count: 128
epoch: 31339, loss: 0.00014938562526367605, rewards: 27.400000000000116, count: 240
epoch: 31349, loss: 0.00012001118011539802, rewards: 8.399999999999986, count: 86
epoch: 31359, loss: 4.083671956323087e-05, rewards: 8.499999999999986, count: 87
epoch: 31369, loss: 1.3449034668155946e-05, rewards: 8.899999999999984, count: 91
epoch: 313

epoch: 32279, loss: 0.00019925355445593596, rewards: 3.8999999999999986, count: 50
epoch: 32289, loss: 0.00010989427391905338, rewards: 3.8999999999999986, count: 50
epoch: 32299, loss: 1.1174678547831718e-05, rewards: 3.8999999999999986, count: 50
epoch: 32309, loss: -3.5152435884810984e-05, rewards: 3.8999999999999986, count: 50
epoch: 32319, loss: -8.403539686696604e-05, rewards: 3.8999999999999986, count: 50
epoch: 32329, loss: -3.6692617868538946e-05, rewards: 3.8999999999999986, count: 50
epoch: 32339, loss: -7.385850040009245e-05, rewards: 3.8999999999999986, count: 50
epoch: 32349, loss: -1.4423131688090507e-05, rewards: 3.8999999999999986, count: 50
epoch: 32359, loss: 3.566503437468782e-05, rewards: 3.8999999999999986, count: 50
epoch: 32369, loss: -7.011890374997165e-06, rewards: 3.8999999999999986, count: 50
epoch: 32379, loss: -1.5960931705194525e-05, rewards: 3.8999999999999986, count: 50
epoch: 32389, loss: -5.904674617340788e-05, rewards: 3.8999999999999986, count: 50
e

epoch: 33269, loss: 6.259679594222689e-06, rewards: 3.8999999999999986, count: 50
epoch: 33279, loss: 1.3029575711698271e-05, rewards: 3.8999999999999986, count: 50
epoch: 33289, loss: -1.1720657312253024e-05, rewards: 3.8999999999999986, count: 50
epoch: 33299, loss: 2.332687290618196e-05, rewards: 2.6999999999999993, count: 50
epoch: 33309, loss: -1.2017488188575953e-05, rewards: 3.8999999999999986, count: 50
epoch: 33319, loss: 1.7845630964075099e-06, rewards: 3.8999999999999986, count: 50
epoch: 33329, loss: 1.0660886800906155e-05, rewards: 3.8999999999999986, count: 50
epoch: 33339, loss: -8.652210453874432e-06, rewards: 3.8999999999999986, count: 50
epoch: 33349, loss: 7.004737653915072e-06, rewards: 3.8999999999999986, count: 50
epoch: 33359, loss: -1.597166010469664e-05, rewards: 3.8999999999999986, count: 50
epoch: 33369, loss: 1.29008294607047e-05, rewards: 3.8999999999999986, count: 50
epoch: 33379, loss: 1.16944311230327e-05, rewards: 3.8999999999999986, count: 50
epoch: 33

epoch: 34259, loss: 5.7421922974754125e-05, rewards: 3.8999999999999986, count: 50
epoch: 34269, loss: -3.7770270864712074e-05, rewards: 3.8999999999999986, count: 50
epoch: 34279, loss: 7.757425191812217e-05, rewards: 3.299999999999999, count: 50
epoch: 34289, loss: -1.8239020960209018e-07, rewards: 3.8999999999999986, count: 50
epoch: 34299, loss: -3.961682159570046e-05, rewards: 3.8999999999999986, count: 50
epoch: 34309, loss: 5.453467383631505e-05, rewards: 1.5, count: 50
epoch: 34319, loss: 2.8307438697083853e-05, rewards: 3.299999999999999, count: 50
epoch: 34329, loss: -5.2624942327383906e-05, rewards: -3.299999999999998, count: 50
epoch: 34339, loss: 8.671283467265312e-06, rewards: -3.299999999999998, count: 50
epoch: 34349, loss: -0.00014049767924007028, rewards: 2.0999999999999996, count: 50
epoch: 34359, loss: 1.8575192370917648e-05, rewards: 1.5, count: 50
epoch: 34369, loss: 7.778167491778731e-05, rewards: 3.8999999999999986, count: 50
epoch: 34379, loss: -2.7581452741287

epoch: 35279, loss: -1.2151002920290921e-05, rewards: 3.8999999999999986, count: 50
epoch: 35289, loss: -6.531477083626669e-06, rewards: 3.8999999999999986, count: 50
epoch: 35299, loss: -3.199100319761783e-05, rewards: 3.8999999999999986, count: 50
epoch: 35309, loss: -1.0340213520976249e-05, rewards: 3.8999999999999986, count: 50
epoch: 35319, loss: 1.0087490409205202e-05, rewards: 3.8999999999999986, count: 50
epoch: 35329, loss: 6.111860329838237e-06, rewards: 3.8999999999999986, count: 50
epoch: 35339, loss: -1.6793012036941946e-05, rewards: 3.8999999999999986, count: 50
epoch: 35349, loss: -5.372762643673923e-06, rewards: 3.8999999999999986, count: 50
epoch: 35359, loss: 1.2154579053458292e-05, rewards: 3.8999999999999986, count: 50
epoch: 35369, loss: 4.278421329217963e-06, rewards: 3.8999999999999986, count: 50
epoch: 35379, loss: -3.249645260439138e-06, rewards: 3.8999999999999986, count: 50
epoch: 35389, loss: 3.211498324162676e-06, rewards: 3.8999999999999986, count: 50
epoc

epoch: 36279, loss: -7.801789615768939e-05, rewards: 8.899999999999984, count: 91
epoch: 36289, loss: 2.2306106984615326e-05, rewards: 13.499999999999972, count: 128
epoch: 36299, loss: 4.130308661842719e-05, rewards: 20.800000000000036, count: 183
epoch: 36309, loss: -5.940365099377232e-06, rewards: 8.399999999999986, count: 86
epoch: 36319, loss: -3.047589234483894e-05, rewards: 17.90000000000001, count: 163
epoch: 36329, loss: 2.226368997071404e-05, rewards: 8.599999999999985, count: 88
epoch: 36339, loss: 2.7583817427512258e-05, rewards: 4.799999999999995, count: 59
epoch: 36349, loss: -7.298422133317217e-05, rewards: 13.199999999999973, count: 125
epoch: 36359, loss: 9.985164069803432e-06, rewards: 8.399999999999986, count: 86
epoch: 36369, loss: 3.701517925946973e-05, rewards: 8.399999999999986, count: 86
epoch: 36379, loss: 2.1006379029131494e-05, rewards: 4.4999999999999964, count: 56
epoch: 36389, loss: -4.074519165442325e-05, rewards: 8.599999999999985, count: 88
epoch: 36399

epoch: 37279, loss: -2.4195227524614893e-05, rewards: 4.4999999999999964, count: 56
epoch: 37289, loss: 2.475099972798489e-05, rewards: 8.499999999999986, count: 87
epoch: 37299, loss: 4.784129032486817e-06, rewards: 8.499999999999986, count: 87
epoch: 37309, loss: -9.934748050000053e-06, rewards: 9.099999999999984, count: 93
epoch: 37319, loss: 2.5588205971871503e-05, rewards: 8.499999999999986, count: 87
epoch: 37329, loss: 4.506800905801356e-05, rewards: 27.60000000000012, count: 242
epoch: 37339, loss: 9.607295396563131e-06, rewards: 27.900000000000123, count: 245
epoch: 37349, loss: -2.862465953512583e-05, rewards: 9.299999999999983, count: 95
epoch: 37359, loss: -4.7529698349535465e-05, rewards: 13.199999999999973, count: 125
epoch: 37369, loss: -6.8203785303921904e-06, rewards: 8.699999999999985, count: 89
epoch: 37379, loss: 1.8402934074401855e-05, rewards: 13.499999999999972, count: 128
epoch: 37389, loss: 1.2748183507937938e-05, rewards: 13.099999999999973, count: 124
epoch: 

epoch: 38279, loss: -2.427503204671666e-05, rewards: 8.399999999999986, count: 86
epoch: 38289, loss: 3.4265045542269945e-05, rewards: 8.399999999999986, count: 86
epoch: 38299, loss: -1.915611937874928e-05, rewards: 8.599999999999985, count: 88
epoch: 38309, loss: 1.4017987268744037e-05, rewards: 9.099999999999984, count: 93
epoch: 38319, loss: -7.415800064336509e-05, rewards: 8.899999999999984, count: 91
epoch: 38329, loss: -5.244955173111521e-05, rewards: 9.199999999999983, count: 94
epoch: 38339, loss: 0.00013562088133767247, rewards: 8.599999999999985, count: 88
epoch: 38349, loss: -0.00010801897587953135, rewards: 17.90000000000001, count: 163
epoch: 38359, loss: 7.291651127161458e-05, rewards: 12.899999999999974, count: 122
epoch: 38369, loss: -0.00018241512589156628, rewards: 12.899999999999974, count: 122
epoch: 38379, loss: 3.6800294765271246e-05, rewards: 12.999999999999973, count: 123
epoch: 38389, loss: -4.724496102426201e-05, rewards: 4.299999999999997, count: 54
epoch: 3

epoch: 39289, loss: -5.344152305042371e-05, rewards: 8.799999999999985, count: 90
epoch: 39299, loss: 5.773639713879675e-05, rewards: 13.199999999999973, count: 125
epoch: 39309, loss: 2.4242455765488558e-05, rewards: 8.399999999999986, count: 86
epoch: 39319, loss: -1.2614006550393242e-07, rewards: 8.399999999999986, count: 86
epoch: 39329, loss: -2.807202690746635e-06, rewards: 27.10000000000011, count: 237
epoch: 39339, loss: 2.198649053752888e-05, rewards: 8.399999999999986, count: 86
epoch: 39349, loss: 2.3218088074372645e-07, rewards: 8.399999999999986, count: 86
epoch: 39359, loss: 3.3219192118849605e-05, rewards: 8.399999999999986, count: 86
epoch: 39369, loss: 3.4516779123805463e-06, rewards: 22.40000000000006, count: 199
epoch: 39379, loss: 7.310003638849594e-06, rewards: 4.1999999999999975, count: 53
epoch: 39389, loss: -1.5988069890227052e-06, rewards: 27.200000000000113, count: 238
epoch: 39399, loss: -3.060490416828543e-05, rewards: 8.399999999999986, count: 86
epoch: 394

epoch: 40289, loss: 7.827366971469019e-06, rewards: 4.4999999999999964, count: 56
epoch: 40299, loss: 1.627206802368164e-05, rewards: 36.40000000000021, count: 312
epoch: 40309, loss: -2.1434221707750112e-05, rewards: 12.899999999999974, count: 122
epoch: 40319, loss: 1.4610068319598213e-06, rewards: 13.599999999999971, count: 129
epoch: 40329, loss: 1.2398459148244001e-05, rewards: 8.399999999999986, count: 86
epoch: 40339, loss: 2.2273172362474725e-05, rewards: 8.599999999999985, count: 88
epoch: 40349, loss: 1.749501097947359e-05, rewards: 22.40000000000006, count: 199
epoch: 40359, loss: 6.74222392262891e-05, rewards: 13.399999999999972, count: 127
epoch: 40369, loss: 4.911214637104422e-05, rewards: 8.399999999999986, count: 86
epoch: 40379, loss: 2.819576911861077e-05, rewards: 8.399999999999986, count: 86
epoch: 40389, loss: 6.705810665152967e-05, rewards: 13.099999999999973, count: 124
epoch: 40399, loss: 9.863520972430706e-05, rewards: 8.899999999999984, count: 91
epoch: 40409,

epoch: 41299, loss: 8.657574653625488e-06, rewards: 4.4999999999999964, count: 56
epoch: 41309, loss: 2.118827796948608e-05, rewards: 13.399999999999972, count: 127
epoch: 41319, loss: -2.535969770178781e-06, rewards: 8.399999999999986, count: 86
epoch: 41329, loss: 3.331058542244136e-05, rewards: 17.90000000000001, count: 163
epoch: 41339, loss: 1.604265889909584e-05, rewards: 8.399999999999986, count: 86
epoch: 41349, loss: 1.0330786608392373e-05, rewards: 8.499999999999986, count: 87
epoch: 41359, loss: 9.101490832108539e-06, rewards: 8.399999999999986, count: 86
epoch: 41369, loss: -3.372103674337268e-05, rewards: 3.9999999999999982, count: 51
epoch: 41379, loss: -0.0001569223968544975, rewards: 8.399999999999986, count: 86
epoch: 41389, loss: -0.0001429028488928452, rewards: 13.199999999999973, count: 125
epoch: 41399, loss: -3.181086140102707e-05, rewards: 8.399999999999986, count: 86
epoch: 41409, loss: 9.165469964500517e-05, rewards: 26.90000000000011, count: 235
epoch: 41419, 

epoch: 42309, loss: -7.4231625148968305e-06, rewards: 3.8999999999999986, count: 50
epoch: 42319, loss: 4.919290586258285e-05, rewards: 3.8999999999999986, count: 50
epoch: 42329, loss: -8.922219421947375e-05, rewards: 3.8999999999999986, count: 50
epoch: 42339, loss: -6.511211267934414e-06, rewards: 3.8999999999999986, count: 50
epoch: 42349, loss: -8.628130308352411e-05, rewards: 3.8999999999999986, count: 50
epoch: 42359, loss: -0.00015045642794575542, rewards: 3.8999999999999986, count: 50
epoch: 42369, loss: 0.0001561176759423688, rewards: 3.8999999999999986, count: 50
epoch: 42379, loss: -4.176974471192807e-05, rewards: 3.8999999999999986, count: 50
epoch: 42389, loss: -4.755497138830833e-05, rewards: 3.8999999999999986, count: 50
epoch: 42399, loss: -2.1414756702142768e-05, rewards: 3.8999999999999986, count: 50
epoch: 42409, loss: -2.88856026600115e-05, rewards: 3.8999999999999986, count: 50
epoch: 42419, loss: -9.052753739524633e-05, rewards: 3.8999999999999986, count: 50
epoc

epoch: 43319, loss: 5.295276423566975e-06, rewards: 3.8999999999999986, count: 50
epoch: 43329, loss: -1.975536360987462e-05, rewards: -3.299999999999998, count: 50
epoch: 43339, loss: 5.078315552964341e-06, rewards: 0.9000000000000004, count: 50
epoch: 43349, loss: 1.3022422535868827e-05, rewards: 2.700000000000001, count: 50
epoch: 43359, loss: -6.823539933975553e-06, rewards: 3.8999999999999986, count: 50
epoch: 43369, loss: -6.607770956179593e-06, rewards: 3.8999999999999986, count: 50
epoch: 43379, loss: 3.3533572150190594e-06, rewards: 3.8999999999999986, count: 50
epoch: 43389, loss: 8.630752290628152e-07, rewards: 3.8999999999999986, count: 50
epoch: 43399, loss: -2.441406195430318e-06, rewards: 3.8999999999999986, count: 50
epoch: 43409, loss: 1.4948844864193234e-06, rewards: 3.8999999999999986, count: 50
epoch: 43419, loss: -6.2680246628588066e-06, rewards: 3.8999999999999986, count: 50
epoch: 43429, loss: -1.2540817806439009e-06, rewards: 3.8999999999999986, count: 50
epoch:

epoch: 44319, loss: -1.1588251709326869e-06, rewards: 8.399999999999986, count: 86
epoch: 44329, loss: 2.4651124022057047e-06, rewards: 12.999999999999973, count: 123
epoch: 44339, loss: -6.858556389488513e-06, rewards: 17.90000000000001, count: 163
epoch: 44349, loss: -3.286587343609426e-06, rewards: 9.099999999999984, count: 93
epoch: 44359, loss: 1.5499979781452566e-05, rewards: 8.399999999999986, count: 86
epoch: 44369, loss: -2.086231870634947e-05, rewards: 8.399999999999986, count: 86
epoch: 44379, loss: 2.2377385903382674e-05, rewards: 8.399999999999986, count: 86
epoch: 44389, loss: -5.374120974011021e-06, rewards: 8.399999999999986, count: 86
epoch: 44399, loss: 1.0922672117885668e-06, rewards: 12.999999999999973, count: 123
epoch: 44409, loss: -1.2160040569142438e-05, rewards: 8.399999999999986, count: 86
epoch: 44419, loss: 3.710228838826879e-06, rewards: 9.099999999999984, count: 93
epoch: 44429, loss: 1.230004181707045e-05, rewards: 8.399999999999986, count: 86
epoch: 4443

epoch: 45319, loss: 2.713039066293277e-06, rewards: 4.699999999999996, count: 58
epoch: 45329, loss: 5.4921219998504966e-05, rewards: 13.399999999999972, count: 127
epoch: 45339, loss: -2.0827565094805323e-05, rewards: 8.899999999999984, count: 91
epoch: 45349, loss: -1.8275753973284736e-05, rewards: 8.399999999999986, count: 86
epoch: 45359, loss: -2.6374191293143667e-05, rewards: 31.900000000000162, count: 276
epoch: 45369, loss: 2.069112815661356e-05, rewards: 8.399999999999986, count: 86
epoch: 45379, loss: -9.902062174660387e-07, rewards: 9.099999999999984, count: 93
epoch: 45389, loss: -7.435565748892259e-06, rewards: 13.79999999999997, count: 131
epoch: 45399, loss: -5.100355792819755e-06, rewards: 8.399999999999986, count: 86
epoch: 45409, loss: 9.498623512627091e-06, rewards: 8.399999999999986, count: 86
epoch: 45419, loss: -5.986690666759387e-06, rewards: 22.50000000000006, count: 200
epoch: 45429, loss: -2.0014686015201733e-05, rewards: 8.399999999999986, count: 86
epoch: 45

epoch: 46329, loss: 1.0755173207144253e-05, rewards: 8.399999999999986, count: 86
epoch: 46339, loss: -6.682908860966563e-06, rewards: 22.40000000000006, count: 199
epoch: 46349, loss: 2.097421202051919e-05, rewards: 8.799999999999985, count: 90
epoch: 46359, loss: -6.444925475079799e-06, rewards: 8.399999999999986, count: 86
epoch: 46369, loss: -4.15973445342388e-05, rewards: 8.699999999999985, count: 89
epoch: 46379, loss: 1.2571035767905414e-05, rewards: 8.399999999999986, count: 86
epoch: 46389, loss: -1.109266304410994e-05, rewards: 13.199999999999973, count: 125
epoch: 46399, loss: -1.1657949471555185e-05, rewards: 22.40000000000006, count: 199
epoch: 46409, loss: 4.944625743519282e-06, rewards: 17.90000000000001, count: 163
epoch: 46419, loss: 2.959133780677803e-05, rewards: 27.300000000000114, count: 239
epoch: 46429, loss: 5.344076726032654e-06, rewards: 6.099999999999994, count: 63
epoch: 46439, loss: -3.9117280721256975e-06, rewards: 8.399999999999986, count: 86
epoch: 46449

epoch: 47329, loss: 4.1743369365576655e-05, rewards: 27.10000000000011, count: 237
epoch: 47339, loss: -2.8298345569055527e-06, rewards: 8.399999999999986, count: 86
epoch: 47349, loss: -2.4568127628299408e-05, rewards: 8.999999999999984, count: 92
epoch: 47359, loss: -1.913338383019436e-05, rewards: 22.800000000000065, count: 203
epoch: 47369, loss: -0.000104712882603053, rewards: 8.399999999999986, count: 86
epoch: 47379, loss: 6.633673183387145e-05, rewards: 13.599999999999971, count: 129
epoch: 47389, loss: -6.749989552190527e-06, rewards: 13.299999999999972, count: 126
epoch: 47399, loss: -1.1735184671124443e-05, rewards: 8.399999999999986, count: 86
epoch: 47409, loss: 7.505845132982358e-05, rewards: 23.10000000000007, count: 206
epoch: 47419, loss: 3.7614788652717834e-06, rewards: 4.4999999999999964, count: 56
epoch: 47429, loss: -2.2582469682674855e-05, rewards: 9.099999999999984, count: 93
epoch: 47439, loss: 2.279669752169866e-05, rewards: 8.399999999999986, count: 86
epoch: 

epoch: 48329, loss: 2.8481670142355142e-06, rewards: 3.9999999999999982, count: 51
epoch: 48339, loss: -6.037950697646011e-06, rewards: 3.8999999999999986, count: 50
epoch: 48349, loss: -8.1050393418991e-06, rewards: 3.8999999999999986, count: 50
epoch: 48359, loss: 1.7617940102354623e-05, rewards: 3.8999999999999986, count: 50
epoch: 48369, loss: -5.1691531552933156e-05, rewards: 3.8999999999999986, count: 50
epoch: 48379, loss: 1.8870830899686553e-05, rewards: 3.8999999999999986, count: 50
epoch: 48389, loss: 9.784221765585244e-05, rewards: 3.8999999999999986, count: 50
epoch: 48399, loss: -0.00010078310879180208, rewards: 3.8999999999999986, count: 50
epoch: 48409, loss: -2.359509380767122e-05, rewards: 3.8999999999999986, count: 50
epoch: 48419, loss: -7.820605969754979e-05, rewards: 3.8999999999999986, count: 50
epoch: 48429, loss: 3.75211238861084e-05, rewards: 4.099999999999998, count: 52
epoch: 48439, loss: 4.6394619857892394e-05, rewards: 3.9999999999999982, count: 51
epoch: 4

epoch: 49329, loss: -2.987856078107143e-06, rewards: 18.800000000000022, count: 172
epoch: 49339, loss: 2.9142140192561783e-05, rewards: 18.70000000000002, count: 171
epoch: 49349, loss: 3.1966152164386585e-05, rewards: 6.399999999999993, count: 66
epoch: 49359, loss: 5.6415796279907227e-05, rewards: 4.099999999999998, count: 52
epoch: 49369, loss: 4.153155896347016e-05, rewards: 22.40000000000006, count: 199
epoch: 49379, loss: 5.753416644438403e-06, rewards: 9.299999999999983, count: 95
epoch: 49389, loss: -3.608693077694625e-05, rewards: 8.699999999999985, count: 89
epoch: 49399, loss: -9.639777999836951e-06, rewards: 22.40000000000006, count: 199
epoch: 49409, loss: 4.014996648038505e-06, rewards: 8.399999999999986, count: 86
epoch: 49419, loss: -1.3141255294613075e-05, rewards: 9.299999999999983, count: 95
epoch: 49429, loss: 1.1009693480446003e-05, rewards: 13.199999999999973, count: 125
epoch: 49439, loss: -5.185604095458984e-06, rewards: 8.399999999999986, count: 86
epoch: 4944

epoch: 50339, loss: 1.4977399587223772e-06, rewards: 8.399999999999986, count: 86
epoch: 50349, loss: -2.426394166832324e-05, rewards: 8.399999999999986, count: 86
epoch: 50359, loss: 2.9274786356836557e-05, rewards: 8.499999999999986, count: 87
epoch: 50369, loss: 1.533572140033357e-05, rewards: 8.399999999999986, count: 86
epoch: 50379, loss: 3.569347882148577e-06, rewards: 8.399999999999986, count: 86
epoch: 50389, loss: 6.2866415646567475e-06, rewards: 26.90000000000011, count: 235
epoch: 50399, loss: -4.200116109132068e-06, rewards: 17.90000000000001, count: 163
epoch: 50409, loss: 3.4862427128246054e-05, rewards: 22.40000000000006, count: 199
epoch: 50419, loss: 4.8703091124480125e-06, rewards: 41.30000000000027, count: 352
epoch: 50429, loss: -4.5926947223051684e-07, rewards: 9.299999999999983, count: 95
epoch: 50439, loss: 5.441095254354877e-06, rewards: 22.40000000000006, count: 199
epoch: 50449, loss: -1.3767979908152483e-05, rewards: 8.399999999999986, count: 86
epoch: 50459

epoch: 51349, loss: -2.630713424878195e-05, rewards: 8.399999999999986, count: 86
epoch: 51359, loss: -8.704471838427708e-05, rewards: 13.199999999999973, count: 125
epoch: 51369, loss: -2.343450887565268e-06, rewards: 22.40000000000006, count: 199
epoch: 51379, loss: -1.7920805476023816e-05, rewards: 27.900000000000123, count: 245
epoch: 51389, loss: 1.4276005458668806e-05, rewards: 8.399999999999986, count: 86
epoch: 51399, loss: 3.2766929507488385e-05, rewards: 23.10000000000007, count: 206
epoch: 51409, loss: 3.180531621183036e-06, rewards: 8.399999999999986, count: 86
epoch: 51419, loss: 1.1116042514913715e-05, rewards: 13.99999999999997, count: 133
epoch: 51429, loss: -4.924746008327929e-06, rewards: 18.60000000000002, count: 170
epoch: 51439, loss: 1.6137713828356937e-05, rewards: 12.899999999999974, count: 122
epoch: 51449, loss: 2.1881141947233118e-05, rewards: 8.399999999999986, count: 86
epoch: 51459, loss: -1.9402352336328477e-05, rewards: 36.700000000000216, count: 315
epo

epoch: 52349, loss: 1.955638435902074e-05, rewards: 27.00000000000011, count: 236
epoch: 52359, loss: -8.780023563303985e-06, rewards: 6.699999999999992, count: 69
epoch: 52369, loss: -4.015289960079826e-06, rewards: 4.099999999999998, count: 52
epoch: 52379, loss: 1.0458861652296036e-05, rewards: 3.9999999999999982, count: 51
epoch: 52389, loss: 8.794068889983464e-06, rewards: 3.8999999999999986, count: 50
epoch: 52399, loss: -4.472732598514995e-06, rewards: 3.8999999999999986, count: 50
epoch: 52409, loss: -2.8260947146918625e-05, rewards: 3.8999999999999986, count: 50
epoch: 52419, loss: -1.366734522889601e-05, rewards: 3.8999999999999986, count: 50
epoch: 52429, loss: -2.3996828986128094e-06, rewards: 3.8999999999999986, count: 50
epoch: 52439, loss: -8.249282927863533e-07, rewards: 3.8999999999999986, count: 50
epoch: 52449, loss: 1.7524957002024166e-05, rewards: 3.8999999999999986, count: 50
epoch: 52459, loss: 2.6179552151006646e-05, rewards: 3.8999999999999986, count: 50
epoch:

epoch: 53359, loss: -4.113674003747292e-05, rewards: -9.299999999999999, count: 50
epoch: 53369, loss: 3.770470721065067e-05, rewards: -9.299999999999999, count: 50
epoch: 53379, loss: 6.214380391611485e-06, rewards: -9.299999999999999, count: 50
epoch: 53389, loss: 4.653811629395932e-05, rewards: -9.299999999999999, count: 50
epoch: 53399, loss: -7.282614387804642e-05, rewards: -9.299999999999999, count: 50
epoch: 53409, loss: -3.4334658266743645e-05, rewards: -9.299999999999999, count: 50
epoch: 53419, loss: -4.2812825995497406e-05, rewards: -9.299999999999999, count: 50
epoch: 53429, loss: -9.104132914217189e-05, rewards: -9.299999999999999, count: 50
epoch: 53439, loss: -0.00015230417193379253, rewards: -9.299999999999999, count: 50
epoch: 53449, loss: -0.0001515162002760917, rewards: -9.299999999999999, count: 50
epoch: 53459, loss: 4.8228503146674484e-05, rewards: -9.299999999999999, count: 50
epoch: 53469, loss: -5.002021680411417e-06, rewards: -9.299999999999999, count: 50
epoc

epoch: 54349, loss: 2.17318529394106e-06, rewards: -9.299999999999999, count: 50
epoch: 54359, loss: -2.3334026991506107e-05, rewards: -9.299999999999999, count: 50
epoch: 54369, loss: -7.425546755257528e-06, rewards: -9.299999999999999, count: 50
epoch: 54379, loss: 2.1343230400816537e-05, rewards: -9.299999999999999, count: 50
epoch: 54389, loss: -1.2278557051104144e-06, rewards: -9.299999999999999, count: 50
epoch: 54399, loss: -4.4500829972093925e-06, rewards: -9.299999999999999, count: 50
epoch: 54409, loss: -1.1341571735101752e-05, rewards: -9.299999999999999, count: 50
epoch: 54419, loss: 1.3976096852275077e-05, rewards: -9.299999999999999, count: 50
epoch: 54429, loss: 2.049923023150768e-05, rewards: -9.299999999999999, count: 50
epoch: 54439, loss: -3.772020500036888e-05, rewards: -9.299999999999999, count: 50
epoch: 54449, loss: -0.00015465736214537174, rewards: -9.299999999999999, count: 50
epoch: 54459, loss: 8.655428973725066e-05, rewards: -9.299999999999999, count: 50
epo

epoch: 55339, loss: 1.527071071905084e-05, rewards: -9.299999999999999, count: 50
epoch: 55349, loss: 2.0964145733159967e-05, rewards: -9.299999999999999, count: 50
epoch: 55359, loss: 2.804994619509671e-05, rewards: -9.299999999999999, count: 50
epoch: 55369, loss: 5.164504182175733e-05, rewards: -9.299999999999999, count: 50
epoch: 55379, loss: 6.234287866391242e-05, rewards: -9.299999999999999, count: 50
epoch: 55389, loss: 3.38017925969325e-05, rewards: -9.299999999999999, count: 50
epoch: 55399, loss: 4.637241545424331e-06, rewards: -9.299999999999999, count: 50
epoch: 55409, loss: 6.422996648325352e-06, rewards: -9.299999999999999, count: 50
epoch: 55419, loss: 1.2069940567016602e-05, rewards: -9.299999999999999, count: 50
epoch: 55429, loss: 2.6488303319638362e-06, rewards: -9.299999999999999, count: 50
epoch: 55439, loss: -4.3606760300463066e-06, rewards: -9.299999999999999, count: 50
epoch: 55449, loss: 6.464719717769185e-06, rewards: -9.299999999999999, count: 50
epoch: 55459

epoch: 56359, loss: -1.130938562710071e-05, rewards: -9.299999999999999, count: 50
epoch: 56369, loss: -9.527206202619709e-06, rewards: -9.299999999999999, count: 50
epoch: 56379, loss: 1.4997720427345484e-05, rewards: -9.299999999999999, count: 50
epoch: 56389, loss: 1.4102459317655303e-05, rewards: -9.299999999999999, count: 50
epoch: 56399, loss: -2.637386387505103e-05, rewards: -9.299999999999999, count: 50
epoch: 56409, loss: 9.632110504753655e-07, rewards: -9.299999999999999, count: 50
epoch: 56419, loss: 5.911469634156674e-05, rewards: -9.299999999999999, count: 50
epoch: 56429, loss: -5.812763993162662e-05, rewards: -9.299999999999999, count: 50
epoch: 56439, loss: 3.24296961480286e-05, rewards: -9.299999999999999, count: 50
epoch: 56449, loss: 1.6355514844690333e-06, rewards: -9.299999999999999, count: 50
epoch: 56459, loss: -1.3659000615007244e-05, rewards: -9.299999999999999, count: 50
epoch: 56469, loss: -1.012802113109501e-05, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 57379, loss: -6.2549115682486445e-06, rewards: -9.299999999999999, count: 50
epoch: 57389, loss: -2.9724837077083066e-05, rewards: -9.299999999999999, count: 50
epoch: 57399, loss: -1.5342235428761342e-06, rewards: -9.299999999999999, count: 50
epoch: 57409, loss: 2.182841308240313e-05, rewards: -9.299999999999999, count: 50
epoch: 57419, loss: 4.201412230031565e-05, rewards: -9.299999999999999, count: 50
epoch: 57429, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 57439, loss: -4.3032170651713386e-05, rewards: -9.299999999999999, count: 50
epoch: 57449, loss: -1.2018680536129978e-05, rewards: -9.299999999999999, count: 50
epoch: 57459, loss: 1.6261339624179527e-05, rewards: -9.299999999999999, count: 50
epoch: 57469, loss: 1.811623587855138e-05, rewards: -9.299999999999999, count: 50
epoch: 57479, loss: 3.482103238638956e-06, rewards: -9.299999999999999, count: 50
epoch: 57489, loss: -1.0910033779509831e-05, rewards: -9.299999999999999, count: 50
ep

epoch: 58389, loss: 4.1724444599822164e-05, rewards: -9.299999999999999, count: 50
epoch: 58399, loss: -1.1999607522739097e-05, rewards: -9.299999999999999, count: 50
epoch: 58409, loss: 3.28803071170114e-05, rewards: -9.299999999999999, count: 50
epoch: 58419, loss: -1.855134905781597e-05, rewards: -9.299999999999999, count: 50
epoch: 58429, loss: 2.2624730263487436e-05, rewards: -9.299999999999999, count: 50
epoch: 58439, loss: -1.1068582352891099e-05, rewards: -9.299999999999999, count: 50
epoch: 58449, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 58459, loss: -4.7445297468584613e-07, rewards: -9.299999999999999, count: 50
epoch: 58469, loss: 1.875758243841119e-05, rewards: -9.299999999999999, count: 50
epoch: 58479, loss: 2.7668475013342686e-06, rewards: -9.299999999999999, count: 50
epoch: 58489, loss: 2.014279380091466e-05, rewards: -9.299999999999999, count: 50
epoch: 58499, loss: -2.1204949007369578e-05, rewards: -9.299999999999999, count: 50
epoch

epoch: 59379, loss: -2.300024061696604e-05, rewards: -9.299999999999999, count: 50
epoch: 59389, loss: -1.3012886483920738e-05, rewards: -9.299999999999999, count: 50
epoch: 59399, loss: -0.00012759446690324694, rewards: -9.299999999999999, count: 50
epoch: 59409, loss: 0.00013356089766602963, rewards: -9.299999999999999, count: 50
epoch: 59419, loss: 5.851030437042937e-05, rewards: -9.299999999999999, count: 50
epoch: 59429, loss: 2.664089151949156e-05, rewards: -9.299999999999999, count: 50
epoch: 59439, loss: 3.493189797154628e-05, rewards: -9.299999999999999, count: 50
epoch: 59449, loss: 3.018975257873535e-05, rewards: -9.299999999999999, count: 50
epoch: 59459, loss: -1.4404058674699627e-05, rewards: -9.299999999999999, count: 50
epoch: 59469, loss: -0.00011271596304140985, rewards: -9.299999999999999, count: 50
epoch: 59479, loss: -7.801532774465159e-05, rewards: -9.299999999999999, count: 50
epoch: 59489, loss: -7.823705527698621e-05, rewards: -9.299999999999999, count: 50
epoc

epoch: 60369, loss: 3.981470945291221e-05, rewards: -9.299999999999999, count: 50
epoch: 60379, loss: -9.491563105257228e-05, rewards: -9.299999999999999, count: 50
epoch: 60389, loss: -2.0376442989800125e-05, rewards: -9.299999999999999, count: 50
epoch: 60399, loss: -1.5413761502713896e-06, rewards: -9.299999999999999, count: 50
epoch: 60409, loss: 1.3905763807997573e-05, rewards: -9.299999999999999, count: 50
epoch: 60419, loss: -9.553432391840033e-06, rewards: -9.299999999999999, count: 50
epoch: 60429, loss: -2.4925469915615395e-05, rewards: -9.299999999999999, count: 50
epoch: 60439, loss: 1.1320114026602823e-05, rewards: -9.299999999999999, count: 50
epoch: 60449, loss: 5.575418526859721e-06, rewards: -9.299999999999999, count: 50
epoch: 60459, loss: -2.3093223717296496e-05, rewards: -9.299999999999999, count: 50
epoch: 60469, loss: -7.913112312962767e-06, rewards: -9.299999999999999, count: 50
epoch: 60479, loss: -1.6180276361410506e-05, rewards: -9.299999999999999, count: 50
e

epoch: 61359, loss: 1.5127658343772055e-06, rewards: -9.299999999999999, count: 50
epoch: 61369, loss: 2.45952614932321e-05, rewards: -9.299999999999999, count: 50
epoch: 61379, loss: -6.637573278567288e-06, rewards: -9.299999999999999, count: 50
epoch: 61389, loss: 2.9691458621528e-05, rewards: -9.299999999999999, count: 50
epoch: 61399, loss: 1.9590854208217934e-05, rewards: -9.299999999999999, count: 50
epoch: 61409, loss: -3.296017530374229e-05, rewards: -9.299999999999999, count: 50
epoch: 61419, loss: -1.6360283552785404e-05, rewards: -9.299999999999999, count: 50
epoch: 61429, loss: 2.817511631292291e-05, rewards: -9.299999999999999, count: 50
epoch: 61439, loss: 1.1067390005337074e-05, rewards: -9.299999999999999, count: 50
epoch: 61449, loss: 4.1961669694501325e-07, rewards: -9.299999999999999, count: 50
epoch: 61459, loss: -9.684562428446952e-06, rewards: -9.299999999999999, count: 50
epoch: 61469, loss: 3.218412530259229e-05, rewards: -9.299999999999999, count: 50
epoch: 614

epoch: 62349, loss: 9.648800187278539e-06, rewards: -9.299999999999999, count: 50
epoch: 62359, loss: -8.263587915280368e-06, rewards: -9.299999999999999, count: 50
epoch: 62369, loss: 2.847909854608588e-06, rewards: -9.299999999999999, count: 50
epoch: 62379, loss: -4.96911998197902e-05, rewards: -9.299999999999999, count: 50
epoch: 62389, loss: 3.7238598451949656e-05, rewards: -9.299999999999999, count: 50
epoch: 62399, loss: -5.696654261555523e-05, rewards: -9.299999999999999, count: 50
epoch: 62409, loss: -2.2912024633114925e-06, rewards: -9.299999999999999, count: 50
epoch: 62419, loss: 1.3946294529887382e-05, rewards: -9.299999999999999, count: 50
epoch: 62429, loss: 2.3843049348215573e-05, rewards: -9.299999999999999, count: 50
epoch: 62439, loss: -2.6628971681930125e-05, rewards: -9.299999999999999, count: 50
epoch: 62449, loss: 1.3855696124664973e-05, rewards: -9.299999999999999, count: 50
epoch: 62459, loss: -1.9037723177461885e-05, rewards: -9.299999999999999, count: 50
epoc

epoch: 63339, loss: 8.064627763815224e-05, rewards: -9.299999999999999, count: 50
epoch: 63349, loss: -5.903124838368967e-05, rewards: -9.299999999999999, count: 50
epoch: 63359, loss: 2.829432560247369e-05, rewards: -9.299999999999999, count: 50
epoch: 63369, loss: 6.827116067142924e-06, rewards: -9.299999999999999, count: 50
epoch: 63379, loss: -6.201267297001323e-06, rewards: -9.299999999999999, count: 50
epoch: 63389, loss: 3.361701885751245e-07, rewards: -9.299999999999999, count: 50
epoch: 63399, loss: 8.682012776262127e-06, rewards: -9.299999999999999, count: 50
epoch: 63409, loss: -1.3141631825419609e-05, rewards: -9.299999999999999, count: 50
epoch: 63419, loss: 4.463195637072204e-06, rewards: -9.299999999999999, count: 50
epoch: 63429, loss: 1.2922287169203628e-06, rewards: -9.299999999999999, count: 50
epoch: 63439, loss: 9.608268555894028e-06, rewards: -9.299999999999999, count: 50
epoch: 63449, loss: 1.9482373318169266e-05, rewards: -9.299999999999999, count: 50
epoch: 634

epoch: 64329, loss: -1.4222860045265406e-05, rewards: -9.299999999999999, count: 50
epoch: 64339, loss: 1.9732713553821668e-05, rewards: -9.299999999999999, count: 50
epoch: 64349, loss: 2.03549861907959e-05, rewards: -9.299999999999999, count: 50
epoch: 64359, loss: -3.587722676456906e-05, rewards: -9.299999999999999, count: 50
epoch: 64369, loss: 3.66926201422757e-06, rewards: -9.299999999999999, count: 50
epoch: 64379, loss: -9.121179755311459e-05, rewards: -9.299999999999999, count: 50
epoch: 64389, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 64399, loss: -1.4078617596169352e-06, rewards: -9.299999999999999, count: 50
epoch: 64409, loss: 1.6468762623844668e-05, rewards: -9.299999999999999, count: 50
epoch: 64419, loss: 2.1810532416566275e-05, rewards: -9.299999999999999, count: 50
epoch: 64429, loss: -1.856684684753418e-05, rewards: -9.299999999999999, count: 50
epoch: 64439, loss: 7.345676294789882e-06, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 65319, loss: -2.585291804280132e-05, rewards: -9.299999999999999, count: 50
epoch: 65329, loss: 6.134867726359516e-05, rewards: -9.299999999999999, count: 50
epoch: 65339, loss: -5.960583803243935e-05, rewards: -9.299999999999999, count: 50
epoch: 65349, loss: -5.240082828095183e-05, rewards: -9.299999999999999, count: 50
epoch: 65359, loss: 6.798744288971648e-05, rewards: -9.299999999999999, count: 50
epoch: 65369, loss: 3.818631012109108e-05, rewards: -9.299999999999999, count: 50
epoch: 65379, loss: 1.580595926498063e-05, rewards: -9.299999999999999, count: 50
epoch: 65389, loss: 1.3458728744808468e-06, rewards: -9.299999999999999, count: 50
epoch: 65399, loss: -2.2554397219209932e-05, rewards: -9.299999999999999, count: 50
epoch: 65409, loss: -1.92737570614554e-05, rewards: -9.299999999999999, count: 50
epoch: 65419, loss: 1.5656947653042153e-05, rewards: -9.299999999999999, count: 50
epoch: 65429, loss: 4.1651724131952506e-06, rewards: -9.299999999999999, count: 50
epoch: 6

epoch: 66339, loss: -1.1770724995585624e-05, rewards: -9.299999999999999, count: 50
epoch: 66349, loss: -5.02467173646437e-06, rewards: -9.299999999999999, count: 50
epoch: 66359, loss: 8.032321784412488e-05, rewards: -9.299999999999999, count: 50
epoch: 66369, loss: -8.987188630271703e-05, rewards: -9.299999999999999, count: 50
epoch: 66379, loss: 7.641673437319696e-05, rewards: -9.299999999999999, count: 50
epoch: 66389, loss: 6.62314923829399e-05, rewards: -9.299999999999999, count: 50
epoch: 66399, loss: 3.6129949876340106e-05, rewards: -9.299999999999999, count: 50
epoch: 66409, loss: 1.8889904822572134e-05, rewards: -9.299999999999999, count: 50
epoch: 66419, loss: 2.395749106653966e-05, rewards: -9.299999999999999, count: 50
epoch: 66429, loss: -7.0774553933006246e-06, rewards: -9.299999999999999, count: 50
epoch: 66439, loss: -4.2788982682395726e-05, rewards: -9.299999999999999, count: 50
epoch: 66449, loss: -9.047269850270823e-05, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 67329, loss: -3.498673322610557e-05, rewards: -9.299999999999999, count: 50
epoch: 67339, loss: -1.9332170268171467e-05, rewards: -9.299999999999999, count: 50
epoch: 67349, loss: 4.5838354708394036e-05, rewards: -9.299999999999999, count: 50
epoch: 67359, loss: 3.7932397390250117e-05, rewards: -9.299999999999999, count: 50
epoch: 67369, loss: -3.2247306080535054e-05, rewards: -9.299999999999999, count: 50
epoch: 67379, loss: 5.477666945807869e-06, rewards: -9.299999999999999, count: 50
epoch: 67389, loss: 1.8752813048195094e-05, rewards: -9.299999999999999, count: 50
epoch: 67399, loss: 9.368657629238442e-06, rewards: -9.299999999999999, count: 50
epoch: 67409, loss: 1.0479689080966637e-05, rewards: -9.299999999999999, count: 50
epoch: 67419, loss: 1.5255212929332629e-05, rewards: -9.299999999999999, count: 50
epoch: 67429, loss: 3.48079192917794e-05, rewards: -9.299999999999999, count: 50
epoch: 67439, loss: 9.207963739754632e-05, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 68319, loss: 6.370782648446038e-05, rewards: -9.299999999999999, count: 50
epoch: 68329, loss: -3.647804260253906e-05, rewards: -9.299999999999999, count: 50
epoch: 68339, loss: 1.132845864049159e-05, rewards: -9.299999999999999, count: 50
epoch: 68349, loss: 1.1689662642311305e-05, rewards: -9.299999999999999, count: 50
epoch: 68359, loss: -1.4722347714268835e-06, rewards: -9.299999999999999, count: 50
epoch: 68369, loss: -5.700588189938571e-06, rewards: -9.299999999999999, count: 50
epoch: 68379, loss: -1.5282630556612276e-05, rewards: -9.299999999999999, count: 50
epoch: 68389, loss: 2.3559332475997508e-05, rewards: -9.299999999999999, count: 50
epoch: 68399, loss: 1.849532054620795e-05, rewards: -9.299999999999999, count: 50
epoch: 68409, loss: 3.242850289097987e-05, rewards: -9.299999999999999, count: 50
epoch: 68419, loss: -1.4296770132204983e-05, rewards: -9.299999999999999, count: 50
epoch: 68429, loss: 4.5621395656780805e-06, rewards: -9.299999999999999, count: 50
epoch

epoch: 69309, loss: -3.130316690658219e-05, rewards: -9.299999999999999, count: 50
epoch: 69319, loss: -1.3566017287303112e-06, rewards: -9.299999999999999, count: 50
epoch: 69329, loss: 3.5475492040859535e-05, rewards: -9.299999999999999, count: 50
epoch: 69339, loss: 6.308555384748615e-06, rewards: -9.299999999999999, count: 50
epoch: 69349, loss: 1.3256072634248994e-05, rewards: -9.299999999999999, count: 50
epoch: 69359, loss: 7.841586921131238e-06, rewards: -9.299999999999999, count: 50
epoch: 69369, loss: -2.446413054713048e-05, rewards: -9.299999999999999, count: 50
epoch: 69379, loss: -9.294152550864965e-05, rewards: -9.299999999999999, count: 50
epoch: 69389, loss: 4.709601489594206e-05, rewards: -9.299999999999999, count: 50
epoch: 69399, loss: -3.173112781951204e-05, rewards: -9.299999999999999, count: 50
epoch: 69409, loss: -3.390312258488848e-06, rewards: -9.299999999999999, count: 50
epoch: 69419, loss: 5.989432247588411e-05, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 70299, loss: -9.2363361545722e-06, rewards: -9.299999999999999, count: 50
epoch: 70309, loss: -3.038644763364573e-06, rewards: -9.299999999999999, count: 50
epoch: 70319, loss: 1.5079974673426477e-06, rewards: -9.299999999999999, count: 50
epoch: 70329, loss: -4.5945645979372784e-05, rewards: -9.299999999999999, count: 50
epoch: 70339, loss: -9.24241539905779e-05, rewards: -9.299999999999999, count: 50
epoch: 70349, loss: 5.219340164330788e-05, rewards: -9.299999999999999, count: 50
epoch: 70359, loss: -2.0525454601738602e-05, rewards: -9.299999999999999, count: 50
epoch: 70369, loss: 9.804964065551758e-06, rewards: -9.299999999999999, count: 50
epoch: 70379, loss: 3.159046286782541e-07, rewards: -9.299999999999999, count: 50
epoch: 70389, loss: 2.7692317416949663e-06, rewards: -9.299999999999999, count: 50
epoch: 70399, loss: 7.643699973414186e-06, rewards: -9.299999999999999, count: 50
epoch: 70409, loss: 2.0610093997674994e-05, rewards: -9.299999999999999, count: 50
epoch: 70

epoch: 71319, loss: -3.170728814438917e-05, rewards: -9.299999999999999, count: 50
epoch: 71329, loss: -9.517550643067807e-05, rewards: -9.299999999999999, count: 50
epoch: 71339, loss: 4.525661643128842e-05, rewards: -9.299999999999999, count: 50
epoch: 71349, loss: -3.318190647405572e-05, rewards: -9.299999999999999, count: 50
epoch: 71359, loss: 2.0681620299001224e-05, rewards: -9.299999999999999, count: 50
epoch: 71369, loss: -9.876489457383286e-06, rewards: -9.299999999999999, count: 50
epoch: 71379, loss: 3.565549832273973e-06, rewards: -9.299999999999999, count: 50
epoch: 71389, loss: 1.938343075380544e-06, rewards: -9.299999999999999, count: 50
epoch: 71399, loss: -5.16533873451408e-06, rewards: -9.299999999999999, count: 50
epoch: 71409, loss: 2.1290779841365293e-06, rewards: -9.299999999999999, count: 50
epoch: 71419, loss: 6.753206434950698e-06, rewards: -9.299999999999999, count: 50
epoch: 71429, loss: 9.119510650634766e-06, rewards: -9.299999999999999, count: 50
epoch: 714

epoch: 72319, loss: 4.237294342601672e-05, rewards: -9.299999999999999, count: 50
epoch: 72329, loss: 1.9459725081105717e-05, rewards: -9.299999999999999, count: 50
epoch: 72339, loss: -1.579999843670521e-05, rewards: -9.299999999999999, count: 50
epoch: 72349, loss: -2.2258758690441027e-05, rewards: -9.299999999999999, count: 50
epoch: 72359, loss: -9.182691428577527e-06, rewards: -9.299999999999999, count: 50
epoch: 72369, loss: -1.7094612303480972e-06, rewards: -9.299999999999999, count: 50
epoch: 72379, loss: -1.505255659139948e-05, rewards: -9.299999999999999, count: 50
epoch: 72389, loss: -4.744529724121094e-05, rewards: -9.299999999999999, count: 50
epoch: 72399, loss: 1.559972770337481e-05, rewards: -9.299999999999999, count: 50
epoch: 72409, loss: -1.0982751518895384e-05, rewards: -9.299999999999999, count: 50
epoch: 72419, loss: -1.0079145795316435e-05, rewards: -9.299999999999999, count: 50
epoch: 72429, loss: -4.154443558945786e-06, rewards: -9.299999999999999, count: 50
ep

epoch: 73309, loss: -7.774830010021105e-06, rewards: -9.299999999999999, count: 50
epoch: 73319, loss: 2.270340883114841e-05, rewards: -9.299999999999999, count: 50
epoch: 73329, loss: 1.999259075091686e-05, rewards: -9.299999999999999, count: 50
epoch: 73339, loss: -9.959936505765654e-06, rewards: -9.299999999999999, count: 50
epoch: 73349, loss: -5.877017770217208e-07, rewards: -9.299999999999999, count: 50
epoch: 73359, loss: -8.453131158603355e-06, rewards: -9.299999999999999, count: 50
epoch: 73369, loss: 3.056883724639192e-05, rewards: -9.299999999999999, count: 50
epoch: 73379, loss: 8.94653785508126e-05, rewards: -9.299999999999999, count: 50
epoch: 73389, loss: -5.317449540598318e-05, rewards: -9.299999999999999, count: 50
epoch: 73399, loss: 4.3264626583550125e-05, rewards: -9.299999999999999, count: 50
epoch: 73409, loss: -3.1672716431785375e-05, rewards: -9.299999999999999, count: 50
epoch: 73419, loss: 2.2039414034225047e-05, rewards: -9.299999999999999, count: 50
epoch: 7

epoch: 74299, loss: 3.212690444343025e-06, rewards: -9.299999999999999, count: 50
epoch: 74309, loss: 2.771615982055664e-06, rewards: -9.299999999999999, count: 50
epoch: 74319, loss: 2.082109494949691e-05, rewards: -9.299999999999999, count: 50
epoch: 74329, loss: 7.76052502260427e-07, rewards: -9.299999999999999, count: 50
epoch: 74339, loss: -4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 74349, loss: 3.722429391928017e-05, rewards: -9.299999999999999, count: 50
epoch: 74359, loss: -2.7086734917247668e-05, rewards: -9.299999999999999, count: 50
epoch: 74369, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 74379, loss: 4.628896931535564e-06, rewards: -9.299999999999999, count: 50
epoch: 74389, loss: -5.004405920772115e-06, rewards: -9.299999999999999, count: 50
epoch: 74399, loss: -4.434585662238533e-06, rewards: -9.299999999999999, count: 50
epoch: 74409, loss: 2.0384788967930945e-06, rewards: -9.299999999999999, count: 50
epoch: 74

epoch: 75289, loss: -2.0873547327937558e-05, rewards: -9.299999999999999, count: 50
epoch: 75299, loss: 3.184676097589545e-05, rewards: -9.299999999999999, count: 50
epoch: 75309, loss: -8.959770639194176e-06, rewards: -9.299999999999999, count: 50
epoch: 75319, loss: -3.6072731290914817e-06, rewards: -9.299999999999999, count: 50
epoch: 75329, loss: 1.9763707314268686e-05, rewards: -9.299999999999999, count: 50
epoch: 75339, loss: 1.8849372281692922e-05, rewards: -9.299999999999999, count: 50
epoch: 75349, loss: 4.729986176243983e-05, rewards: -9.299999999999999, count: 50
epoch: 75359, loss: 3.7367342883953825e-05, rewards: -9.299999999999999, count: 50
epoch: 75369, loss: -1.1814832760137506e-05, rewards: -9.299999999999999, count: 50
epoch: 75379, loss: 4.726648512587417e-06, rewards: -9.299999999999999, count: 50
epoch: 75389, loss: 1.0836124602064956e-05, rewards: -9.299999999999999, count: 50
epoch: 75399, loss: 7.630586878804024e-06, rewards: -9.299999999999999, count: 50
epoch

epoch: 76279, loss: 2.1270512661430985e-05, rewards: -9.299999999999999, count: 50
epoch: 76289, loss: 1.2252330634510145e-05, rewards: -9.299999999999999, count: 50
epoch: 76299, loss: -1.2259482900844887e-05, rewards: -9.299999999999999, count: 50
epoch: 76309, loss: -1.1140107744722627e-05, rewards: -9.299999999999999, count: 50
epoch: 76319, loss: -8.847713615978137e-06, rewards: -9.299999999999999, count: 50
epoch: 76329, loss: -6.713867151120212e-06, rewards: -9.299999999999999, count: 50
epoch: 76339, loss: -1.9359587895451114e-05, rewards: -9.299999999999999, count: 50
epoch: 76349, loss: -8.018135849852115e-05, rewards: -9.299999999999999, count: 50
epoch: 76359, loss: 6.136894171504537e-06, rewards: -9.299999999999999, count: 50
epoch: 76369, loss: -1.5188456018222496e-05, rewards: -9.299999999999999, count: 50
epoch: 76379, loss: 1.5612840797984973e-05, rewards: -9.299999999999999, count: 50
epoch: 76389, loss: -1.5923977116472088e-05, rewards: -9.299999999999999, count: 50


epoch: 77269, loss: -1.685023380559869e-05, rewards: -9.299999999999999, count: 50
epoch: 77279, loss: -0.00010240912524750456, rewards: -9.299999999999999, count: 50
epoch: 77289, loss: 4.460215495782904e-05, rewards: -9.299999999999999, count: 50
epoch: 77299, loss: -2.6817322577699088e-05, rewards: -9.299999999999999, count: 50
epoch: 77309, loss: 7.2407724474032875e-06, rewards: -9.299999999999999, count: 50
epoch: 77319, loss: 1.782298022590112e-05, rewards: -9.299999999999999, count: 50
epoch: 77329, loss: -1.6814470654935576e-05, rewards: -9.299999999999999, count: 50
epoch: 77339, loss: 1.1262894076935481e-05, rewards: -9.299999999999999, count: 50
epoch: 77349, loss: -1.8602609998197295e-05, rewards: -9.299999999999999, count: 50
epoch: 77359, loss: 5.729198619519593e-06, rewards: -9.299999999999999, count: 50
epoch: 77369, loss: -5.0171613111160696e-05, rewards: -9.299999999999999, count: 50
epoch: 77379, loss: 4.082083614775911e-05, rewards: -9.299999999999999, count: 50
epo

epoch: 78279, loss: -4.7206879116856726e-07, rewards: -9.299999999999999, count: 50
epoch: 78289, loss: -1.2379884537949692e-05, rewards: -9.299999999999999, count: 50
epoch: 78299, loss: -4.908800110570155e-05, rewards: -9.299999999999999, count: 50
epoch: 78309, loss: 3.181099964422174e-05, rewards: -9.299999999999999, count: 50
epoch: 78319, loss: 2.8562544684973545e-06, rewards: -9.299999999999999, count: 50
epoch: 78329, loss: -2.066135493805632e-05, rewards: -9.299999999999999, count: 50
epoch: 78339, loss: 3.2770633424661355e-06, rewards: -9.299999999999999, count: 50
epoch: 78349, loss: 3.1856299756327644e-05, rewards: -9.299999999999999, count: 50
epoch: 78359, loss: 6.263494287850335e-05, rewards: -9.299999999999999, count: 50
epoch: 78369, loss: 3.223776730010286e-05, rewards: -9.299999999999999, count: 50
epoch: 78379, loss: -4.292726316634798e-06, rewards: -9.299999999999999, count: 50
epoch: 78389, loss: -1.140952099376591e-05, rewards: -9.299999999999999, count: 50
epoch

epoch: 79279, loss: -4.4596195039048325e-06, rewards: -9.299999999999999, count: 50
epoch: 79289, loss: 2.18868262891192e-06, rewards: -9.299999999999999, count: 50
epoch: 79299, loss: -8.670092029205989e-06, rewards: -9.299999999999999, count: 50
epoch: 79309, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 79319, loss: 1.62100786837982e-05, rewards: -9.299999999999999, count: 50
epoch: 79329, loss: 3.0953884561313316e-05, rewards: -9.299999999999999, count: 50
epoch: 79339, loss: 7.367014768533409e-05, rewards: -9.299999999999999, count: 50
epoch: 79349, loss: -2.249717726954259e-05, rewards: -9.299999999999999, count: 50
epoch: 79359, loss: -2.2137164705782197e-06, rewards: -9.299999999999999, count: 50
epoch: 79369, loss: 1.9066334061790258e-05, rewards: -9.299999999999999, count: 50
epoch: 79379, loss: -1.849532054620795e-05, rewards: -9.299999999999999, count: 50
epoch: 79389, loss: 1.155972495325841e-05, rewards: -9.299999999999999, count: 50
epoch: 7

epoch: 80289, loss: 4.6801565076748375e-06, rewards: -9.299999999999999, count: 50
epoch: 80299, loss: 7.184743935795268e-06, rewards: -9.299999999999999, count: 50
epoch: 80309, loss: 1.665353738644626e-05, rewards: -9.299999999999999, count: 50
epoch: 80319, loss: 5.001783210900612e-05, rewards: -9.299999999999999, count: 50
epoch: 80329, loss: 3.937959627364762e-05, rewards: -9.299999999999999, count: 50
epoch: 80339, loss: -2.6944875571643934e-05, rewards: -9.299999999999999, count: 50
epoch: 80349, loss: 2.9349328087846516e-06, rewards: -9.299999999999999, count: 50
epoch: 80359, loss: 8.397102646995336e-06, rewards: -9.299999999999999, count: 50
epoch: 80369, loss: -1.6808509428756224e-07, rewards: -9.299999999999999, count: 50
epoch: 80379, loss: -2.453327169860131e-06, rewards: -9.299999999999999, count: 50
epoch: 80389, loss: 1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 80399, loss: 1.9199847884010524e-05, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 81279, loss: 4.028081821161322e-05, rewards: -9.299999999999999, count: 50
epoch: 81289, loss: 9.239077917300165e-05, rewards: -9.299999999999999, count: 50
epoch: 81299, loss: -4.1321516619063914e-05, rewards: -9.299999999999999, count: 50
epoch: 81309, loss: 1.9824505216092803e-05, rewards: -9.299999999999999, count: 50
epoch: 81319, loss: 5.10215784288448e-07, rewards: -9.299999999999999, count: 50
epoch: 81329, loss: -5.608796982414788e-06, rewards: -9.299999999999999, count: 50
epoch: 81339, loss: 2.918243353633443e-06, rewards: -9.299999999999999, count: 50
epoch: 81349, loss: 2.3162365323514678e-06, rewards: -9.299999999999999, count: 50
epoch: 81359, loss: -1.3258457329357043e-05, rewards: -9.299999999999999, count: 50
epoch: 81369, loss: 3.0350684028235264e-06, rewards: -9.299999999999999, count: 50
epoch: 81379, loss: -4.0531158447265625e-06, rewards: -9.299999999999999, count: 50
epoch: 81389, loss: -4.911422820441658e-06, rewards: -9.299999999999999, count: 50
epoch:

epoch: 82279, loss: -1.6586780475336127e-05, rewards: -9.299999999999999, count: 50
epoch: 82289, loss: -6.989479152252898e-05, rewards: -9.299999999999999, count: 50
epoch: 82299, loss: 2.347230974919512e-06, rewards: -9.299999999999999, count: 50
epoch: 82309, loss: -1.4593601008527912e-05, rewards: -9.299999999999999, count: 50
epoch: 82319, loss: 1.7712116459733807e-05, rewards: -9.299999999999999, count: 50
epoch: 82329, loss: -1.5254020581778605e-05, rewards: -9.299999999999999, count: 50
epoch: 82339, loss: 7.692575309192762e-06, rewards: -9.299999999999999, count: 50
epoch: 82349, loss: -5.927086021983996e-06, rewards: -9.299999999999999, count: 50
epoch: 82359, loss: -2.2697449821862392e-06, rewards: -9.299999999999999, count: 50
epoch: 82369, loss: 4.650354185287142e-06, rewards: -9.299999999999999, count: 50
epoch: 82379, loss: 6.594657861569431e-06, rewards: -9.299999999999999, count: 50
epoch: 82389, loss: 6.022453362675151e-06, rewards: -9.299999999999999, count: 50
epoch

epoch: 83289, loss: 3.782510702876607e-06, rewards: -9.299999999999999, count: 50
epoch: 83299, loss: 5.711317044188036e-06, rewards: -9.299999999999999, count: 50
epoch: 83309, loss: 1.4070272300159559e-05, rewards: -9.299999999999999, count: 50
epoch: 83319, loss: 3.834962990367785e-05, rewards: -9.299999999999999, count: 50
epoch: 83329, loss: 6.763935380149633e-05, rewards: -9.299999999999999, count: 50
epoch: 83339, loss: -3.5359858884476125e-05, rewards: -9.299999999999999, count: 50
epoch: 83349, loss: 1.5275478290277533e-05, rewards: -9.299999999999999, count: 50
epoch: 83359, loss: -2.2411346378703456e-07, rewards: -9.299999999999999, count: 50
epoch: 83369, loss: -9.788274837774225e-06, rewards: -9.299999999999999, count: 50
epoch: 83379, loss: 1.5628337450834806e-06, rewards: -9.299999999999999, count: 50
epoch: 83389, loss: 6.257295808609342e-06, rewards: -9.299999999999999, count: 50
epoch: 83399, loss: 9.976625733543187e-06, rewards: -9.299999999999999, count: 50
epoch: 8

epoch: 84299, loss: 1.35779384891066e-06, rewards: -9.299999999999999, count: 50
epoch: 84309, loss: 6.10351571594947e-06, rewards: -9.299999999999999, count: 50
epoch: 84319, loss: -5.580187007581117e-06, rewards: -9.299999999999999, count: 50
epoch: 84329, loss: -6.1845780692237895e-06, rewards: -9.299999999999999, count: 50
epoch: 84339, loss: -8.383989552385174e-06, rewards: -9.299999999999999, count: 50
epoch: 84349, loss: -8.53300116432365e-06, rewards: -9.299999999999999, count: 50
epoch: 84359, loss: -3.999829277745448e-05, rewards: -9.299999999999999, count: 50
epoch: 84369, loss: -8.206486381823197e-05, rewards: -9.299999999999999, count: 50
epoch: 84379, loss: 2.9871463993913494e-05, rewards: -9.299999999999999, count: 50
epoch: 84389, loss: 3.6096573694521794e-06, rewards: -9.299999999999999, count: 50
epoch: 84399, loss: -1.320481260336237e-05, rewards: -9.299999999999999, count: 50
epoch: 84409, loss: 1.2068748219462577e-05, rewards: -9.299999999999999, count: 50
epoch: 8

epoch: 85289, loss: -8.803605851426255e-06, rewards: -9.299999999999999, count: 50
epoch: 85299, loss: 9.889602551993448e-06, rewards: -9.299999999999999, count: 50
epoch: 85309, loss: 1.3685225894732866e-06, rewards: -9.299999999999999, count: 50
epoch: 85319, loss: 3.273486981925089e-06, rewards: -9.299999999999999, count: 50
epoch: 85329, loss: -1.1801719210779993e-06, rewards: -9.299999999999999, count: 50
epoch: 85339, loss: -2.188801772717852e-05, rewards: -9.299999999999999, count: 50
epoch: 85349, loss: -8.101225103018805e-05, rewards: -9.299999999999999, count: 50
epoch: 85359, loss: 3.1859875889495015e-05, rewards: -9.299999999999999, count: 50
epoch: 85369, loss: -1.8769502275972627e-05, rewards: -9.299999999999999, count: 50
epoch: 85379, loss: 1.3810396012559067e-05, rewards: -9.299999999999999, count: 50
epoch: 85389, loss: -2.259016127936775e-06, rewards: -9.299999999999999, count: 50
epoch: 85399, loss: -4.0018558138399385e-06, rewards: -9.299999999999999, count: 50
epo

epoch: 86279, loss: 5.421638434199849e-06, rewards: -9.299999999999999, count: 50
epoch: 86289, loss: -4.771947715198621e-06, rewards: -9.299999999999999, count: 50
epoch: 86299, loss: 3.8385392144846264e-06, rewards: -9.299999999999999, count: 50
epoch: 86309, loss: -2.536773763495148e-06, rewards: -9.299999999999999, count: 50
epoch: 86319, loss: 2.0956993012077874e-06, rewards: -9.299999999999999, count: 50
epoch: 86329, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 86339, loss: 3.218650945768786e-08, rewards: -9.299999999999999, count: 50
epoch: 86349, loss: 4.887580828949467e-08, rewards: -9.299999999999999, count: 50
epoch: 86359, loss: 6.370544269884704e-06, rewards: -9.299999999999999, count: 50
epoch: 86369, loss: 2.2459029423771426e-05, rewards: -9.299999999999999, count: 50
epoch: 86379, loss: 8.091688505373895e-05, rewards: -9.299999999999999, count: 50
epoch: 86389, loss: -3.0488967240671627e-05, rewards: -9.299999999999999, count: 50
epoch: 86

epoch: 87299, loss: 2.04801563086221e-06, rewards: -9.299999999999999, count: 50
epoch: 87309, loss: 2.2089482172304997e-06, rewards: -9.299999999999999, count: 50
epoch: 87319, loss: -1.184940288112557e-06, rewards: -9.299999999999999, count: 50
epoch: 87329, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 87339, loss: 2.5475026177446125e-06, rewards: -9.299999999999999, count: 50
epoch: 87349, loss: 6.307363491941942e-06, rewards: -9.299999999999999, count: 50
epoch: 87359, loss: 1.4744997315574437e-05, rewards: -9.299999999999999, count: 50
epoch: 87369, loss: 5.743145811720751e-05, rewards: -9.299999999999999, count: 50
epoch: 87379, loss: 2.685904473764822e-05, rewards: -9.299999999999999, count: 50
epoch: 87389, loss: -6.089210728532635e-06, rewards: -9.299999999999999, count: 50
epoch: 87399, loss: -4.723071924672695e-06, rewards: -9.299999999999999, count: 50
epoch: 87409, loss: 5.495548066392075e-06, rewards: -9.299999999999999, count: 50
epoch: 874

epoch: 88309, loss: -6.71148313813319e-07, rewards: -9.299999999999999, count: 50
epoch: 88319, loss: -7.939338502183091e-07, rewards: -9.299999999999999, count: 50
epoch: 88329, loss: 5.877017770217208e-07, rewards: -9.299999999999999, count: 50
epoch: 88339, loss: 3.7908553167653736e-06, rewards: -9.299999999999999, count: 50
epoch: 88349, loss: 3.298759475001134e-05, rewards: -9.299999999999999, count: 50
epoch: 88359, loss: 0.00011335611634422094, rewards: -9.299999999999999, count: 50
epoch: 88369, loss: 3.927707803086378e-05, rewards: -9.299999999999999, count: 50
epoch: 88379, loss: -1.698136293271091e-05, rewards: -9.299999999999999, count: 50
epoch: 88389, loss: -2.5415420168428682e-05, rewards: -9.299999999999999, count: 50
epoch: 88399, loss: -1.0223388926533516e-05, rewards: -9.299999999999999, count: 50
epoch: 88409, loss: 3.1256674901669612e-06, rewards: -9.299999999999999, count: 50
epoch: 88419, loss: 6.150007266114699e-06, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 89309, loss: -6.151199158921372e-07, rewards: -9.299999999999999, count: 50
epoch: 89319, loss: 3.26633454506009e-07, rewards: -9.299999999999999, count: 50
epoch: 89329, loss: 4.127025476918789e-06, rewards: -9.299999999999999, count: 50
epoch: 89339, loss: 1.172542579297442e-05, rewards: -9.299999999999999, count: 50
epoch: 89349, loss: 5.648732258123346e-05, rewards: -9.299999999999999, count: 50
epoch: 89359, loss: 3.1412841053679585e-05, rewards: -9.299999999999999, count: 50
epoch: 89369, loss: -5.486011559696635e-06, rewards: -9.299999999999999, count: 50
epoch: 89379, loss: -5.487203452503309e-06, rewards: -9.299999999999999, count: 50
epoch: 89389, loss: 5.875825991097372e-06, rewards: -9.299999999999999, count: 50
epoch: 89399, loss: -3.4809113458322827e-06, rewards: -9.299999999999999, count: 50
epoch: 89409, loss: 1.5771388461871538e-06, rewards: -9.299999999999999, count: 50
epoch: 89419, loss: 1.7952919506569742e-06, rewards: -9.299999999999999, count: 50
epoch: 89

epoch: 90299, loss: 7.667541467526462e-06, rewards: -9.299999999999999, count: 50
epoch: 90309, loss: 3.539681347319856e-05, rewards: -9.299999999999999, count: 50
epoch: 90319, loss: 8.883118425728753e-05, rewards: -9.299999999999999, count: 50
epoch: 90329, loss: -4.333377000875771e-05, rewards: -9.299999999999999, count: 50
epoch: 90339, loss: 1.0538101378188003e-05, rewards: -9.299999999999999, count: 50
epoch: 90349, loss: 3.9184092202049214e-06, rewards: -9.299999999999999, count: 50
epoch: 90359, loss: -6.310939625109313e-06, rewards: -9.299999999999999, count: 50
epoch: 90369, loss: 4.531144895736361e-06, rewards: -9.299999999999999, count: 50
epoch: 90379, loss: -2.121925263054436e-06, rewards: -9.299999999999999, count: 50
epoch: 90389, loss: -5.125999535948722e-08, rewards: -9.299999999999999, count: 50
epoch: 90399, loss: 1.4829635119895102e-06, rewards: -9.299999999999999, count: 50
epoch: 90409, loss: 1.5497207073167374e-08, rewards: -9.299999999999999, count: 50
epoch: 9

epoch: 91309, loss: 1.4317035947897239e-06, rewards: -9.299999999999999, count: 50
epoch: 91319, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 91329, loss: 2.813339108342916e-07, rewards: -9.299999999999999, count: 50
epoch: 91339, loss: 5.0067900048134106e-08, rewards: -9.299999999999999, count: 50
epoch: 91349, loss: -3.3140182154056674e-07, rewards: -9.299999999999999, count: 50
epoch: 91359, loss: 1.7166138377433526e-07, rewards: -9.299999999999999, count: 50
epoch: 91369, loss: 5.805492264698842e-07, rewards: -9.299999999999999, count: 50
epoch: 91379, loss: 1.1909007753274636e-06, rewards: -9.299999999999999, count: 50
epoch: 91389, loss: 5.776882062491495e-06, rewards: -9.299999999999999, count: 50
epoch: 91399, loss: 3.997325984528288e-05, rewards: -9.299999999999999, count: 50
epoch: 91409, loss: 8.256554428953677e-05, rewards: -9.299999999999999, count: 50
epoch: 91419, loss: 6.127357664809097e-06, rewards: -9.299999999999999, count: 50
epoch: 91

epoch: 92299, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 92309, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 92319, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 92329, loss: 5.125999678057269e-07, rewards: -9.299999999999999, count: 50
epoch: 92339, loss: 2.8014181907565217e-07, rewards: -9.299999999999999, count: 50
epoch: 92349, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 92359, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 92369, loss: 3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 92379, loss: 1.8541812096373178e-05, rewards: -9.299999999999999, count: 50
epoch: 92389, loss: 9.749770106282085e-05, rewards: -9.299999999999999, count: 50
epoch: 92399, loss: -6.120443140389398e-05, rewards: -9.299999999999999, count: 50
epoch: 92409, loss: 3.6065579479327425e-05, rewards: -9.299999999999999, count: 50
epoch: 92

epoch: 93289, loss: 2.0598172341124155e-05, rewards: -9.299999999999999, count: 50
epoch: 93299, loss: 0.00012078404688509181, rewards: -9.299999999999999, count: 50
epoch: 93309, loss: -6.719350494677201e-05, rewards: -9.299999999999999, count: 50
epoch: 93319, loss: -1.3962983757664915e-05, rewards: -9.299999999999999, count: 50
epoch: 93329, loss: 2.4446249881293625e-05, rewards: -9.299999999999999, count: 50
epoch: 93339, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 93349, loss: -9.720325579110067e-06, rewards: -9.299999999999999, count: 50
epoch: 93359, loss: 3.502368826957536e-06, rewards: -9.299999999999999, count: 50
epoch: 93369, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 93379, loss: -1.3685225894732866e-06, rewards: -9.299999999999999, count: 50
epoch: 93389, loss: 9.739399047248298e-07, rewards: -9.299999999999999, count: 50
epoch: 93399, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch

epoch: 94289, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 94299, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 94309, loss: 3.664970427053049e-05, rewards: -9.299999999999999, count: 50
epoch: 94319, loss: 9.455800318391994e-05, rewards: -9.299999999999999, count: 50
epoch: 94329, loss: 1.1515617188706528e-05, rewards: -9.299999999999999, count: 50
epoch: 94339, loss: -3.655195177998394e-05, rewards: -9.299999999999999, count: 50
epoch: 94349, loss: -4.903078206552891e-06, rewards: -9.299999999999999, count: 50
epoch: 94359, loss: 1.3422965821519028e-05, rewards: -9.299999999999999, count: 50
epoch: 94369, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count: 50
epoch: 94379, loss: -3.687143362185452e-06, rewards: -9.299999999999999, count: 50
epoch: 94389, loss: 2.796649823721964e-06, rewards: -9.299999999999999, count: 50
epoch: 94399, loss: -1.4579295566363726e-06, rewards: -9.299999999999999, count: 50
epoch:

epoch: 95279, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 95289, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 95299, loss: 5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 95309, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 95319, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 95329, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 95339, loss: 4.509687641984783e-06, rewards: -9.299999999999999, count: 50
epoch: 95349, loss: 2.6565790903987363e-05, rewards: -9.299999999999999, count: 50
epoch: 95359, loss: 0.00011529326729942113, rewards: -9.299999999999999, count: 50
epoch: 95369, loss: -4.9054622650146484e-05, rewards: -9.299999999999999, count: 50
epoch: 95379, loss: -1.6347170458175242e-05, rewards: -9.299999999999999, count: 50
epoch: 95389, loss: 2.2753476514481008e-05, rewards: -9.299999999999999, count: 50
epoch:

epoch: 96289, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 96299, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 96309, loss: 1.6307831174344756e-06, rewards: -9.299999999999999, count: 50
epoch: 96319, loss: 5.202293323236518e-06, rewards: -9.299999999999999, count: 50
epoch: 96329, loss: 2.832651080098003e-05, rewards: -9.299999999999999, count: 50
epoch: 96339, loss: 0.00010817527800099924, rewards: -9.299999999999999, count: 50
epoch: 96349, loss: -5.175709884497337e-05, rewards: -9.299999999999999, count: 50
epoch: 96359, loss: -1.394748665006773e-06, rewards: -9.299999999999999, count: 50
epoch: 96369, loss: 1.9046068700845353e-05, rewards: -9.299999999999999, count: 50
epoch: 96379, loss: -1.2022256669297349e-05, rewards: -9.299999999999999, count: 50
epoch: 96389, loss: 3.951788130507339e-06, rewards: -9.299999999999999, count: 50
epoch: 96399, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 9

epoch: 97279, loss: 2.6869774956139736e-06, rewards: -9.299999999999999, count: 50
epoch: 97289, loss: -2.0015240806969814e-06, rewards: -9.299999999999999, count: 50
epoch: 97299, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 97309, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 97319, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 97329, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 97339, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 97349, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 97359, loss: -1.0907649993896484e-05, rewards: -9.299999999999999, count: 50
epoch: 97369, loss: -8.299946784973145e-05, rewards: -9.299999999999999, count: 50
epoch: 97379, loss: 5.5052041716407984e-05, rewards: -9.299999999999999, count: 50
epoch: 97389, loss: -4.246354001224972e-05, rewards: -9.299999999999999, count: 50
epoch

epoch: 98269, loss: 1.2658834748435766e-05, rewards: -9.299999999999999, count: 50
epoch: 98279, loss: 9.664297067502048e-06, rewards: -9.299999999999999, count: 50
epoch: 98289, loss: -8.418560355494265e-06, rewards: -9.299999999999999, count: 50
epoch: 98299, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 98309, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 98319, loss: -1.4948844864193234e-06, rewards: -9.299999999999999, count: 50
epoch: 98329, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 98339, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 98349, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 98359, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 98369, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 98379, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 

epoch: 99259, loss: 4.380941390991211e-06, rewards: -9.299999999999999, count: 50
epoch: 99269, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 99279, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 99289, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 99299, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 99309, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 99319, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 99329, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 99339, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 99349, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 99359, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 99369, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 9937

epoch: 100259, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 100269, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 100279, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 100289, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 100299, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 100309, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 100319, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 100329, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 100339, loss: -3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 100349, loss: -1.472711574024288e-05, rewards: -9.299999999999999, count: 50
epoch: 100359, loss: -8.919835090637207e-05, rewards: -9.299999999999999, count: 50
epoch: 100369, loss: 5.440950553747825e-05, rewards: -9.299999999999999, co

epoch: 101259, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 101269, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 101279, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 101289, loss: 7.355213256232673e-06, rewards: -9.299999999999999, count: 50
epoch: 101299, loss: 6.571412086486816e-05, rewards: -9.299999999999999, count: 50
epoch: 101309, loss: -2.770662285911385e-05, rewards: -9.299999999999999, count: 50
epoch: 101319, loss: 5.238533049123362e-05, rewards: -9.299999999999999, count: 50
epoch: 101329, loss: 3.133654536213726e-05, rewards: -9.299999999999999, count: 50
epoch: 101339, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 101349, loss: -1.1793375051638577e-05, rewards: -9.299999999999999, count: 50
epoch: 101359, loss: -4.355907549324911e-06, rewards: -9.299999999999999, count: 50
epoch: 101369, loss: 4.122257450944744e-06, rewards: -9.299999999999999, count: 50

epoch: 102249, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 102259, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 102269, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 102279, loss: 2.100467781929183e-06, rewards: -9.299999999999999, count: 50
epoch: 102289, loss: 1.3250112715468276e-05, rewards: -9.299999999999999, count: 50
epoch: 102299, loss: 0.00010305643081665039, rewards: -9.299999999999999, count: 50
epoch: 102309, loss: -8.123159204842523e-05, rewards: -9.299999999999999, count: 50
epoch: 102319, loss: 9.496212442172691e-06, rewards: -9.299999999999999, count: 50
epoch: 102329, loss: 2.9397011530818418e-05, rewards: -9.299999999999999, count: 50
epoch: 102339, loss: 4.494190307013923e-06, rewards: -9.299999999999999, count: 50
epoch: 102349, loss: -1.0362863577029202e-05, rewards: -9.299999999999999, count: 50
epoch: 102359, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count

epoch: 103239, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 103249, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 103259, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 103269, loss: 7.971525519678835e-06, rewards: -9.299999999999999, count: 50
epoch: 103279, loss: 4.7876834287308156e-05, rewards: -9.299999999999999, count: 50
epoch: 103289, loss: 5.44869908480905e-05, rewards: -9.299999999999999, count: 50
epoch: 103299, loss: 1.1856555829581339e-05, rewards: -9.299999999999999, count: 50
epoch: 103309, loss: -3.0907391192158684e-05, rewards: -9.299999999999999, count: 50
epoch: 103319, loss: 1.3092756489641033e-05, rewards: -9.299999999999999, count: 50
epoch: 103329, loss: 1.3804435639030999e-06, rewards: -9.299999999999999, count: 50
epoch: 103339, loss: -4.755258487421088e-06, rewards: -9.299999999999999, count: 50
epoch: 103349, loss: 3.6454200653679436e-06, rewards: -9.299999999999999, count: 

epoch: 104229, loss: 3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 104239, loss: -1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 104249, loss: 1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 104259, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 104269, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 104279, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 104289, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 104299, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 104309, loss: 1.2228489140397869e-05, rewards: -9.299999999999999, count: 50
epoch: 104319, loss: 6.816029781475663e-05, rewards: -9.299999999999999, count: 50
epoch: 104329, loss: -7.193088549684035e-06, rewards: -9.299999999999999, count: 50
epoch: 104339, loss: 3.604054290917702e-05, rewards: -9.299999999999999, coun

epoch: 105219, loss: 1.3167858014639933e-05, rewards: -9.299999999999999, count: 50
epoch: 105229, loss: -1.6239881006185897e-05, rewards: -9.299999999999999, count: 50
epoch: 105239, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 105249, loss: 4.34637058788212e-06, rewards: -9.299999999999999, count: 50
epoch: 105259, loss: -3.650188546089339e-06, rewards: -9.299999999999999, count: 50
epoch: 105269, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 105279, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 105289, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 105299, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 105309, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 105319, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 105329, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count

epoch: 106199, loss: -8.623599569546059e-06, rewards: -9.299999999999999, count: 50
epoch: 106209, loss: -9.233951459464151e-06, rewards: -9.299999999999999, count: 50
epoch: 106219, loss: 1.6307831174344756e-06, rewards: -9.299999999999999, count: 50
epoch: 106229, loss: 3.185272134942352e-06, rewards: -9.299999999999999, count: 50
epoch: 106239, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 106249, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 106259, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 106269, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 106279, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 106289, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 106299, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 106309, loss: 5.960464477539063e-08, rewards: -9.299999999999999, coun

epoch: 107179, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, count: 50
epoch: 107189, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 107199, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 107209, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 107219, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 107229, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 107239, loss: 1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 107249, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 107259, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 107269, loss: 3.908872713509481e-06, rewards: -9.299999999999999, count: 50
epoch: 107279, loss: 2.868175579351373e-05, rewards: -9.299999999999999, count: 50
epoch: 107289, loss: 0.00012059926666552201, rewards: -9.299999999999999, count

epoch: 108179, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 108189, loss: 1.962184796866495e-06, rewards: -9.299999999999999, count: 50
epoch: 108199, loss: 5.458593477669638e-06, rewards: -9.299999999999999, count: 50
epoch: 108209, loss: 2.4596452931291424e-05, rewards: -9.299999999999999, count: 50
epoch: 108219, loss: 0.00010173082409892231, rewards: -9.299999999999999, count: 50
epoch: 108229, loss: -6.006837065797299e-05, rewards: -9.299999999999999, count: 50
epoch: 108239, loss: 2.714991569519043e-05, rewards: -9.299999999999999, count: 50
epoch: 108249, loss: -2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 108259, loss: -6.372928510245401e-06, rewards: -9.299999999999999, count: 50
epoch: 108269, loss: 6.608962848986266e-06, rewards: -9.299999999999999, count: 50
epoch: 108279, loss: -4.32133674621582e-06, rewards: -9.299999999999999, count: 50
epoch: 108289, loss: 1.8644333295014803e-06, rewards: -9.299999999999999, count: 

epoch: 109179, loss: -7.812976946297567e-06, rewards: -9.299999999999999, count: 50
epoch: 109189, loss: 1.994371359614888e-06, rewards: -9.299999999999999, count: 50
epoch: 109199, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 109209, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 109219, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 109229, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 109239, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 109249, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 109259, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 109269, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 109279, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 109289, loss: -5.686283088834898e-07, rewards: -9.299999999999999, coun

epoch: 110169, loss: 1.4345645467983559e-05, rewards: -9.299999999999999, count: 50
epoch: 110179, loss: -7.177591214713175e-06, rewards: -9.299999999999999, count: 50
epoch: 110189, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 110199, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 110209, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 110219, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 110229, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 110239, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 110249, loss: -9.179115068036481e-07, rewards: -9.299999999999999, count: 50
epoch: 110259, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 110269, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 110279, loss: -5.985498319205362e-06, rewards: -9.299999999999999, cou

epoch: 111159, loss: 9.80913609964773e-05, rewards: -9.299999999999999, count: 50
epoch: 111169, loss: -6.503343320218846e-05, rewards: -9.299999999999999, count: 50
epoch: 111179, loss: 3.419518543523736e-05, rewards: -9.299999999999999, count: 50
epoch: 111189, loss: 7.766485396132339e-06, rewards: -9.299999999999999, count: 50
epoch: 111199, loss: -1.5573501514154486e-05, rewards: -9.299999999999999, count: 50
epoch: 111209, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count: 50
epoch: 111219, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 111229, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 111239, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 111249, loss: -1.668930025289228e-07, rewards: -9.299999999999999, count: 50
epoch: 111259, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 111269, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 

epoch: 112149, loss: -7.796287718520034e-06, rewards: -9.299999999999999, count: 50
epoch: 112159, loss: 1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 112169, loss: 3.7229060581012163e-06, rewards: -9.299999999999999, count: 50
epoch: 112179, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 112189, loss: -1.8846989178200602e-06, rewards: -9.299999999999999, count: 50
epoch: 112199, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 112209, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 112219, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 112229, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 112239, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 112249, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 112259, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count:

epoch: 113139, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 113149, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 113159, loss: 3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 113169, loss: 1.3564825167122763e-05, rewards: -9.299999999999999, count: 50
epoch: 113179, loss: 7.179140811786056e-05, rewards: -9.299999999999999, count: 50
epoch: 113189, loss: -1.5096664355951361e-05, rewards: -9.299999999999999, count: 50
epoch: 113199, loss: 3.4421682357788086e-05, rewards: -9.299999999999999, count: 50
epoch: 113209, loss: -2.577424129412975e-05, rewards: -9.299999999999999, count: 50
epoch: 113219, loss: 1.0557174391578883e-05, rewards: -9.299999999999999, count: 50
epoch: 113229, loss: -2.123117383234785e-06, rewards: -9.299999999999999, count: 50
epoch: 113239, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 113249, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, cou

epoch: 114129, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 114139, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 114149, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 114159, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 114169, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 114179, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 114189, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 114199, loss: -1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 114209, loss: -7.216930498543661e-06, rewards: -9.299999999999999, count: 50
epoch: 114219, loss: -3.8408041291404516e-05, rewards: -9.299999999999999, count: 50
epoch: 114229, loss: -8.442521357210353e-05, rewards: -9.299999999999999, count: 50
epoch: 114239, loss: 2.7139187295688316e-05, rewards: -9.299999999999999, co

epoch: 115119, loss: -2.0161867723800242e-05, rewards: -9.299999999999999, count: 50
epoch: 115129, loss: -2.3062229956849478e-05, rewards: -9.299999999999999, count: 50
epoch: 115139, loss: 1.1007785360561684e-05, rewards: -9.299999999999999, count: 50
epoch: 115149, loss: 4.611015356204007e-06, rewards: -9.299999999999999, count: 50
epoch: 115159, loss: -5.896091352042276e-06, rewards: -9.299999999999999, count: 50
epoch: 115169, loss: 2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 115179, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 115189, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 115199, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 115209, loss: 1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 115219, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 115229, loss: 6.34193440873787e-07, rewards: -9.299999999999999, cou

epoch: 116109, loss: 9.797811799217016e-06, rewards: -9.299999999999999, count: 50
epoch: 116119, loss: -4.545450337900547e-06, rewards: -9.299999999999999, count: 50
epoch: 116129, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 116139, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 116149, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 116159, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 116169, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 116179, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 116189, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 116199, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 116209, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 116219, loss: 6.893873433000408e-06, rewards: -9.299999999999999, count: 5

epoch: 117099, loss: -7.35700159566477e-05, rewards: -9.299999999999999, count: 50
epoch: 117109, loss: 2.0366907847346738e-05, rewards: -9.299999999999999, count: 50
epoch: 117119, loss: -3.8564205169677734e-05, rewards: -9.299999999999999, count: 50
epoch: 117129, loss: 2.451181353535503e-05, rewards: -9.299999999999999, count: 50
epoch: 117139, loss: -6.288290023803711e-06, rewards: -9.299999999999999, count: 50
epoch: 117149, loss: -1.871585823209898e-06, rewards: -9.299999999999999, count: 50
epoch: 117159, loss: 2.9015541258559097e-06, rewards: -9.299999999999999, count: 50
epoch: 117169, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 117179, loss: 7.867812996664725e-07, rewards: -9.299999999999999, count: 50
epoch: 117189, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 117199, loss: -1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 117209, loss: -4.220008804622921e-07, rewards: -9.299999999999999, coun

epoch: 118079, loss: 5.0195456424262375e-05, rewards: -9.299999999999999, count: 50
epoch: 118089, loss: 5.110502115712734e-06, rewards: -9.299999999999999, count: 50
epoch: 118099, loss: -1.0182857295149006e-05, rewards: -9.299999999999999, count: 50
epoch: 118109, loss: -1.366376909572864e-05, rewards: -9.299999999999999, count: 50
epoch: 118119, loss: -1.0197162737313192e-05, rewards: -9.299999999999999, count: 50
epoch: 118129, loss: -3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 118139, loss: 1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 118149, loss: 2.404451379334205e-06, rewards: -9.299999999999999, count: 50
epoch: 118159, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 118169, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 118179, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 118189, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, co

epoch: 119069, loss: 9.0726614871528e-05, rewards: -9.299999999999999, count: 50
epoch: 119079, loss: -4.9852133088279516e-05, rewards: -9.299999999999999, count: 50
epoch: 119089, loss: 3.62646569556091e-05, rewards: -9.299999999999999, count: 50
epoch: 119099, loss: -1.973390499188099e-05, rewards: -9.299999999999999, count: 50
epoch: 119109, loss: 9.119510650634766e-06, rewards: -9.299999999999999, count: 50
epoch: 119119, loss: -5.087852514407132e-06, rewards: -9.299999999999999, count: 50
epoch: 119129, loss: 4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 119139, loss: -3.0648707252112217e-06, rewards: -9.299999999999999, count: 50
epoch: 119149, loss: 1.3661384627994266e-06, rewards: -9.299999999999999, count: 50
epoch: 119159, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 119169, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 119179, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count:

epoch: 120059, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 120069, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 120079, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 120089, loss: 2.282857849422726e-06, rewards: -9.299999999999999, count: 50
epoch: 120099, loss: 8.876323590811808e-06, rewards: -9.299999999999999, count: 50
epoch: 120109, loss: 6.084680717322044e-05, rewards: -9.299999999999999, count: 50
epoch: 120119, loss: 5.620718184218276e-06, rewards: -9.299999999999999, count: 50
epoch: 120129, loss: 4.739999712910503e-05, rewards: -9.299999999999999, count: 50
epoch: 120139, loss: -1.7812251826399006e-05, rewards: -9.299999999999999, count: 50
epoch: 120149, loss: -1.2730360140267294e-05, rewards: -9.299999999999999, count: 50
epoch: 120159, loss: 1.0708570698625408e-05, rewards: -9.299999999999999, count: 50
epoch: 120169, loss: -2.008676574405399e-06, rewards: -9.299999999999999, count:

epoch: 121039, loss: 2.2901296688360162e-05, rewards: -9.299999999999999, count: 50
epoch: 121049, loss: -2.889394818339497e-05, rewards: -9.299999999999999, count: 50
epoch: 121059, loss: 1.245737075805664e-05, rewards: -9.299999999999999, count: 50
epoch: 121069, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 121079, loss: -1.871585823209898e-06, rewards: -9.299999999999999, count: 50
epoch: 121089, loss: 1.8799305507855024e-06, rewards: -9.299999999999999, count: 50
epoch: 121099, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 121109, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 121119, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 121129, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 121139, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 121149, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count:

epoch: 122039, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 122049, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 122059, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 122069, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 122079, loss: 2.7711392249329947e-05, rewards: -9.299999999999999, count: 50
epoch: 122089, loss: 0.00012133479322073981, rewards: -9.299999999999999, count: 50
epoch: 122099, loss: -2.676248550415039e-05, rewards: -9.299999999999999, count: 50
epoch: 122109, loss: -3.773450953303836e-05, rewards: -9.299999999999999, count: 50
epoch: 122119, loss: 6.936788395250915e-06, rewards: -9.299999999999999, count: 50
epoch: 122129, loss: 1.356720895273611e-05, rewards: -9.299999999999999, count: 50
epoch: 122139, loss: -4.708766937255859e-06, rewards: -9.299999999999999, count: 50
epoch: 122149, loss: -2.7263165520707844e-06, rewards: -9.299999999999999, count:

epoch: 123029, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 123039, loss: 4.72784040539409e-06, rewards: -9.299999999999999, count: 50
epoch: 123049, loss: 3.346562516526319e-05, rewards: -9.299999999999999, count: 50
epoch: 123059, loss: 0.00010695576929720119, rewards: -9.299999999999999, count: 50
epoch: 123069, loss: 5.784034783573588e-06, rewards: -9.299999999999999, count: 50
epoch: 123079, loss: -3.8092137401690707e-05, rewards: -9.299999999999999, count: 50
epoch: 123089, loss: -7.833242307242472e-06, rewards: -9.299999999999999, count: 50
epoch: 123099, loss: 1.3167858014639933e-05, rewards: -9.299999999999999, count: 50
epoch: 123109, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 123119, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 123129, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 123139, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count

epoch: 124019, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 124029, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 124039, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 124049, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 124059, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 124069, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 124079, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 124089, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 124099, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 124109, loss: 2.8204917725815903e-06, rewards: -9.299999999999999, count: 50
epoch: 124119, loss: 1.1878013538080268e-05, rewards: -9.299999999999999, count: 50
epoch: 124129, loss: 7.230519986478612e-05, rewards: -9.299999999999999, count

epoch: 125009, loss: -2.782583214866463e-05, rewards: -9.299999999999999, count: 50
epoch: 125019, loss: 5.108117875352036e-06, rewards: -9.299999999999999, count: 50
epoch: 125029, loss: 1.0141134225705173e-05, rewards: -9.299999999999999, count: 50
epoch: 125039, loss: -3.869533429678995e-06, rewards: -9.299999999999999, count: 50
epoch: 125049, loss: -1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 125059, loss: 2.2006033759680577e-06, rewards: -9.299999999999999, count: 50
epoch: 125069, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 125079, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 125089, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 125099, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 125109, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 125119, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, co

epoch: 125999, loss: -2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 126009, loss: -9.40203699428821e-06, rewards: -9.299999999999999, count: 50
epoch: 126019, loss: -4.715204340755008e-05, rewards: -9.299999999999999, count: 50
epoch: 126029, loss: -5.7656765420688316e-05, rewards: -9.299999999999999, count: 50
epoch: 126039, loss: 1.130938562710071e-05, rewards: -9.299999999999999, count: 50
epoch: 126049, loss: 1.3438463611237239e-05, rewards: -9.299999999999999, count: 50
epoch: 126059, loss: -1.564621925354004e-05, rewards: -9.299999999999999, count: 50
epoch: 126069, loss: 1.046538363880245e-05, rewards: -9.299999999999999, count: 50
epoch: 126079, loss: -6.265640422498109e-06, rewards: -9.299999999999999, count: 50
epoch: 126089, loss: 3.2949446904240176e-06, rewards: -9.299999999999999, count: 50
epoch: 126099, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 126109, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, co

epoch: 126979, loss: -5.8343412092654034e-05, rewards: -9.299999999999999, count: 50
epoch: 126989, loss: -1.4102459317655303e-05, rewards: -9.299999999999999, count: 50
epoch: 126999, loss: -4.584074122249149e-05, rewards: -9.299999999999999, count: 50
epoch: 127009, loss: 2.052426316367928e-05, rewards: -9.299999999999999, count: 50
epoch: 127019, loss: 1.1819601240858901e-05, rewards: -9.299999999999999, count: 50
epoch: 127029, loss: -1.094698927772697e-05, rewards: -9.299999999999999, count: 50
epoch: 127039, loss: 2.3412703740177676e-06, rewards: -9.299999999999999, count: 50
epoch: 127049, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 127059, loss: -1.871585823209898e-06, rewards: -9.299999999999999, count: 50
epoch: 127069, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 127079, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 127089, loss: 5.435943535303522e-07, rewards: -9.299999999999999, cou

epoch: 127969, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 127979, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 127989, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 127999, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 128009, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 128019, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 128029, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 128039, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 128049, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 128059, loss: -1.1062621751989354e-06, rewards: -9.299999999999999, count: 50
epoch: 128069, loss: -4.606246875482611e-06, rewards: -9.299999999999999, count: 50
epoch: 128079, loss: -3.1861065508564934e-05, rewards: -9.299999999999999, c

epoch: 128959, loss: -1.1610984984145034e-05, rewards: -9.299999999999999, count: 50
epoch: 128969, loss: -1.3625622159452178e-05, rewards: -9.299999999999999, count: 50
epoch: 128979, loss: -2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 128989, loss: 4.701614216173766e-06, rewards: -9.299999999999999, count: 50
epoch: 128999, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 129009, loss: -1.996755599975586e-06, rewards: -9.299999999999999, count: 50
epoch: 129019, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 129029, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 129039, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 129049, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 129059, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 129069, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count:

epoch: 129949, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 129959, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 129969, loss: -2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 129979, loss: -1.2671947843045928e-05, rewards: -9.299999999999999, count: 50
epoch: 129989, loss: -8.227944636018947e-05, rewards: -9.299999999999999, count: 50
epoch: 129999, loss: 4.5721531932940707e-05, rewards: -9.299999999999999, count: 50
epoch: 130009, loss: -4.6193599700927734e-05, rewards: -9.299999999999999, count: 50
epoch: 130019, loss: 1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 130029, loss: 1.542091376904864e-05, rewards: -9.299999999999999, count: 50
epoch: 130039, loss: -8.131265531119425e-06, rewards: -9.299999999999999, count: 50
epoch: 130049, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 130059, loss: 1.6534328324269154e-06, rewards: -9.299999999999999, c

epoch: 130949, loss: -3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 130959, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 130969, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 130979, loss: 2.2649764730431343e-08, rewards: -9.299999999999999, count: 50
epoch: 130989, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 130999, loss: 1.6331672441083356e-06, rewards: -9.299999999999999, count: 50
epoch: 131009, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 131019, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 131029, loss: -2.448558916512411e-06, rewards: -9.299999999999999, count: 50
epoch: 131039, loss: -7.766485396132339e-06, rewards: -9.299999999999999, count: 50
epoch: 131049, loss: -4.1269064240623266e-05, rewards: -9.299999999999999, count: 50
epoch: 131059, loss: -7.716059917584062e-05, rewards: -9.299999999999999, cou

epoch: 131929, loss: 2.8312206268310547e-06, rewards: -9.299999999999999, count: 50
epoch: 131939, loss: 1.9925832020817325e-05, rewards: -9.299999999999999, count: 50
epoch: 131949, loss: 0.0001253533409908414, rewards: -9.299999999999999, count: 50
epoch: 131959, loss: -6.454229151131585e-05, rewards: -9.299999999999999, count: 50
epoch: 131969, loss: -2.5327206458314322e-05, rewards: -9.299999999999999, count: 50
epoch: 131979, loss: 1.9160508600180037e-05, rewards: -9.299999999999999, count: 50
epoch: 131989, loss: 1.0832548468897585e-05, rewards: -9.299999999999999, count: 50
epoch: 131999, loss: -7.656813068024348e-06, rewards: -9.299999999999999, count: 50
epoch: 132009, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 132019, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 132029, loss: -1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 132039, loss: 7.498264267269406e-07, rewards: -9.299999999999999, c

epoch: 132929, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 132939, loss: 1.62363051003922e-06, rewards: -9.299999999999999, count: 50
epoch: 132949, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 132959, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 132969, loss: 5.307197625370463e-06, rewards: -9.299999999999999, count: 50
epoch: 132979, loss: 2.5103092411882244e-05, rewards: -9.299999999999999, count: 50
epoch: 132989, loss: 0.00010723352170316502, rewards: -9.299999999999999, count: 50
epoch: 132999, loss: -6.107806984800845e-05, rewards: -9.299999999999999, count: 50
epoch: 133009, loss: 1.4469623238255735e-05, rewards: -9.299999999999999, count: 50
epoch: 133019, loss: 1.1934042049688287e-05, rewards: -9.299999999999999, count: 50
epoch: 133029, loss: -1.3284683518577367e-05, rewards: -9.299999999999999, count: 50
epoch: 133039, loss: 7.936954716569744e-06, rewards: -9.299999999999999, count:

epoch: 133919, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 133929, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 133939, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 133949, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 133959, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 133969, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 133979, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 133989, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 133999, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 134009, loss: 9.750127901497763e-06, rewards: -9.299999999999999, count: 50
epoch: 134019, loss: 6.41691658529453e-05, rewards: -9.299999999999999, count: 50
epoch: 134029, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 5

epoch: 134899, loss: -1.816987969505135e-05, rewards: -9.299999999999999, count: 50
epoch: 134909, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 134919, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 134929, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 134939, loss: 1.8775463104248047e-06, rewards: -9.299999999999999, count: 50
epoch: 134949, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 134959, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 134969, loss: 9.739399047248298e-07, rewards: -9.299999999999999, count: 50
epoch: 134979, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 134989, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 134999, loss: 8.618831657258852e-07, rewards: -9.299999999999999, count: 50
epoch: 135009, loss: 3.869533429678995e-06, rewards: -9.299999999999999, count

epoch: 135889, loss: -5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 135899, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 135909, loss: -2.88605679088505e-06, rewards: -9.299999999999999, count: 50
epoch: 135919, loss: -2.0787716493941844e-05, rewards: -9.299999999999999, count: 50
epoch: 135929, loss: -0.00012179255281807855, rewards: -9.299999999999999, count: 50
epoch: 135939, loss: 6.534457497764379e-05, rewards: -9.299999999999999, count: 50
epoch: 135949, loss: 1.6435384168289602e-05, rewards: -9.299999999999999, count: 50
epoch: 135959, loss: -2.346634937566705e-05, rewards: -9.299999999999999, count: 50
epoch: 135969, loss: -4.34637058788212e-06, rewards: -9.299999999999999, count: 50
epoch: 135979, loss: 9.121894436248112e-06, rewards: -9.299999999999999, count: 50
epoch: 135989, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count: 50
epoch: 135999, loss: -1.5246868088070187e-06, rewards: -9.299999999999999, count

epoch: 136879, loss: -8.60810268932255e-06, rewards: -9.299999999999999, count: 50
epoch: 136889, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 136899, loss: 1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 136909, loss: -1.9502640498103574e-06, rewards: -9.299999999999999, count: 50
epoch: 136919, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 136929, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 136939, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 136949, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 136959, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 136969, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 136979, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 136989, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count:

epoch: 137869, loss: -4.748106221086346e-06, rewards: -9.299999999999999, count: 50
epoch: 137879, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 137889, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 137899, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 137909, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 137919, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 137929, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 137939, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 137949, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 137959, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 137969, loss: -4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 137979, loss: -1.9809007426374592e-05, rewards: -9.299999999999999, coun

epoch: 138859, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 138869, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 138879, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 138889, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 138899, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 138909, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 138919, loss: -1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 138929, loss: -8.841752787702717e-06, rewards: -9.299999999999999, count: 50
epoch: 138939, loss: -8.765101665630937e-05, rewards: -9.299999999999999, count: 50
epoch: 138949, loss: 8.000731759238988e-05, rewards: -9.299999999999999, count: 50
epoch: 138959, loss: -1.035571131069446e-05, rewards: -9.299999999999999, count: 50
epoch: 138969, loss: -2.9985903893248178e-05, rewards: -9.299999999999999, c

epoch: 139849, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 139859, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 139869, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 139879, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 139889, loss: 1.1061430086556356e-05, rewards: -9.299999999999999, count: 50
epoch: 139899, loss: 6.350874900817871e-05, rewards: -9.299999999999999, count: 50
epoch: 139909, loss: 4.804134277947014e-06, rewards: -9.299999999999999, count: 50
epoch: 139919, loss: 3.3209322282345966e-05, rewards: -9.299999999999999, count: 50
epoch: 139929, loss: -2.787232369882986e-05, rewards: -9.299999999999999, count: 50
epoch: 139939, loss: 7.718801498413086e-06, rewards: -9.299999999999999, count: 50
epoch: 139949, loss: 2.771615982055664e-06, rewards: -9.299999999999999, count: 50
epoch: 139959, loss: -4.392862138047349e-06, rewards: -9.299999999999999, count: 5

epoch: 140829, loss: -8.380413419217803e-06, rewards: -9.299999999999999, count: 50
epoch: 140839, loss: -2.638101477714372e-06, rewards: -9.299999999999999, count: 50
epoch: 140849, loss: 4.378557150630513e-06, rewards: -9.299999999999999, count: 50
epoch: 140859, loss: -3.727674538822612e-06, rewards: -9.299999999999999, count: 50
epoch: 140869, loss: 2.3567677089886274e-06, rewards: -9.299999999999999, count: 50
epoch: 140879, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 140889, loss: 9.238719940185547e-07, rewards: -9.299999999999999, count: 50
epoch: 140899, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 140909, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 140919, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 140929, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 140939, loss: -3.983974238508381e-06, rewards: -9.299999999999999, coun

epoch: 141829, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 141839, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 141849, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 141859, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 141869, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 141879, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 141889, loss: 6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 141899, loss: 5.601286829914898e-05, rewards: -9.299999999999999, count: 50
epoch: 141909, loss: 4.484653345571132e-06, rewards: -9.299999999999999, count: 50
epoch: 141919, loss: 6.157398456707597e-05, rewards: -9.299999999999999, count: 50
epoch: 141929, loss: 2.6280880774720572e-05, rewards: -9.299999999999999, count: 50
epoch: 141939, loss: -5.027055522077717e-06, rewards: -9.299999999999999, coun

epoch: 142819, loss: -6.163120360724861e-06, rewards: -9.299999999999999, count: 50
epoch: 142829, loss: -2.771615982055664e-05, rewards: -9.299999999999999, count: 50
epoch: 142839, loss: -9.796738595468923e-05, rewards: -9.299999999999999, count: 50
epoch: 142849, loss: 5.632638931274414e-05, rewards: -9.299999999999999, count: 50
epoch: 142859, loss: -2.5682449631858617e-05, rewards: -9.299999999999999, count: 50
epoch: 142869, loss: 5.438327661977382e-06, rewards: -9.299999999999999, count: 50
epoch: 142879, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 142889, loss: -2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 142899, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 142909, loss: 1.1110305422334932e-06, rewards: -9.299999999999999, count: 50
epoch: 142919, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 142929, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count

epoch: 143809, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 143819, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 143829, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 143839, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 143849, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 143859, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 143869, loss: -6.859302629891317e-06, rewards: -9.299999999999999, count: 50
epoch: 143879, loss: -5.1667691877810284e-05, rewards: -9.299999999999999, count: 50
epoch: 143889, loss: -3.427267074584961e-05, rewards: -9.299999999999999, count: 50
epoch: 143899, loss: -4.927158443024382e-05, rewards: -9.299999999999999, count: 50
epoch: 143909, loss: 1.5685558537370525e-05, rewards: -9.299999999999999, count: 50
epoch: 143919, loss: 1.8899439965025522e-05, rewards: -9.299999999999999

epoch: 144799, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 144809, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 144819, loss: -6.263256182137411e-06, rewards: -9.299999999999999, count: 50
epoch: 144829, loss: -4.011392593383789e-05, rewards: -9.299999999999999, count: 50
epoch: 144839, loss: -8.159875869750977e-05, rewards: -9.299999999999999, count: 50
epoch: 144849, loss: -7.289648237929214e-06, rewards: -9.299999999999999, count: 50
epoch: 144859, loss: 3.487586945993826e-05, rewards: -9.299999999999999, count: 50
epoch: 144869, loss: -6.452798970713047e-06, rewards: -9.299999999999999, count: 50
epoch: 144879, loss: -9.710788617667276e-06, rewards: -9.299999999999999, count: 50
epoch: 144889, loss: 7.188320068962639e-06, rewards: -9.299999999999999, count: 50
epoch: 144899, loss: -2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 144909, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, c

epoch: 145789, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 145799, loss: -4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 145809, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 145819, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 145829, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 145839, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 145849, loss: 2.419948486931389e-06, rewards: -9.299999999999999, count: 50
epoch: 145859, loss: 8.860826710588299e-06, rewards: -9.299999999999999, count: 50
epoch: 145869, loss: 4.9680471420288086e-05, rewards: -9.299999999999999, count: 50
epoch: 145879, loss: 4.890322816208936e-05, rewards: -9.299999999999999, count: 50
epoch: 145889, loss: 9.630918611946981e-06, rewards: -9.299999999999999, count: 50
epoch: 145899, loss: -2.819776454998646e-05, rewards: -9.299999999999999, count: 50

epoch: 146779, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 146789, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 146799, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 146809, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 146819, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 146829, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 146839, loss: -1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 146849, loss: -1.535296360088978e-05, rewards: -9.299999999999999, count: 50
epoch: 146859, loss: -0.0001365792704746127, rewards: -9.299999999999999, count: 50
epoch: 146869, loss: 6.671428855042905e-05, rewards: -9.299999999999999, count: 50
epoch: 146879, loss: 5.063891512691043e-05, rewards: -9.299999999999999, count: 50
epoch: 146889, loss: 1.8055439795716666e-05, rewards: -9.299999999999999, co

epoch: 147769, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 147779, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 147789, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 147799, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 147809, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 147819, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 147829, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 147839, loss: 1.8799305507855024e-06, rewards: -9.299999999999999, count: 50
epoch: 147849, loss: 1.2961626453034114e-05, rewards: -9.299999999999999, count: 50
epoch: 147859, loss: 9.73296191659756e-05, rewards: -9.299999999999999, count: 50
epoch: 147869, loss: -7.442713103955612e-05, rewards: -9.299999999999999, count: 50
epoch: 147879, loss: 2.355813921894878e-05, rewards: -9.299999999999999, count: 50


epoch: 148749, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 148759, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 148769, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 148779, loss: -1.2087822142348159e-05, rewards: -9.299999999999999, count: 50
epoch: 148789, loss: -0.00012918234278913587, rewards: -9.299999999999999, count: 50
epoch: 148799, loss: 8.422612881986424e-05, rewards: -9.299999999999999, count: 50
epoch: 148809, loss: 5.3623913117917255e-05, rewards: -9.299999999999999, count: 50
epoch: 148819, loss: 2.3198128474177793e-05, rewards: -9.299999999999999, count: 50
epoch: 148829, loss: 3.725290298461914e-06, rewards: -9.299999999999999, count: 50
epoch: 148839, loss: -6.034374109731289e-06, rewards: -9.299999999999999, count: 50
epoch: 148849, loss: -6.959438451303868e-06, rewards: -9.299999999999999, count: 50
epoch: 148859, loss: -2.4509429294994334e-06, rewards: -9.299999999999999, c

epoch: 149739, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 149749, loss: -1.1205672763026087e-06, rewards: -9.299999999999999, count: 50
epoch: 149759, loss: -4.869699296250474e-06, rewards: -9.299999999999999, count: 50
epoch: 149769, loss: -3.589749394450337e-05, rewards: -9.299999999999999, count: 50
epoch: 149779, loss: -9.749054879648611e-05, rewards: -9.299999999999999, count: 50
epoch: 149789, loss: -1.806020736694336e-05, rewards: -9.299999999999999, count: 50
epoch: 149799, loss: 3.497123543638736e-05, rewards: -9.299999999999999, count: 50
epoch: 149809, loss: 1.237273227161495e-05, rewards: -9.299999999999999, count: 50
epoch: 149819, loss: -1.1403561074985191e-05, rewards: -9.299999999999999, count: 50
epoch: 149829, loss: -3.650188546089339e-06, rewards: -9.299999999999999, count: 50
epoch: 149839, loss: 5.304813385009766e-06, rewards: -9.299999999999999, count: 50
epoch: 149849, loss: -1.5223026821331587e-06, rewards: -9.299999999999999, c

epoch: 150729, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 150739, loss: 1.1531114978424739e-05, rewards: -9.299999999999999, count: 50
epoch: 150749, loss: -6.368160029524006e-06, rewards: -9.299999999999999, count: 50
epoch: 150759, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 150769, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 150779, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 150789, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 150799, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 150809, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 150819, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 150829, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 150839, loss: -1.0883808272410533e-06, rewards: -9.299999999999999

epoch: 151719, loss: 4.6943427150836214e-05, rewards: -9.299999999999999, count: 50
epoch: 151729, loss: -9.372234671900515e-06, rewards: -9.299999999999999, count: 50
epoch: 151739, loss: -1.2235641406732611e-05, rewards: -9.299999999999999, count: 50
epoch: 151749, loss: 9.95874415821163e-06, rewards: -9.299999999999999, count: 50
epoch: 151759, loss: -3.8397311072913e-06, rewards: -9.299999999999999, count: 50
epoch: 151769, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 151779, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 151789, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 151799, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 151809, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 151819, loss: 6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 151829, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
ep

epoch: 152709, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 152719, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 152729, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 152739, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 152749, loss: 1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 152759, loss: 4.224777057970641e-06, rewards: -9.299999999999999, count: 50
epoch: 152769, loss: 2.4483204470016062e-05, rewards: -9.299999999999999, count: 50
epoch: 152779, loss: 0.00011678219016175717, rewards: -9.299999999999999, count: 50
epoch: 152789, loss: -5.6959390349220484e-05, rewards: -9.299999999999999, count: 50
epoch: 152799, loss: -1.0401010513305664e-05, rewards: -9.299999999999999, count: 50
epoch: 152809, loss: 2.3642778614885174e-05, rewards: -9.299999999999999, count: 50
epoch: 152819, loss: -5.627870450553019e-06, rewards: -9.299999999999999, coun

epoch: 153699, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 153709, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 153719, loss: 1.3875961712983553e-06, rewards: -9.299999999999999, count: 50
epoch: 153729, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 153739, loss: -1.971721758309286e-06, rewards: -9.299999999999999, count: 50
epoch: 153749, loss: -3.727674538822612e-06, rewards: -9.299999999999999, count: 50
epoch: 153759, loss: -1.4160871614876669e-05, rewards: -9.299999999999999, count: 50
epoch: 153769, loss: -7.937073678476736e-05, rewards: -9.299999999999999, count: 50
epoch: 153779, loss: 3.284692866145633e-05, rewards: -9.299999999999999, count: 50
epoch: 153789, loss: -4.1673181840451434e-05, rewards: -9.299999999999999, count: 50
epoch: 153799, loss: 2.0598172341124155e-05, rewards: -9.299999999999999, count: 50
epoch: 153809, loss: -1.9741057712963084e-06, rewards: -9.299999999999999, co

epoch: 154689, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 154699, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 154709, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 154719, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 154729, loss: -4.197358975943644e-06, rewards: -9.299999999999999, count: 50
epoch: 154739, loss: -2.4948119971668348e-05, rewards: -9.299999999999999, count: 50
epoch: 154749, loss: -0.0001169991519418545, rewards: -9.299999999999999, count: 50
epoch: 154759, loss: 5.514621807378717e-05, rewards: -9.299999999999999, count: 50
epoch: 154769, loss: 1.2658834748435766e-05, rewards: -9.299999999999999, count: 50
epoch: 154779, loss: -2.3351907657342963e-05, rewards: -9.299999999999999, count: 50
epoch: 154789, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 154799, loss: 5.884170604986139e-06, rewards: -9.299999999999999, cou

epoch: 155679, loss: -5.0449370974092744e-06, rewards: -9.299999999999999, count: 50
epoch: 155689, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 155699, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 155709, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 155719, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 155729, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 155739, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 155749, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 155759, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 155769, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 155779, loss: -2.9957293463667156e-06, rewards: -9.299999999999999, count: 50
epoch: 155789, loss: -1.7498730812803842e-05, rewards: -9.299999999999999

epoch: 156669, loss: -2.1064281554572517e-06, rewards: -9.299999999999999, count: 50
epoch: 156679, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 156689, loss: 8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 156699, loss: -9.179115068036481e-07, rewards: -9.299999999999999, count: 50
epoch: 156709, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 156719, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 156729, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 156739, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 156749, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 156759, loss: 2.8336048671917524e-06, rewards: -9.299999999999999, count: 50
epoch: 156769, loss: 1.0584592928353231e-05, rewards: -9.299999999999999, count: 50
epoch: 156779, loss: 5.3304433095036075e-05, rewards: -9.299999999999999, coun

epoch: 157649, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 157659, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 157669, loss: -2.7894972731701273e-07, rewards: -9.299999999999999, count: 50
epoch: 157679, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 157689, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 157699, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 157709, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 157719, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 157729, loss: -2.733469045779202e-06, rewards: -9.299999999999999, count: 50
epoch: 157739, loss: -1.8537044525146484e-05, rewards: -9.299999999999999, count: 50
epoch: 157749, loss: -0.0001218700417666696, rewards: -9.299999999999999, count: 50
epoch: 157759, loss: 7.181883120210841e-05, rewards: -9.299999999999999

epoch: 158639, loss: 2.703666723391507e-06, rewards: -9.299999999999999, count: 50
epoch: 158649, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 158659, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 158669, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 158679, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 158689, loss: -2.0146370616203058e-07, rewards: -9.299999999999999, count: 50
epoch: 158699, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 158709, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 158719, loss: -1.1420249848015374e-06, rewards: -9.299999999999999, count: 50
epoch: 158729, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 158739, loss: -1.1034011549782008e-05, rewards: -9.299999999999999, count: 50
epoch: 158749, loss: -6.428360939025879e-05, rewards: -9.299999999999999, cou

epoch: 159629, loss: 6.25371922069462e-06, rewards: -9.299999999999999, count: 50
epoch: 159639, loss: 4.6453475079033524e-05, rewards: -9.299999999999999, count: 50
epoch: 159649, loss: 5.5223703384399414e-05, rewards: -9.299999999999999, count: 50
epoch: 159659, loss: 4.1787625377764925e-05, rewards: -9.299999999999999, count: 50
epoch: 159669, loss: -2.3022890673018992e-05, rewards: -9.299999999999999, count: 50
epoch: 159679, loss: -1.7153024600702338e-05, rewards: -9.299999999999999, count: 50
epoch: 159689, loss: 7.936954716569744e-06, rewards: -9.299999999999999, count: 50
epoch: 159699, loss: 4.419088327267673e-06, rewards: -9.299999999999999, count: 50
epoch: 159709, loss: -4.458427611098159e-06, rewards: -9.299999999999999, count: 50
epoch: 159719, loss: 1.8429756210025516e-06, rewards: -9.299999999999999, count: 50
epoch: 159729, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 159739, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, coun

epoch: 160619, loss: -5.158186013431987e-06, rewards: -9.299999999999999, count: 50
epoch: 160629, loss: -2.3819207854103297e-05, rewards: -9.299999999999999, count: 50
epoch: 160639, loss: -0.000102721453004051, rewards: -9.299999999999999, count: 50
epoch: 160649, loss: 6.155729352030903e-05, rewards: -9.299999999999999, count: 50
epoch: 160659, loss: -2.657532604644075e-05, rewards: -9.299999999999999, count: 50
epoch: 160669, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 160679, loss: 8.081197847786825e-06, rewards: -9.299999999999999, count: 50
epoch: 160689, loss: -7.41362555345404e-06, rewards: -9.299999999999999, count: 50
epoch: 160699, loss: 4.932880528940586e-06, rewards: -9.299999999999999, count: 50
epoch: 160709, loss: -2.825260253302986e-06, rewards: -9.299999999999999, count: 50
epoch: 160719, loss: 1.6438960983578e-06, rewards: -9.299999999999999, count: 50
epoch: 160729, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50


epoch: 161609, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 161619, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 161629, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 161639, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 161649, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 161659, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 161669, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 161679, loss: 1.3363361404117313e-06, rewards: -9.299999999999999, count: 50
epoch: 161689, loss: 5.655288532580016e-06, rewards: -9.299999999999999, count: 50
epoch: 161699, loss: 4.7266483306884766e-05, rewards: -9.299999999999999, count: 50
epoch: 161709, loss: 4.670500857173465e-05, rewards: -9.299999999999999, count: 50
epoch: 161719, loss: 5.785465327790007e-05, rewards: -9.299999999999999, coun

epoch: 162599, loss: 4.250526399118826e-05, rewards: -9.299999999999999, count: 50
epoch: 162609, loss: 7.387995719909668e-05, rewards: -9.299999999999999, count: 50
epoch: 162619, loss: -6.223916898306925e-06, rewards: -9.299999999999999, count: 50
epoch: 162629, loss: -2.602815584396012e-05, rewards: -9.299999999999999, count: 50
epoch: 162639, loss: 1.83975698746508e-05, rewards: -9.299999999999999, count: 50
epoch: 162649, loss: -4.271269062883221e-06, rewards: -9.299999999999999, count: 50
epoch: 162659, loss: -1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 162669, loss: 2.471208517818013e-06, rewards: -9.299999999999999, count: 50
epoch: 162679, loss: -1.5676021121180383e-06, rewards: -9.299999999999999, count: 50
epoch: 162689, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 162699, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 162709, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 

epoch: 163589, loss: -9.087323996936902e-05, rewards: -9.299999999999999, count: 50
epoch: 163599, loss: 5.5698157666483894e-05, rewards: -9.299999999999999, count: 50
epoch: 163609, loss: -4.1284562030341476e-05, rewards: -9.299999999999999, count: 50
epoch: 163619, loss: 3.5381317502469756e-06, rewards: -9.299999999999999, count: 50
epoch: 163629, loss: 1.2048482858517673e-05, rewards: -9.299999999999999, count: 50
epoch: 163639, loss: -9.638070878281724e-06, rewards: -9.299999999999999, count: 50
epoch: 163649, loss: 4.631280717148911e-06, rewards: -9.299999999999999, count: 50
epoch: 163659, loss: -2.219677071479964e-06, rewards: -9.299999999999999, count: 50
epoch: 163669, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 163679, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 163689, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 163699, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, co

epoch: 164589, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 164599, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 164609, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 164619, loss: 1.994371359614888e-06, rewards: -9.299999999999999, count: 50
epoch: 164629, loss: 5.776882062491495e-06, rewards: -9.299999999999999, count: 50
epoch: 164639, loss: 3.7163496017456055e-05, rewards: -9.299999999999999, count: 50
epoch: 164649, loss: 9.146332740783691e-05, rewards: -9.299999999999999, count: 50
epoch: 164659, loss: -4.748106221086346e-06, rewards: -9.299999999999999, count: 50
epoch: 164669, loss: -3.341913179610856e-05, rewards: -9.299999999999999, count: 50
epoch: 164679, loss: 1.195669210574124e-05, rewards: -9.299999999999999, count: 50
epoch: 164689, loss: 6.762742941646138e-06, rewards: -9.299999999999999, count: 50
epoch: 164699, loss: -7.978677786013577e-06, rewards: -9.299999999999999, count: 5

epoch: 165579, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 165589, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 165599, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 165609, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 165619, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 165629, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 165639, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 165649, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count: 50
epoch: 165659, loss: 6.343960558297113e-05, rewards: -9.299999999999999, count: 50
epoch: 165669, loss: -1.79922571987845e-05, rewards: -9.299999999999999, count: 50
epoch: 165679, loss: 5.613207758869976e-05, rewards: -9.299999999999999, count: 50
epoch: 165689, loss: 2.5137662305496633e-05, rewards: -9.299999999999999, count: 50

epoch: 166569, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 166579, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 166589, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 166599, loss: -1.2278557051104144e-06, rewards: -9.299999999999999, count: 50
epoch: 166609, loss: -1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 166619, loss: -8.857250577420928e-06, rewards: -9.299999999999999, count: 50
epoch: 166629, loss: -6.0255526477703825e-05, rewards: -9.299999999999999, count: 50
epoch: 166639, loss: -9.263753781851847e-06, rewards: -9.299999999999999, count: 50
epoch: 166649, loss: -4.5212505938252434e-05, rewards: -9.299999999999999, count: 50
epoch: 166659, loss: 2.1145344362594187e-05, rewards: -9.299999999999999, count: 50
epoch: 166669, loss: 9.829997907218058e-06, rewards: -9.299999999999999, count: 50
epoch: 166679, loss: -1.1307000931992661e-05, rewards: -9.299999999999999

epoch: 167549, loss: -5.662441253662109e-06, rewards: -9.299999999999999, count: 50
epoch: 167559, loss: -3.677129643619992e-05, rewards: -9.299999999999999, count: 50
epoch: 167569, loss: -9.400844282936305e-05, rewards: -9.299999999999999, count: 50
epoch: 167579, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 167589, loss: 3.566145824152045e-05, rewards: -9.299999999999999, count: 50
epoch: 167599, loss: -6.586313247680664e-06, rewards: -9.299999999999999, count: 50
epoch: 167609, loss: -1.070618600351736e-05, rewards: -9.299999999999999, count: 50
epoch: 167619, loss: 7.017850748525234e-06, rewards: -9.299999999999999, count: 50
epoch: 167629, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 167639, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 167649, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 167659, loss: -8.940696716308594e-07, rewards: -9.299999999999999, cou

epoch: 168549, loss: -2.6988982426701114e-06, rewards: -9.299999999999999, count: 50
epoch: 168559, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 168569, loss: 1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 168579, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 168589, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 168599, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 168609, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 168619, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 168629, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 168639, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 168649, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 168659, loss: 5.891322871320881e-06, rewards: -9.299999999999999, count

epoch: 169529, loss: -0.00012868642807006836, rewards: -9.299999999999999, count: 50
epoch: 169539, loss: 7.13729823473841e-05, rewards: -9.299999999999999, count: 50
epoch: 169549, loss: 3.5834313166560605e-05, rewards: -9.299999999999999, count: 50
epoch: 169559, loss: -7.406473287119297e-06, rewards: -9.299999999999999, count: 50
epoch: 169569, loss: -1.67906291608233e-05, rewards: -9.299999999999999, count: 50
epoch: 169579, loss: -3.139972704957472e-06, rewards: -9.299999999999999, count: 50
epoch: 169589, loss: 5.694627816410502e-06, rewards: -9.299999999999999, count: 50
epoch: 169599, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 169609, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 169619, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 169629, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 169639, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, coun

epoch: 170509, loss: 4.584789166983683e-06, rewards: -9.299999999999999, count: 50
epoch: 170519, loss: -2.6583670660329517e-06, rewards: -9.299999999999999, count: 50
epoch: 170529, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 170539, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 170549, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 170559, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 170569, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 170579, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 170589, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 170599, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 170609, loss: -1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 170619, loss: -7.287263997568516e-06, rewards: -9.299999999999999, co

epoch: 171499, loss: 6.803274118283298e-06, rewards: -9.299999999999999, count: 50
epoch: 171509, loss: -5.594491994997952e-06, rewards: -9.299999999999999, count: 50
epoch: 171519, loss: 3.266334488216671e-06, rewards: -9.299999999999999, count: 50
epoch: 171529, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 171539, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 171549, loss: 1.6486644653923577e-06, rewards: -9.299999999999999, count: 50
epoch: 171559, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 171569, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 171579, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 171589, loss: 7.127523531380575e-06, rewards: -9.299999999999999, count: 50
epoch: 171599, loss: 4.608392555383034e-05, rewards: -9.299999999999999, count: 50
epoch: 171609, loss: 5.887389124836773e-05, rewards: -9.299999999999999, count: 50

epoch: 172489, loss: 1.5203952898446005e-05, rewards: -9.299999999999999, count: 50
epoch: 172499, loss: -2.7010441044694744e-05, rewards: -9.299999999999999, count: 50
epoch: 172509, loss: 1.5566349247819744e-05, rewards: -9.299999999999999, count: 50
epoch: 172519, loss: -5.052089818491368e-06, rewards: -9.299999999999999, count: 50
epoch: 172529, loss: 9.107589562518115e-07, rewards: -9.299999999999999, count: 50
epoch: 172539, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 172549, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 172559, loss: -1.8942356518891756e-06, rewards: -9.299999999999999, count: 50
epoch: 172569, loss: 1.2266635849300656e-06, rewards: -9.299999999999999, count: 50
epoch: 172579, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 172589, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 172599, loss: -9.965896197172697e-07, rewards: -9.299999999999999, c

epoch: 173469, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 173479, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 173489, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 173499, loss: -2.3436546143784653e-06, rewards: -9.299999999999999, count: 50
epoch: 173509, loss: -5.158186013431987e-06, rewards: -9.299999999999999, count: 50
epoch: 173519, loss: -2.8192996978759766e-05, rewards: -9.299999999999999, count: 50
epoch: 173529, loss: -0.0001106810595956631, rewards: -9.299999999999999, count: 50
epoch: 173539, loss: 4.894494850304909e-05, rewards: -9.299999999999999, count: 50
epoch: 173549, loss: 9.337663868791424e-06, rewards: -9.299999999999999, count: 50
epoch: 173559, loss: -2.189397855545394e-05, rewards: -9.299999999999999, count: 50
epoch: 173569, loss: 9.073019100469537e-06, rewards: -9.299999999999999, count: 50
epoch: 173579, loss: 3.397464638510428e-07, rewards: -9.299999999999999, co

epoch: 174459, loss: -0.000144582983921282, rewards: -9.299999999999999, count: 50
epoch: 174469, loss: 1.3440847396850586e-05, rewards: -9.299999999999999, count: 50
epoch: 174479, loss: 4.1629074985394254e-05, rewards: -9.299999999999999, count: 50
epoch: 174489, loss: 2.6792287826538086e-05, rewards: -9.299999999999999, count: 50
epoch: 174499, loss: 5.662441253662109e-06, rewards: -9.299999999999999, count: 50
epoch: 174509, loss: -6.548166311404202e-06, rewards: -9.299999999999999, count: 50
epoch: 174519, loss: -5.627870450553019e-06, rewards: -9.299999999999999, count: 50
epoch: 174529, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 174539, loss: 2.0694733393611386e-06, rewards: -9.299999999999999, count: 50
epoch: 174549, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 174559, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 174569, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count

epoch: 175459, loss: 1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 175469, loss: 6.978511919442099e-06, rewards: -9.299999999999999, count: 50
epoch: 175479, loss: 4.872441422776319e-05, rewards: -9.299999999999999, count: 50
epoch: 175489, loss: 4.933714808430523e-05, rewards: -9.299999999999999, count: 50
epoch: 175499, loss: 3.2638312404742464e-05, rewards: -9.299999999999999, count: 50
epoch: 175509, loss: -2.977967233164236e-05, rewards: -9.299999999999999, count: 50
epoch: 175519, loss: -6.036758350091986e-06, rewards: -9.299999999999999, count: 50
epoch: 175529, loss: 1.2143850653956179e-05, rewards: -9.299999999999999, count: 50
epoch: 175539, loss: -4.163980520388577e-06, rewards: -9.299999999999999, count: 50
epoch: 175549, loss: -6.651878265984124e-07, rewards: -9.299999999999999, count: 50
epoch: 175559, loss: 1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 175569, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count

epoch: 176439, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 176449, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 176459, loss: -2.5582312446204014e-06, rewards: -9.299999999999999, count: 50
epoch: 176469, loss: -1.3133287211530842e-05, rewards: -9.299999999999999, count: 50
epoch: 176479, loss: -9.968400263460353e-05, rewards: -9.299999999999999, count: 50
epoch: 176489, loss: 7.706403994234279e-05, rewards: -9.299999999999999, count: 50
epoch: 176499, loss: -1.8380880646873266e-05, rewards: -9.299999999999999, count: 50
epoch: 176509, loss: -2.8523207220132463e-05, rewards: -9.299999999999999, count: 50
epoch: 176519, loss: 3.083944420723128e-06, rewards: -9.299999999999999, count: 50
epoch: 176529, loss: 1.0291338185197674e-05, rewards: -9.299999999999999, count: 50
epoch: 176539, loss: -3.3807755244197324e-06, rewards: -9.299999999999999, count: 50
epoch: 176549, loss: -2.191066641898942e-06, rewards: -9.299999999999999

epoch: 177439, loss: 2.7917623810935766e-05, rewards: -9.299999999999999, count: 50
epoch: 177449, loss: -3.180503881594632e-06, rewards: -9.299999999999999, count: 50
epoch: 177459, loss: -9.740590940054972e-06, rewards: -9.299999999999999, count: 50
epoch: 177469, loss: 3.743171646419796e-06, rewards: -9.299999999999999, count: 50
epoch: 177479, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 177489, loss: -1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 177499, loss: 1.3875961712983553e-06, rewards: -9.299999999999999, count: 50
epoch: 177509, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 177519, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 177529, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 177539, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 177549, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, 

epoch: 178439, loss: 1.7770529666449875e-05, rewards: -9.299999999999999, count: 50
epoch: 178449, loss: -1.080274614650989e-05, rewards: -9.299999999999999, count: 50
epoch: 178459, loss: 5.9223175412626006e-06, rewards: -9.299999999999999, count: 50
epoch: 178469, loss: -4.129409717279486e-06, rewards: -9.299999999999999, count: 50
epoch: 178479, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 178489, loss: -1.4102458862907952e-06, rewards: -9.299999999999999, count: 50
epoch: 178499, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 178509, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 178519, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 178529, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 178539, loss: -5.100965609017294e-06, rewards: -9.299999999999999, count: 50
epoch: 178549, loss: -3.3727883419487625e-05, rewards: -9.29999999999999

epoch: 179419, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 179429, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 179439, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 179449, loss: 2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 179459, loss: 1.0337829735362902e-05, rewards: -9.299999999999999, count: 50
epoch: 179469, loss: 6.243586540222168e-05, rewards: -9.299999999999999, count: 50
epoch: 179479, loss: 4.631280717148911e-06, rewards: -9.299999999999999, count: 50
epoch: 179489, loss: 4.189491301076487e-05, rewards: -9.299999999999999, count: 50
epoch: 179499, loss: -2.4030208805925213e-05, rewards: -9.299999999999999, count: 50
epoch: 179509, loss: -4.127025476918789e-06, rewards: -9.299999999999999, count: 50
epoch: 179519, loss: 1.0306835065421183e-05, rewards: -9.299999999999999, count: 50
epoch: 179529, loss: -6.239414233277785e-06, rewards: -9.299999999999999, count: 5

epoch: 180409, loss: 1.6534328324269154e-06, rewards: -9.299999999999999, count: 50
epoch: 180419, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 180429, loss: -3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 180439, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 180449, loss: 1.6331672441083356e-06, rewards: -9.299999999999999, count: 50
epoch: 180459, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 180469, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 180479, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 180489, loss: 2.9230118343548384e-06, rewards: -9.299999999999999, count: 50
epoch: 180499, loss: 1.313567190663889e-05, rewards: -9.299999999999999, count: 50
epoch: 180509, loss: 7.751345401629806e-05, rewards: -9.299999999999999, count: 50
epoch: 180519, loss: -3.108382225036621e-05, rewards: -9.299999999999999, count:

epoch: 181399, loss: -3.935456334147602e-05, rewards: -9.299999999999999, count: 50
epoch: 181409, loss: 3.1387805847771233e-06, rewards: -9.299999999999999, count: 50
epoch: 181419, loss: 1.4598369489249308e-05, rewards: -9.299999999999999, count: 50
epoch: 181429, loss: -2.84075736090017e-06, rewards: -9.299999999999999, count: 50
epoch: 181439, loss: -4.197358975943644e-06, rewards: -9.299999999999999, count: 50
epoch: 181449, loss: 3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 181459, loss: -1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 181469, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 181479, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 181489, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 181499, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 181509, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, c

epoch: 182389, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 182399, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 182409, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 182419, loss: 1.7333030655208859e-06, rewards: -9.299999999999999, count: 50
epoch: 182429, loss: 9.664297067502048e-06, rewards: -9.299999999999999, count: 50
epoch: 182439, loss: 7.118344365153462e-05, rewards: -9.299999999999999, count: 50
epoch: 182449, loss: -2.8302669306867756e-05, rewards: -9.299999999999999, count: 50
epoch: 182459, loss: 5.2034854888916016e-05, rewards: -9.299999999999999, count: 50
epoch: 182469, loss: 5.50627692064154e-06, rewards: -9.299999999999999, count: 50
epoch: 182479, loss: -1.8936396372737363e-05, rewards: -9.299999999999999, count: 50
epoch: 182489, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 182499, loss: 6.412267794075888e-06, rewards: -9.299999999999999, count: 

epoch: 183379, loss: -7.454395381500944e-05, rewards: -9.299999999999999, count: 50
epoch: 183389, loss: -2.5916098820744082e-05, rewards: -9.299999999999999, count: 50
epoch: 183399, loss: 1.723766399663873e-05, rewards: -9.299999999999999, count: 50
epoch: 183409, loss: 1.52337552208337e-05, rewards: -9.299999999999999, count: 50
epoch: 183419, loss: -3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 183429, loss: -5.528926976694493e-06, rewards: -9.299999999999999, count: 50
epoch: 183439, loss: 2.708434976739227e-06, rewards: -9.299999999999999, count: 50
epoch: 183449, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 183459, loss: -8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 183469, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 183479, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 183489, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50

epoch: 184369, loss: 1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 184379, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 184389, loss: 2.8336048671917524e-06, rewards: -9.299999999999999, count: 50
epoch: 184399, loss: 1.4449357877310831e-05, rewards: -9.299999999999999, count: 50
epoch: 184409, loss: 9.696245251689106e-05, rewards: -9.299999999999999, count: 50
epoch: 184419, loss: -6.911635136930272e-05, rewards: -9.299999999999999, count: 50
epoch: 184429, loss: 3.085732532781549e-05, rewards: -9.299999999999999, count: 50
epoch: 184439, loss: 1.8984079360961914e-05, rewards: -9.299999999999999, count: 50
epoch: 184449, loss: -1.4654398000857327e-05, rewards: -9.299999999999999, count: 50
epoch: 184459, loss: -1.7631053879085812e-06, rewards: -9.299999999999999, count: 50
epoch: 184469, loss: 5.781650543212891e-06, rewards: -9.299999999999999, count: 50
epoch: 184479, loss: -3.727674538822612e-06, rewards: -9.299999999999999, coun

epoch: 185369, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 185379, loss: 5.125999678057269e-07, rewards: -9.299999999999999, count: 50
epoch: 185389, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 185399, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 185409, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 185419, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 185429, loss: -2.915859113272745e-06, rewards: -9.299999999999999, count: 50
epoch: 185439, loss: -1.4474391718977131e-05, rewards: -9.299999999999999, count: 50
epoch: 185449, loss: -9.480357402935624e-05, rewards: -9.299999999999999, count: 50
epoch: 185459, loss: 6.490945816040039e-05, rewards: -9.299999999999999, count: 50
epoch: 185469, loss: -3.569602995412424e-05, rewards: -9.299999999999999, count: 50
epoch: 185479, loss: -1.157879796664929e-05, rewards: -9.299999999999999, c

epoch: 186359, loss: -2.2900103431311436e-06, rewards: -9.299999999999999, count: 50
epoch: 186369, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 186379, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 186389, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 186399, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 186409, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 186419, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 186429, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 186439, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 186449, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 186459, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 186469, loss: 9.942054930434097e-07, rewards: -9.299999999999999, cou

epoch: 187349, loss: -1.1833905773528386e-05, rewards: -9.299999999999999, count: 50
epoch: 187359, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 187369, loss: 2.7942658107349416e-06, rewards: -9.299999999999999, count: 50
epoch: 187379, loss: -2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 187389, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 187399, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 187409, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 187419, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 187429, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 187439, loss: -5.0067900048134106e-08, rewards: -9.299999999999999, count: 50
epoch: 187449, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 187459, loss: 1.962184796866495e-06, rewards: -9.299999999999999, count

epoch: 188339, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 188349, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 188359, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 188369, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 188379, loss: 3.0231476557673886e-06, rewards: -9.299999999999999, count: 50
epoch: 188389, loss: 9.701251656224485e-06, rewards: -9.299999999999999, count: 50
epoch: 188399, loss: 5.136132313054986e-05, rewards: -9.299999999999999, count: 50
epoch: 188409, loss: 4.453897417988628e-05, rewards: -9.299999999999999, count: 50
epoch: 188419, loss: 5.389451871451456e-06, rewards: -9.299999999999999, count: 50
epoch: 188429, loss: -2.4224520529969595e-05, rewards: -9.299999999999999, count: 50
epoch: 188439, loss: 1.761317253112793e-05, rewards: -9.299999999999999, count: 50
epoch: 188449, loss: -8.07046853879001e-06, rewards: -9.299999999999999, count: 5

epoch: 189329, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 189339, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 189349, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 189359, loss: 3.2007694699132117e-06, rewards: -9.299999999999999, count: 50
epoch: 189369, loss: 2.400398261670489e-05, rewards: -9.299999999999999, count: 50
epoch: 189379, loss: 0.0001303410535911098, rewards: -9.299999999999999, count: 50
epoch: 189389, loss: -3.4722088457783684e-05, rewards: -9.299999999999999, count: 50
epoch: 189399, loss: -4.08053383580409e-05, rewards: -9.299999999999999, count: 50
epoch: 189409, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 189419, loss: 1.570582389831543e-05, rewards: -9.299999999999999, count: 50
epoch: 189429, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 189439, loss: -5.764961315435357e-06, rewards: -9.299999999999999, count: 5

epoch: 190319, loss: 8.268356396001764e-06, rewards: -9.299999999999999, count: 50
epoch: 190329, loss: 8.0484154750593e-05, rewards: -9.299999999999999, count: 50
epoch: 190339, loss: -6.647944246651605e-05, rewards: -9.299999999999999, count: 50
epoch: 190349, loss: 2.6313065973226912e-05, rewards: -9.299999999999999, count: 50
epoch: 190359, loss: 3.391742575331591e-05, rewards: -9.299999999999999, count: 50
epoch: 190369, loss: 1.3659000615007244e-05, rewards: -9.299999999999999, count: 50
epoch: 190379, loss: -4.681348855228862e-06, rewards: -9.299999999999999, count: 50
epoch: 190389, loss: -7.253885087266099e-06, rewards: -9.299999999999999, count: 50
epoch: 190399, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 190409, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 190419, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 190429, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 

epoch: 191309, loss: 2.2006033759680577e-06, rewards: -9.299999999999999, count: 50
epoch: 191319, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 191329, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 191339, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 191349, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 191359, loss: -3.5262107758171624e-06, rewards: -9.299999999999999, count: 50
epoch: 191369, loss: -1.9128323401673697e-05, rewards: -9.299999999999999, count: 50
epoch: 191379, loss: -0.0001073825333151035, rewards: -9.299999999999999, count: 50
epoch: 191389, loss: 6.98411458870396e-05, rewards: -9.299999999999999, count: 50
epoch: 191399, loss: -1.981139212148264e-05, rewards: -9.299999999999999, count: 50
epoch: 191409, loss: -1.6486645108670928e-05, rewards: -9.299999999999999, count: 50
epoch: 191419, loss: 1.4756918062630575e-05, rewards: -9.299999999999999, c

epoch: 192309, loss: 3.2436846595373936e-06, rewards: -9.299999999999999, count: 50
epoch: 192319, loss: -4.460811396711506e-06, rewards: -9.299999999999999, count: 50
epoch: 192329, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 192339, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 192349, loss: -1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 192359, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 192369, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 192379, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 192389, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 192399, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 192409, loss: -5.0067900048134106e-08, rewards: -9.299999999999999, count: 50
epoch: 192419, loss: -1.01327898960335e-07, rewards: -9.299999999999999, co

epoch: 193299, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 193309, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 193319, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, count: 50
epoch: 193329, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 193339, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 193349, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 193359, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 193369, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 193379, loss: -5.224943379289471e-06, rewards: -9.299999999999999, count: 50
epoch: 193389, loss: -2.5110244678216986e-05, rewards: -9.299999999999999, count: 50
epoch: 193399, loss: -0.00010826707148225978, rewards: -9.299999999999999, count: 50
epoch: 193409, loss: 6.0865877458127216e-05, rewards: -9.29999999999

epoch: 194299, loss: 1.3515948921849485e-05, rewards: -9.299999999999999, count: 50
epoch: 194309, loss: 0.00011357426410540938, rewards: -9.299999999999999, count: 50
epoch: 194319, loss: -8.688568777870387e-05, rewards: -9.299999999999999, count: 50
epoch: 194329, loss: -1.782655635906849e-05, rewards: -9.299999999999999, count: 50
epoch: 194339, loss: 2.0093917555641383e-05, rewards: -9.299999999999999, count: 50
epoch: 194349, loss: 1.7473697880632244e-05, rewards: -9.299999999999999, count: 50
epoch: 194359, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 194369, loss: -6.849765668448526e-06, rewards: -9.299999999999999, count: 50
epoch: 194379, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 194389, loss: 2.548694510551286e-06, rewards: -9.299999999999999, count: 50
epoch: 194399, loss: -1.394748665006773e-06, rewards: -9.299999999999999, count: 50
epoch: 194409, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, cou

epoch: 195289, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 195299, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 195309, loss: 3.312826265755575e-06, rewards: -9.299999999999999, count: 50
epoch: 195319, loss: 1.743912616802845e-05, rewards: -9.299999999999999, count: 50
epoch: 195329, loss: 0.00010664462752174586, rewards: -9.299999999999999, count: 50
epoch: 195339, loss: -7.228612957987934e-05, rewards: -9.299999999999999, count: 50
epoch: 195349, loss: 1.9712448192876764e-05, rewards: -9.299999999999999, count: 50
epoch: 195359, loss: 2.0040273739141412e-05, rewards: -9.299999999999999, count: 50
epoch: 195369, loss: -1.348614659946179e-05, rewards: -9.299999999999999, count: 50
epoch: 195379, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 195389, loss: 4.599094609147869e-06, rewards: -9.299999999999999, count: 50
epoch: 195399, loss: -3.6144256227998994e-06, rewards: -9.299999999999999, count:

epoch: 196289, loss: -2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 196299, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 196309, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 196319, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 196329, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 196339, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 196349, loss: 2.59995454143791e-06, rewards: -9.299999999999999, count: 50
epoch: 196359, loss: 8.670092029205989e-06, rewards: -9.299999999999999, count: 50
epoch: 196369, loss: 5.012988913222216e-05, rewards: -9.299999999999999, count: 50
epoch: 196379, loss: 4.722475932794623e-05, rewards: -9.299999999999999, count: 50
epoch: 196389, loss: 9.260177648684476e-06, rewards: -9.299999999999999, count: 50
epoch: 196399, loss: -2.759456583589781e-05, rewards: -9.299999999999999, count: 5

epoch: 197269, loss: -1.6595125998719595e-05, rewards: -9.299999999999999, count: 50
epoch: 197279, loss: -8.205890480894595e-05, rewards: -9.299999999999999, count: 50
epoch: 197289, loss: 3.603577715693973e-05, rewards: -9.299999999999999, count: 50
epoch: 197299, loss: -3.7525893276324496e-05, rewards: -9.299999999999999, count: 50
epoch: 197309, loss: 2.2751093638362363e-05, rewards: -9.299999999999999, count: 50
epoch: 197319, loss: -9.447336196899414e-06, rewards: -9.299999999999999, count: 50
epoch: 197329, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 197339, loss: -2.1815299078298267e-06, rewards: -9.299999999999999, count: 50
epoch: 197349, loss: 2.5463104975642636e-06, rewards: -9.299999999999999, count: 50
epoch: 197359, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 197369, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 197379, loss: 5.638599418489321e-07, rewards: -9.299999999999999, c

epoch: 198259, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 198269, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 198279, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 198289, loss: 1.9693375179485884e-06, rewards: -9.299999999999999, count: 50
epoch: 198299, loss: 1.2961626453034114e-05, rewards: -9.299999999999999, count: 50
epoch: 198309, loss: 9.911060624290258e-05, rewards: -9.299999999999999, count: 50
epoch: 198319, loss: -7.639527029823512e-05, rewards: -9.299999999999999, count: 50
epoch: 198329, loss: 1.9509792764438316e-05, rewards: -9.299999999999999, count: 50
epoch: 198339, loss: 2.8297901735641062e-05, rewards: -9.299999999999999, count: 50
epoch: 198349, loss: -3.210306203982327e-06, rewards: -9.299999999999999, count: 50
epoch: 198359, loss: -1.0321140507585369e-05, rewards: -9.299999999999999, count: 50
epoch: 198369, loss: 3.7896634239587e-06, rewards: -9.299999999999999, count:

epoch: 199249, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 199259, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 199269, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 199279, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 199289, loss: -2.6583670660329517e-06, rewards: -9.299999999999999, count: 50
epoch: 199299, loss: -2.244114875793457e-05, rewards: -9.299999999999999, count: 50
epoch: 199309, loss: -0.00013640045654028654, rewards: -9.299999999999999, count: 50
epoch: 199319, loss: 3.203749656677246e-05, rewards: -9.299999999999999, count: 50
epoch: 199329, loss: 4.488229751586914e-05, rewards: -9.299999999999999, count: 50
epoch: 199339, loss: 1.069068912329385e-05, rewards: -9.299999999999999, count: 50
epoch: 199349, loss: -1.1627674211922567e-05, rewards: -9.299999999999999, count: 50
epoch: 199359, loss: -8.046627044677734e-06, rewards: -9.299999999999999, co

epoch: 200239, loss: -4.078149686392862e-06, rewards: -9.299999999999999, count: 50
epoch: 200249, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 200259, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 200269, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 200279, loss: -1.2278557051104144e-06, rewards: -9.299999999999999, count: 50
epoch: 200289, loss: -2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 200299, loss: -7.116794677131111e-06, rewards: -9.299999999999999, count: 50
epoch: 200309, loss: -4.129171429667622e-05, rewards: -9.299999999999999, count: 50
epoch: 200319, loss: -7.660865958314389e-05, rewards: -9.299999999999999, count: 50
epoch: 200329, loss: 1.0579824447631836e-05, rewards: -9.299999999999999, count: 50
epoch: 200339, loss: 2.390146255493164e-05, rewards: -9.299999999999999, count: 50
epoch: 200349, loss: -1.9327402696944773e-05, rewards: -9.299999999999999, co

epoch: 201229, loss: 1.1895895113411825e-05, rewards: -9.299999999999999, count: 50
epoch: 201239, loss: -1.1250972420384642e-05, rewards: -9.299999999999999, count: 50
epoch: 201249, loss: 7.321834345930256e-06, rewards: -9.299999999999999, count: 50
epoch: 201259, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 201269, loss: 2.2172928311192663e-06, rewards: -9.299999999999999, count: 50
epoch: 201279, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 201289, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 201299, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 201309, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 201319, loss: 2.511739694455173e-06, rewards: -9.299999999999999, count: 50
epoch: 201329, loss: 8.890629032975994e-06, rewards: -9.299999999999999, count: 50
epoch: 201339, loss: 4.9540994950803e-05, rewards: -9.299999999999999, count: 5

epoch: 202219, loss: 2.51710407610517e-05, rewards: -9.299999999999999, count: 50
epoch: 202229, loss: -1.9899605831597e-05, rewards: -9.299999999999999, count: 50
epoch: 202239, loss: -4.905462446913589e-06, rewards: -9.299999999999999, count: 50
epoch: 202249, loss: 8.64267349243164e-06, rewards: -9.299999999999999, count: 50
epoch: 202259, loss: -3.5214425224694423e-06, rewards: -9.299999999999999, count: 50
epoch: 202269, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 202279, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 202289, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 202299, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 202309, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 202319, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 202329, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50

epoch: 203209, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 203219, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 203229, loss: -2.4139881134033203e-06, rewards: -9.299999999999999, count: 50
epoch: 203239, loss: -1.4086961527937092e-05, rewards: -9.299999999999999, count: 50
epoch: 203249, loss: -0.00010243058204650879, rewards: -9.299999999999999, count: 50
epoch: 203259, loss: 7.697343971813098e-05, rewards: -9.299999999999999, count: 50
epoch: 203269, loss: -1.733541466819588e-05, rewards: -9.299999999999999, count: 50
epoch: 203279, loss: -2.7251244318904355e-05, rewards: -9.299999999999999, count: 50
epoch: 203289, loss: 4.781484676641412e-06, rewards: -9.299999999999999, count: 50
epoch: 203299, loss: 9.353160749014933e-06, rewards: -9.299999999999999, count: 50
epoch: 203309, loss: -4.869699296250474e-06, rewards: -9.299999999999999, count: 50
epoch: 203319, loss: -6.34193440873787e-07, rewards: -9.299999999999999, coun

epoch: 204199, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 204209, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 204219, loss: -1.497268726780021e-06, rewards: -9.299999999999999, count: 50
epoch: 204229, loss: -5.031824002799112e-06, rewards: -9.299999999999999, count: 50
epoch: 204239, loss: -2.7108191716251895e-05, rewards: -9.299999999999999, count: 50
epoch: 204249, loss: -0.00010928750270977616, rewards: -9.299999999999999, count: 50
epoch: 204259, loss: 5.4795742471469566e-05, rewards: -9.299999999999999, count: 50
epoch: 204269, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 204279, loss: -1.901388168334961e-05, rewards: -9.299999999999999, count: 50
epoch: 204289, loss: 1.2670755495491903e-05, rewards: -9.299999999999999, count: 50
epoch: 204299, loss: -4.465579877432901e-06, rewards: -9.299999999999999, count: 50
epoch: 204309, loss: 8.189678055714467e-07, rewards: -9.299999999999999, 

epoch: 205189, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 205199, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 205209, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 205219, loss: 3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 205229, loss: 2.7849673642776906e-05, rewards: -9.299999999999999, count: 50
epoch: 205239, loss: 0.00012632131983991712, rewards: -9.299999999999999, count: 50
epoch: 205249, loss: -3.250837380619487e-06, rewards: -9.299999999999999, count: 50
epoch: 205259, loss: -4.066824840265326e-05, rewards: -9.299999999999999, count: 50
epoch: 205269, loss: -1.5571116819046438e-05, rewards: -9.299999999999999, count: 50
epoch: 205279, loss: 8.988380614027847e-06, rewards: -9.299999999999999, count: 50
epoch: 205289, loss: 8.065700967563316e-06, rewards: -9.299999999999999, count: 50
epoch: 205299, loss: -3.3676624298095703e-06, rewards: -9.299999999999999, coun

epoch: 206179, loss: -7.817745427018963e-06, rewards: -9.299999999999999, count: 50
epoch: 206189, loss: 2.988576852658298e-06, rewards: -9.299999999999999, count: 50
epoch: 206199, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 206209, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 206219, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 206229, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 206239, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 206249, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 206259, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 206269, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 206279, loss: -3.0648707252112217e-06, rewards: -9.299999999999999, count: 50
epoch: 206289, loss: -1.348614659946179e-05, rewards: -9.299999999999999, co

epoch: 207169, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 207179, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 207189, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 207199, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 207209, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 207219, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 207229, loss: -1.4603137969970703e-06, rewards: -9.299999999999999, count: 50
epoch: 207239, loss: -9.046792911249213e-06, rewards: -9.299999999999999, count: 50
epoch: 207249, loss: -7.580399687867612e-05, rewards: -9.299999999999999, count: 50
epoch: 207259, loss: 4.716396506410092e-05, rewards: -9.299999999999999, count: 50
epoch: 207269, loss: -4.532456296146847e-05, rewards: -9.299999999999999, count: 50
epoch: 207279, loss: -2.6538371457718313e-05, rewards: -9.299999999999999, c

epoch: 208159, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 208169, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 208179, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 208189, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 208199, loss: -2.8884410312457476e-06, rewards: -9.299999999999999, count: 50
epoch: 208209, loss: -2.463340752001386e-05, rewards: -9.299999999999999, count: 50
epoch: 208219, loss: -0.00014223218022380024, rewards: -9.299999999999999, count: 50
epoch: 208229, loss: -1.4348030163091607e-05, rewards: -9.299999999999999, count: 50
epoch: 208239, loss: 2.8073787689208984e-05, rewards: -9.299999999999999, count: 50
epoch: 208249, loss: 2.7192831112188287e-05, rewards: -9.299999999999999, count: 50
epoch: 208259, loss: 1.3182163456804119e-05, rewards: -9.299999999999999, count: 50
epoch: 208269, loss: -3.8981437455731793e-07, rewards: -9.2999999999999

epoch: 209139, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 209149, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 209159, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 209169, loss: 6.999969627941027e-06, rewards: -9.299999999999999, count: 50
epoch: 209179, loss: 4.361033279565163e-05, rewards: -9.299999999999999, count: 50
epoch: 209189, loss: 6.88648215145804e-05, rewards: -9.299999999999999, count: 50
epoch: 209199, loss: 1.7427206330467016e-05, rewards: -9.299999999999999, count: 50
epoch: 209209, loss: -3.3795833587646484e-05, rewards: -9.299999999999999, count: 50
epoch: 209219, loss: 2.100467781929183e-06, rewards: -9.299999999999999, count: 50
epoch: 209229, loss: 1.0917186955339275e-05, rewards: -9.299999999999999, count: 50
epoch: 209239, loss: -6.954669970582472e-06, rewards: -9.299999999999999, count: 50
epoch: 209249, loss: 1.994371359614888e-06, rewards: -9.299999999999999, count: 5

epoch: 210129, loss: 2.171874075429514e-05, rewards: -9.299999999999999, count: 50
epoch: 210139, loss: -4.8364399845013395e-05, rewards: -9.299999999999999, count: 50
epoch: 210149, loss: 1.3077258699922822e-05, rewards: -9.299999999999999, count: 50
epoch: 210159, loss: 1.218199759023264e-05, rewards: -9.299999999999999, count: 50
epoch: 210169, loss: -1.0517836017243098e-05, rewards: -9.299999999999999, count: 50
epoch: 210179, loss: 3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 210189, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 210199, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 210209, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 210219, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 210229, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 210239, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, c

epoch: 211119, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 211129, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 211139, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 211149, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 211159, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 211169, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 211179, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 211189, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 211199, loss: 2.8836727778980276e-06, rewards: -9.299999999999999, count: 50
epoch: 211209, loss: 1.436471939086914e-05, rewards: -9.299999999999999, count: 50
epoch: 211219, loss: 9.055852569872513e-05, rewards: -9.299999999999999, count: 50
epoch: 211229, loss: -5.795836477773264e-05, rewards: -9.299999999999999, co

epoch: 212109, loss: -4.475116838875692e-06, rewards: -9.299999999999999, count: 50
epoch: 212119, loss: -2.2023916244506836e-05, rewards: -9.299999999999999, count: 50
epoch: 212129, loss: -0.00011163949966430664, rewards: -9.299999999999999, count: 50
epoch: 212139, loss: 6.574868893949315e-05, rewards: -9.299999999999999, count: 50
epoch: 212149, loss: -9.149312973022461e-06, rewards: -9.299999999999999, count: 50
epoch: 212159, loss: -1.9140243239235133e-05, rewards: -9.299999999999999, count: 50
epoch: 212169, loss: 1.2934207916259766e-05, rewards: -9.299999999999999, count: 50
epoch: 212179, loss: -2.774000222416362e-06, rewards: -9.299999999999999, count: 50
epoch: 212189, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 212199, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 212209, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 212219, loss: -1.668930025289228e-07, rewards: -9.299999999999999, c

epoch: 213099, loss: -2.1111964088049717e-06, rewards: -9.299999999999999, count: 50
epoch: 213109, loss: -5.7470797401038e-06, rewards: -9.299999999999999, count: 50
epoch: 213119, loss: -3.521442340570502e-05, rewards: -9.299999999999999, count: 50
epoch: 213129, loss: -9.73296191659756e-05, rewards: -9.299999999999999, count: 50
epoch: 213139, loss: 1.0708570698625408e-05, rewards: -9.299999999999999, count: 50
epoch: 213149, loss: 3.324151111883111e-05, rewards: -9.299999999999999, count: 50
epoch: 213159, loss: -1.3090371794532984e-05, rewards: -9.299999999999999, count: 50
epoch: 213169, loss: -6.620884050789755e-06, rewards: -9.299999999999999, count: 50
epoch: 213179, loss: 7.652044587302953e-06, rewards: -9.299999999999999, count: 50
epoch: 213189, loss: -3.439188049014774e-06, rewards: -9.299999999999999, count: 50
epoch: 213199, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 213209, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 

epoch: 214079, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 214089, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 214099, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 214109, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 214119, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 214129, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 214139, loss: 2.2649764730431343e-08, rewards: -9.299999999999999, count: 50
epoch: 214149, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 214159, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 214169, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 214179, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 214189, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, c

epoch: 215069, loss: -5.557536951528164e-06, rewards: -9.299999999999999, count: 50
epoch: 215079, loss: -2.5883913622237742e-05, rewards: -9.299999999999999, count: 50
epoch: 215089, loss: -0.00010413885320303962, rewards: -9.299999999999999, count: 50
epoch: 215099, loss: 5.976676766294986e-05, rewards: -9.299999999999999, count: 50
epoch: 215109, loss: -1.961469570233021e-05, rewards: -9.299999999999999, count: 50
epoch: 215119, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 215129, loss: 1.055002212524414e-05, rewards: -9.299999999999999, count: 50
epoch: 215139, loss: -7.892846952017862e-06, rewards: -9.299999999999999, count: 50
epoch: 215149, loss: 4.937648554914631e-06, rewards: -9.299999999999999, count: 50
epoch: 215159, loss: -2.962350890811649e-06, rewards: -9.299999999999999, count: 50
epoch: 215169, loss: 1.8799305507855024e-06, rewards: -9.299999999999999, count: 50
epoch: 215179, loss: -2.837181227732799e-07, rewards: -9.299999999999999, cou

epoch: 216059, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 216069, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 216079, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 216089, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 216099, loss: 9.353160749014933e-06, rewards: -9.299999999999999, count: 50
epoch: 216109, loss: 5.629181760014035e-05, rewards: -9.299999999999999, count: 50
epoch: 216119, loss: 2.6285648345947266e-05, rewards: -9.299999999999999, count: 50
epoch: 216129, loss: 2.4884939193725586e-05, rewards: -9.299999999999999, count: 50
epoch: 216139, loss: -2.9591321435873397e-05, rewards: -9.299999999999999, count: 50
epoch: 216149, loss: 1.055002212524414e-05, rewards: -9.299999999999999, count: 50
epoch: 216159, loss: 1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 216169, loss: -3.967285010730848e-06, rewards: -9.299999999999999, count

epoch: 217039, loss: -4.475116838875692e-06, rewards: -9.299999999999999, count: 50
epoch: 217049, loss: -6.005764134897618e-06, rewards: -9.299999999999999, count: 50
epoch: 217059, loss: 5.735158993047662e-06, rewards: -9.299999999999999, count: 50
epoch: 217069, loss: -2.8884410312457476e-06, rewards: -9.299999999999999, count: 50
epoch: 217079, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 217089, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 217099, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 217109, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 217119, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 217129, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 217139, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 217149, loss: 3.362893949088175e-06, rewards: -9.299999999999999, count:

epoch: 218029, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 218039, loss: 3.942251169064548e-06, rewards: -9.299999999999999, count: 50
epoch: 218049, loss: -3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 218059, loss: 2.2006033759680577e-06, rewards: -9.299999999999999, count: 50
epoch: 218069, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 218079, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 218089, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 218099, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 218109, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 218119, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 218129, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 218139, loss: -6.071329153201077e-06, rewards: -9.299999999999999, coun

epoch: 219019, loss: 1.046538363880245e-05, rewards: -9.299999999999999, count: 50
epoch: 219029, loss: -1.1299848665657919e-05, rewards: -9.299999999999999, count: 50
epoch: 219039, loss: 2.034902536252048e-06, rewards: -9.299999999999999, count: 50
epoch: 219049, loss: 1.994371359614888e-06, rewards: -9.299999999999999, count: 50
epoch: 219059, loss: -2.363920202697045e-06, rewards: -9.299999999999999, count: 50
epoch: 219069, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 219079, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 219089, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 219099, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 219109, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 219119, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 219129, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 

epoch: 220009, loss: -1.5325545973610133e-05, rewards: -9.299999999999999, count: 50
epoch: 220019, loss: -9.820341801969334e-05, rewards: -9.299999999999999, count: 50
epoch: 220029, loss: 6.725907587679103e-05, rewards: -9.299999999999999, count: 50
epoch: 220039, loss: -3.178358019795269e-05, rewards: -9.299999999999999, count: 50
epoch: 220049, loss: -1.3997554560774006e-05, rewards: -9.299999999999999, count: 50
epoch: 220059, loss: 1.5561579857603647e-05, rewards: -9.299999999999999, count: 50
epoch: 220069, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 220079, loss: -2.925396074715536e-06, rewards: -9.299999999999999, count: 50
epoch: 220089, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 220099, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count: 50
epoch: 220109, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 220119, loss: -9.131431397690903e-07, rewards: -9.299999999999999, co

epoch: 220999, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 221009, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 221019, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 221029, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 221039, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 221049, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 221059, loss: 2.607107262520003e-06, rewards: -9.299999999999999, count: 50
epoch: 221069, loss: 1.4203787031874526e-05, rewards: -9.299999999999999, count: 50
epoch: 221079, loss: 9.318590309703723e-05, rewards: -9.299999999999999, count: 50
epoch: 221089, loss: -6.265402043936774e-05, rewards: -9.299999999999999, count: 50
epoch: 221099, loss: 3.760337858693674e-05, rewards: -9.299999999999999, count: 50
epoch: 221109, loss: 1.0026693416875787e-05, rewards: -9.299999999999999, count: 50


epoch: 221989, loss: -6.506443241960369e-06, rewards: -9.299999999999999, count: 50
epoch: 221999, loss: 2.5868416742014233e-06, rewards: -9.299999999999999, count: 50
epoch: 222009, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 222019, loss: -6.651878265984124e-07, rewards: -9.299999999999999, count: 50
epoch: 222029, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 222039, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 222049, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 222059, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 222069, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 222079, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 222089, loss: -1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 222099, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, cou

epoch: 222969, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 222979, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 222989, loss: -1.4078617596169352e-06, rewards: -9.299999999999999, count: 50
epoch: 222999, loss: -5.382299605116714e-06, rewards: -9.299999999999999, count: 50
epoch: 223009, loss: -4.186749356449582e-05, rewards: -9.299999999999999, count: 50
epoch: 223019, loss: -7.218599057523534e-05, rewards: -9.299999999999999, count: 50
epoch: 223029, loss: -3.9201975596370175e-05, rewards: -9.299999999999999, count: 50
epoch: 223039, loss: 2.4263857994810678e-05, rewards: -9.299999999999999, count: 50
epoch: 223049, loss: 1.9413233530940488e-05, rewards: -9.299999999999999, count: 50
epoch: 223059, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 223069, loss: -6.939172635611612e-06, rewards: -9.299999999999999, count: 50
epoch: 223079, loss: 4.190206709608901e-06, rewards: -9.299999999999999, c

epoch: 223959, loss: -9.126662916969508e-06, rewards: -9.299999999999999, count: 50
epoch: 223969, loss: 1.8256901967106387e-05, rewards: -9.299999999999999, count: 50
epoch: 223979, loss: 5.548000444832724e-06, rewards: -9.299999999999999, count: 50
epoch: 223989, loss: -7.241964340209961e-06, rewards: -9.299999999999999, count: 50
epoch: 223999, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 224009, loss: 1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 224019, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 224029, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 224039, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 224049, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 224059, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 224069, loss: -4.0531159584134e-07, rewards: -9.299999999999999, count: 5

epoch: 224949, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 224959, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 224969, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 224979, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 224989, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 224999, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 225009, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 225019, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 225029, loss: -1.745224039950699e-06, rewards: -9.299999999999999, count: 50
epoch: 225039, loss: -1.0579824447631836e-05, rewards: -9.299999999999999, count: 50
epoch: 225049, loss: -9.402632713317871e-05, rewards: -9.299999999999999, count: 50
epoch: 225059, loss: 8.03744769655168e-05, rewards: -9.299999999999999, coun

epoch: 225939, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 225949, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 225959, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 225969, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 225979, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 225989, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 225999, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 226009, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 226019, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 226029, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 226039, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 226049, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, c

epoch: 226929, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 226939, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 226949, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 226959, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 226969, loss: -2.915859113272745e-06, rewards: -9.299999999999999, count: 50
epoch: 226979, loss: -1.4848708815407008e-05, rewards: -9.299999999999999, count: 50
epoch: 226989, loss: -8.766651444602758e-05, rewards: -9.299999999999999, count: 50
epoch: 226999, loss: 5.096674067317508e-05, rewards: -9.299999999999999, count: 50
epoch: 227009, loss: -4.322767199482769e-05, rewards: -9.299999999999999, count: 50
epoch: 227019, loss: 6.538629349961411e-06, rewards: -9.299999999999999, count: 50
epoch: 227029, loss: 1.0865926924452651e-05, rewards: -9.299999999999999, count: 50
epoch: 227039, loss: -9.926557140715886e-06, rewards: -9.299999999999999, co

epoch: 227919, loss: 3.465414010861423e-06, rewards: -9.299999999999999, count: 50
epoch: 227929, loss: -2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 227939, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 227949, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 227959, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 227969, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 227979, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 227989, loss: 6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 227999, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, count: 50
epoch: 228009, loss: 9.481907000008505e-06, rewards: -9.299999999999999, count: 50
epoch: 228019, loss: 5.5143831559689716e-05, rewards: -9.299999999999999, count: 50
epoch: 228029, loss: 3.1007526558823884e-05, rewards: -9.299999999999999, count: 

epoch: 228909, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 228919, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 228929, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 228939, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 228949, loss: -7.065534646244487e-06, rewards: -9.299999999999999, count: 50
epoch: 228959, loss: -4.855036604567431e-05, rewards: -9.299999999999999, count: 50
epoch: 228969, loss: -4.849314791499637e-05, rewards: -9.299999999999999, count: 50
epoch: 228979, loss: -3.818750337813981e-05, rewards: -9.299999999999999, count: 50
epoch: 228989, loss: 2.6545523724053055e-05, rewards: -9.299999999999999, count: 50
epoch: 228999, loss: 1.186609279102413e-05, rewards: -9.299999999999999, count: 50
epoch: 229009, loss: -1.1357068615325261e-05, rewards: -9.299999999999999, count: 50
epoch: 229019, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count

epoch: 229889, loss: 1.4030933925823774e-06, rewards: -9.299999999999999, count: 50
epoch: 229899, loss: 1.7333030655208859e-06, rewards: -9.299999999999999, count: 50
epoch: 229909, loss: -1.7070769899873994e-06, rewards: -9.299999999999999, count: 50
epoch: 229919, loss: 1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 229929, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 229939, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 229949, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 229959, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 229969, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 229979, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 229989, loss: 1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 229999, loss: 9.146929187409114e-06, rewards: -9.299999999999999, count: 50


epoch: 230879, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 230889, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 230899, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 230909, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 230919, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 230929, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 230939, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 230949, loss: 4.804134277947014e-06, rewards: -9.299999999999999, count: 50
epoch: 230959, loss: 3.4525393857620656e-05, rewards: -9.299999999999999, count: 50
epoch: 230969, loss: 0.00010082602238981053, rewards: -9.299999999999999, count: 50
epoch: 230979, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 230989, loss: -3.6675930459750816e-05, rewards: -9.299999999999999, coun

epoch: 231869, loss: -6.14404689258663e-06, rewards: -9.299999999999999, count: 50
epoch: 231879, loss: -3.41320046572946e-05, rewards: -9.299999999999999, count: 50
epoch: 231889, loss: -9.901761950459331e-05, rewards: -9.299999999999999, count: 50
epoch: 231899, loss: 2.5762319637578912e-05, rewards: -9.299999999999999, count: 50
epoch: 231909, loss: 2.2975205865805037e-05, rewards: -9.299999999999999, count: 50
epoch: 231919, loss: -2.0409823264344595e-05, rewards: -9.299999999999999, count: 50
epoch: 231929, loss: 3.972053491452243e-06, rewards: -9.299999999999999, count: 50
epoch: 231939, loss: 3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 231949, loss: -4.231929779052734e-06, rewards: -9.299999999999999, count: 50
epoch: 231959, loss: 3.2007694699132117e-06, rewards: -9.299999999999999, count: 50
epoch: 231969, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 231979, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count

epoch: 232859, loss: -1.6629695892333984e-05, rewards: -9.299999999999999, count: 50
epoch: 232869, loss: -4.2130948713747784e-05, rewards: -9.299999999999999, count: 50
epoch: 232879, loss: -2.748727820289787e-05, rewards: -9.299999999999999, count: 50
epoch: 232889, loss: -7.616281436639838e-06, rewards: -9.299999999999999, count: 50
epoch: 232899, loss: 5.178451374376891e-06, rewards: -9.299999999999999, count: 50
epoch: 232909, loss: 5.816221346321981e-06, rewards: -9.299999999999999, count: 50
epoch: 232919, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 232929, loss: -2.359151949349325e-06, rewards: -9.299999999999999, count: 50
epoch: 232939, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 232949, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 232959, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 232969, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count:

epoch: 233849, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 233859, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 233869, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 233879, loss: 1.167058940154675e-06, rewards: -9.299999999999999, count: 50
epoch: 233889, loss: 2.915859113272745e-06, rewards: -9.299999999999999, count: 50
epoch: 233899, loss: 1.2134313692513388e-05, rewards: -9.299999999999999, count: 50
epoch: 233909, loss: 6.602168286917731e-05, rewards: -9.299999999999999, count: 50
epoch: 233919, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 233929, loss: 2.8078555260435678e-05, rewards: -9.299999999999999, count: 50
epoch: 233939, loss: -2.657532604644075e-05, rewards: -9.299999999999999, count: 50
epoch: 233949, loss: 1.384854294883553e-05, rewards: -9.299999999999999, count: 50
epoch: 233959, loss: -4.98414055982721e-06, rewards: -9.299999999999999, count:

epoch: 234839, loss: 3.312826265755575e-06, rewards: -9.299999999999999, count: 50
epoch: 234849, loss: -1.1062621751989354e-06, rewards: -9.299999999999999, count: 50
epoch: 234859, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 234869, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 234879, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 234889, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 234899, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 234909, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 234919, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 234929, loss: 4.674196134146769e-06, rewards: -9.299999999999999, count: 50
epoch: 234939, loss: 2.5135279429377988e-05, rewards: -9.299999999999999, count: 50
epoch: 234949, loss: 0.000111001732875593, rewards: -9.299999999999999, cou

epoch: 235829, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 235839, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 235849, loss: 4.994869300389837e-07, rewards: -9.299999999999999, count: 50
epoch: 235859, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 235869, loss: 2.796649823721964e-06, rewards: -9.299999999999999, count: 50
epoch: 235879, loss: 1.9563436580938287e-05, rewards: -9.299999999999999, count: 50
epoch: 235889, loss: 0.00012384056753944606, rewards: -9.299999999999999, count: 50
epoch: 235899, loss: -6.775141082471237e-05, rewards: -9.299999999999999, count: 50
epoch: 235909, loss: -2.155900074285455e-05, rewards: -9.299999999999999, count: 50
epoch: 235919, loss: 2.1001100321882404e-05, rewards: -9.299999999999999, count: 50
epoch: 235929, loss: 9.739398592500947e-06, rewards: -9.299999999999999, count: 50
epoch: 235939, loss: -8.455514944216702e-06, rewards: -9.299999999999999, count: 

epoch: 236819, loss: -1.3138055692252237e-05, rewards: -9.299999999999999, count: 50
epoch: 236829, loss: -6.253242463571951e-05, rewards: -9.299999999999999, count: 50
epoch: 236839, loss: -1.1613368769758381e-05, rewards: -9.299999999999999, count: 50
epoch: 236849, loss: -1.2975930985703599e-05, rewards: -9.299999999999999, count: 50
epoch: 236859, loss: 1.9925832020817325e-05, rewards: -9.299999999999999, count: 50
epoch: 236869, loss: -1.5301704479497857e-05, rewards: -9.299999999999999, count: 50
epoch: 236879, loss: 9.664297067502048e-06, rewards: -9.299999999999999, count: 50
epoch: 236889, loss: -5.891322871320881e-06, rewards: -9.299999999999999, count: 50
epoch: 236899, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 236909, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 236919, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 236929, loss: 3.0279159091151087e-07, rewards: -9.299999999999999

epoch: 237809, loss: -2.9221773729659617e-05, rewards: -9.299999999999999, count: 50
epoch: 237819, loss: -0.00011424779950175434, rewards: -9.299999999999999, count: 50
epoch: 237829, loss: 3.096461296081543e-05, rewards: -9.299999999999999, count: 50
epoch: 237839, loss: 3.0100345611572266e-05, rewards: -9.299999999999999, count: 50
epoch: 237849, loss: -1.6416312064393423e-05, rewards: -9.299999999999999, count: 50
epoch: 237859, loss: -6.356239282467868e-06, rewards: -9.299999999999999, count: 50
epoch: 237869, loss: 8.518695722159464e-06, rewards: -9.299999999999999, count: 50
epoch: 237879, loss: -3.5071373076789314e-06, rewards: -9.299999999999999, count: 50
epoch: 237889, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 237899, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 237909, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 237919, loss: -2.539158003855846e-07, rewards: -9.299999999999999,

epoch: 238799, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 238809, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 238819, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 238829, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 238839, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 238849, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 238859, loss: 5.155801773071289e-06, rewards: -9.299999999999999, count: 50
epoch: 238869, loss: 2.953886905743275e-05, rewards: -9.299999999999999, count: 50
epoch: 238879, loss: 0.0001108264914364554, rewards: -9.299999999999999, count: 50
epoch: 238889, loss: -3.974676292273216e-05, rewards: -9.299999999999999, count: 50
epoch: 238899, loss: -1.924276330100838e-05, rewards: -9.299999999999999, count: 50
epoch: 238909, loss: 2.136826515197754e-05, rewards: -9.299999999999999, count: 5

epoch: 239779, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 239789, loss: -6.372928510245401e-06, rewards: -9.299999999999999, count: 50
epoch: 239799, loss: -5.1667691877810284e-05, rewards: -9.299999999999999, count: 50
epoch: 239809, loss: -2.8520822525024414e-05, rewards: -9.299999999999999, count: 50
epoch: 239819, loss: -5.8637859183363616e-05, rewards: -9.299999999999999, count: 50
epoch: 239829, loss: -1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 239839, loss: 2.0329951439634897e-05, rewards: -9.299999999999999, count: 50
epoch: 239849, loss: 6.506443241960369e-06, rewards: -9.299999999999999, count: 50
epoch: 239859, loss: -6.742477580701234e-06, rewards: -9.299999999999999, count: 50
epoch: 239869, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 239879, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 239889, loss: -8.594989822086063e-07, rewards: -9.29999999999999

epoch: 240769, loss: -7.195472790044732e-06, rewards: -9.299999999999999, count: 50
epoch: 240779, loss: 7.791519237798639e-06, rewards: -9.299999999999999, count: 50
epoch: 240789, loss: -3.881454631482484e-06, rewards: -9.299999999999999, count: 50
epoch: 240799, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 240809, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 240819, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 240829, loss: -1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 240839, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 240849, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 240859, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 240869, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 240879, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count:

epoch: 241759, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 241769, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 241779, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 241789, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 241799, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 241809, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 241819, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 241829, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 241839, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 241849, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 241859, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 241869, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 5

epoch: 242739, loss: 8.890629032975994e-06, rewards: -9.299999999999999, count: 50
epoch: 242749, loss: -1.6279220290016383e-05, rewards: -9.299999999999999, count: 50
epoch: 242759, loss: 5.438327661977382e-06, rewards: -9.299999999999999, count: 50
epoch: 242769, loss: 1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 242779, loss: -2.294778823852539e-06, rewards: -9.299999999999999, count: 50
epoch: 242789, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 242799, loss: -1.1551379657248617e-06, rewards: -9.299999999999999, count: 50
epoch: 242809, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 242819, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 242829, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 242839, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 242849, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, cou

epoch: 243729, loss: -1.6891956420295173e-06, rewards: -9.299999999999999, count: 50
epoch: 243739, loss: -5.594491994997952e-06, rewards: -9.299999999999999, count: 50
epoch: 243749, loss: -3.165960151818581e-05, rewards: -9.299999999999999, count: 50
epoch: 243759, loss: -0.0001068365600076504, rewards: -9.299999999999999, count: 50
epoch: 243769, loss: 2.9214621463324875e-05, rewards: -9.299999999999999, count: 50
epoch: 243779, loss: 2.606868656584993e-05, rewards: -9.299999999999999, count: 50
epoch: 243789, loss: -1.9605160559876822e-05, rewards: -9.299999999999999, count: 50
epoch: 243799, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 243809, loss: 6.762742941646138e-06, rewards: -9.299999999999999, count: 50
epoch: 243819, loss: -4.732608886115486e-06, rewards: -9.299999999999999, count: 50
epoch: 243829, loss: 2.759695007625851e-06, rewards: -9.299999999999999, count: 50
epoch: 243839, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, co

epoch: 244719, loss: 5.533695002668537e-06, rewards: -9.299999999999999, count: 50
epoch: 244729, loss: 1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 244739, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 244749, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 244759, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 244769, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 244779, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 244789, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 244799, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 244809, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 244819, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 244829, loss: 1.9788741667525755e-07, rewards: -9.299999999999999,

epoch: 245709, loss: -7.406473287119297e-06, rewards: -9.299999999999999, count: 50
epoch: 245719, loss: 3.750324140128214e-06, rewards: -9.299999999999999, count: 50
epoch: 245729, loss: -1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 245739, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 245749, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 245759, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 245769, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 245779, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 245789, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 245799, loss: -2.0909310478600673e-06, rewards: -9.299999999999999, count: 50
epoch: 245809, loss: -5.76257707507466e-06, rewards: -9.299999999999999, count: 50
epoch: 245819, loss: -2.8543472581077367e-05, rewards: -9.299999999999999, cou

epoch: 246699, loss: 1.8380880646873266e-05, rewards: -9.299999999999999, count: 50
epoch: 246709, loss: 8.874177729012445e-05, rewards: -9.299999999999999, count: 50
epoch: 246719, loss: -4.7247409383999184e-05, rewards: -9.299999999999999, count: 50
epoch: 246729, loss: 3.8710833905497566e-05, rewards: -9.299999999999999, count: 50
epoch: 246739, loss: -1.9226074073230848e-05, rewards: -9.299999999999999, count: 50
epoch: 246749, loss: 5.884170604986139e-06, rewards: -9.299999999999999, count: 50
epoch: 246759, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 246769, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 246779, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 246789, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 246799, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 246809, loss: -9.942054930434097e-07, rewards: -9.299999999999999, c

epoch: 247689, loss: -8.704543142812327e-05, rewards: -9.299999999999999, count: 50
epoch: 247699, loss: -2.7920008506043814e-05, rewards: -9.299999999999999, count: 50
epoch: 247709, loss: 3.08930866594892e-05, rewards: -9.299999999999999, count: 50
epoch: 247719, loss: 1.5900135622359812e-05, rewards: -9.299999999999999, count: 50
epoch: 247729, loss: -9.47713851928711e-06, rewards: -9.299999999999999, count: 50
epoch: 247739, loss: -4.860162789555034e-06, rewards: -9.299999999999999, count: 50
epoch: 247749, loss: 4.701614216173766e-06, rewards: -9.299999999999999, count: 50
epoch: 247759, loss: -9.810923984332476e-07, rewards: -9.299999999999999, count: 50
epoch: 247769, loss: -8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 247779, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 247789, loss: -8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 247799, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 5

epoch: 248679, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 248689, loss: 2.528428922232706e-06, rewards: -9.299999999999999, count: 50
epoch: 248699, loss: -1.693964009064075e-06, rewards: -9.299999999999999, count: 50
epoch: 248709, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 248719, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 248729, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 248739, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 248749, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 248759, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 248769, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 248779, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 248789, loss: -2.191066641898942e-06, rewards: -9.299999999999999, coun

epoch: 249659, loss: 4.1135550418403e-05, rewards: -9.299999999999999, count: 50
epoch: 249669, loss: 1.7805099560064264e-05, rewards: -9.299999999999999, count: 50
epoch: 249679, loss: -7.085800007189391e-06, rewards: -9.299999999999999, count: 50
epoch: 249689, loss: -8.649825758766383e-06, rewards: -9.299999999999999, count: 50
epoch: 249699, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 249709, loss: 2.759695007625851e-06, rewards: -9.299999999999999, count: 50
epoch: 249719, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 249729, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 249739, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 249749, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 249759, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 249769, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count:

epoch: 250649, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 250659, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 250669, loss: -4.318952505855123e-06, rewards: -9.299999999999999, count: 50
epoch: 250679, loss: -1.795053503883537e-05, rewards: -9.299999999999999, count: 50
epoch: 250689, loss: -8.8388922449667e-05, rewards: -9.299999999999999, count: 50
epoch: 250699, loss: 4.7003031795611605e-05, rewards: -9.299999999999999, count: 50
epoch: 250709, loss: -3.9045811718096957e-05, rewards: -9.299999999999999, count: 50
epoch: 250719, loss: 1.897931178973522e-05, rewards: -9.299999999999999, count: 50
epoch: 250729, loss: -5.042552857048577e-06, rewards: -9.299999999999999, count: 50
epoch: 250739, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 250749, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 250759, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count

epoch: 251639, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 251649, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 251659, loss: 7.41362555345404e-06, rewards: -9.299999999999999, count: 50
epoch: 251669, loss: 5.1915645599365234e-05, rewards: -9.299999999999999, count: 50
epoch: 251679, loss: 3.630638093454763e-05, rewards: -9.299999999999999, count: 50
epoch: 251689, loss: 4.0746926970314234e-05, rewards: -9.299999999999999, count: 50
epoch: 251699, loss: -2.5058985556825064e-05, rewards: -9.299999999999999, count: 50
epoch: 251709, loss: -1.102209080272587e-05, rewards: -9.299999999999999, count: 50
epoch: 251719, loss: 1.1961459676967934e-05, rewards: -9.299999999999999, count: 50
epoch: 251729, loss: -1.8870830444939202e-06, rewards: -9.299999999999999, count: 50
epoch: 251739, loss: -2.371072696405463e-06, rewards: -9.299999999999999, count: 50
epoch: 251749, loss: 2.511739694455173e-06, rewards: -9.299999999999999, coun

epoch: 252629, loss: -1.5313626136048697e-05, rewards: -9.299999999999999, count: 50
epoch: 252639, loss: -0.00010200261749560013, rewards: -9.299999999999999, count: 50
epoch: 252649, loss: 7.304549217224121e-05, rewards: -9.299999999999999, count: 50
epoch: 252659, loss: -2.381682315899525e-05, rewards: -9.299999999999999, count: 50
epoch: 252669, loss: -2.2317171897157095e-05, rewards: -9.299999999999999, count: 50
epoch: 252679, loss: 1.2410879207891412e-05, rewards: -9.299999999999999, count: 50
epoch: 252689, loss: 3.6311148505774327e-06, rewards: -9.299999999999999, count: 50
epoch: 252699, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 252709, loss: 3.005266080435831e-06, rewards: -9.299999999999999, count: 50
epoch: 252719, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 252729, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 252739, loss: -2.837181227732799e-07, rewards: -9.299999999999999, c

epoch: 253619, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 253629, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 253639, loss: 8.596181942266412e-06, rewards: -9.299999999999999, count: 50
epoch: 253649, loss: 5.7119130360661075e-05, rewards: -9.299999999999999, count: 50
epoch: 253659, loss: 1.9890070689143613e-05, rewards: -9.299999999999999, count: 50
epoch: 253669, loss: 3.9999486034503207e-05, rewards: -9.299999999999999, count: 50
epoch: 253679, loss: -2.5813578758970834e-05, rewards: -9.299999999999999, count: 50
epoch: 253689, loss: -5.209446044318611e-06, rewards: -9.299999999999999, count: 50
epoch: 253699, loss: 1.1377334885764867e-05, rewards: -9.299999999999999, count: 50
epoch: 253709, loss: -5.528926976694493e-06, rewards: -9.299999999999999, count: 50
epoch: 253719, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 253729, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, cou

epoch: 254609, loss: -4.220009031996597e-06, rewards: -9.299999999999999, count: 50
epoch: 254619, loss: -7.177591214713175e-06, rewards: -9.299999999999999, count: 50
epoch: 254629, loss: 5.30004490428837e-06, rewards: -9.299999999999999, count: 50
epoch: 254639, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 254649, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 254659, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 254669, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 254679, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 254689, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 254699, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 254709, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 254719, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count:

epoch: 255599, loss: -9.324431448476389e-05, rewards: -9.299999999999999, count: 50
epoch: 255609, loss: 6.537675653817132e-05, rewards: -9.299999999999999, count: 50
epoch: 255619, loss: -3.542303966241889e-05, rewards: -9.299999999999999, count: 50
epoch: 255629, loss: -1.6313792002620175e-05, rewards: -9.299999999999999, count: 50
epoch: 255639, loss: 1.535296360088978e-05, rewards: -9.299999999999999, count: 50
epoch: 255649, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 255659, loss: -5.042552857048577e-06, rewards: -9.299999999999999, count: 50
epoch: 255669, loss: 3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 255679, loss: -1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 255689, loss: 1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 255699, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 255709, loss: 5.316734359439579e-07, rewards: -9.299999999999999, coun

epoch: 256589, loss: -7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 256599, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 256609, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 256619, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 256629, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 256639, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 256649, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 256659, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 256669, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 256679, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 256689, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 256699, loss: -5.50627692064154e-06, rewards: -9.299999999999999, count:

epoch: 257579, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 257589, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 257599, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 257609, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 257619, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 257629, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 257639, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 257649, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 257659, loss: -2.219677071479964e-06, rewards: -9.299999999999999, count: 50
epoch: 257669, loss: -1.1718273526639678e-05, rewards: -9.299999999999999, count: 50
epoch: 257679, loss: -8.276939479401335e-05, rewards: -9.299999999999999, count: 50
epoch: 257689, loss: 5.081057679490186e-05, rewards: -9.299999999999999, co

epoch: 258569, loss: -1.6744137610658072e-05, rewards: -9.299999999999999, count: 50
epoch: 258579, loss: -0.00010064005618914962, rewards: -9.299999999999999, count: 50
epoch: 258589, loss: 6.820797716500238e-05, rewards: -9.299999999999999, count: 50
epoch: 258599, loss: -2.9978751626913436e-05, rewards: -9.299999999999999, count: 50
epoch: 258609, loss: -1.2555122339108493e-05, rewards: -9.299999999999999, count: 50
epoch: 258619, loss: 1.5758276276756078e-05, rewards: -9.299999999999999, count: 50
epoch: 258629, loss: -4.839896973862778e-06, rewards: -9.299999999999999, count: 50
epoch: 258639, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 258649, loss: 1.996755599975586e-06, rewards: -9.299999999999999, count: 50
epoch: 258659, loss: -1.7631053879085812e-06, rewards: -9.299999999999999, count: 50
epoch: 258669, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 258679, loss: -4.3630600998767477e-07, rewards: -9.299999999999999,

epoch: 259559, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 259569, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 259579, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 259589, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 259599, loss: 1.3387202670855913e-06, rewards: -9.299999999999999, count: 50
epoch: 259609, loss: 3.6454200653679436e-06, rewards: -9.299999999999999, count: 50
epoch: 259619, loss: 1.732826240186114e-05, rewards: -9.299999999999999, count: 50
epoch: 259629, loss: 0.00010258198017254472, rewards: -9.299999999999999, count: 50
epoch: 259639, loss: -6.923079490661621e-05, rewards: -9.299999999999999, count: 50
epoch: 259649, loss: 2.721548116824124e-05, rewards: -9.299999999999999, count: 50
epoch: 259659, loss: 1.3768672943115234e-05, rewards: -9.299999999999999, count: 50
epoch: 259669, loss: -1.5432835425599478e-05, rewards: -9.299999999999999, cou

epoch: 260549, loss: -1.8556118448032066e-05, rewards: -9.299999999999999, count: 50
epoch: 260559, loss: -1.5283823813661e-05, rewards: -9.299999999999999, count: 50
epoch: 260569, loss: 1.8461942090652883e-05, rewards: -9.299999999999999, count: 50
epoch: 260579, loss: -1.0647773706295993e-05, rewards: -9.299999999999999, count: 50
epoch: 260589, loss: 5.075931767350994e-06, rewards: -9.299999999999999, count: 50
epoch: 260599, loss: -3.12209135699959e-06, rewards: -9.299999999999999, count: 50
epoch: 260609, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 260619, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 260629, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 260639, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 260649, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 260659, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50


epoch: 261539, loss: 7.723569979134481e-06, rewards: -9.299999999999999, count: 50
epoch: 261549, loss: 3.7872791835980024e-06, rewards: -9.299999999999999, count: 50
epoch: 261559, loss: -4.631280717148911e-06, rewards: -9.299999999999999, count: 50
epoch: 261569, loss: 1.7404556729161413e-06, rewards: -9.299999999999999, count: 50
epoch: 261579, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 261589, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 261599, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 261609, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 261619, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 261629, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 261639, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 261649, loss: -8.940696716308594e-07, rewards: -9.299999999999999, cou

epoch: 262529, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 262539, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 262549, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 262559, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 262569, loss: 2.471208517818013e-06, rewards: -9.299999999999999, count: 50
epoch: 262579, loss: 1.2083053661626764e-05, rewards: -9.299999999999999, count: 50
epoch: 262589, loss: 8.932590571930632e-05, rewards: -9.299999999999999, count: 50
epoch: 262599, loss: -6.403088627848774e-05, rewards: -9.299999999999999, count: 50
epoch: 262609, loss: 3.6257504689274356e-05, rewards: -9.299999999999999, count: 50
epoch: 262619, loss: 2.125859282386955e-05, rewards: -9.299999999999999, count: 50
epoch: 262629, loss: -1.2861490176874213e-05, rewards: -9.299999999999999, count: 50
epoch: 262639, loss: -5.749463980464498e-06, rewards: -9.299999999999999, count

epoch: 263519, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 263529, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 263539, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 263549, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 263559, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 263569, loss: -5.382299605116714e-06, rewards: -9.299999999999999, count: 50
epoch: 263579, loss: -4.5503376895794645e-05, rewards: -9.299999999999999, count: 50
epoch: 263589, loss: -5.5776836234144866e-05, rewards: -9.299999999999999, count: 50
epoch: 263599, loss: -5.264043647912331e-05, rewards: -9.299999999999999, count: 50
epoch: 263609, loss: 9.706020136945881e-06, rewards: -9.299999999999999, count: 50
epoch: 263619, loss: 2.232193946838379e-05, rewards: -9.299999999999999, count: 50
epoch: 263629, loss: 3.1387805847771233e-06, rewards: -9.299999999999999, co

epoch: 264509, loss: -5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 264519, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 264529, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 264539, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 264549, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 264559, loss: 3.185272134942352e-06, rewards: -9.299999999999999, count: 50
epoch: 264569, loss: 1.7787218894227408e-05, rewards: -9.299999999999999, count: 50
epoch: 264579, loss: 0.00010268211190123111, rewards: -9.299999999999999, count: 50
epoch: 264589, loss: -6.821275019319728e-05, rewards: -9.299999999999999, count: 50
epoch: 264599, loss: 2.804398536682129e-05, rewards: -9.299999999999999, count: 50
epoch: 264609, loss: 1.1705160432029516e-05, rewards: -9.299999999999999, count: 50
epoch: 264619, loss: -1.5360117686213925e-05, rewards: -9.299999999999999, count: 5

epoch: 265509, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 265519, loss: 3.759861101571005e-06, rewards: -9.299999999999999, count: 50
epoch: 265529, loss: 2.5212764739990234e-05, rewards: -9.299999999999999, count: 50
epoch: 265539, loss: 0.00012629508273676038, rewards: -9.299999999999999, count: 50
epoch: 265549, loss: -3.615259993239306e-05, rewards: -9.299999999999999, count: 50
epoch: 265559, loss: -3.744721470866352e-05, rewards: -9.299999999999999, count: 50
epoch: 265569, loss: 7.494687906728359e-06, rewards: -9.299999999999999, count: 50
epoch: 265579, loss: 1.443982091586804e-05, rewards: -9.299999999999999, count: 50
epoch: 265589, loss: -3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 265599, loss: -3.725290298461914e-06, rewards: -9.299999999999999, count: 50
epoch: 265609, loss: 3.482103238638956e-06, rewards: -9.299999999999999, count: 50
epoch: 265619, loss: -1.5223026821331587e-06, rewards: -9.299999999999999, count

epoch: 266499, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 266509, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 266519, loss: -1.497268726780021e-06, rewards: -9.299999999999999, count: 50
epoch: 266529, loss: -4.0912627810030244e-06, rewards: -9.299999999999999, count: 50
epoch: 266539, loss: -1.993298610614147e-05, rewards: -9.299999999999999, count: 50
epoch: 266549, loss: -0.00010775208647828549, rewards: -9.299999999999999, count: 50
epoch: 266559, loss: 6.849527562735602e-05, rewards: -9.299999999999999, count: 50
epoch: 266569, loss: -1.9899605831597e-05, rewards: -9.299999999999999, count: 50
epoch: 266579, loss: -1.5393496141768992e-05, rewards: -9.299999999999999, count: 50
epoch: 266589, loss: 1.4995336641732138e-05, rewards: -9.299999999999999, count: 50
epoch: 266599, loss: -5.272626822261373e-06, rewards: -9.299999999999999, count: 50
epoch: 266609, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, co

epoch: 267489, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 267499, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 267509, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 267519, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 267529, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 267539, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 267549, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 267559, loss: 1.1765956742237904e-06, rewards: -9.299999999999999, count: 50
epoch: 267569, loss: 4.363059815659653e-06, rewards: -9.299999999999999, count: 50
epoch: 267579, loss: 2.1498202841030434e-05, rewards: -9.299999999999999, count: 50
epoch: 267589, loss: 0.00011078357783844694, rewards: -9.299999999999999, count: 50
epoch: 267599, loss: -6.680011574644595e-05, rewards: -9.299999999999999, c

epoch: 268479, loss: -2.1370649847085588e-05, rewards: -9.299999999999999, count: 50
epoch: 268489, loss: 4.322290260461159e-05, rewards: -9.299999999999999, count: 50
epoch: 268499, loss: -2.090930865961127e-05, rewards: -9.299999999999999, count: 50
epoch: 268509, loss: -1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 268519, loss: 7.756948434689548e-06, rewards: -9.299999999999999, count: 50
epoch: 268529, loss: -6.223916898306925e-06, rewards: -9.299999999999999, count: 50
epoch: 268539, loss: 3.827810360235162e-06, rewards: -9.299999999999999, count: 50
epoch: 268549, loss: -2.1839141481905244e-06, rewards: -9.299999999999999, count: 50
epoch: 268559, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 268569, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 268579, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 268589, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, 

epoch: 269469, loss: -3.4768581826938316e-05, rewards: -9.299999999999999, count: 50
epoch: 269479, loss: -1.3803243746224325e-05, rewards: -9.299999999999999, count: 50
epoch: 269489, loss: 2.0668507204391062e-05, rewards: -9.299999999999999, count: 50
epoch: 269499, loss: -9.591579328116495e-06, rewards: -9.299999999999999, count: 50
epoch: 269509, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 269519, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 269529, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 269539, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 269549, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 269559, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 269569, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 269579, loss: -5.316734359439579e-07, rewards: -9.299999999999999, 

epoch: 270459, loss: 3.054142098335433e-06, rewards: -9.299999999999999, count: 50
epoch: 270469, loss: 4.633664957509609e-06, rewards: -9.299999999999999, count: 50
epoch: 270479, loss: -4.088878540642327e-06, rewards: -9.299999999999999, count: 50
epoch: 270489, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 270499, loss: -1.3685225894732866e-06, rewards: -9.299999999999999, count: 50
epoch: 270509, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 270519, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 270529, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 270539, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 270549, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 270559, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 270569, loss: -3.402233232918661e-06, rewards: -9.299999999999999, co

epoch: 271449, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 271459, loss: 1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 271469, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 271479, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count: 50
epoch: 271489, loss: -4.72784040539409e-06, rewards: -9.299999999999999, count: 50
epoch: 271499, loss: -2.0416975530679338e-05, rewards: -9.299999999999999, count: 50
epoch: 271509, loss: -9.992242121370509e-05, rewards: -9.299999999999999, count: 50
epoch: 271519, loss: 6.189942359924316e-05, rewards: -9.299999999999999, count: 50
epoch: 271529, loss: -3.292679684818722e-05, rewards: -9.299999999999999, count: 50
epoch: 271539, loss: 3.083944420723128e-06, rewards: -9.299999999999999, count: 50
epoch: 271549, loss: 8.268356396001764e-06, rewards: -9.299999999999999, count: 50
epoch: 271559, loss: -7.669925253139809e-06, rewards: -9.299999999999999, count

epoch: 272439, loss: 4.504919161263388e-06, rewards: -9.299999999999999, count: 50
epoch: 272449, loss: -7.6043606895837e-06, rewards: -9.299999999999999, count: 50
epoch: 272459, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 272469, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 272479, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 272489, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 272499, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 272509, loss: 4.994869300389837e-07, rewards: -9.299999999999999, count: 50
epoch: 272519, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 272529, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 272539, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 272549, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 5

epoch: 273429, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 273439, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 273449, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 273459, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 273469, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 273479, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 273489, loss: 2.83598910755245e-06, rewards: -9.299999999999999, count: 50
epoch: 273499, loss: 1.9853114281431772e-05, rewards: -9.299999999999999, count: 50
epoch: 273509, loss: 0.0001283240271732211, rewards: -9.299999999999999, count: 50
epoch: 273519, loss: -6.204843521118164e-05, rewards: -9.299999999999999, count: 50
epoch: 273529, loss: -3.1861065508564934e-05, rewards: -9.299999999999999, count: 50
epoch: 273539, loss: 1.3564825167122763e-05, rewards: -9.299999999999999, cou

epoch: 274419, loss: -4.603862635121914e-06, rewards: -9.299999999999999, count: 50
epoch: 274429, loss: -3.580570046324283e-05, rewards: -9.299999999999999, count: 50
epoch: 274439, loss: -9.768962627276778e-05, rewards: -9.299999999999999, count: 50
epoch: 274449, loss: -2.3351907657342963e-05, rewards: -9.299999999999999, count: 50
epoch: 274459, loss: 3.238439603592269e-05, rewards: -9.299999999999999, count: 50
epoch: 274469, loss: 1.6776322809164412e-05, rewards: -9.299999999999999, count: 50
epoch: 274479, loss: -8.562803486711346e-06, rewards: -9.299999999999999, count: 50
epoch: 274489, loss: -6.482601293100743e-06, rewards: -9.299999999999999, count: 50
epoch: 274499, loss: 4.494190307013923e-06, rewards: -9.299999999999999, count: 50
epoch: 274509, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 274519, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 274529, loss: 1.1181831496287487e-06, rewards: -9.299999999999999, co

epoch: 275409, loss: -5.414485713117756e-06, rewards: -9.299999999999999, count: 50
epoch: 275419, loss: -8.752345820539631e-06, rewards: -9.299999999999999, count: 50
epoch: 275429, loss: 5.793571290269028e-06, rewards: -9.299999999999999, count: 50
epoch: 275439, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 275449, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 275459, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 275469, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 275479, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 275489, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 275499, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 275509, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 275519, loss: -4.410743770222325e-07, rewards: -9.299999999999999, cou

epoch: 276399, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 276409, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 276419, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 276429, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 276439, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 276449, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 276459, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 276469, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 276479, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 276489, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 276499, loss: 3.273486981925089e-06, rewards: -9.299999999999999, count: 50
epoch: 276509, loss: 1.3654232134285849e-05, rewards: -9.299999999999999, c

epoch: 277389, loss: -4.541873863672663e-07, rewards: -9.299999999999999, count: 50
epoch: 277399, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 277409, loss: -5.749463980464498e-06, rewards: -9.299999999999999, count: 50
epoch: 277419, loss: -3.8839578337501734e-05, rewards: -9.299999999999999, count: 50
epoch: 277429, loss: -8.66270056576468e-05, rewards: -9.299999999999999, count: 50
epoch: 277439, loss: -6.457566996687092e-06, rewards: -9.299999999999999, count: 50
epoch: 277449, loss: 3.5375356674194336e-05, rewards: -9.299999999999999, count: 50
epoch: 277459, loss: -4.426241048349766e-06, rewards: -9.299999999999999, count: 50
epoch: 277469, loss: -1.0877847671508789e-05, rewards: -9.299999999999999, count: 50
epoch: 277479, loss: 6.999969627941027e-06, rewards: -9.299999999999999, count: 50
epoch: 277489, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 277499, loss: -8.666515327604429e-07, rewards: -9.299999999999999, 

epoch: 278379, loss: -2.0444392703211633e-06, rewards: -9.299999999999999, count: 50
epoch: 278389, loss: -4.251003247190965e-06, rewards: -9.299999999999999, count: 50
epoch: 278399, loss: -1.7602444131625816e-05, rewards: -9.299999999999999, count: 50
epoch: 278409, loss: -8.633256220491603e-05, rewards: -9.299999999999999, count: 50
epoch: 278419, loss: 4.396319491206668e-05, rewards: -9.299999999999999, count: 50
epoch: 278429, loss: -3.926753925043158e-05, rewards: -9.299999999999999, count: 50
epoch: 278439, loss: 2.034664066741243e-05, rewards: -9.299999999999999, count: 50
epoch: 278449, loss: -5.7470797401038e-06, rewards: -9.299999999999999, count: 50
epoch: 278459, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 278469, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 278479, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 278489, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 

epoch: 279369, loss: -2.6088953745784238e-05, rewards: -9.299999999999999, count: 50
epoch: 279379, loss: -0.00011839032231364399, rewards: -9.299999999999999, count: 50
epoch: 279389, loss: 4.894494850304909e-05, rewards: -9.299999999999999, count: 50
epoch: 279399, loss: 2.11811056942679e-05, rewards: -9.299999999999999, count: 50
epoch: 279409, loss: -2.172112544940319e-05, rewards: -9.299999999999999, count: 50
epoch: 279419, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 279429, loss: 8.399486432608683e-06, rewards: -9.299999999999999, count: 50
epoch: 279439, loss: -5.022287496103672e-06, rewards: -9.299999999999999, count: 50
epoch: 279449, loss: 1.7237663314517704e-06, rewards: -9.299999999999999, count: 50
epoch: 279459, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 279469, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 279479, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, co

epoch: 280359, loss: 3.942251169064548e-06, rewards: -9.299999999999999, count: 50
epoch: 280369, loss: -1.957416543518775e-06, rewards: -9.299999999999999, count: 50
epoch: 280379, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 280389, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 280399, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 280409, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 280419, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 280429, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 280439, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 280449, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 280459, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 280469, loss: 2.8312206268310547e-06, rewards: -9.299999999999999, count:

epoch: 281339, loss: 1.8683671441976912e-05, rewards: -9.299999999999999, count: 50
epoch: 281349, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 281359, loss: -5.083084033685736e-06, rewards: -9.299999999999999, count: 50
epoch: 281369, loss: 4.220009031996597e-06, rewards: -9.299999999999999, count: 50
epoch: 281379, loss: -3.0350684028235264e-06, rewards: -9.299999999999999, count: 50
epoch: 281389, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 281399, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 281409, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 281419, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 281429, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 281439, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 281449, loss: -9.787082717593876e-07, rewards: -9.299999999999999, 

epoch: 282329, loss: -3.710627424879931e-05, rewards: -9.299999999999999, count: 50
epoch: 282339, loss: -9.292840695707127e-05, rewards: -9.299999999999999, count: 50
epoch: 282349, loss: -4.755258487421088e-06, rewards: -9.299999999999999, count: 50
epoch: 282359, loss: 3.6401746910996735e-05, rewards: -9.299999999999999, count: 50
epoch: 282369, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 282379, loss: -1.2438296835171059e-05, rewards: -9.299999999999999, count: 50
epoch: 282389, loss: 5.538463483389933e-06, rewards: -9.299999999999999, count: 50
epoch: 282399, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 282409, loss: -2.256631887576077e-06, rewards: -9.299999999999999, count: 50
epoch: 282419, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 282429, loss: -1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 282439, loss: 7.867812996664725e-07, rewards: -9.299999999999999, co

epoch: 283319, loss: -1.4044046110939234e-05, rewards: -9.299999999999999, count: 50
epoch: 283329, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 283339, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 283349, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 283359, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 283369, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 283379, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 283389, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 283399, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 283409, loss: 1.9693375179485884e-06, rewards: -9.299999999999999, count: 50
epoch: 283419, loss: 6.260871941776713e-06, rewards: -9.299999999999999, count: 50
epoch: 283429, loss: 3.3029318728949875e-05, rewards: -9.299999999999999, count

epoch: 284309, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 284319, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 284329, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 284339, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 284349, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 284359, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 284369, loss: -5.594491994997952e-06, rewards: -9.299999999999999, count: 50
epoch: 284379, loss: -0.00010077953629661351, rewards: -9.299999999999999, count: 50
epoch: 284389, loss: 0.00012641192006412894, rewards: -9.299999999999999, count: 50
epoch: 284399, loss: 6.948351801838726e-05, rewards: -9.299999999999999, count: 50
epoch: 284409, loss: 2.7601718102232553e-05, rewards: -9.299999999999999, count: 50
epoch: 284419, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, coun

epoch: 285299, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 285309, loss: -5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 285319, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 285329, loss: -1.394748665006773e-06, rewards: -9.299999999999999, count: 50
epoch: 285339, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 285349, loss: -1.2534856978163589e-05, rewards: -9.299999999999999, count: 50
epoch: 285359, loss: -7.500052743125707e-05, rewards: -9.299999999999999, count: 50
epoch: 285369, loss: 2.570867582107894e-05, rewards: -9.299999999999999, count: 50
epoch: 285379, loss: -4.354953853180632e-05, rewards: -9.299999999999999, count: 50
epoch: 285389, loss: 2.008676528930664e-05, rewards: -9.299999999999999, count: 50
epoch: 285399, loss: 2.319812665518839e-06, rewards: -9.299999999999999, count: 50
epoch: 285409, loss: -8.258819434558973e-06, rewards: -9.299999999999999, count

epoch: 286289, loss: -1.2302398317842744e-06, rewards: -9.299999999999999, count: 50
epoch: 286299, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 286309, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 286319, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 286329, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 286339, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 286349, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 286359, loss: -3.591775794120622e-06, rewards: -9.299999999999999, count: 50
epoch: 286369, loss: -1.611828884051647e-05, rewards: -9.299999999999999, count: 50
epoch: 286379, loss: -8.84687906363979e-05, rewards: -9.299999999999999, count: 50
epoch: 286389, loss: 5.06722935824655e-05, rewards: -9.299999999999999, count: 50
epoch: 286399, loss: -4.175901267444715e-05, rewards: -9.299999999999999, count:

epoch: 287279, loss: 5.690813122782856e-05, rewards: -9.299999999999999, count: 50
epoch: 287289, loss: 3.055333945667371e-05, rewards: -9.299999999999999, count: 50
epoch: 287299, loss: 1.2887716366094537e-05, rewards: -9.299999999999999, count: 50
epoch: 287309, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 287319, loss: -3.988742719229776e-06, rewards: -9.299999999999999, count: 50
epoch: 287329, loss: -4.606246875482611e-06, rewards: -9.299999999999999, count: 50
epoch: 287339, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 287349, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 287359, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 287369, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 287379, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 287389, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count:

epoch: 288269, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 288279, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 288289, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 288299, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 288309, loss: -1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 288319, loss: -8.836984306981321e-06, rewards: -9.299999999999999, count: 50
epoch: 288329, loss: -7.950425060698763e-05, rewards: -9.299999999999999, count: 50
epoch: 288339, loss: 5.9026478993473575e-05, rewards: -9.299999999999999, count: 50
epoch: 288349, loss: -3.5961867979494855e-05, rewards: -9.299999999999999, count: 50
epoch: 288359, loss: -3.2479762012371793e-05, rewards: -9.299999999999999, count: 50
epoch: 288369, loss: -3.687143362185452e-06, rewards: -9.299999999999999, count: 50
epoch: 288379, loss: 1.0797977665788494e-05, rewards: -9.299999999999999, 

epoch: 289259, loss: 3.0258894184953533e-05, rewards: -9.299999999999999, count: 50
epoch: 289269, loss: -1.5909672583802603e-05, rewards: -9.299999999999999, count: 50
epoch: 289279, loss: -1.2012719707854558e-05, rewards: -9.299999999999999, count: 50
epoch: 289289, loss: 7.355213256232673e-06, rewards: -9.299999999999999, count: 50
epoch: 289299, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 289309, loss: -3.427267074584961e-06, rewards: -9.299999999999999, count: 50
epoch: 289319, loss: 2.0015240806969814e-06, rewards: -9.299999999999999, count: 50
epoch: 289329, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 289339, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 289349, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 289359, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 289369, loss: 8.940696716308594e-08, rewards: -9.299999999999999, co

epoch: 290249, loss: 1.466274284211977e-06, rewards: -9.299999999999999, count: 50
epoch: 290259, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count: 50
epoch: 290269, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 290279, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 290289, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 290299, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 290309, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 290319, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 290329, loss: 4.701614216173766e-06, rewards: -9.299999999999999, count: 50
epoch: 290339, loss: 1.7982722056331113e-05, rewards: -9.299999999999999, count: 50
epoch: 290349, loss: 7.993936742423102e-05, rewards: -9.299999999999999, count: 50
epoch: 290359, loss: -2.962350845336914e-05, rewards: -9.299999999999999, count: 5

epoch: 291239, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 291249, loss: -9.11474216991337e-06, rewards: -9.299999999999999, count: 50
epoch: 291259, loss: -2.0909310478600673e-06, rewards: -9.299999999999999, count: 50
epoch: 291269, loss: 3.350973202032037e-06, rewards: -9.299999999999999, count: 50
epoch: 291279, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 291289, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 291299, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 291309, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 291319, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 291329, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 291339, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 291349, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, c

epoch: 292229, loss: 3.3080577850341797e-06, rewards: -9.299999999999999, count: 50
epoch: 292239, loss: -2.5188921881635906e-06, rewards: -9.299999999999999, count: 50
epoch: 292249, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 292259, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 292269, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 292279, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 292289, loss: 3.2949446904240176e-06, rewards: -9.299999999999999, count: 50
epoch: 292299, loss: 1.757860263751354e-05, rewards: -9.299999999999999, count: 50
epoch: 292309, loss: 0.0001012897482723929, rewards: -9.299999999999999, count: 50
epoch: 292319, loss: -6.714701885357499e-05, rewards: -9.299999999999999, count: 50
epoch: 292329, loss: 3.0313729439512827e-05, rewards: -9.299999999999999, count: 50
epoch: 292339, loss: 9.22441449802136e-06, rewards: -9.299999999999999, count: 

epoch: 293219, loss: 1.0797977665788494e-05, rewards: -9.299999999999999, count: 50
epoch: 293229, loss: -3.5071373076789314e-06, rewards: -9.299999999999999, count: 50
epoch: 293239, loss: -5.311966106091859e-06, rewards: -9.299999999999999, count: 50
epoch: 293249, loss: 1.1110305422334932e-06, rewards: -9.299999999999999, count: 50
epoch: 293259, loss: 1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 293269, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 293279, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 293289, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 293299, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 293309, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 293319, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 293329, loss: -2.539158003855846e-07, rewards: -9.299999999999999, 

epoch: 294209, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 294219, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 294229, loss: -1.4919042769179214e-05, rewards: -9.299999999999999, count: 50
epoch: 294239, loss: -0.00012467027409002185, rewards: -9.299999999999999, count: 50
epoch: 294249, loss: 8.031129982555285e-05, rewards: -9.299999999999999, count: 50
epoch: 294259, loss: 3.496646968415007e-05, rewards: -9.299999999999999, count: 50
epoch: 294269, loss: -7.058381925162394e-06, rewards: -9.299999999999999, count: 50
epoch: 294279, loss: -1.7555952581460588e-05, rewards: -9.299999999999999, count: 50
epoch: 294289, loss: -6.064176432118984e-06, rewards: -9.299999999999999, count: 50
epoch: 294299, loss: 4.862547029915731e-06, rewards: -9.299999999999999, count: 50
epoch: 294309, loss: 2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 294319, loss: -2.43902195506962e-06, rewards: -9.299999999999999, co

epoch: 295199, loss: -8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 295209, loss: -5.108117875352036e-06, rewards: -9.299999999999999, count: 50
epoch: 295219, loss: -4.266500582161825e-06, rewards: -9.299999999999999, count: 50
epoch: 295229, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 295239, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 295249, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 295259, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 295269, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 295279, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 295289, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 295299, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 295309, loss: -1.668930025289228e-07, rewards: -9.299999999999999, count: 5

epoch: 296189, loss: -9.104013588512316e-05, rewards: -9.299999999999999, count: 50
epoch: 296199, loss: -1.0769366781460121e-05, rewards: -9.299999999999999, count: 50
epoch: 296209, loss: 1.627206802368164e-05, rewards: -9.299999999999999, count: 50
epoch: 296219, loss: 1.9381046513444744e-05, rewards: -9.299999999999999, count: 50
epoch: 296229, loss: 1.1746883501473349e-05, rewards: -9.299999999999999, count: 50
epoch: 296239, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 296249, loss: -3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 296259, loss: -2.161264319511247e-06, rewards: -9.299999999999999, count: 50
epoch: 296269, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 296279, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 296289, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 296299, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, coun

epoch: 297179, loss: -2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 297189, loss: 2.471208517818013e-06, rewards: -9.299999999999999, count: 50
epoch: 297199, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 297209, loss: 9.238719940185547e-07, rewards: -9.299999999999999, count: 50
epoch: 297219, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 297229, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 297239, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 297249, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 297259, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 297269, loss: -2.8908252716064453e-06, rewards: -9.299999999999999, count: 50
epoch: 297279, loss: -1.6369820514228195e-05, rewards: -9.299999999999999, count: 50
epoch: 297289, loss: -0.00010313391976524144, rewards: -9.299999999999999

epoch: 298169, loss: 3.424882834224263e-06, rewards: -9.299999999999999, count: 50
epoch: 298179, loss: 2.338528611289803e-05, rewards: -9.299999999999999, count: 50
epoch: 298189, loss: 0.00012666940165217966, rewards: -9.299999999999999, count: 50
epoch: 298199, loss: -4.832983177038841e-05, rewards: -9.299999999999999, count: 50
epoch: 298209, loss: -3.2757521694293246e-05, rewards: -9.299999999999999, count: 50
epoch: 298219, loss: 1.3691186723008286e-05, rewards: -9.299999999999999, count: 50
epoch: 298229, loss: 1.3018846402701456e-05, rewards: -9.299999999999999, count: 50
epoch: 298239, loss: -6.335973921522964e-06, rewards: -9.299999999999999, count: 50
epoch: 298249, loss: -2.312660171810421e-06, rewards: -9.299999999999999, count: 50
epoch: 298259, loss: 3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 298269, loss: -1.8358230136072962e-06, rewards: -9.299999999999999, count: 50
epoch: 298279, loss: 6.997585160206654e-07, rewards: -9.299999999999999, coun

epoch: 299159, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 299169, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 299179, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 299189, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 299199, loss: -1.4734267779203947e-06, rewards: -9.299999999999999, count: 50
epoch: 299209, loss: -1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 299219, loss: -5.220174898568075e-06, rewards: -9.299999999999999, count: 50
epoch: 299229, loss: -2.5838613510131836e-05, rewards: -9.299999999999999, count: 50
epoch: 299239, loss: -0.00010540842777118087, rewards: -9.299999999999999, count: 50
epoch: 299249, loss: 5.9341193264117464e-05, rewards: -9.299999999999999, count: 50
epoch: 299259, loss: -1.6239881006185897e-05, rewards: -9.299999999999999, count: 50
epoch: 299269, loss: -8.782148142927326e-06, rewards: -9.299999999999999

epoch: 300149, loss: -4.527926284936257e-05, rewards: -9.299999999999999, count: 50
epoch: 300159, loss: 4.7351120883831754e-05, rewards: -9.299999999999999, count: 50
epoch: 300169, loss: 1.7656087948125787e-05, rewards: -9.299999999999999, count: 50
epoch: 300179, loss: -1.4955997357901651e-05, rewards: -9.299999999999999, count: 50
epoch: 300189, loss: -6.548166311404202e-06, rewards: -9.299999999999999, count: 50
epoch: 300199, loss: 6.840229161753086e-06, rewards: -9.299999999999999, count: 50
epoch: 300209, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 300219, loss: -2.1088123958179494e-06, rewards: -9.299999999999999, count: 50
epoch: 300229, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 300239, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 300249, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 300259, loss: -3.826618240054813e-07, rewards: -9.299999999999999, c

epoch: 301139, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 301149, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 301159, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 301169, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 301179, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 301189, loss: -2.915859113272745e-06, rewards: -9.299999999999999, count: 50
epoch: 301199, loss: -1.816034273360856e-05, rewards: -9.299999999999999, count: 50
epoch: 301209, loss: -0.00011040806566597894, rewards: -9.299999999999999, count: 50
epoch: 301219, loss: 7.315635593840852e-05, rewards: -9.299999999999999, count: 50
epoch: 301229, loss: -1.1357068615325261e-05, rewards: -9.299999999999999, count: 50
epoch: 301239, loss: -2.3534297724836506e-05, rewards: -9.299999999999999, count: 50
epoch: 301249, loss: 1.0389089766249526e-05, rewards: -9.299999999999999, c

epoch: 302129, loss: -2.638101477714372e-06, rewards: -9.299999999999999, count: 50
epoch: 302139, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 302149, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 302159, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 302169, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 302179, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 302189, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 302199, loss: -2.5677682060631923e-06, rewards: -9.299999999999999, count: 50
epoch: 302209, loss: -5.632638931274414e-06, rewards: -9.299999999999999, count: 50
epoch: 302219, loss: -1.939296635100618e-05, rewards: -9.299999999999999, count: 50
epoch: 302229, loss: -8.207559585571289e-05, rewards: -9.299999999999999, count: 50
epoch: 302239, loss: 3.4158230846514925e-05, rewards: -9.299999999999999, c

epoch: 303119, loss: -2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 303129, loss: -1.4002323041495401e-05, rewards: -9.299999999999999, count: 50
epoch: 303139, loss: -0.00010200261749560013, rewards: -9.299999999999999, count: 50
epoch: 303149, loss: 7.76600863900967e-05, rewards: -9.299999999999999, count: 50
epoch: 303159, loss: -1.675367275311146e-05, rewards: -9.299999999999999, count: 50
epoch: 303169, loss: -2.8039217795594595e-05, rewards: -9.299999999999999, count: 50
epoch: 303179, loss: 3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 303189, loss: 9.623766345612239e-06, rewards: -9.299999999999999, count: 50
epoch: 303199, loss: -3.591775794120622e-06, rewards: -9.299999999999999, count: 50
epoch: 303209, loss: -1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 303219, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, count: 50
epoch: 303229, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, 

epoch: 304109, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 304119, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 304129, loss: -3.820657730102539e-05, rewards: -9.299999999999999, count: 50
epoch: 304139, loss: -8.84985929587856e-05, rewards: -9.299999999999999, count: 50
epoch: 304149, loss: 2.9015541258559097e-06, rewards: -9.299999999999999, count: 50
epoch: 304159, loss: 3.3596752473386005e-05, rewards: -9.299999999999999, count: 50
epoch: 304169, loss: -1.1702775736921467e-05, rewards: -9.299999999999999, count: 50
epoch: 304179, loss: -6.990432666498236e-06, rewards: -9.299999999999999, count: 50
epoch: 304189, loss: 7.700919923081528e-06, rewards: -9.299999999999999, count: 50
epoch: 304199, loss: -3.6120413824392017e-06, rewards: -9.299999999999999, count: 50
epoch: 304209, loss: 1.7547607740198146e-06, rewards: -9.299999999999999, count: 50
epoch: 304219, loss: -7.605552809764049e-07, rewards: -9.299999999999999, c

epoch: 305099, loss: 1.313567190663889e-05, rewards: -9.299999999999999, count: 50
epoch: 305109, loss: -1.69658669619821e-05, rewards: -9.299999999999999, count: 50
epoch: 305119, loss: -3.38792801812815e-06, rewards: -9.299999999999999, count: 50
epoch: 305129, loss: 6.819963346060831e-06, rewards: -9.299999999999999, count: 50
epoch: 305139, loss: -2.348423095099861e-06, rewards: -9.299999999999999, count: 50
epoch: 305149, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 305159, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 305169, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 305179, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 305189, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 305199, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 305209, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 

epoch: 306089, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 306099, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 306109, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 306119, loss: 1.0226965059700888e-05, rewards: -9.299999999999999, count: 50
epoch: 306129, loss: 9.510397649137303e-05, rewards: -9.299999999999999, count: 50
epoch: 306139, loss: -8.54253739817068e-05, rewards: -9.299999999999999, count: 50
epoch: 306149, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 306159, loss: 2.7567148208618164e-05, rewards: -9.299999999999999, count: 50
epoch: 306169, loss: 1.817941665649414e-05, rewards: -9.299999999999999, count: 50
epoch: 306179, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 306189, loss: -7.112026196409715e-06, rewards: -9.299999999999999, count: 50
epoch: 306199, loss: -1.1205672763026087e-06, rewards: -9.299999999999999, count

epoch: 307079, loss: -0.00010327100608265027, rewards: -9.299999999999999, count: 50
epoch: 307089, loss: 5.851626337971538e-05, rewards: -9.299999999999999, count: 50
epoch: 307099, loss: -1.8984079360961914e-05, rewards: -9.299999999999999, count: 50
epoch: 307109, loss: -4.565715698845452e-06, rewards: -9.299999999999999, count: 50
epoch: 307119, loss: 9.946823411155492e-06, rewards: -9.299999999999999, count: 50
epoch: 307129, loss: -7.501840627810452e-06, rewards: -9.299999999999999, count: 50
epoch: 307139, loss: 4.74333774036495e-06, rewards: -9.299999999999999, count: 50
epoch: 307149, loss: -2.6583670660329517e-06, rewards: -9.299999999999999, count: 50
epoch: 307159, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 307169, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 307179, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 307189, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count:

epoch: 308069, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 308079, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 308089, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 308099, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 308109, loss: 2.6023387817986077e-06, rewards: -9.299999999999999, count: 50
epoch: 308119, loss: 7.375478617177578e-06, rewards: -9.299999999999999, count: 50
epoch: 308129, loss: 3.463506800471805e-05, rewards: -9.299999999999999, count: 50
epoch: 308139, loss: 9.145140938926488e-05, rewards: -9.299999999999999, count: 50
epoch: 308149, loss: -4.277110201655887e-05, rewards: -9.299999999999999, count: 50
epoch: 308159, loss: 6.574392500624526e-06, rewards: -9.299999999999999, count: 50
epoch: 308169, loss: 8.516311936546117e-06, rewards: -9.299999999999999, count: 50
epoch: 308179, loss: -9.5617770057288e-06, rewards: -9.299999999999999, count: 50

epoch: 309059, loss: 1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 309069, loss: 6.265640422498109e-06, rewards: -9.299999999999999, count: 50
epoch: 309079, loss: 3.154277874273248e-05, rewards: -9.299999999999999, count: 50
epoch: 309089, loss: 0.00010187387670157477, rewards: -9.299999999999999, count: 50
epoch: 309099, loss: -4.4218300899956375e-05, rewards: -9.299999999999999, count: 50
epoch: 309109, loss: -3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 309119, loss: 1.849532054620795e-05, rewards: -9.299999999999999, count: 50
epoch: 309129, loss: -1.233816146850586e-05, rewards: -9.299999999999999, count: 50
epoch: 309139, loss: 5.735158993047662e-06, rewards: -9.299999999999999, count: 50
epoch: 309149, loss: -2.5582312446204014e-06, rewards: -9.299999999999999, count: 50
epoch: 309159, loss: 1.466274284211977e-06, rewards: -9.299999999999999, count: 50
epoch: 309169, loss: -1.268386881747574e-06, rewards: -9.299999999999999, coun

epoch: 310049, loss: -0.00011279463797109202, rewards: -9.299999999999999, count: 50
epoch: 310059, loss: 4.429102045833133e-05, rewards: -9.299999999999999, count: 50
epoch: 310069, loss: 1.767873800417874e-05, rewards: -9.299999999999999, count: 50
epoch: 310079, loss: -2.2580623408430256e-05, rewards: -9.299999999999999, count: 50
epoch: 310089, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 310099, loss: 4.827976226806641e-06, rewards: -9.299999999999999, count: 50
epoch: 310109, loss: -5.270242581900675e-06, rewards: -9.299999999999999, count: 50
epoch: 310119, loss: 3.236532165828976e-06, rewards: -9.299999999999999, count: 50
epoch: 310129, loss: -1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 310139, loss: 9.238719940185547e-07, rewards: -9.299999999999999, count: 50
epoch: 310149, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 310159, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count:

epoch: 311039, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 311049, loss: 2.607107262520003e-06, rewards: -9.299999999999999, count: 50
epoch: 311059, loss: 1.0435581316414755e-05, rewards: -9.299999999999999, count: 50
epoch: 311069, loss: 5.9980153309879825e-05, rewards: -9.299999999999999, count: 50
epoch: 311079, loss: 1.5685558537370525e-05, rewards: -9.299999999999999, count: 50
epoch: 311089, loss: 2.7010441044694744e-05, rewards: -9.299999999999999, count: 50
epoch: 311099, loss: -2.8407574063749053e-05, rewards: -9.299999999999999, count: 50
epoch: 311109, loss: 1.1038780030503403e-05, rewards: -9.299999999999999, count: 50
epoch: 311119, loss: -4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 311129, loss: -2.0444392703211633e-06, rewards: -9.299999999999999, count: 50
epoch: 311139, loss: 1.6534328324269154e-06, rewards: -9.299999999999999, count: 50
epoch: 311149, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, 

epoch: 312029, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 312039, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 312049, loss: 5.899310053791851e-05, rewards: -9.299999999999999, count: 50
epoch: 312059, loss: -2.0341873096185736e-05, rewards: -9.299999999999999, count: 50
epoch: 312069, loss: 4.8855541535886005e-05, rewards: -9.299999999999999, count: 50
epoch: 312079, loss: 3.923773692804389e-05, rewards: -9.299999999999999, count: 50
epoch: 312089, loss: 2.034664066741243e-05, rewards: -9.299999999999999, count: 50
epoch: 312099, loss: 4.351139068603516e-06, rewards: -9.299999999999999, count: 50
epoch: 312109, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 312119, loss: -4.951953997078817e-06, rewards: -9.299999999999999, count: 50
epoch: 312129, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 312139, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count:

epoch: 313019, loss: -2.4040937205427326e-05, rewards: -9.299999999999999, count: 50
epoch: 313029, loss: -9.970187966246158e-05, rewards: -9.299999999999999, count: 50
epoch: 313039, loss: 5.943060023128055e-05, rewards: -9.299999999999999, count: 50
epoch: 313049, loss: -3.0691622669110075e-05, rewards: -9.299999999999999, count: 50
epoch: 313059, loss: 6.65068637317745e-06, rewards: -9.299999999999999, count: 50
epoch: 313069, loss: 2.703666723391507e-06, rewards: -9.299999999999999, count: 50
epoch: 313079, loss: -3.5071373076789314e-06, rewards: -9.299999999999999, count: 50
epoch: 313089, loss: 2.3543834686279297e-06, rewards: -9.299999999999999, count: 50
epoch: 313099, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 313109, loss: -7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 313119, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 313129, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, coun

epoch: 314009, loss: 4.017353148810798e-06, rewards: -9.299999999999999, count: 50
epoch: 314019, loss: -2.4509429294994334e-06, rewards: -9.299999999999999, count: 50
epoch: 314029, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 314039, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 314049, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 314059, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 314069, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 314079, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 314089, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 314099, loss: 7.309913598874118e-06, rewards: -9.299999999999999, count: 50
epoch: 314109, loss: 6.237626075744629e-05, rewards: -9.299999999999999, count: 50
epoch: 314119, loss: -1.093149148800876e-05, rewards: -9.299999999999999, count: 5

epoch: 314999, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 315009, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 315019, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 315029, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 315039, loss: 3.7479401271411916e-06, rewards: -9.299999999999999, count: 50
epoch: 315049, loss: 1.970768062165007e-05, rewards: -9.299999999999999, count: 50
epoch: 315059, loss: 0.00010197162919212133, rewards: -9.299999999999999, count: 50
epoch: 315069, loss: -6.470560765592381e-05, rewards: -9.299999999999999, count: 50
epoch: 315079, loss: 3.030180960195139e-05, rewards: -9.299999999999999, count: 50
epoch: 315089, loss: 3.834962626569904e-06, rewards: -9.299999999999999, count: 50
epoch: 315099, loss: -1.2567043086164631e-05, rewards: -9.299999999999999, count: 50
epoch: 315109, loss: 8.844137482810766e-06, rewards: -9.299999999999999, count: 

epoch: 315989, loss: 1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 315999, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 316009, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 316019, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 316029, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 316039, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 316049, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 316059, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 316069, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 316079, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 316089, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 316099, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, 

epoch: 316979, loss: 5.409717687143711e-06, rewards: -9.299999999999999, count: 50
epoch: 316989, loss: -2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 316999, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 317009, loss: -1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 317019, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 317029, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 317039, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 317049, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 317059, loss: -2.0813940864172764e-06, rewards: -9.299999999999999, count: 50
epoch: 317069, loss: -1.1017322322004475e-05, rewards: -9.299999999999999, count: 50
epoch: 317079, loss: -8.45694521558471e-05, rewards: -9.299999999999999, count: 50
epoch: 317089, loss: 5.8084726333618164e-05, rewards: -9.299999999999999, 

epoch: 317969, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 317979, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 317989, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 317999, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 318009, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 318019, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 318029, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 318039, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 318049, loss: -5.080699793325039e-06, rewards: -9.299999999999999, count: 50
epoch: 318059, loss: -2.84099587588571e-05, rewards: -9.299999999999999, count: 50
epoch: 318069, loss: -0.00011018991790479049, rewards: -9.299999999999999, count: 50
epoch: 318079, loss: 4.8607587814331055e-05, rewards: -9.299999999999999,

epoch: 318959, loss: -2.9366015951381996e-05, rewards: -9.299999999999999, count: 50
epoch: 318969, loss: -0.00011704087228281423, rewards: -9.299999999999999, count: 50
epoch: 318979, loss: 1.853227695391979e-05, rewards: -9.299999999999999, count: 50
epoch: 318989, loss: 3.7872792745474726e-05, rewards: -9.299999999999999, count: 50
epoch: 318999, loss: -5.117654836794827e-06, rewards: -9.299999999999999, count: 50
epoch: 319009, loss: -1.329183578491211e-05, rewards: -9.299999999999999, count: 50
epoch: 319019, loss: 4.678964614868164e-06, rewards: -9.299999999999999, count: 50
epoch: 319029, loss: 2.6619434265739983e-06, rewards: -9.299999999999999, count: 50
epoch: 319039, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 319049, loss: 1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 319059, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 319069, loss: 7.700920150455204e-07, rewards: -9.299999999999999, cou

epoch: 319949, loss: 1.7702579498291016e-05, rewards: -9.299999999999999, count: 50
epoch: 319959, loss: 5.776882062491495e-06, rewards: -9.299999999999999, count: 50
epoch: 319969, loss: -6.959438451303868e-06, rewards: -9.299999999999999, count: 50
epoch: 319979, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 319989, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 319999, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 320009, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 320019, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 320029, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 320039, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 320049, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 320059, loss: -3.194809039541724e-07, rewards: -9.299999999999999, coun

epoch: 320939, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 320949, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 320959, loss: 6.356239282467868e-06, rewards: -9.299999999999999, count: 50
epoch: 320969, loss: 4.0918588638305664e-05, rewards: -9.299999999999999, count: 50
epoch: 320979, loss: 7.949471182655543e-05, rewards: -9.299999999999999, count: 50
epoch: 320989, loss: 1.3296604265633505e-05, rewards: -9.299999999999999, count: 50
epoch: 320999, loss: -3.534555435180664e-05, rewards: -9.299999999999999, count: 50
epoch: 321009, loss: 9.107589562518115e-07, rewards: -9.299999999999999, count: 50
epoch: 321019, loss: 1.2139082173234783e-05, rewards: -9.299999999999999, count: 50
epoch: 321029, loss: -6.185769962030463e-06, rewards: -9.299999999999999, count: 50
epoch: 321039, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 321049, loss: 1.341104507446289e-06, rewards: -9.299999999999999, count:

epoch: 321929, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 321939, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 321949, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 321959, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 321969, loss: -5.93781487623346e-06, rewards: -9.299999999999999, count: 50
epoch: 321979, loss: -3.5340784961590543e-05, rewards: -9.299999999999999, count: 50
epoch: 321989, loss: -9.623408550396562e-05, rewards: -9.299999999999999, count: 50
epoch: 321999, loss: 2.0517110897344537e-05, rewards: -9.299999999999999, count: 50
epoch: 322009, loss: 2.5312900106655434e-05, rewards: -9.299999999999999, count: 50
epoch: 322019, loss: -1.9140243239235133e-05, rewards: -9.299999999999999, count: 50
epoch: 322029, loss: 1.962184796866495e-06, rewards: -9.299999999999999, count: 50
epoch: 322039, loss: 4.460811396711506e-06, rewards: -9.299999999999999, co

epoch: 322929, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 322939, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 322949, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 322959, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 322969, loss: 2.1255016235954827e-06, rewards: -9.299999999999999, count: 50
epoch: 322979, loss: 1.1242627806495875e-05, rewards: -9.299999999999999, count: 50
epoch: 322989, loss: 8.984804298961535e-05, rewards: -9.299999999999999, count: 50
epoch: 322999, loss: -7.005452789599076e-05, rewards: -9.299999999999999, count: 50
epoch: 323009, loss: 2.8567314075189643e-05, rewards: -9.299999999999999, count: 50
epoch: 323019, loss: 2.939462683571037e-05, rewards: -9.299999999999999, count: 50
epoch: 323029, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 323039, loss: -1.1225938578718342e-05, rewards: -9.299999999999999, count:

epoch: 323919, loss: 8.603334572399035e-05, rewards: -9.299999999999999, count: 50
epoch: 323929, loss: 3.273248512414284e-05, rewards: -9.299999999999999, count: 50
epoch: 323939, loss: -2.769708589767106e-05, rewards: -9.299999999999999, count: 50
epoch: 323949, loss: -1.8984079360961914e-05, rewards: -9.299999999999999, count: 50
epoch: 323959, loss: 6.620884050789755e-06, rewards: -9.299999999999999, count: 50
epoch: 323969, loss: 6.936788395250915e-06, rewards: -9.299999999999999, count: 50
epoch: 323979, loss: -3.8397311072913e-06, rewards: -9.299999999999999, count: 50
epoch: 323989, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 323999, loss: 1.7237663314517704e-06, rewards: -9.299999999999999, count: 50
epoch: 324009, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 324019, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 324029, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 5

epoch: 324909, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 324919, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 324929, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 324939, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 324949, loss: -1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 324959, loss: -2.4271012080134824e-06, rewards: -9.299999999999999, count: 50
epoch: 324969, loss: -5.823373612656724e-06, rewards: -9.299999999999999, count: 50
epoch: 324979, loss: -2.8665066565736197e-05, rewards: -9.299999999999999, count: 50
epoch: 324989, loss: -0.00010521650256123394, rewards: -9.299999999999999, count: 50
epoch: 324999, loss: 5.2282808610470966e-05, rewards: -9.299999999999999, count: 50
epoch: 325009, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 325019, loss: -1.5171765880950261e-05, rewards: -9.299999999999999,

epoch: 325899, loss: -7.09772120899288e-06, rewards: -9.299999999999999, count: 50
epoch: 325909, loss: 4.227161298331339e-06, rewards: -9.299999999999999, count: 50
epoch: 325919, loss: -2.5725364594109124e-06, rewards: -9.299999999999999, count: 50
epoch: 325929, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 325939, loss: -1.714229597382655e-06, rewards: -9.299999999999999, count: 50
epoch: 325949, loss: 8.618831657258852e-07, rewards: -9.299999999999999, count: 50
epoch: 325959, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 325969, loss: -1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 325979, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 325989, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 325999, loss: -6.765127182006836e-06, rewards: -9.299999999999999, count: 50
epoch: 326009, loss: -4.3084623030154034e-05, rewards: -9.299999999999999, c

epoch: 326889, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 326899, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 326909, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 326919, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 326929, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 326939, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 326949, loss: -5.402564966061618e-06, rewards: -9.299999999999999, count: 50
epoch: 326959, loss: -3.260851008235477e-05, rewards: -9.299999999999999, count: 50
epoch: 326969, loss: -0.00010458469478180632, rewards: -9.299999999999999, count: 50
epoch: 326979, loss: 2.8260947146918625e-05, rewards: -9.299999999999999, count: 50
epoch: 326989, loss: 2.4808645321172662e-05, rewards: -9.299999999999999, count: 50
epoch: 326999, loss: -2.000212589337025e-05, rewards: -9.299999999999999, count

epoch: 327879, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 327889, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 327899, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 327909, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 327919, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 327929, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 327939, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 327949, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 327959, loss: -1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 327969, loss: -9.102821422857232e-06, rewards: -9.299999999999999, count: 50
epoch: 327979, loss: -5.422234607976861e-05, rewards: -9.299999999999999, count: 50
epoch: 327989, loss: -3.336548979859799e-05, rewards: -9.299999999999999, 

epoch: 328869, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 328879, loss: -4.475116838875692e-06, rewards: -9.299999999999999, count: 50
epoch: 328889, loss: -2.3220776711241342e-05, rewards: -9.299999999999999, count: 50
epoch: 328899, loss: -0.00011040806566597894, rewards: -9.299999999999999, count: 50
epoch: 328909, loss: 6.431341171264648e-05, rewards: -9.299999999999999, count: 50
epoch: 328919, loss: -1.1101960808446165e-05, rewards: -9.299999999999999, count: 50
epoch: 328929, loss: -1.6963482266874053e-05, rewards: -9.299999999999999, count: 50
epoch: 328939, loss: 1.3654232134285849e-05, rewards: -9.299999999999999, count: 50
epoch: 328949, loss: -4.713535417977255e-06, rewards: -9.299999999999999, count: 50
epoch: 328959, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 328969, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 328979, loss: -1.7404556729161413e-07, rewards: -9.29999999999999

epoch: 329859, loss: -6.639957518927986e-06, rewards: -9.299999999999999, count: 50
epoch: 329869, loss: 2.3436546143784653e-06, rewards: -9.299999999999999, count: 50
epoch: 329879, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 329889, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 329899, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 329909, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 329919, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 329929, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 329939, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 329949, loss: -8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 329959, loss: -3.173351387886214e-06, rewards: -9.299999999999999, count: 50
epoch: 329969, loss: -1.748561771819368e-05, rewards: -9.299999999999999, co

epoch: 330849, loss: -3.879070391121786e-06, rewards: -9.299999999999999, count: 50
epoch: 330859, loss: -1.638174035178963e-05, rewards: -9.299999999999999, count: 50
epoch: 330869, loss: -9.158015018329024e-05, rewards: -9.299999999999999, count: 50
epoch: 330879, loss: 5.561590296565555e-05, rewards: -9.299999999999999, count: 50
epoch: 330889, loss: -4.0403603634331375e-05, rewards: -9.299999999999999, count: 50
epoch: 330899, loss: 5.76257707507466e-06, rewards: -9.299999999999999, count: 50
epoch: 330909, loss: 9.95874415821163e-06, rewards: -9.299999999999999, count: 50
epoch: 330919, loss: -9.149312973022461e-06, rewards: -9.299999999999999, count: 50
epoch: 330929, loss: 5.204677563597215e-06, rewards: -9.299999999999999, count: 50
epoch: 330939, loss: -2.962350890811649e-06, rewards: -9.299999999999999, count: 50
epoch: 330949, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 330959, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count:

epoch: 331839, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 331849, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 331859, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 331869, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 331879, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 331889, loss: 2.3567677089886274e-06, rewards: -9.299999999999999, count: 50
epoch: 331899, loss: 1.2590885489771608e-05, rewards: -9.299999999999999, count: 50
epoch: 331909, loss: 8.995294774649665e-05, rewards: -9.299999999999999, count: 50
epoch: 331919, loss: -6.263494287850335e-05, rewards: -9.299999999999999, count: 50
epoch: 331929, loss: 3.775954246520996e-05, rewards: -9.299999999999999, count: 50
epoch: 331939, loss: 1.7389058484695852e-05, rewards: -9.299999999999999, count: 50
epoch: 331949, loss: -1.4955997357901651e-05, rewards: -9.299999999999999, count: 

epoch: 332829, loss: 1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 332839, loss: 1.0708570698625408e-05, rewards: -9.299999999999999, count: 50
epoch: 332849, loss: 9.164214134216309e-05, rewards: -9.299999999999999, count: 50
epoch: 332859, loss: -7.480859494535252e-05, rewards: -9.299999999999999, count: 50
epoch: 332869, loss: 2.1660327547579072e-05, rewards: -9.299999999999999, count: 50
epoch: 332879, loss: 3.144621950923465e-05, rewards: -9.299999999999999, count: 50
epoch: 332889, loss: 4.351139068603516e-06, rewards: -9.299999999999999, count: 50
epoch: 332899, loss: -1.0900497727561742e-05, rewards: -9.299999999999999, count: 50
epoch: 332909, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 332919, loss: 4.266500582161825e-06, rewards: -9.299999999999999, count: 50
epoch: 332929, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 332939, loss: -2.157688072657038e-07, rewards: -9.299999999999999, coun

epoch: 333819, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 333829, loss: 1.6307831174344756e-06, rewards: -9.299999999999999, count: 50
epoch: 333839, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 333849, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 333859, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 333869, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 333879, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 333889, loss: 9.912252608046401e-06, rewards: -9.299999999999999, count: 50
epoch: 333899, loss: 6.753682828275487e-05, rewards: -9.299999999999999, count: 50
epoch: 333909, loss: -1.3452768143906724e-05, rewards: -9.299999999999999, count: 50
epoch: 333919, loss: 5.0103662943001837e-05, rewards: -9.299999999999999, count: 50
epoch: 333929, loss: -1.1068582352891099e-05, rewards: -9.299999999999999, cou

epoch: 334809, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 334819, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 334829, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 334839, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 334849, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 334859, loss: 6.8759918576688506e-06, rewards: -9.299999999999999, count: 50
epoch: 334869, loss: 3.658175410237163e-05, rewards: -9.299999999999999, count: 50
epoch: 334879, loss: 9.148836397798732e-05, rewards: -9.299999999999999, count: 50
epoch: 334889, loss: -2.7745962142944336e-05, rewards: -9.299999999999999, count: 50
epoch: 334899, loss: -1.5045404325064737e-05, rewards: -9.299999999999999, count: 50
epoch: 334909, loss: 1.9609928131103516e-05, rewards: -9.299999999999999, count: 50
epoch: 334919, loss: -9.844303349382244e-06, rewards: -9.299999999999999, coun

epoch: 335799, loss: 0.00010249495244352147, rewards: -9.299999999999999, count: 50
epoch: 335809, loss: -1.1038780030503403e-05, rewards: -9.299999999999999, count: 50
epoch: 335819, loss: -3.5196542739868164e-05, rewards: -9.299999999999999, count: 50
epoch: 335829, loss: 1.0693072908907197e-05, rewards: -9.299999999999999, count: 50
epoch: 335839, loss: 9.353160749014933e-06, rewards: -9.299999999999999, count: 50
epoch: 335849, loss: -7.803439984854776e-06, rewards: -9.299999999999999, count: 50
epoch: 335859, loss: 2.548694510551286e-06, rewards: -9.299999999999999, count: 50
epoch: 335869, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 335879, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 335889, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 335899, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 335909, loss: -4.220008804622921e-07, rewards: -9.299999999999999, coun

epoch: 336789, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 336799, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 336809, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 336819, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 336829, loss: -1.4474391718977131e-05, rewards: -9.299999999999999, count: 50
epoch: 336839, loss: -0.00012770533794537187, rewards: -9.299999999999999, count: 50
epoch: 336849, loss: 8.032679761527106e-05, rewards: -9.299999999999999, count: 50
epoch: 336859, loss: 4.366517168818973e-05, rewards: -9.299999999999999, count: 50
epoch: 336869, loss: 4.932880528940586e-06, rewards: -9.299999999999999, count: 50
epoch: 336879, loss: -1.245737075805664e-05, rewards: -9.299999999999999, count: 50
epoch: 336889, loss: -1.0744332939793821e-05, rewards: -9.299999999999999, count: 50
epoch: 336899, loss: -5.042552970735414e-07, rewards: -9.299999999999999, c

epoch: 337779, loss: -1.0659694453352131e-05, rewards: -9.299999999999999, count: 50
epoch: 337789, loss: -1.2487173080444336e-05, rewards: -9.299999999999999, count: 50
epoch: 337799, loss: 1.0321140507585369e-05, rewards: -9.299999999999999, count: 50
epoch: 337809, loss: -3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 337819, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 337829, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 337839, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 337849, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 337859, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 337869, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 337879, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 337889, loss: 6.949901489861077e-07, rewards: -9.299999999999999, coun

epoch: 338769, loss: -1.3363361404117313e-06, rewards: -9.299999999999999, count: 50
epoch: 338779, loss: 8.618831657258852e-07, rewards: -9.299999999999999, count: 50
epoch: 338789, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 338799, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 338809, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 338819, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 338829, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 338839, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 338849, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 338859, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 338869, loss: 1.7074346033041365e-05, rewards: -9.299999999999999, count: 50
epoch: 338879, loss: 0.00010411500988993794, rewards: -9.299999999999999, count:

epoch: 339759, loss: -4.550218363874592e-06, rewards: -9.299999999999999, count: 50
epoch: 339769, loss: 8.399486432608683e-06, rewards: -9.299999999999999, count: 50
epoch: 339779, loss: 8.776188224146608e-06, rewards: -9.299999999999999, count: 50
epoch: 339789, loss: 2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 339799, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 339809, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 339819, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 339829, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 339839, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 339849, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 339859, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 339869, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count

epoch: 340749, loss: 2.6360750780440867e-05, rewards: -9.299999999999999, count: 50
epoch: 340759, loss: 6.127357664809097e-06, rewards: -9.299999999999999, count: 50
epoch: 340769, loss: -1.2094974408682901e-05, rewards: -9.299999999999999, count: 50
epoch: 340779, loss: 5.332231467036763e-06, rewards: -9.299999999999999, count: 50
epoch: 340789, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 340799, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 340809, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 340819, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 340829, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 340839, loss: 2.2649764730431343e-08, rewards: -9.299999999999999, count: 50
epoch: 340849, loss: 5.125999678057269e-07, rewards: -9.299999999999999, count: 50
epoch: 340859, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count

epoch: 341739, loss: -3.317594519103295e-06, rewards: -9.299999999999999, count: 50
epoch: 341749, loss: -1.7303227650700137e-05, rewards: -9.299999999999999, count: 50
epoch: 341759, loss: -0.00010373473196523264, rewards: -9.299999999999999, count: 50
epoch: 341769, loss: 6.97457799105905e-05, rewards: -9.299999999999999, count: 50
epoch: 341779, loss: -2.5584697141312063e-05, rewards: -9.299999999999999, count: 50
epoch: 341789, loss: -1.483678806835087e-05, rewards: -9.299999999999999, count: 50
epoch: 341799, loss: 1.5461444490938447e-05, rewards: -9.299999999999999, count: 50
epoch: 341809, loss: -3.949403890146641e-06, rewards: -9.299999999999999, count: 50
epoch: 341819, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 341829, loss: 2.404451379334205e-06, rewards: -9.299999999999999, count: 50
epoch: 341839, loss: -1.4579295566363726e-06, rewards: -9.299999999999999, count: 50
epoch: 341849, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, 

epoch: 342729, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 342739, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 342749, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 342759, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 342769, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 342779, loss: 2.3245811462402344e-06, rewards: -9.299999999999999, count: 50
epoch: 342789, loss: 5.173683348402847e-06, rewards: -9.299999999999999, count: 50
epoch: 342799, loss: 1.832008274504915e-05, rewards: -9.299999999999999, count: 50
epoch: 342809, loss: 8.537173562217504e-05, rewards: -9.299999999999999, count: 50
epoch: 342819, loss: -4.098296267329715e-05, rewards: -9.299999999999999, count: 50
epoch: 342829, loss: 3.690957964863628e-05, rewards: -9.299999999999999, count: 50
epoch: 342839, loss: -2.2267102394835092e-05, rewards: -9.299999999999999, count: 

epoch: 343719, loss: -3.4046172459056834e-06, rewards: -9.299999999999999, count: 50
epoch: 343729, loss: -3.4725665045698406e-06, rewards: -9.299999999999999, count: 50
epoch: 343739, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 343749, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 343759, loss: 1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 343769, loss: -1.7046928633135394e-06, rewards: -9.299999999999999, count: 50
epoch: 343779, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 343789, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 343799, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 343809, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 343819, loss: -9.334087280876702e-07, rewards: -9.299999999999999, count: 50
epoch: 343829, loss: -3.2782554626464844e-06, rewards: -9.299999999999999,

epoch: 344709, loss: -2.256631887576077e-06, rewards: -9.299999999999999, count: 50
epoch: 344719, loss: -1.2302398317842744e-06, rewards: -9.299999999999999, count: 50
epoch: 344729, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 344739, loss: -1.23739243917953e-06, rewards: -9.299999999999999, count: 50
epoch: 344749, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 344759, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 344769, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 344779, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 344789, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 344799, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 344809, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 344819, loss: -8.016824722290039e-06, rewards: -9.299999999999999, cou

epoch: 345699, loss: -8.609175711171702e-05, rewards: -9.299999999999999, count: 50
epoch: 345709, loss: 5.701661211787723e-05, rewards: -9.299999999999999, count: 50
epoch: 345719, loss: -4.174590139882639e-05, rewards: -9.299999999999999, count: 50
epoch: 345729, loss: -1.4002323041495401e-05, rewards: -9.299999999999999, count: 50
epoch: 345739, loss: 1.6622543625999242e-05, rewards: -9.299999999999999, count: 50
epoch: 345749, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 345759, loss: -5.905628313485067e-06, rewards: -9.299999999999999, count: 50
epoch: 345769, loss: 3.6156177429802483e-06, rewards: -9.299999999999999, count: 50
epoch: 345779, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 345789, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 345799, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 345809, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count:

epoch: 346689, loss: 2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 346699, loss: -8.647441973153036e-06, rewards: -9.299999999999999, count: 50
epoch: 346709, loss: -9.046792911249213e-06, rewards: -9.299999999999999, count: 50
epoch: 346719, loss: -3.4725665045698406e-06, rewards: -9.299999999999999, count: 50
epoch: 346729, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 346739, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 346749, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 346759, loss: -9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 346769, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 346779, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 346789, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 346799, loss: -6.437301891537572e-08, rewards: -9.299999999999999, co

epoch: 347679, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 347689, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 347699, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 347709, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 347719, loss: -2.5880335670080967e-06, rewards: -9.299999999999999, count: 50
epoch: 347729, loss: -1.5425681340275332e-05, rewards: -9.299999999999999, count: 50
epoch: 347739, loss: -0.00010649204341461882, rewards: -9.299999999999999, count: 50
epoch: 347749, loss: 7.64942160458304e-05, rewards: -9.299999999999999, count: 50
epoch: 347759, loss: -1.424551010131836e-05, rewards: -9.299999999999999, count: 50
epoch: 347769, loss: -2.6185512979282066e-05, rewards: -9.299999999999999, count: 50
epoch: 347779, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count: 50
epoch: 347789, loss: 7.460117558366619e-06, rewards: -9.299999999999999, c

epoch: 348669, loss: 1.3287067304190714e-05, rewards: -9.299999999999999, count: 50
epoch: 348679, loss: -4.134178198000882e-06, rewards: -9.299999999999999, count: 50
epoch: 348689, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 348699, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 348709, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 348719, loss: -5.269050689094001e-07, rewards: -9.299999999999999, count: 50
epoch: 348729, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 348739, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 348749, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 348759, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 348769, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 348779, loss: 5.332231467036763e-06, rewards: -9.299999999999999, count: 5

epoch: 349659, loss: -1.3853311429556925e-05, rewards: -9.299999999999999, count: 50
epoch: 349669, loss: -0.00011912226909771562, rewards: -9.299999999999999, count: 50
epoch: 349679, loss: 8.565664029447362e-05, rewards: -9.299999999999999, count: 50
epoch: 349689, loss: 2.853870319086127e-05, rewards: -9.299999999999999, count: 50
epoch: 349699, loss: -1.2567043086164631e-05, rewards: -9.299999999999999, count: 50
epoch: 349709, loss: -1.8221140635432675e-05, rewards: -9.299999999999999, count: 50
epoch: 349719, loss: -4.23669825977413e-06, rewards: -9.299999999999999, count: 50
epoch: 349729, loss: 5.304813385009766e-06, rewards: -9.299999999999999, count: 50
epoch: 349739, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 349749, loss: -2.064704858639743e-06, rewards: -9.299999999999999, count: 50
epoch: 349759, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 349769, loss: 5.638599418489321e-07, rewards: -9.299999999999999, coun

epoch: 350649, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 350659, loss: -2.312660171810421e-06, rewards: -9.299999999999999, count: 50
epoch: 350669, loss: -1.3555288205679972e-05, rewards: -9.299999999999999, count: 50
epoch: 350679, loss: -9.189486445393413e-05, rewards: -9.299999999999999, count: 50
epoch: 350689, loss: 6.243586540222168e-05, rewards: -9.299999999999999, count: 50
epoch: 350699, loss: -3.780484257731587e-05, rewards: -9.299999999999999, count: 50
epoch: 350709, loss: -1.2326240721449722e-05, rewards: -9.299999999999999, count: 50
epoch: 350719, loss: 1.625299410079606e-05, rewards: -9.299999999999999, count: 50
epoch: 350729, loss: -2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 350739, loss: -3.916024979844224e-06, rewards: -9.299999999999999, count: 50
epoch: 350749, loss: 4.03046624342096e-06, rewards: -9.299999999999999, count: 50
epoch: 350759, loss: -2.59995454143791e-06, rewards: -9.299999999999999, coun

epoch: 351639, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 351649, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 351659, loss: -1.4734267779203947e-06, rewards: -9.299999999999999, count: 50
epoch: 351669, loss: -7.539987564086914e-06, rewards: -9.299999999999999, count: 50
epoch: 351679, loss: -5.606651393463835e-05, rewards: -9.299999999999999, count: 50
epoch: 351689, loss: -1.9103288650512695e-05, rewards: -9.299999999999999, count: 50
epoch: 351699, loss: -4.987478314433247e-05, rewards: -9.299999999999999, count: 50
epoch: 351709, loss: 1.4754533367522527e-05, rewards: -9.299999999999999, count: 50
epoch: 351719, loss: 1.761317253112793e-05, rewards: -9.299999999999999, count: 50
epoch: 351729, loss: -7.494687906728359e-06, rewards: -9.299999999999999, count: 50
epoch: 351739, loss: -3.39269627147587e-06, rewards: -9.299999999999999, count: 50
epoch: 351749, loss: 4.032850483781658e-06, rewards: -9.299999999999999, cou

epoch: 352629, loss: -9.670853614807129e-05, rewards: -9.299999999999999, count: 50
epoch: 352639, loss: 6.584406219189987e-05, rewards: -9.299999999999999, count: 50
epoch: 352649, loss: -3.426551847951487e-05, rewards: -9.299999999999999, count: 50
epoch: 352659, loss: -1.1101960808446165e-05, rewards: -9.299999999999999, count: 50
epoch: 352669, loss: 1.6157626305357553e-05, rewards: -9.299999999999999, count: 50
epoch: 352679, loss: -4.860162789555034e-06, rewards: -9.299999999999999, count: 50
epoch: 352689, loss: -1.5223026821331587e-06, rewards: -9.299999999999999, count: 50
epoch: 352699, loss: 2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 352709, loss: -1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 352719, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 352729, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 352739, loss: -3.194809039541724e-07, rewards: -9.299999999999999, 

epoch: 353619, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count: 50
epoch: 353629, loss: 1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 353639, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 353649, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 353659, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 353669, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 353679, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 353689, loss: 4.419088327267673e-06, rewards: -9.299999999999999, count: 50
epoch: 353699, loss: 2.7679205231834203e-05, rewards: -9.299999999999999, count: 50
epoch: 353709, loss: 0.00011452793842181563, rewards: -9.299999999999999, count: 50
epoch: 353719, loss: -4.3779611587524414e-05, rewards: -9.299999999999999, count: 50
epoch: 353729, loss: -2.0093917555641383e-05, rewards: -9.299999999999999, co

epoch: 354609, loss: -3.255963383708149e-05, rewards: -9.299999999999999, count: 50
epoch: 354619, loss: 1.3391971151577309e-05, rewards: -9.299999999999999, count: 50
epoch: 354629, loss: 1.4612674931413494e-05, rewards: -9.299999999999999, count: 50
epoch: 354639, loss: -4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 354649, loss: -4.03046624342096e-06, rewards: -9.299999999999999, count: 50
epoch: 354659, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 354669, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 354679, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 354689, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 354699, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 354709, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 354719, loss: -5.018711135562626e-07, rewards: -9.299999999999999, cou

epoch: 355599, loss: 2.7263165520707844e-06, rewards: -9.299999999999999, count: 50
epoch: 355609, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 355619, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 355629, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 355639, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 355649, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 355659, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 355669, loss: -2.4342537017219e-06, rewards: -9.299999999999999, count: 50
epoch: 355679, loss: -1.2834072549594566e-05, rewards: -9.299999999999999, count: 50
epoch: 355689, loss: -8.624792098999023e-05, rewards: -9.299999999999999, count: 50
epoch: 355699, loss: 5.405187766882591e-05, rewards: -9.299999999999999, count: 50
epoch: 355709, loss: -4.2945146560668945e-05, rewards: -9.299999999999999, cou

epoch: 356589, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 356599, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 356609, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 356619, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 356629, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 356639, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 356649, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 356659, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 356669, loss: 8.394717951887287e-06, rewards: -9.299999999999999, count: 50
epoch: 356679, loss: 6.391406350303441e-05, rewards: -9.299999999999999, count: 50
epoch: 356689, loss: -7.195472790044732e-06, rewards: -9.299999999999999, count: 50
epoch: 356699, loss: 5.34307946509216e-05, rewards: -9.299999999999999, count: 

epoch: 357579, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 357589, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 357599, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 357609, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 357619, loss: -1.693964009064075e-06, rewards: -9.299999999999999, count: 50
epoch: 357629, loss: -5.382299605116714e-06, rewards: -9.299999999999999, count: 50
epoch: 357639, loss: -3.0083656383794732e-05, rewards: -9.299999999999999, count: 50
epoch: 357649, loss: -0.00010592460603220388, rewards: -9.299999999999999, count: 50
epoch: 357659, loss: 4.5386554120341316e-05, rewards: -9.299999999999999, count: 50
epoch: 357669, loss: 7.239580099849263e-06, rewards: -9.299999999999999, count: 50
epoch: 357679, loss: -2.0463467080844566e-05, rewards: -9.299999999999999, count: 50
epoch: 357689, loss: 1.1169910067110322e-05, rewards: -9.29999999999999

epoch: 358569, loss: -1.5878677004366182e-06, rewards: -9.299999999999999, count: 50
epoch: 358579, loss: 1.3661384627994266e-06, rewards: -9.299999999999999, count: 50
epoch: 358589, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 358599, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 358609, loss: 1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 358619, loss: 2.645254198796465e-06, rewards: -9.299999999999999, count: 50
epoch: 358629, loss: 7.460117558366619e-06, rewards: -9.299999999999999, count: 50
epoch: 358639, loss: 3.477931022644043e-05, rewards: -9.299999999999999, count: 50
epoch: 358649, loss: 8.950829214882106e-05, rewards: -9.299999999999999, count: 50
epoch: 358659, loss: -4.3596028262982145e-05, rewards: -9.299999999999999, count: 50
epoch: 358669, loss: 9.729862540552858e-06, rewards: -9.299999999999999, count: 50
epoch: 358679, loss: 5.108117875352036e-06, rewards: -9.299999999999999, count:

epoch: 359559, loss: -5.3144693083595484e-05, rewards: -9.299999999999999, count: 50
epoch: 359569, loss: -8.364915629499592e-06, rewards: -9.299999999999999, count: 50
epoch: 359579, loss: 1.8569231542642228e-05, rewards: -9.299999999999999, count: 50
epoch: 359589, loss: 2.282857849422726e-06, rewards: -9.299999999999999, count: 50
epoch: 359599, loss: -7.017850748525234e-06, rewards: -9.299999999999999, count: 50
epoch: 359609, loss: 2.5844574338407256e-06, rewards: -9.299999999999999, count: 50
epoch: 359619, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 359629, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 359639, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 359649, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 359659, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 359669, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, 

epoch: 360539, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 360549, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 360559, loss: -1.1575222060855594e-06, rewards: -9.299999999999999, count: 50
epoch: 360569, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 360579, loss: -9.953975677490234e-06, rewards: -9.299999999999999, count: 50
epoch: 360589, loss: -7.183790148701519e-05, rewards: -9.299999999999999, count: 50
epoch: 360599, loss: 2.6601552235661075e-05, rewards: -9.299999999999999, count: 50
epoch: 360609, loss: -5.105137825012207e-05, rewards: -9.299999999999999, count: 50
epoch: 360619, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 360629, loss: 1.7702579498291016e-05, rewards: -9.299999999999999, count: 50
epoch: 360639, loss: -6.036758350091986e-06, rewards: -9.299999999999999, count: 50
epoch: 360649, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count:

epoch: 361529, loss: 7.907152394182049e-06, rewards: -9.299999999999999, count: 50
epoch: 361539, loss: -2.4116038730426226e-06, rewards: -9.299999999999999, count: 50
epoch: 361549, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 361559, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 361569, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 361579, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 361589, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 361599, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 361609, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 361619, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 361629, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 361639, loss: -2.157688072657038e-07, rewards: -9.299999999999999, cou

epoch: 362519, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 362529, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 362539, loss: -1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 362549, loss: -8.012056241568644e-06, rewards: -9.299999999999999, count: 50
epoch: 362559, loss: -8.282541966764256e-05, rewards: -9.299999999999999, count: 50
epoch: 362569, loss: 7.525086402893066e-05, rewards: -9.299999999999999, count: 50
epoch: 362579, loss: -1.4140606253931765e-05, rewards: -9.299999999999999, count: 50
epoch: 362589, loss: -3.0686856916872784e-05, rewards: -9.299999999999999, count: 50
epoch: 362599, loss: -1.981139212148264e-05, rewards: -9.299999999999999, count: 50
epoch: 362609, loss: -3.362893949088175e-06, rewards: -9.299999999999999, count: 50
epoch: 362619, loss: 5.735158993047662e-06, rewards: -9.299999999999999, count: 50
epoch: 362629, loss: 3.55362885784416e-06, rewards: -9.299999999999999, cou

epoch: 363509, loss: 2.3578404579893686e-05, rewards: -9.299999999999999, count: 50
epoch: 363519, loss: 2.319812665518839e-06, rewards: -9.299999999999999, count: 50
epoch: 363529, loss: -8.866786629369017e-06, rewards: -9.299999999999999, count: 50
epoch: 363539, loss: 4.122257450944744e-06, rewards: -9.299999999999999, count: 50
epoch: 363549, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 363559, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 363569, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 363579, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 363589, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 363599, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 363609, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 363619, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count:

epoch: 364499, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 364509, loss: -1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 364519, loss: -9.392500032845419e-06, rewards: -9.299999999999999, count: 50
epoch: 364529, loss: -7.88521792856045e-05, rewards: -9.299999999999999, count: 50
epoch: 364539, loss: 5.4923297284403816e-05, rewards: -9.299999999999999, count: 50
epoch: 364549, loss: -4.031300704809837e-05, rewards: -9.299999999999999, count: 50
epoch: 364559, loss: -2.9112101401551627e-05, rewards: -9.299999999999999, count: 50
epoch: 364569, loss: 3.1638146538170986e-06, rewards: -9.299999999999999, count: 50
epoch: 364579, loss: 1.1854172043967992e-05, rewards: -9.299999999999999, count: 50
epoch: 364589, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 364599, loss: -4.073381205671467e-06, rewards: -9.299999999999999, count: 50
epoch: 364609, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, c

epoch: 365489, loss: 9.680986295279581e-06, rewards: -9.299999999999999, count: 50
epoch: 365499, loss: 2.0570754713844508e-05, rewards: -9.299999999999999, count: 50
epoch: 365509, loss: -2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 365519, loss: -6.810426839365391e-06, rewards: -9.299999999999999, count: 50
epoch: 365529, loss: 3.6132335026195506e-06, rewards: -9.299999999999999, count: 50
epoch: 365539, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 365549, loss: -1.2302398317842744e-06, rewards: -9.299999999999999, count: 50
epoch: 365559, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 365569, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 365579, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 365589, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 365599, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count:

epoch: 366479, loss: 1.8358230136072962e-06, rewards: -9.299999999999999, count: 50
epoch: 366489, loss: -9.179115068036481e-07, rewards: -9.299999999999999, count: 50
epoch: 366499, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 366509, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 366519, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 366529, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 366539, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 366549, loss: -2.6476382117834873e-06, rewards: -9.299999999999999, count: 50
epoch: 366559, loss: -9.037255949806422e-06, rewards: -9.299999999999999, count: 50
epoch: 366569, loss: -4.500627619563602e-05, rewards: -9.299999999999999, count: 50
epoch: 366579, loss: -6.526708602905273e-05, rewards: -9.299999999999999, count: 50
epoch: 366589, loss: 1.2670755495491903e-05, rewards: -9.299999999999999, c

epoch: 367469, loss: -8.52346420288086e-06, rewards: -9.299999999999999, count: 50
epoch: 367479, loss: 7.051229658827651e-06, rewards: -9.299999999999999, count: 50
epoch: 367489, loss: -5.027055522077717e-06, rewards: -9.299999999999999, count: 50
epoch: 367499, loss: 2.88128853753733e-06, rewards: -9.299999999999999, count: 50
epoch: 367509, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 367519, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 367529, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 367539, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 367549, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 367559, loss: -6.988048426137539e-06, rewards: -9.299999999999999, count: 50
epoch: 367569, loss: -3.6021472624270245e-05, rewards: -9.299999999999999, count: 50
epoch: 367579, loss: -8.979797712527215e-05, rewards: -9.299999999999999, cou

epoch: 368469, loss: -2.059578946500551e-05, rewards: -9.299999999999999, count: 50
epoch: 368479, loss: 4.327297119743889e-06, rewards: -9.299999999999999, count: 50
epoch: 368489, loss: 7.425546755257528e-06, rewards: -9.299999999999999, count: 50
epoch: 368499, loss: -2.962350890811649e-06, rewards: -9.299999999999999, count: 50
epoch: 368509, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 368519, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 368529, loss: -1.1420249848015374e-06, rewards: -9.299999999999999, count: 50
epoch: 368539, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 368549, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 368559, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 368569, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 368579, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, co

epoch: 369459, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 369469, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 369479, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 369489, loss: 7.498264267269406e-07, rewards: -9.299999999999999, count: 50
epoch: 369499, loss: 4.930496288579889e-06, rewards: -9.299999999999999, count: 50
epoch: 369509, loss: 3.7406683986773714e-05, rewards: -9.299999999999999, count: 50
epoch: 369519, loss: 9.113073610933498e-05, rewards: -9.299999999999999, count: 50
epoch: 369529, loss: 2.11811056942679e-05, rewards: -9.299999999999999, count: 50
epoch: 369539, loss: -3.402471702429466e-05, rewards: -9.299999999999999, count: 50
epoch: 369549, loss: -1.2058019819960464e-05, rewards: -9.299999999999999, count: 50
epoch: 369559, loss: 1.194119431602303e-05, rewards: -9.299999999999999, count: 50
epoch: 369569, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, coun

epoch: 370449, loss: 3.424882834224263e-06, rewards: -9.299999999999999, count: 50
epoch: 370459, loss: 2.7564763513510115e-05, rewards: -9.299999999999999, count: 50
epoch: 370469, loss: -1.7583370208740234e-05, rewards: -9.299999999999999, count: 50
epoch: 370479, loss: 3.28302394336788e-06, rewards: -9.299999999999999, count: 50
epoch: 370489, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 370499, loss: -2.8204917725815903e-06, rewards: -9.299999999999999, count: 50
epoch: 370509, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 370519, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 370529, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 370539, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 370549, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 370559, loss: -1.3685225894732866e-06, rewards: -9.299999999999999, co

epoch: 371439, loss: -2.5883913622237742e-05, rewards: -9.299999999999999, count: 50
epoch: 371449, loss: 5.267858341539977e-06, rewards: -9.299999999999999, count: 50
epoch: 371459, loss: 7.317066319956211e-06, rewards: -9.299999999999999, count: 50
epoch: 371469, loss: -5.749463980464498e-06, rewards: -9.299999999999999, count: 50
epoch: 371479, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 371489, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 371499, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 371509, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 371519, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 371529, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 371539, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 371549, loss: 4.220008804622921e-07, rewards: -9.299999999999999, coun

epoch: 372429, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 372439, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 372449, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 372459, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 372469, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 372479, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 372489, loss: 1.504302053945139e-05, rewards: -9.299999999999999, count: 50
epoch: 372499, loss: 0.000120183227409143, rewards: -9.299999999999999, count: 50
epoch: 372509, loss: -8.175492257578298e-05, rewards: -9.299999999999999, count: 50
epoch: 372519, loss: -2.401113488303963e-05, rewards: -9.299999999999999, count: 50
epoch: 372529, loss: 1.761317253112793e-05, rewards: -9.299999999999999, count: 50
epoch: 372539, loss: 1.699686072242912e-05, rewards: -9.299999999999999, count: 50
ep

epoch: 373419, loss: 5.332231467036763e-06, rewards: -9.299999999999999, count: 50
epoch: 373429, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 373439, loss: 4.122257450944744e-06, rewards: -9.299999999999999, count: 50
epoch: 373449, loss: -2.806186785164755e-06, rewards: -9.299999999999999, count: 50
epoch: 373459, loss: 1.1765956742237904e-06, rewards: -9.299999999999999, count: 50
epoch: 373469, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 373479, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 373489, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 373499, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 373509, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 373519, loss: 2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 373529, loss: 1.4926195035513956e-05, rewards: -9.299999999999999, count: 

epoch: 374409, loss: 9.824633889365941e-05, rewards: -9.299999999999999, count: 50
epoch: 374419, loss: -2.0288229279685766e-05, rewards: -9.299999999999999, count: 50
epoch: 374429, loss: -2.687215783225838e-05, rewards: -9.299999999999999, count: 50
epoch: 374439, loss: 1.8683671441976912e-05, rewards: -9.299999999999999, count: 50
epoch: 374449, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 374459, loss: -5.627870450553019e-06, rewards: -9.299999999999999, count: 50
epoch: 374469, loss: 4.930496288579889e-06, rewards: -9.299999999999999, count: 50
epoch: 374479, loss: -3.2031537102739094e-06, rewards: -9.299999999999999, count: 50
epoch: 374489, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 374499, loss: -1.5878677004366182e-06, rewards: -9.299999999999999, count: 50
epoch: 374509, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 374519, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, co

epoch: 375399, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 375409, loss: 1.0236502021143679e-05, rewards: -9.299999999999999, count: 50
epoch: 375419, loss: 7.693529187235981e-05, rewards: -9.299999999999999, count: 50
epoch: 375429, loss: -4.2787789425347e-05, rewards: -9.299999999999999, count: 50
epoch: 375439, loss: 4.8363210225943476e-05, rewards: -9.299999999999999, count: 50
epoch: 375449, loss: 1.3644695172843058e-05, rewards: -9.299999999999999, count: 50
epoch: 375459, loss: -1.71911724464735e-05, rewards: -9.299999999999999, count: 50
epoch: 375469, loss: -3.250837380619487e-06, rewards: -9.299999999999999, count: 50
epoch: 375479, loss: 7.165670467657037e-06, rewards: -9.299999999999999, count: 50
epoch: 375489, loss: -2.536773763495148e-06, rewards: -9.299999999999999, count: 50
epoch: 375499, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 375509, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 

epoch: 376389, loss: -3.210306203982327e-06, rewards: -9.299999999999999, count: 50
epoch: 376399, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 376409, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 376419, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 376429, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 376439, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 376449, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 376459, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 376469, loss: 1.2847184734710027e-05, rewards: -9.299999999999999, count: 50
epoch: 376479, loss: 8.599281136412174e-05, rewards: -9.299999999999999, count: 50
epoch: 376489, loss: -5.34307946509216e-05, rewards: -9.299999999999999, count: 50
epoch: 376499, loss: 4.365801942185499e-05, rewards: -9.299999999999999, count: 50

epoch: 377379, loss: -1.7046928633135394e-06, rewards: -9.299999999999999, count: 50
epoch: 377389, loss: 1.303911176364636e-05, rewards: -9.299999999999999, count: 50
epoch: 377399, loss: -1.0907649993896484e-05, rewards: -9.299999999999999, count: 50
epoch: 377409, loss: 7.241964340209961e-06, rewards: -9.299999999999999, count: 50
epoch: 377419, loss: -4.458427611098159e-06, rewards: -9.299999999999999, count: 50
epoch: 377429, loss: 2.0694733393611386e-06, rewards: -9.299999999999999, count: 50
epoch: 377439, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 377449, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 377459, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 377469, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 377479, loss: 4.266500582161825e-06, rewards: -9.299999999999999, count: 50
epoch: 377489, loss: 1.755356788635254e-05, rewards: -9.299999999999999, count:

epoch: 378369, loss: 2.8833150281570852e-05, rewards: -9.299999999999999, count: 50
epoch: 378379, loss: 7.535219083365519e-06, rewards: -9.299999999999999, count: 50
epoch: 378389, loss: -2.0719766325782984e-05, rewards: -9.299999999999999, count: 50
epoch: 378399, loss: 1.6520023564225994e-05, rewards: -9.299999999999999, count: 50
epoch: 378409, loss: -9.953975677490234e-06, rewards: -9.299999999999999, count: 50
epoch: 378419, loss: 5.735158993047662e-06, rewards: -9.299999999999999, count: 50
epoch: 378429, loss: -3.801584171014838e-06, rewards: -9.299999999999999, count: 50
epoch: 378439, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 378449, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 378459, loss: -1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 378469, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 378479, loss: 5.638599418489321e-07, rewards: -9.299999999999999, cou

epoch: 379359, loss: -6.157159805297852e-05, rewards: -9.299999999999999, count: 50
epoch: 379369, loss: 4.227161298331339e-06, rewards: -9.299999999999999, count: 50
epoch: 379379, loss: 2.2573471142095514e-05, rewards: -9.299999999999999, count: 50
epoch: 379389, loss: -1.814484676287975e-05, rewards: -9.299999999999999, count: 50
epoch: 379399, loss: 8.44836267788196e-06, rewards: -9.299999999999999, count: 50
epoch: 379409, loss: -3.1352042242360767e-06, rewards: -9.299999999999999, count: 50
epoch: 379419, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 379429, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 379439, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 379449, loss: -1.2302398317842744e-06, rewards: -9.299999999999999, count: 50
epoch: 379459, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 379469, loss: 7.843971161491936e-07, rewards: -9.299999999999999, coun

epoch: 380349, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 380359, loss: -9.442567716178019e-06, rewards: -9.299999999999999, count: 50
epoch: 380369, loss: -6.277799548115581e-05, rewards: -9.299999999999999, count: 50
epoch: 380379, loss: -2.703666723391507e-06, rewards: -9.299999999999999, count: 50
epoch: 380389, loss: -4.374146374175325e-05, rewards: -9.299999999999999, count: 50
epoch: 380399, loss: 2.187490463256836e-05, rewards: -9.299999999999999, count: 50
epoch: 380409, loss: 6.506443241960369e-06, rewards: -9.299999999999999, count: 50
epoch: 380419, loss: -1.0877847671508789e-05, rewards: -9.299999999999999, count: 50
epoch: 380429, loss: 5.569458153331652e-06, rewards: -9.299999999999999, count: 50
epoch: 380439, loss: -1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 380449, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 380459, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, c

epoch: 381339, loss: -7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 381349, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 381359, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 381369, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 381379, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 381389, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 381399, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 381409, loss: -3.972053491452243e-06, rewards: -9.299999999999999, count: 50
epoch: 381419, loss: -3.0210017939680256e-05, rewards: -9.299999999999999, count: 50
epoch: 381429, loss: -0.00011513352365000173, rewards: -9.299999999999999, count: 50
epoch: 381439, loss: 1.504302053945139e-05, rewards: -9.299999999999999, count: 50
epoch: 381449, loss: 3.8365124055417255e-05, rewards: -9.299999999999999, coun

epoch: 382329, loss: 5.075931767350994e-06, rewards: -9.299999999999999, count: 50
epoch: 382339, loss: 2.425432285235729e-05, rewards: -9.299999999999999, count: 50
epoch: 382349, loss: 0.00011225700291106477, rewards: -9.299999999999999, count: 50
epoch: 382359, loss: -6.120920443208888e-05, rewards: -9.299999999999999, count: 50
epoch: 382369, loss: 3.0231476557673886e-06, rewards: -9.299999999999999, count: 50
epoch: 382379, loss: 2.069711626973003e-05, rewards: -9.299999999999999, count: 50
epoch: 382389, loss: -1.2335777682892513e-05, rewards: -9.299999999999999, count: 50
epoch: 382399, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 382409, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 382419, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count: 50
epoch: 382429, loss: 1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 382439, loss: -8.261203561232833e-07, rewards: -9.299999999999999, coun

epoch: 383319, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 383329, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 383339, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 383349, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 383359, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 383369, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 383379, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 383389, loss: -1.4102458862907952e-06, rewards: -9.299999999999999, count: 50
epoch: 383399, loss: -4.492997959459899e-06, rewards: -9.299999999999999, count: 50
epoch: 383409, loss: -2.305746056663338e-05, rewards: -9.299999999999999, count: 50
epoch: 383419, loss: -0.00011124015145469457, rewards: -9.299999999999999, count: 50
epoch: 383429, loss: 6.395817035809159e-05, rewards: -9.299999999999999, cou

epoch: 384309, loss: -0.00012557745503727347, rewards: -9.299999999999999, count: 50
epoch: 384319, loss: 7.403731433441862e-05, rewards: -9.299999999999999, count: 50
epoch: 384329, loss: 2.962350845336914e-05, rewards: -9.299999999999999, count: 50
epoch: 384339, loss: -1.4011860002938192e-05, rewards: -9.299999999999999, count: 50
epoch: 384349, loss: -1.6524791135452688e-05, rewards: -9.299999999999999, count: 50
epoch: 384359, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 384369, loss: 6.221532657946227e-06, rewards: -9.299999999999999, count: 50
epoch: 384379, loss: -1.745224039950699e-06, rewards: -9.299999999999999, count: 50
epoch: 384389, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 384399, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 384409, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 384419, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, co

epoch: 385299, loss: 2.1803380150231533e-06, rewards: -9.299999999999999, count: 50
epoch: 385309, loss: 5.68151472180034e-06, rewards: -9.299999999999999, count: 50
epoch: 385319, loss: 2.4359225790249184e-05, rewards: -9.299999999999999, count: 50
epoch: 385329, loss: 0.00010305643081665039, rewards: -9.299999999999999, count: 50
epoch: 385339, loss: -6.072759788366966e-05, rewards: -9.299999999999999, count: 50
epoch: 385349, loss: 2.495765693311114e-05, rewards: -9.299999999999999, count: 50
epoch: 385359, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 385369, loss: -8.623599569546059e-06, rewards: -9.299999999999999, count: 50
epoch: 385379, loss: 7.494687906728359e-06, rewards: -9.299999999999999, count: 50
epoch: 385389, loss: -5.100965609017294e-06, rewards: -9.299999999999999, count: 50
epoch: 385399, loss: 2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 385409, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count

epoch: 386289, loss: 2.604246219561901e-05, rewards: -9.299999999999999, count: 50
epoch: 386299, loss: 0.00012241244257893413, rewards: -9.299999999999999, count: 50
epoch: 386309, loss: -3.780722545343451e-05, rewards: -9.299999999999999, count: 50
epoch: 386319, loss: -3.335356814204715e-05, rewards: -9.299999999999999, count: 50
epoch: 386329, loss: 1.3693571418116335e-05, rewards: -9.299999999999999, count: 50
epoch: 386339, loss: 1.1274814823991619e-05, rewards: -9.299999999999999, count: 50
epoch: 386349, loss: -7.669925253139809e-06, rewards: -9.299999999999999, count: 50
epoch: 386359, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 386369, loss: 2.950429916381836e-06, rewards: -9.299999999999999, count: 50
epoch: 386379, loss: -2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 386389, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 386399, loss: -7.96318033735588e-07, rewards: -9.299999999999999, coun

epoch: 387279, loss: 1.7931461115949787e-05, rewards: -9.299999999999999, count: 50
epoch: 387289, loss: -1.433014858776005e-05, rewards: -9.299999999999999, count: 50
epoch: 387299, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 387309, loss: 2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 387319, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 387329, loss: 2.1207333702477627e-06, rewards: -9.299999999999999, count: 50
epoch: 387339, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 387349, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 387359, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 387369, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 387379, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 387389, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, 

epoch: 388269, loss: -1.0774135262181517e-05, rewards: -9.299999999999999, count: 50
epoch: 388279, loss: -6.949901489861077e-06, rewards: -9.299999999999999, count: 50
epoch: 388289, loss: 6.14404689258663e-06, rewards: -9.299999999999999, count: 50
epoch: 388299, loss: -1.5020370938145788e-06, rewards: -9.299999999999999, count: 50
epoch: 388309, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 388319, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 388329, loss: -3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 388339, loss: 2.2649764730431343e-08, rewards: -9.299999999999999, count: 50
epoch: 388349, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 388359, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 388369, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 388379, loss: -1.6891956420295173e-06, rewards: -9.299999999999999, co

epoch: 389259, loss: -8.986115426523611e-05, rewards: -9.299999999999999, count: 50
epoch: 389269, loss: -4.748106221086346e-06, rewards: -9.299999999999999, count: 50
epoch: 389279, loss: 3.5691260563908145e-05, rewards: -9.299999999999999, count: 50
epoch: 389289, loss: -4.575252660288243e-06, rewards: -9.299999999999999, count: 50
epoch: 389299, loss: -1.1372566405043472e-05, rewards: -9.299999999999999, count: 50
epoch: 389309, loss: 6.501674761238974e-06, rewards: -9.299999999999999, count: 50
epoch: 389319, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 389329, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 389339, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 389349, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 389359, loss: 7.522106102442194e-07, rewards: -9.299999999999999, count: 50
epoch: 389369, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, 

epoch: 390249, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 390259, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 390269, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 390279, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 390289, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 390299, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 390309, loss: 4.713535417977255e-06, rewards: -9.299999999999999, count: 50
epoch: 390319, loss: 2.4341345124412328e-05, rewards: -9.299999999999999, count: 50
epoch: 390329, loss: 0.00011402368545532227, rewards: -9.299999999999999, count: 50
epoch: 390339, loss: -6.021618901286274e-05, rewards: -9.299999999999999, count: 50
epoch: 390349, loss: -1.745224039950699e-06, rewards: -9.299999999999999, count: 50
epoch: 390359, loss: 2.2276639356277883e-05, rewards: -9.299999999999999, cou

epoch: 391239, loss: -3.39269627147587e-06, rewards: -9.299999999999999, count: 50
epoch: 391249, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 391259, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 391269, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 391279, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 391289, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 391299, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 391309, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 391319, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 391329, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 391339, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 391349, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50


epoch: 392229, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 392239, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 392249, loss: -1.1575222060855594e-06, rewards: -9.299999999999999, count: 50
epoch: 392259, loss: -4.32133674621582e-06, rewards: -9.299999999999999, count: 50
epoch: 392269, loss: -3.34060205204878e-05, rewards: -9.299999999999999, count: 50
epoch: 392279, loss: -0.00010699987615225837, rewards: -9.299999999999999, count: 50
epoch: 392289, loss: -1.6125441106851213e-05, rewards: -9.299999999999999, count: 50
epoch: 392299, loss: 3.5116670915158466e-05, rewards: -9.299999999999999, count: 50
epoch: 392309, loss: 1.604318640602287e-05, rewards: -9.299999999999999, count: 50
epoch: 392319, loss: -9.196996870741714e-06, rewards: -9.299999999999999, count: 50
epoch: 392329, loss: -6.335973921522964e-06, rewards: -9.299999999999999, count: 50
epoch: 392339, loss: 4.514455667958828e-06, rewards: -9.299999999999999, c

epoch: 393219, loss: -5.919933391851373e-05, rewards: -9.299999999999999, count: 50
epoch: 393229, loss: -2.6003122911788523e-05, rewards: -9.299999999999999, count: 50
epoch: 393239, loss: 6.024837603035849e-06, rewards: -9.299999999999999, count: 50
epoch: 393249, loss: 1.3036727978033014e-05, rewards: -9.299999999999999, count: 50
epoch: 393259, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 393269, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 393279, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 393289, loss: 1.3875961712983553e-06, rewards: -9.299999999999999, count: 50
epoch: 393299, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 393309, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 393319, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 393329, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, cou

epoch: 394209, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 394219, loss: -1.668930025289228e-07, rewards: -9.299999999999999, count: 50
epoch: 394229, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 394239, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 394249, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 394259, loss: -4.363059815659653e-06, rewards: -9.299999999999999, count: 50
epoch: 394269, loss: -3.072142499149777e-05, rewards: -9.299999999999999, count: 50
epoch: 394279, loss: -0.0001161479958682321, rewards: -9.299999999999999, count: 50
epoch: 394289, loss: 5.19037257618038e-06, rewards: -9.299999999999999, count: 50
epoch: 394299, loss: 3.991723133367486e-05, rewards: -9.299999999999999, count: 50
epoch: 394309, loss: 5.468129984365078e-06, rewards: -9.299999999999999, count: 50
epoch: 394319, loss: -1.3768672943115234e-05, rewards: -9.299999999999999, coun

epoch: 395199, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 395209, loss: 2.577304940132308e-06, rewards: -9.299999999999999, count: 50
epoch: 395219, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 395229, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 395239, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 395249, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 395259, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 395269, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 395279, loss: 1.0107755770150106e-05, rewards: -9.299999999999999, count: 50
epoch: 395289, loss: 0.00010581731476122513, rewards: -9.299999999999999, count: 50
epoch: 395299, loss: -9.708166180644184e-05, rewards: -9.299999999999999, count: 50
epoch: 395309, loss: -2.9366015951381996e-05, rewards: -9.299999999999999, co

epoch: 396189, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 396199, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 396209, loss: 5.332231467036763e-06, rewards: -9.299999999999999, count: 50
epoch: 396219, loss: 5.259632962406613e-05, rewards: -9.299999999999999, count: 50
epoch: 396229, loss: 1.4264584024203941e-05, rewards: -9.299999999999999, count: 50
epoch: 396239, loss: 6.398319965228438e-05, rewards: -9.299999999999999, count: 50
epoch: 396249, loss: 3.0674935260321945e-05, rewards: -9.299999999999999, count: 50
epoch: 396259, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 396269, loss: -1.1171102414664347e-05, rewards: -9.299999999999999, count: 50
epoch: 396279, loss: -7.253885087266099e-06, rewards: -9.299999999999999, count: 50
epoch: 396289, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 396299, loss: 2.83598910755245e-06, rewards: -9.299999999999999, count: 

epoch: 397179, loss: -7.723569979134481e-06, rewards: -9.299999999999999, count: 50
epoch: 397189, loss: 4.156827799306484e-06, rewards: -9.299999999999999, count: 50
epoch: 397199, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 397209, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 397219, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 397229, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 397239, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 397249, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 397259, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 397269, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 397279, loss: 1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 397289, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count:

epoch: 398169, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 398179, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 398189, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 398199, loss: -5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 398209, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 398219, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 398229, loss: 1.9415616407059133e-05, rewards: -9.299999999999999, count: 50
epoch: 398239, loss: 0.0001378238230245188, rewards: -9.299999999999999, count: 50
epoch: 398249, loss: -4.953145980834961e-05, rewards: -9.299999999999999, count: 50
epoch: 398259, loss: -4.630803960026242e-05, rewards: -9.299999999999999, count: 50
epoch: 398269, loss: -1.0197162737313192e-05, rewards: -9.299999999999999, count: 50
epoch: 398279, loss: 1.1054277820221614e-05, rewards: -9.299999999999999, coun

epoch: 399159, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 399169, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 399179, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 399189, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 399199, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 399209, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 399219, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 399229, loss: -2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 399239, loss: -1.0435581316414755e-05, rewards: -9.299999999999999, count: 50
epoch: 399249, loss: -6.192922592163086e-05, rewards: -9.299999999999999, count: 50
epoch: 399259, loss: -8.720159712538589e-06, rewards: -9.299999999999999, count: 50
epoch: 399269, loss: -3.3842326956801116e-05, rewards: -9.299999999999999

epoch: 400149, loss: -7.672309948247857e-06, rewards: -9.299999999999999, count: 50
epoch: 400159, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 400169, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 400179, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 400189, loss: 8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 400199, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 400209, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 400219, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 400229, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 400239, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 400249, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 400259, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, co

epoch: 401139, loss: -7.557869139418472e-07, rewards: -9.299999999999999, count: 50
epoch: 401149, loss: 2.4461746761517134e-06, rewards: -9.299999999999999, count: 50
epoch: 401159, loss: -1.714229597382655e-06, rewards: -9.299999999999999, count: 50
epoch: 401169, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 401179, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 401189, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 401199, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 401209, loss: -9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 401219, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 401229, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 401239, loss: -3.446698246989399e-05, rewards: -9.299999999999999, count: 50
epoch: 401249, loss: -9.689211583463475e-05, rewards: -9.299999999999999, cou

epoch: 402129, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 402139, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 402149, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 402159, loss: -2.3245811462402344e-06, rewards: -9.299999999999999, count: 50
epoch: 402169, loss: -1.379251443722751e-05, rewards: -9.299999999999999, count: 50
epoch: 402179, loss: -0.00010258674592478201, rewards: -9.299999999999999, count: 50
epoch: 402189, loss: 7.908821135060862e-05, rewards: -9.299999999999999, count: 50
epoch: 402199, loss: -1.356720895273611e-05, rewards: -9.299999999999999, count: 50
epoch: 402209, loss: -2.915143886639271e-05, rewards: -9.299999999999999, count: 50
epoch: 402219, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 402229, loss: 1.0389089766249526e-05, rewards: -9.299999999999999, count: 50
epoch: 402239, loss: -2.191066641898942e-06, rewards: -9.299999999999999, c

epoch: 403119, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 403129, loss: -2.6476382117834873e-06, rewards: -9.299999999999999, count: 50
epoch: 403139, loss: -8.380413419217803e-06, rewards: -9.299999999999999, count: 50
epoch: 403149, loss: -4.3463707697810605e-05, rewards: -9.299999999999999, count: 50
epoch: 403159, loss: -6.968498200876638e-05, rewards: -9.299999999999999, count: 50
epoch: 403169, loss: 1.4756918062630575e-05, rewards: -9.299999999999999, count: 50
epoch: 403179, loss: 1.6617774235783145e-05, rewards: -9.299999999999999, count: 50
epoch: 403189, loss: -1.8434524463373236e-05, rewards: -9.299999999999999, count: 50
epoch: 403199, loss: 1.0797977665788494e-05, rewards: -9.299999999999999, count: 50
epoch: 403209, loss: -5.158186013431987e-06, rewards: -9.299999999999999, count: 50
epoch: 403219, loss: 3.0744076866540127e-06, rewards: -9.299999999999999, count: 50
epoch: 403229, loss: -2.1815299078298267e-06, rewards: -9.29999999999999

epoch: 404109, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 404119, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 404129, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 404139, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 404149, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 404159, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 404169, loss: 2.2649764730431343e-08, rewards: -9.299999999999999, count: 50
epoch: 404179, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 404189, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 404199, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 404209, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 404219, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, co

epoch: 405099, loss: 5.125999678057269e-07, rewards: -9.299999999999999, count: 50
epoch: 405109, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 405119, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 405129, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 405139, loss: 2.0015240806969814e-06, rewards: -9.299999999999999, count: 50
epoch: 405149, loss: 1.2484789294830989e-05, rewards: -9.299999999999999, count: 50
epoch: 405159, loss: 0.00010782480239868164, rewards: -9.299999999999999, count: 50
epoch: 405169, loss: -8.793711458565667e-05, rewards: -9.299999999999999, count: 50
epoch: 405179, loss: -9.813308679440524e-06, rewards: -9.299999999999999, count: 50
epoch: 405189, loss: 2.3812055587768555e-05, rewards: -9.299999999999999, count: 50
epoch: 405199, loss: 1.658797191339545e-05, rewards: -9.299999999999999, count: 50
epoch: 405209, loss: -1.9502640498103574e-06, rewards: -9.299999999999999, count:

epoch: 406089, loss: -9.233951459464151e-06, rewards: -9.299999999999999, count: 50
epoch: 406099, loss: 3.721833127201535e-05, rewards: -9.299999999999999, count: 50
epoch: 406109, loss: 1.928687015606556e-05, rewards: -9.299999999999999, count: 50
epoch: 406119, loss: -6.223916898306925e-06, rewards: -9.299999999999999, count: 50
epoch: 406129, loss: -8.866786629369017e-06, rewards: -9.299999999999999, count: 50
epoch: 406139, loss: 2.1207333702477627e-06, rewards: -9.299999999999999, count: 50
epoch: 406149, loss: 2.6619434265739983e-06, rewards: -9.299999999999999, count: 50
epoch: 406159, loss: -2.008676574405399e-06, rewards: -9.299999999999999, count: 50
epoch: 406169, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 406179, loss: -5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 406189, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 406199, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count:

epoch: 407079, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 407089, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 407099, loss: -3.180503881594632e-06, rewards: -9.299999999999999, count: 50
epoch: 407109, loss: -4.9027203203877434e-05, rewards: -9.299999999999999, count: 50
epoch: 407119, loss: 1.197576511913212e-05, rewards: -9.299999999999999, count: 50
epoch: 407129, loss: -2.0673274775617756e-05, rewards: -9.299999999999999, count: 50
epoch: 407139, loss: -5.204677563597215e-06, rewards: -9.299999999999999, count: 50
epoch: 407149, loss: 3.1638146538170986e-06, rewards: -9.299999999999999, count: 50
epoch: 407159, loss: 4.154443558945786e-06, rewards: -9.299999999999999, count: 50
epoch: 407169, loss: 2.771615982055664e-06, rewards: -9.299999999999999, count: 50
epoch: 407179, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 407189, loss: -8.988380386654171e-07, rewards: -9.299999999999999, co

epoch: 408069, loss: 3.9118527638493106e-05, rewards: -9.299999999999999, count: 50
epoch: 408079, loss: 7.883548823883757e-05, rewards: -9.299999999999999, count: 50
epoch: 408089, loss: -3.5123826819472015e-05, rewards: -9.299999999999999, count: 50
epoch: 408099, loss: 5.6719782151049e-06, rewards: -9.299999999999999, count: 50
epoch: 408109, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 408119, loss: -5.823373612656724e-06, rewards: -9.299999999999999, count: 50
epoch: 408129, loss: 3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 408139, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 408149, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 408159, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, count: 50
epoch: 408169, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 408179, loss: -1.8846989178200602e-06, rewards: -9.299999999999999, count:

epoch: 409059, loss: 3.1732321076560766e-05, rewards: -9.299999999999999, count: 50
epoch: 409069, loss: 1.3513565136236139e-05, rewards: -9.299999999999999, count: 50
epoch: 409079, loss: -2.6131869162782095e-05, rewards: -9.299999999999999, count: 50
epoch: 409089, loss: 1.665949821472168e-05, rewards: -9.299999999999999, count: 50
epoch: 409099, loss: -6.777047929062974e-06, rewards: -9.299999999999999, count: 50
epoch: 409109, loss: 2.4783612389001064e-06, rewards: -9.299999999999999, count: 50
epoch: 409119, loss: -1.4805793853156501e-06, rewards: -9.299999999999999, count: 50
epoch: 409129, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 409139, loss: -2.1111964088049717e-06, rewards: -9.299999999999999, count: 50
epoch: 409149, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 409159, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 409169, loss: -6.926059654688288e-07, rewards: -9.299999999999999, c

epoch: 410049, loss: -4.681348855228862e-06, rewards: -9.299999999999999, count: 50
epoch: 410059, loss: -2.771615982055664e-05, rewards: -9.299999999999999, count: 50
epoch: 410069, loss: -0.00010998487414326519, rewards: -9.299999999999999, count: 50
epoch: 410079, loss: 5.155801773071289e-05, rewards: -9.299999999999999, count: 50
epoch: 410089, loss: 4.781484676641412e-06, rewards: -9.299999999999999, count: 50
epoch: 410099, loss: -2.1295547412591986e-05, rewards: -9.299999999999999, count: 50
epoch: 410109, loss: 1.1307000931992661e-05, rewards: -9.299999999999999, count: 50
epoch: 410119, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 410129, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 410139, loss: 1.7547607740198146e-06, rewards: -9.299999999999999, count: 50
epoch: 410149, loss: -1.1420249848015374e-06, rewards: -9.299999999999999, count: 50
epoch: 410159, loss: 6.389618079083448e-07, rewards: -9.299999999999999, 

epoch: 411039, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 411049, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 411059, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 411069, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 411079, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 411089, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 411099, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 411109, loss: 8.44836267788196e-06, rewards: -9.299999999999999, count: 50
epoch: 411119, loss: 6.576299347216263e-05, rewards: -9.299999999999999, count: 50
epoch: 411129, loss: -1.6267300452454947e-05, rewards: -9.299999999999999, count: 50
epoch: 411139, loss: 5.537271499633789e-05, rewards: -9.299999999999999, count: 50
epoch: 411149, loss: 7.528066817030776e-06, rewards: -9.299999999999999, count: 50

epoch: 412029, loss: 4.190206527709961e-05, rewards: -9.299999999999999, count: 50
epoch: 412039, loss: -2.393603244854603e-05, rewards: -9.299999999999999, count: 50
epoch: 412049, loss: -7.20739353710087e-06, rewards: -9.299999999999999, count: 50
epoch: 412059, loss: 1.1538267244759481e-05, rewards: -9.299999999999999, count: 50
epoch: 412069, loss: -4.565715698845452e-06, rewards: -9.299999999999999, count: 50
epoch: 412079, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 412089, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 412099, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 412109, loss: 7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 412119, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 412129, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 412139, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 

epoch: 413019, loss: 4.7023295337567106e-05, rewards: -9.299999999999999, count: 50
epoch: 413029, loss: 5.195021731196903e-05, rewards: -9.299999999999999, count: 50
epoch: 413039, loss: 4.368543523014523e-05, rewards: -9.299999999999999, count: 50
epoch: 413049, loss: -2.1498202841030434e-05, rewards: -9.299999999999999, count: 50
epoch: 413059, loss: -1.7588137779966928e-05, rewards: -9.299999999999999, count: 50
epoch: 413069, loss: 7.586479114252143e-06, rewards: -9.299999999999999, count: 50
epoch: 413079, loss: 4.703998456534464e-06, rewards: -9.299999999999999, count: 50
epoch: 413089, loss: -4.611015356204007e-06, rewards: -9.299999999999999, count: 50
epoch: 413099, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 413109, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 413119, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 413129, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count:

epoch: 414009, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 414019, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 414029, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 414039, loss: -1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 414049, loss: -3.250837380619487e-06, rewards: -9.299999999999999, count: 50
epoch: 414059, loss: -1.433014858776005e-05, rewards: -9.299999999999999, count: 50
epoch: 414069, loss: -8.556961984140798e-05, rewards: -9.299999999999999, count: 50
epoch: 414079, loss: 4.8288107791449875e-05, rewards: -9.299999999999999, count: 50
epoch: 414089, loss: -4.441261262400076e-05, rewards: -9.299999999999999, count: 50
epoch: 414099, loss: 6.844997187727131e-06, rewards: -9.299999999999999, count: 50
epoch: 414109, loss: 1.1606216503423639e-05, rewards: -9.299999999999999, count: 50
epoch: 414119, loss: -9.47713851928711e-06, rewards: -9.299999999999999, coun

epoch: 414999, loss: 3.6756991903530434e-05, rewards: -9.299999999999999, count: 50
epoch: 415009, loss: -8.046627044677734e-06, rewards: -9.299999999999999, count: 50
epoch: 415019, loss: -1.2357235391391441e-05, rewards: -9.299999999999999, count: 50
epoch: 415029, loss: 6.293058504525106e-06, rewards: -9.299999999999999, count: 50
epoch: 415039, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 415049, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 415059, loss: 2.1076202756376006e-06, rewards: -9.299999999999999, count: 50
epoch: 415069, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 415079, loss: 1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 415089, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 415099, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 415109, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, cou

epoch: 415989, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 415999, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 416009, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 416019, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 416029, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 416039, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 416049, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 416059, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 416069, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 416079, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 416089, loss: 5.68151472180034e-06, rewards: -9.299999999999999, count: 50
epoch: 416099, loss: 5.011797111365013e-05, rewards: -9.299999999999999, count: 

epoch: 416979, loss: 1.920104114105925e-05, rewards: -9.299999999999999, count: 50
epoch: 416989, loss: 0.00010200261749560013, rewards: -9.299999999999999, count: 50
epoch: 416999, loss: -6.511330866487697e-05, rewards: -9.299999999999999, count: 50
epoch: 417009, loss: 3.0360220989678055e-05, rewards: -9.299999999999999, count: 50
epoch: 417019, loss: 4.017353148810798e-06, rewards: -9.299999999999999, count: 50
epoch: 417029, loss: -1.2975930985703599e-05, rewards: -9.299999999999999, count: 50
epoch: 417039, loss: 8.844137482810766e-06, rewards: -9.299999999999999, count: 50
epoch: 417049, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 417059, loss: 2.359151949349325e-06, rewards: -9.299999999999999, count: 50
epoch: 417069, loss: -1.2075901167918346e-06, rewards: -9.299999999999999, count: 50
epoch: 417079, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 417089, loss: -6.508827254947391e-07, rewards: -9.299999999999999, coun

epoch: 417969, loss: 6.181955541251227e-05, rewards: -9.299999999999999, count: 50
epoch: 417979, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 417989, loss: 5.0127506256103516e-05, rewards: -9.299999999999999, count: 50
epoch: 417999, loss: -1.3700723684451077e-05, rewards: -9.299999999999999, count: 50
epoch: 418009, loss: -1.583695484441705e-05, rewards: -9.299999999999999, count: 50
epoch: 418019, loss: 9.275674528907984e-06, rewards: -9.299999999999999, count: 50
epoch: 418029, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 418039, loss: -3.5417078834143467e-06, rewards: -9.299999999999999, count: 50
epoch: 418049, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 418059, loss: -1.9347667148394976e-06, rewards: -9.299999999999999, count: 50
epoch: 418069, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 418079, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, co

epoch: 418959, loss: 1.3031959497311618e-05, rewards: -9.299999999999999, count: 50
epoch: 418969, loss: -3.193616976204794e-06, rewards: -9.299999999999999, count: 50
epoch: 418979, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 418989, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 418999, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 419009, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 419019, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 419029, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 419039, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 419049, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 419059, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 419069, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, c

epoch: 419949, loss: 1.313567190663889e-05, rewards: -9.299999999999999, count: 50
epoch: 419959, loss: 8.938670362113044e-05, rewards: -9.299999999999999, count: 50
epoch: 419969, loss: -6.0522557760123163e-05, rewards: -9.299999999999999, count: 50
epoch: 419979, loss: 3.953457053285092e-05, rewards: -9.299999999999999, count: 50
epoch: 419989, loss: 1.3036727978033014e-05, rewards: -9.299999999999999, count: 50
epoch: 419999, loss: -1.6664265785948373e-05, rewards: -9.299999999999999, count: 50
epoch: 420009, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 420019, loss: 4.986524800187908e-06, rewards: -9.299999999999999, count: 50
epoch: 420029, loss: -3.6144256227998994e-06, rewards: -9.299999999999999, count: 50
epoch: 420039, loss: 2.1255016235954827e-06, rewards: -9.299999999999999, count: 50
epoch: 420049, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 420059, loss: 9.763240313986898e-07, rewards: -9.299999999999999, cou

epoch: 420939, loss: -1.070618600351736e-05, rewards: -9.299999999999999, count: 50
epoch: 420949, loss: 5.21540641784668e-06, rewards: -9.299999999999999, count: 50
epoch: 420959, loss: -1.4948844864193234e-06, rewards: -9.299999999999999, count: 50
epoch: 420969, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 420979, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 420989, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 420999, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 421009, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 421019, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 421029, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 421039, loss: 1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 421049, loss: 6.730556378897745e-06, rewards: -9.299999999999999, count: 5

epoch: 421929, loss: -4.0518047171644866e-05, rewards: -9.299999999999999, count: 50
epoch: 421939, loss: 4.908442497253418e-05, rewards: -9.299999999999999, count: 50
epoch: 421949, loss: 7.088184247550089e-06, rewards: -9.299999999999999, count: 50
epoch: 421959, loss: -1.8303393517271616e-05, rewards: -9.299999999999999, count: 50
epoch: 421969, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 421979, loss: 5.316734132065903e-06, rewards: -9.299999999999999, count: 50
epoch: 421989, loss: -3.881454631482484e-06, rewards: -9.299999999999999, count: 50
epoch: 421999, loss: 1.7404556729161413e-06, rewards: -9.299999999999999, count: 50
epoch: 422009, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 422019, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 422029, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 422039, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, coun

epoch: 422919, loss: 1.5283823813661e-05, rewards: -9.299999999999999, count: 50
epoch: 422929, loss: 0.00010814070992637426, rewards: -9.299999999999999, count: 50
epoch: 422939, loss: -7.837056909920648e-05, rewards: -9.299999999999999, count: 50
epoch: 422949, loss: 8.796453585091513e-06, rewards: -9.299999999999999, count: 50
epoch: 422959, loss: 2.7521848096512258e-05, rewards: -9.299999999999999, count: 50
epoch: 422969, loss: -2.219677071479964e-06, rewards: -9.299999999999999, count: 50
epoch: 422979, loss: -9.474754733673763e-06, rewards: -9.299999999999999, count: 50
epoch: 422989, loss: 3.976821972173639e-06, rewards: -9.299999999999999, count: 50
epoch: 422999, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 423009, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 423019, loss: 1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 423029, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 

epoch: 423909, loss: 2.1958351226203376e-06, rewards: -9.299999999999999, count: 50
epoch: 423919, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 423929, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 423939, loss: -4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 423949, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 423959, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 423969, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 423979, loss: 1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 423989, loss: 4.138946678722277e-06, rewards: -9.299999999999999, count: 50
epoch: 423999, loss: 2.10058697120985e-05, rewards: -9.299999999999999, count: 50
epoch: 424009, loss: 0.000109975335362833, rewards: -9.299999999999999, count: 50
epoch: 424019, loss: -6.788015161873773e-05, rewards: -9.299999999999999, count: 50
e

epoch: 424899, loss: 4.611611439031549e-05, rewards: -9.299999999999999, count: 50
epoch: 424909, loss: 1.6617774235783145e-05, rewards: -9.299999999999999, count: 50
epoch: 424919, loss: -7.193088549684035e-06, rewards: -9.299999999999999, count: 50
epoch: 424929, loss: -9.84907183010364e-06, rewards: -9.299999999999999, count: 50
epoch: 424939, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 424949, loss: 3.713369324032101e-06, rewards: -9.299999999999999, count: 50
epoch: 424959, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 424969, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 424979, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 424989, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 424999, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 425009, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, coun

epoch: 425889, loss: -1.4173984936860506e-06, rewards: -9.299999999999999, count: 50
epoch: 425899, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 425909, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 425919, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 425929, loss: -5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 425939, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 425949, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 425959, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 425969, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 425979, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 425989, loss: 1.2779236158166896e-06, rewards: -9.299999999999999, count: 50
epoch: 425999, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, c

epoch: 426879, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 426889, loss: -1.714229597382655e-06, rewards: -9.299999999999999, count: 50
epoch: 426899, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 426909, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 426919, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 426929, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 426939, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 426949, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 426959, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 426969, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 426979, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 426989, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, cou

epoch: 427869, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 427879, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 427889, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 427899, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 427909, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 427919, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 427929, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 427939, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 427949, loss: 8.776188224146608e-06, rewards: -9.299999999999999, count: 50
epoch: 427959, loss: 7.693290535826236e-05, rewards: -9.299999999999999, count: 50
epoch: 427969, loss: -5.201816384214908e-05, rewards: -9.299999999999999, count: 50
epoch: 427979, loss: 4.182696284260601e-05, rewards: -9.299999999999999, count: 50

epoch: 428859, loss: 2.3583173970109783e-05, rewards: -9.299999999999999, count: 50
epoch: 428869, loss: 3.236532165828976e-06, rewards: -9.299999999999999, count: 50
epoch: 428879, loss: -1.0197162737313192e-05, rewards: -9.299999999999999, count: 50
epoch: 428889, loss: 8.243322554335464e-06, rewards: -9.299999999999999, count: 50
epoch: 428899, loss: -4.798173904418945e-06, rewards: -9.299999999999999, count: 50
epoch: 428909, loss: 2.100467781929183e-06, rewards: -9.299999999999999, count: 50
epoch: 428919, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 428929, loss: -2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 428939, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 428949, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 428959, loss: -1.7046928633135394e-06, rewards: -9.299999999999999, count: 50
epoch: 428969, loss: -5.594491994997952e-06, rewards: -9.299999999999999, count

epoch: 429849, loss: -3.4725665045698406e-06, rewards: -9.299999999999999, count: 50
epoch: 429859, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 429869, loss: 1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 429879, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 429889, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 429899, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 429909, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 429919, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 429929, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 429939, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 429949, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 429959, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50


epoch: 430839, loss: -5.19037257618038e-06, rewards: -9.299999999999999, count: 50
epoch: 430849, loss: 3.2007694699132117e-06, rewards: -9.299999999999999, count: 50
epoch: 430859, loss: -2.008676574405399e-06, rewards: -9.299999999999999, count: 50
epoch: 430869, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 430879, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 430889, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 430899, loss: -5.0067900048134106e-08, rewards: -9.299999999999999, count: 50
epoch: 430909, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 430919, loss: 1.4531611896018148e-06, rewards: -9.299999999999999, count: 50
epoch: 430929, loss: 7.314682079595514e-06, rewards: -9.299999999999999, count: 50
epoch: 430939, loss: 4.880189953837544e-05, rewards: -9.299999999999999, count: 50
epoch: 430949, loss: 4.908442497253418e-05, rewards: -9.299999999999999, count

epoch: 431829, loss: -3.083229239564389e-05, rewards: -9.299999999999999, count: 50
epoch: 431839, loss: -5.414485713117756e-06, rewards: -9.299999999999999, count: 50
epoch: 431849, loss: 1.0157823453482706e-05, rewards: -9.299999999999999, count: 50
epoch: 431859, loss: 2.33411788030935e-06, rewards: -9.299999999999999, count: 50
epoch: 431869, loss: -3.857612682622857e-06, rewards: -9.299999999999999, count: 50
epoch: 431879, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 431889, loss: 6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 431899, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 431909, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 431919, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 431929, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 431939, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 

epoch: 432819, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 432829, loss: 3.39269627147587e-06, rewards: -9.299999999999999, count: 50
epoch: 432839, loss: 1.9823311959044077e-05, rewards: -9.299999999999999, count: 50
epoch: 432849, loss: 0.00011353730951668695, rewards: -9.299999999999999, count: 50
epoch: 432859, loss: -7.055878813844174e-05, rewards: -9.299999999999999, count: 50
epoch: 432869, loss: 4.9686432248563506e-06, rewards: -9.299999999999999, count: 50
epoch: 432879, loss: 2.4102926545310766e-05, rewards: -9.299999999999999, count: 50
epoch: 432889, loss: -9.149312973022461e-06, rewards: -9.299999999999999, count: 50
epoch: 432899, loss: -3.5965442748420173e-06, rewards: -9.299999999999999, count: 50
epoch: 432909, loss: 5.195141056901775e-06, rewards: -9.299999999999999, count: 50
epoch: 432919, loss: -3.250837380619487e-06, rewards: -9.299999999999999, count: 50
epoch: 432929, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, coun

epoch: 433809, loss: 9.388208127347752e-05, rewards: -9.299999999999999, count: 50
epoch: 433819, loss: -7.005452789599076e-05, rewards: -9.299999999999999, count: 50
epoch: 433829, loss: 2.9499531592591666e-05, rewards: -9.299999999999999, count: 50
epoch: 433839, loss: 2.5013685444719158e-05, rewards: -9.299999999999999, count: 50
epoch: 433849, loss: -9.47713851928711e-06, rewards: -9.299999999999999, count: 50
epoch: 433859, loss: -8.133650226227473e-06, rewards: -9.299999999999999, count: 50
epoch: 433869, loss: 6.029606083757244e-06, rewards: -9.299999999999999, count: 50
epoch: 433879, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 433889, loss: -1.3589858554041712e-06, rewards: -9.299999999999999, count: 50
epoch: 433899, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 433909, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 433919, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count

epoch: 434799, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 434809, loss: -1.4102458862907952e-06, rewards: -9.299999999999999, count: 50
epoch: 434819, loss: -3.5965442748420173e-06, rewards: -9.299999999999999, count: 50
epoch: 434829, loss: -1.7340184058411978e-05, rewards: -9.299999999999999, count: 50
epoch: 434839, loss: -0.00010048508556792513, rewards: -9.299999999999999, count: 50
epoch: 434849, loss: 6.737828516634181e-05, rewards: -9.299999999999999, count: 50
epoch: 434859, loss: -3.0686856916872784e-05, rewards: -9.299999999999999, count: 50
epoch: 434869, loss: -1.0880231457122136e-05, rewards: -9.299999999999999, count: 50
epoch: 434879, loss: 1.5900135622359812e-05, rewards: -9.299999999999999, count: 50
epoch: 434889, loss: -5.921125193708576e-06, rewards: -9.299999999999999, count: 50
epoch: 434899, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 434909, loss: 1.146793351836095e-06, rewards: -9.299999999999999,

epoch: 435789, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 435799, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 435809, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 435819, loss: -9.946823411155492e-06, rewards: -9.299999999999999, count: 50
epoch: 435829, loss: -7.883548823883757e-05, rewards: -9.299999999999999, count: 50
epoch: 435839, loss: 4.89401827508118e-05, rewards: -9.299999999999999, count: 50
epoch: 435849, loss: -4.568934309645556e-05, rewards: -9.299999999999999, count: 50
epoch: 435859, loss: -1.9137858544127084e-05, rewards: -9.299999999999999, count: 50
epoch: 435869, loss: 1.4041662325325888e-05, rewards: -9.299999999999999, count: 50
epoch: 435879, loss: 7.063150405883789e-06, rewards: -9.299999999999999, count: 50
epoch: 435889, loss: -6.513595508295111e-06, rewards: -9.299999999999999, count: 50
epoch: 435899, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count

epoch: 436779, loss: -1.2440681530279107e-05, rewards: -9.299999999999999, count: 50
epoch: 436789, loss: 5.50627692064154e-06, rewards: -9.299999999999999, count: 50
epoch: 436799, loss: -3.173351387886214e-06, rewards: -9.299999999999999, count: 50
epoch: 436809, loss: 2.708434976739227e-06, rewards: -9.299999999999999, count: 50
epoch: 436819, loss: -2.806186785164755e-06, rewards: -9.299999999999999, count: 50
epoch: 436829, loss: 1.7333030655208859e-06, rewards: -9.299999999999999, count: 50
epoch: 436839, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 436849, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 436859, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 436869, loss: 1.2731552487821318e-06, rewards: -9.299999999999999, count: 50
epoch: 436879, loss: 5.258321834844537e-06, rewards: -9.299999999999999, count: 50
epoch: 436889, loss: 4.270434510544874e-05, rewards: -9.299999999999999, count: 

epoch: 437769, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 437779, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 437789, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 437799, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 437809, loss: -1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 437819, loss: -1.9323824744788e-06, rewards: -9.299999999999999, count: 50
epoch: 437829, loss: -5.151033292349894e-06, rewards: -9.299999999999999, count: 50
epoch: 437839, loss: -2.9275417546159588e-05, rewards: -9.299999999999999, count: 50
epoch: 437849, loss: -0.00010952830052701756, rewards: -9.299999999999999, count: 50
epoch: 437859, loss: 4.479765993892215e-05, rewards: -9.299999999999999, count: 50
epoch: 437869, loss: 1.281976710743038e-05, rewards: -9.299999999999999, count: 50
epoch: 437879, loss: -2.1845102310180664e-05, rewards: -9.299999999999999, co

epoch: 438759, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 438769, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 438779, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 438789, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 438799, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 438809, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 438819, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 438829, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 438839, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 438849, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 438859, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 438869, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, coun

epoch: 439749, loss: -1.1533498764038086e-05, rewards: -9.299999999999999, count: 50
epoch: 439759, loss: -6.279825902311131e-05, rewards: -9.299999999999999, count: 50
epoch: 439769, loss: -2.4534463591407984e-05, rewards: -9.299999999999999, count: 50
epoch: 439779, loss: 6.029606083757244e-06, rewards: -9.299999999999999, count: 50
epoch: 439789, loss: 1.3310909707797691e-05, rewards: -9.299999999999999, count: 50
epoch: 439799, loss: 3.713369324032101e-06, rewards: -9.299999999999999, count: 50
epoch: 439809, loss: -4.32133674621582e-06, rewards: -9.299999999999999, count: 50
epoch: 439819, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 439829, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 439839, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 439849, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 439859, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, co

epoch: 440739, loss: 1.542091376904864e-05, rewards: -9.299999999999999, count: 50
epoch: 440749, loss: 3.1633375328965485e-05, rewards: -9.299999999999999, count: 50
epoch: 440759, loss: -2.8747319447575137e-05, rewards: -9.299999999999999, count: 50
epoch: 440769, loss: 6.999969627941027e-06, rewards: -9.299999999999999, count: 50
epoch: 440779, loss: 4.122257450944744e-06, rewards: -9.299999999999999, count: 50
epoch: 440789, loss: -5.76257707507466e-06, rewards: -9.299999999999999, count: 50
epoch: 440799, loss: 4.156827799306484e-06, rewards: -9.299999999999999, count: 50
epoch: 440809, loss: -2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 440819, loss: 1.4531611896018148e-06, rewards: -9.299999999999999, count: 50
epoch: 440829, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 440839, loss: -7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 440849, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 5

epoch: 441729, loss: 8.081197847786825e-06, rewards: -9.299999999999999, count: 50
epoch: 441739, loss: -5.0449370974092744e-06, rewards: -9.299999999999999, count: 50
epoch: 441749, loss: 3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 441759, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 441769, loss: 8.010864007701457e-07, rewards: -9.299999999999999, count: 50
epoch: 441779, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 441789, loss: -1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 441799, loss: -2.3221969058795366e-06, rewards: -9.299999999999999, count: 50
epoch: 441809, loss: -4.34637058788212e-06, rewards: -9.299999999999999, count: 50
epoch: 441819, loss: -1.7193555322592147e-05, rewards: -9.299999999999999, count: 50
epoch: 441829, loss: -8.58056519064121e-05, rewards: -9.299999999999999, count: 50
epoch: 441839, loss: 4.226922828820534e-05, rewards: -9.299999999999999, cou

epoch: 442719, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 442729, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 442739, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 442749, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 442759, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 442769, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 442779, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 442789, loss: -7.41362555345404e-06, rewards: -9.299999999999999, count: 50
epoch: 442799, loss: -5.009293454349972e-05, rewards: -9.299999999999999, count: 50
epoch: 442809, loss: -4.476308822631836e-05, rewards: -9.299999999999999, count: 50
epoch: 442819, loss: -3.168702096445486e-05, rewards: -9.299999999999999, count: 50
epoch: 442829, loss: 3.0015706215635873e-05, rewards: -9.299999999999999, c

epoch: 443709, loss: 1.3661384627994266e-06, rewards: -9.299999999999999, count: 50
epoch: 443719, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 443729, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 443739, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 443749, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 443759, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 443769, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 443779, loss: 2.3543834686279297e-06, rewards: -9.299999999999999, count: 50
epoch: 443789, loss: 1.4997720427345484e-05, rewards: -9.299999999999999, count: 50
epoch: 443799, loss: 0.00010892152931774035, rewards: -9.299999999999999, count: 50
epoch: 443809, loss: -7.946968253236264e-05, rewards: -9.299999999999999, count: 50
epoch: 443819, loss: 5.657672772940714e-06, rewards: -9.299999999999999, count: 5

epoch: 444699, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 444709, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 444719, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 444729, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 444739, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 444749, loss: -3.0303001494758064e-06, rewards: -9.299999999999999, count: 50
epoch: 444759, loss: -2.0719766325782984e-05, rewards: -9.299999999999999, count: 50
epoch: 444769, loss: -0.00013151764869689941, rewards: -9.299999999999999, count: 50
epoch: 444779, loss: 5.469918323797174e-05, rewards: -9.299999999999999, count: 50
epoch: 444789, loss: 3.8839578337501734e-05, rewards: -9.299999999999999, count: 50
epoch: 444799, loss: -5.662441253662109e-06, rewards: -9.299999999999999, count: 50
epoch: 444809, loss: -1.6466379747726023e-05, rewards: -9.299999999999999,

epoch: 445689, loss: 6.465077603934333e-05, rewards: -9.299999999999999, count: 50
epoch: 445699, loss: -3.114938635917497e-06, rewards: -9.299999999999999, count: 50
epoch: 445709, loss: -2.1879672203795053e-05, rewards: -9.299999999999999, count: 50
epoch: 445719, loss: 1.0797977665788494e-05, rewards: -9.299999999999999, count: 50
epoch: 445729, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 445739, loss: -3.2973289307847153e-06, rewards: -9.299999999999999, count: 50
epoch: 445749, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, count: 50
epoch: 445759, loss: -1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 445769, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 445779, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 445789, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 445799, loss: 8.940696716308594e-07, rewards: -9.299999999999999, co

epoch: 446679, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 446689, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 446699, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 446709, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, count: 50
epoch: 446719, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 446729, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 446739, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 446749, loss: -8.019208507903386e-06, rewards: -9.299999999999999, count: 50
epoch: 446759, loss: -6.971955008339137e-05, rewards: -9.299999999999999, count: 50
epoch: 446769, loss: 3.3895968954311684e-05, rewards: -9.299999999999999, count: 50
epoch: 446779, loss: -5.108833283884451e-05, rewards: -9.299999999999999, count: 50
epoch: 446789, loss: -2.5135279429377988e-05, rewards: -9.299999999999999

epoch: 447669, loss: -2.064704858639743e-06, rewards: -9.299999999999999, count: 50
epoch: 447679, loss: -3.0446053642663173e-06, rewards: -9.299999999999999, count: 50
epoch: 447689, loss: -7.616281436639838e-06, rewards: -9.299999999999999, count: 50
epoch: 447699, loss: -3.680586814880371e-05, rewards: -9.299999999999999, count: 50
epoch: 447709, loss: -8.69214563863352e-05, rewards: -9.299999999999999, count: 50
epoch: 447719, loss: 3.6656856536865234e-05, rewards: -9.299999999999999, count: 50
epoch: 447729, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 447739, loss: -1.0744332939793821e-05, rewards: -9.299999999999999, count: 50
epoch: 447749, loss: 1.0435581316414755e-05, rewards: -9.299999999999999, count: 50
epoch: 447759, loss: -7.289648237929214e-06, rewards: -9.299999999999999, count: 50
epoch: 447769, loss: 4.34637058788212e-06, rewards: -9.299999999999999, count: 50
epoch: 447779, loss: -1.8358230136072962e-06, rewards: -9.299999999999999, c

epoch: 448659, loss: -3.727674538822612e-06, rewards: -9.299999999999999, count: 50
epoch: 448669, loss: -1.5187263670668472e-05, rewards: -9.299999999999999, count: 50
epoch: 448679, loss: -8.407593122683465e-05, rewards: -9.299999999999999, count: 50
epoch: 448689, loss: 4.237413304508664e-05, rewards: -9.299999999999999, count: 50
epoch: 448699, loss: -4.2753221350722015e-05, rewards: -9.299999999999999, count: 50
epoch: 448709, loss: 1.6369820514228195e-05, rewards: -9.299999999999999, count: 50
epoch: 448719, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 448729, loss: -6.104707608756144e-06, rewards: -9.299999999999999, count: 50
epoch: 448739, loss: 5.078315552964341e-06, rewards: -9.299999999999999, count: 50
epoch: 448749, loss: -3.362893949088175e-06, rewards: -9.299999999999999, count: 50
epoch: 448759, loss: 2.2923945834918413e-06, rewards: -9.299999999999999, count: 50
epoch: 448769, loss: -3.492832263418677e-07, rewards: -9.299999999999999, co

epoch: 449639, loss: -1.6844272749949596e-06, rewards: -9.299999999999999, count: 50
epoch: 449649, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 449659, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 449669, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 449679, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 449689, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 449699, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 449709, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 449719, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 449729, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 449739, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 449749, loss: -1.6927718604620168e-07, rewards: -9.299999999999999,

epoch: 450629, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 450639, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 450649, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 450659, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 450669, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 450679, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 450689, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 450699, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 450709, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 450719, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 450729, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 450739, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, 

epoch: 451619, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 451629, loss: -1.8621683921082877e-05, rewards: -9.299999999999999, count: 50
epoch: 451639, loss: -0.0001274168462259695, rewards: -9.299999999999999, count: 50
epoch: 451649, loss: 6.703018880216405e-05, rewards: -9.299999999999999, count: 50
epoch: 451659, loss: 3.030180960195139e-05, rewards: -9.299999999999999, count: 50
epoch: 451669, loss: -1.452088326914236e-05, rewards: -9.299999999999999, count: 50
epoch: 451679, loss: -1.5286206689779647e-05, rewards: -9.299999999999999, count: 50
epoch: 451689, loss: 3.3080577850341797e-06, rewards: -9.299999999999999, count: 50
epoch: 451699, loss: 4.932880528940586e-06, rewards: -9.299999999999999, count: 50
epoch: 451709, loss: -2.88605679088505e-06, rewards: -9.299999999999999, count: 50
epoch: 451719, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 451729, loss: 1.043081283569336e-06, rewards: -9.299999999999999, coun

epoch: 452609, loss: -8.52346420288086e-06, rewards: -9.299999999999999, count: 50
epoch: 452619, loss: -5.431890531326644e-05, rewards: -9.299999999999999, count: 50
epoch: 452629, loss: -3.1934978323988616e-05, rewards: -9.299999999999999, count: 50
epoch: 452639, loss: -2.7996302378596738e-05, rewards: -9.299999999999999, count: 50
epoch: 452649, loss: 3.019809810211882e-05, rewards: -9.299999999999999, count: 50
epoch: 452659, loss: -6.005764134897618e-06, rewards: -9.299999999999999, count: 50
epoch: 452669, loss: -6.185769962030463e-06, rewards: -9.299999999999999, count: 50
epoch: 452679, loss: 6.368160029524006e-06, rewards: -9.299999999999999, count: 50
epoch: 452689, loss: -4.332065600465285e-06, rewards: -9.299999999999999, count: 50
epoch: 452699, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, count: 50
epoch: 452709, loss: -1.9371509552001953e-06, rewards: -9.299999999999999, count: 50
epoch: 452719, loss: 8.165836220541678e-07, rewards: -9.299999999999999, cou

epoch: 453599, loss: -4.277110201655887e-05, rewards: -9.299999999999999, count: 50
epoch: 453609, loss: -2.3245811462402344e-06, rewards: -9.299999999999999, count: 50
epoch: 453619, loss: 1.5000105122453533e-05, rewards: -9.299999999999999, count: 50
epoch: 453629, loss: 4.701614216173766e-06, rewards: -9.299999999999999, count: 50
epoch: 453639, loss: -5.117654836794827e-06, rewards: -9.299999999999999, count: 50
epoch: 453649, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 453659, loss: 2.3412703740177676e-06, rewards: -9.299999999999999, count: 50
epoch: 453669, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 453679, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 453689, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 453699, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 453709, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, co

epoch: 454589, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 454599, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 454609, loss: 4.117488970223349e-06, rewards: -9.299999999999999, count: 50
epoch: 454619, loss: 2.470135768817272e-05, rewards: -9.299999999999999, count: 50
epoch: 454629, loss: 0.00011764168448280543, rewards: -9.299999999999999, count: 50
epoch: 454639, loss: -5.514860094990581e-05, rewards: -9.299999999999999, count: 50
epoch: 454649, loss: -1.4142990039545111e-05, rewards: -9.299999999999999, count: 50
epoch: 454659, loss: 2.3704767954768613e-05, rewards: -9.299999999999999, count: 50
epoch: 454669, loss: -2.911090859925025e-06, rewards: -9.299999999999999, count: 50
epoch: 454679, loss: -6.270408448472153e-06, rewards: -9.299999999999999, count: 50
epoch: 454689, loss: 5.407333446783014e-06, rewards: -9.299999999999999, count: 50
epoch: 454699, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count

epoch: 455579, loss: 1.1181831496287487e-06, rewards: -9.299999999999999, count: 50
epoch: 455589, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 455599, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 455609, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 455619, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 455629, loss: 3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 455639, loss: 1.0389089766249526e-05, rewards: -9.299999999999999, count: 50
epoch: 455649, loss: 5.630731538985856e-05, rewards: -9.299999999999999, count: 50
epoch: 455659, loss: 2.832651080098003e-05, rewards: -9.299999999999999, count: 50
epoch: 455669, loss: 1.2209415217512287e-05, rewards: -9.299999999999999, count: 50
epoch: 455679, loss: -2.4455785023747012e-05, rewards: -9.299999999999999, count: 50
epoch: 455689, loss: 1.729369250824675e-05, rewards: -9.299999999999999, count:

epoch: 456569, loss: -1.1349916348990519e-05, rewards: -9.299999999999999, count: 50
epoch: 456579, loss: 5.587339273915859e-06, rewards: -9.299999999999999, count: 50
epoch: 456589, loss: -3.4046172459056834e-06, rewards: -9.299999999999999, count: 50
epoch: 456599, loss: 2.528428922232706e-06, rewards: -9.299999999999999, count: 50
epoch: 456609, loss: -2.348423095099861e-06, rewards: -9.299999999999999, count: 50
epoch: 456619, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 456629, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 456639, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 456649, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 456659, loss: 3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 456669, loss: 2.5942325009964406e-05, rewards: -9.299999999999999, count: 50
epoch: 456679, loss: 0.0001221299171447754, rewards: -9.299999999999999, coun

epoch: 457559, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 457569, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 457579, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 457589, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 457599, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 457609, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 457619, loss: 2.7263165520707844e-06, rewards: -9.299999999999999, count: 50
epoch: 457629, loss: 1.5953779438859783e-05, rewards: -9.299999999999999, count: 50
epoch: 457639, loss: 0.00010344624752178788, rewards: -9.299999999999999, count: 50
epoch: 457649, loss: -7.286191248567775e-05, rewards: -9.299999999999999, count: 50
epoch: 457659, loss: 2.2735595848644152e-05, rewards: -9.299999999999999, count: 50
epoch: 457669, loss: 2.1145344362594187e-05, rewards: -9.299999999999999, 

epoch: 458549, loss: 2.2274256480159238e-05, rewards: -9.299999999999999, count: 50
epoch: 458559, loss: 9.447336196899414e-05, rewards: -9.299999999999999, count: 50
epoch: 458569, loss: -5.461812179419212e-05, rewards: -9.299999999999999, count: 50
epoch: 458579, loss: 3.605604069889523e-05, rewards: -9.299999999999999, count: 50
epoch: 458589, loss: -1.5870333299972117e-05, rewards: -9.299999999999999, count: 50
epoch: 458599, loss: 5.21540641784668e-06, rewards: -9.299999999999999, count: 50
epoch: 458609, loss: -1.693964009064075e-06, rewards: -9.299999999999999, count: 50
epoch: 458619, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 458629, loss: -2.0742415927088587e-06, rewards: -9.299999999999999, count: 50
epoch: 458639, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 458649, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 458659, loss: -1.978874252017704e-06, rewards: -9.299999999999999, coun

epoch: 459539, loss: 5.050420804764144e-05, rewards: -9.299999999999999, count: 50
epoch: 459549, loss: 2.1797419321956113e-05, rewards: -9.299999999999999, count: 50
epoch: 459559, loss: 6.574392318725586e-05, rewards: -9.299999999999999, count: 50
epoch: 459569, loss: 3.192305666743778e-05, rewards: -9.299999999999999, count: 50
epoch: 459579, loss: 3.273486981925089e-06, rewards: -9.299999999999999, count: 50
epoch: 459589, loss: -9.84907183010364e-06, rewards: -9.299999999999999, count: 50
epoch: 459599, loss: -8.165836334228516e-06, rewards: -9.299999999999999, count: 50
epoch: 459609, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 459619, loss: 3.2031537102739094e-06, rewards: -9.299999999999999, count: 50
epoch: 459629, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 459639, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 459649, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50

epoch: 460529, loss: 1.2452602277335245e-05, rewards: -9.299999999999999, count: 50
epoch: 460539, loss: 8.086323941824958e-05, rewards: -9.299999999999999, count: 50
epoch: 460549, loss: -4.311919110477902e-05, rewards: -9.299999999999999, count: 50
epoch: 460559, loss: 4.678011100622825e-05, rewards: -9.299999999999999, count: 50
epoch: 460569, loss: -2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 460579, loss: -1.5522242392762564e-05, rewards: -9.299999999999999, count: 50
epoch: 460589, loss: 8.44836267788196e-06, rewards: -9.299999999999999, count: 50
epoch: 460599, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 460609, loss: -1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 460619, loss: 1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 460629, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 460639, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count

epoch: 461519, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 461529, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 461539, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 461549, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 461559, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 461569, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 461579, loss: -1.71411029441515e-05, rewards: -9.299999999999999, count: 50
epoch: 461589, loss: -0.00013624787970911711, rewards: -9.299999999999999, count: 50
epoch: 461599, loss: 6.125331128714606e-05, rewards: -9.299999999999999, count: 50
epoch: 461609, loss: 4.717707633972168e-05, rewards: -9.299999999999999, count: 50
epoch: 461619, loss: 1.0435581316414755e-05, rewards: -9.299999999999999, count: 50
epoch: 461629, loss: -1.0435581316414755e-05, rewards: -9.299999999999999, c

epoch: 462509, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 462519, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, count: 50
epoch: 462529, loss: 2.214789310528431e-05, rewards: -9.299999999999999, count: 50
epoch: 462539, loss: 0.00013118029164616019, rewards: -9.299999999999999, count: 50
epoch: 462549, loss: -4.638791142497212e-05, rewards: -9.299999999999999, count: 50
epoch: 462559, loss: -3.901362288161181e-05, rewards: -9.299999999999999, count: 50
epoch: 462569, loss: 4.986524800187908e-06, rewards: -9.299999999999999, count: 50
epoch: 462579, loss: 1.5958547010086477e-05, rewards: -9.299999999999999, count: 50
epoch: 462589, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 462599, loss: -6.109476089477539e-06, rewards: -9.299999999999999, count: 50
epoch: 462609, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 462619, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 

epoch: 463499, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 463509, loss: 2.1288395146257244e-05, rewards: -9.299999999999999, count: 50
epoch: 463519, loss: 5.856752522959141e-06, rewards: -9.299999999999999, count: 50
epoch: 463529, loss: -7.326602826651651e-06, rewards: -9.299999999999999, count: 50
epoch: 463539, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 463549, loss: 3.0612945920438506e-06, rewards: -9.299999999999999, count: 50
epoch: 463559, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 463569, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 463579, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 463589, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 463599, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 463609, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count

epoch: 464489, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 464499, loss: 8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 464509, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 464519, loss: 4.994869300389837e-07, rewards: -9.299999999999999, count: 50
epoch: 464529, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 464539, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 464549, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 464559, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 464569, loss: -9.334087280876702e-07, rewards: -9.299999999999999, count: 50
epoch: 464579, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 464589, loss: -1.0102987289428711e-05, rewards: -9.299999999999999, count: 50
epoch: 464599, loss: -9.652256994741037e-05, rewards: -9.299999999999999, c

epoch: 465479, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 465489, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 465499, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 465509, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 465519, loss: -3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 465529, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 465539, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 465549, loss: -6.72578789817635e-06, rewards: -9.299999999999999, count: 50
epoch: 465559, loss: -7.559895311715081e-05, rewards: -9.299999999999999, count: 50
epoch: 465569, loss: 7.264137093443424e-05, rewards: -9.299999999999999, count: 50
epoch: 465579, loss: -7.718801498413086e-06, rewards: -9.299999999999999, count: 50
epoch: 465589, loss: -2.294778823852539e-05, rewards: -9.299999999999999, count

epoch: 466469, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 466479, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 466489, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 466499, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 466509, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 466519, loss: 8.131265531119425e-06, rewards: -9.299999999999999, count: 50
epoch: 466529, loss: 5.630731538985856e-05, rewards: -9.299999999999999, count: 50
epoch: 466539, loss: 2.1322965039871633e-05, rewards: -9.299999999999999, count: 50
epoch: 466549, loss: 4.468798579182476e-05, rewards: -9.299999999999999, count: 50
epoch: 466559, loss: -2.1741390810348094e-05, rewards: -9.299999999999999, count: 50
epoch: 466569, loss: -1.197576511913212e-05, rewards: -9.299999999999999, count: 50
epoch: 466579, loss: 1.1291504051769152e-05, rewards: -9.299999999999999, count

epoch: 467459, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 467469, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 467479, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 467489, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 467499, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 467509, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 467519, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 467529, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 467539, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 467549, loss: 7.350444775511278e-06, rewards: -9.299999999999999, count: 50
epoch: 467559, loss: 6.48248169454746e-05, rewards: -9.299999999999999, count: 50
epoch: 467569, loss: -2.2548436390934512e-05, rewards: -9.299999999999999, cou

epoch: 468449, loss: -6.034374109731289e-06, rewards: -9.299999999999999, count: 50
epoch: 468459, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 468469, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 468479, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 468489, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 468499, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 468509, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 468519, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 468529, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 468539, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 468549, loss: 1.840591380641854e-06, rewards: -9.299999999999999, count: 50
epoch: 468559, loss: 1.0832548468897585e-05, rewards: -9.299999999999999, count

epoch: 469439, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 469449, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 469459, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 469469, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 469479, loss: -4.073381205671467e-06, rewards: -9.299999999999999, count: 50
epoch: 469489, loss: -2.0846127881668508e-05, rewards: -9.299999999999999, count: 50
epoch: 469499, loss: -0.00010807752551045269, rewards: -9.299999999999999, count: 50
epoch: 469509, loss: 6.739139644196257e-05, rewards: -9.299999999999999, count: 50
epoch: 469519, loss: -1.924276330100838e-05, rewards: -9.299999999999999, count: 50
epoch: 469529, loss: -1.3899802979722153e-05, rewards: -9.299999999999999, count: 50
epoch: 469539, loss: 1.4318227840703912e-05, rewards: -9.299999999999999, count: 50
epoch: 469549, loss: -6.226301138667623e-06, rewards: -9.299999999999999, 

epoch: 470429, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 470439, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 470449, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 470459, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 470469, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 470479, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 470489, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 470499, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 470509, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 470519, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 470529, loss: -2.8884410312457476e-06, rewards: -9.299999999999999, count: 50
epoch: 470539, loss: -2.081871025438886e-05, rewards: -9.299999999999999, cou

epoch: 471419, loss: -4.755258487421088e-06, rewards: -9.299999999999999, count: 50
epoch: 471429, loss: 5.3236482926877216e-05, rewards: -9.299999999999999, count: 50
epoch: 471439, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 471449, loss: -1.8870830899686553e-05, rewards: -9.299999999999999, count: 50
epoch: 471459, loss: 4.895925485470798e-06, rewards: -9.299999999999999, count: 50
epoch: 471469, loss: 4.827976226806641e-06, rewards: -9.299999999999999, count: 50
epoch: 471479, loss: -4.088878540642327e-06, rewards: -9.299999999999999, count: 50
epoch: 471489, loss: 1.6331672441083356e-06, rewards: -9.299999999999999, count: 50
epoch: 471499, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 471509, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 471519, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 471529, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, cou

epoch: 472409, loss: 6.620884050789755e-06, rewards: -9.299999999999999, count: 50
epoch: 472419, loss: -1.9738674382097088e-05, rewards: -9.299999999999999, count: 50
epoch: 472429, loss: 1.1180639376107138e-05, rewards: -9.299999999999999, count: 50
epoch: 472439, loss: -3.3783912840590347e-06, rewards: -9.299999999999999, count: 50
epoch: 472449, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 472459, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 472469, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 472479, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 472489, loss: 9.89437126008852e-07, rewards: -9.299999999999999, count: 50
epoch: 472499, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 472509, loss: -1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 472519, loss: -2.485513732608524e-06, rewards: -9.299999999999999, co

epoch: 473399, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 473409, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 473419, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 473429, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 473439, loss: -1.1420249848015374e-06, rewards: -9.299999999999999, count: 50
epoch: 473449, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 473459, loss: -6.462335477408487e-06, rewards: -9.299999999999999, count: 50
epoch: 473469, loss: -3.637671397882514e-05, rewards: -9.299999999999999, count: 50
epoch: 473479, loss: -9.379982657264918e-05, rewards: -9.299999999999999, count: 50
epoch: 473489, loss: 1.436471939086914e-05, rewards: -9.299999999999999, count: 50
epoch: 473499, loss: 2.89857380266767e-05, rewards: -9.299999999999999, count: 50
epoch: 473509, loss: -1.6947984477155842e-05, rewards: -9.299999999999999, co

epoch: 474389, loss: -1.5360117686213925e-05, rewards: -9.299999999999999, count: 50
epoch: 474399, loss: 1.046538363880245e-05, rewards: -9.299999999999999, count: 50
epoch: 474409, loss: 1.4371871657203883e-05, rewards: -9.299999999999999, count: 50
epoch: 474419, loss: 6.14404689258663e-06, rewards: -9.299999999999999, count: 50
epoch: 474429, loss: -2.638101477714372e-06, rewards: -9.299999999999999, count: 50
epoch: 474439, loss: -2.7942658107349416e-06, rewards: -9.299999999999999, count: 50
epoch: 474449, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 474459, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 474469, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 474479, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 474489, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 474499, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, cou

epoch: 475379, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 475389, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 475399, loss: -1.0454655239300337e-06, rewards: -9.299999999999999, count: 50
epoch: 475409, loss: -4.003048161393963e-06, rewards: -9.299999999999999, count: 50
epoch: 475419, loss: -2.5137662305496633e-05, rewards: -9.299999999999999, count: 50
epoch: 475429, loss: -0.00012069463991792873, rewards: -9.299999999999999, count: 50
epoch: 475439, loss: 4.9201251385966316e-05, rewards: -9.299999999999999, count: 50
epoch: 475449, loss: 2.3740529286442325e-05, rewards: -9.299999999999999, count: 50
epoch: 475459, loss: -2.0490884708124213e-05, rewards: -9.299999999999999, count: 50
epoch: 475469, loss: -4.640817678591702e-06, rewards: -9.299999999999999, count: 50
epoch: 475479, loss: 8.925199836085085e-06, rewards: -9.299999999999999, count: 50
epoch: 475489, loss: -3.409385726627079e-06, rewards: -9.299999999999999

epoch: 476369, loss: -2.703666723391507e-06, rewards: -9.299999999999999, count: 50
epoch: 476379, loss: -1.0744332939793821e-05, rewards: -9.299999999999999, count: 50
epoch: 476389, loss: -6.75177579978481e-05, rewards: -9.299999999999999, count: 50
epoch: 476399, loss: 9.435415449843276e-06, rewards: -9.299999999999999, count: 50
epoch: 476409, loss: -4.437088864506222e-05, rewards: -9.299999999999999, count: 50
epoch: 476419, loss: 2.105236126226373e-05, rewards: -9.299999999999999, count: 50
epoch: 476429, loss: 5.533695002668537e-06, rewards: -9.299999999999999, count: 50
epoch: 476439, loss: -1.0545253644522745e-05, rewards: -9.299999999999999, count: 50
epoch: 476449, loss: 6.326436960080173e-06, rewards: -9.299999999999999, count: 50
epoch: 476459, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 476469, loss: 1.7082691101677483e-06, rewards: -9.299999999999999, count: 50
epoch: 476479, loss: -1.4853477523502079e-06, rewards: -9.299999999999999, cou

epoch: 477359, loss: 2.435445821902249e-05, rewards: -9.299999999999999, count: 50
epoch: 477369, loss: 0.00012590289406944066, rewards: -9.299999999999999, count: 50
epoch: 477379, loss: -4.277825428289361e-05, rewards: -9.299999999999999, count: 50
epoch: 477389, loss: -3.4506319934735075e-05, rewards: -9.299999999999999, count: 50
epoch: 477399, loss: 1.1945962796744425e-05, rewards: -9.299999999999999, count: 50
epoch: 477409, loss: 1.3250112715468276e-05, rewards: -9.299999999999999, count: 50
epoch: 477419, loss: -6.036758350091986e-06, rewards: -9.299999999999999, count: 50
epoch: 477429, loss: -2.7668475013342686e-06, rewards: -9.299999999999999, count: 50
epoch: 477439, loss: 3.834962626569904e-06, rewards: -9.299999999999999, count: 50
epoch: 477449, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 477459, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 477469, loss: -6.34193440873787e-07, rewards: -9.299999999999999, coun

epoch: 478349, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 478359, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 478369, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 478379, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 478389, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 478399, loss: 1.8775463104248047e-06, rewards: -9.299999999999999, count: 50
epoch: 478409, loss: 1.1136531611555256e-05, rewards: -9.299999999999999, count: 50
epoch: 478419, loss: 8.757114119362086e-05, rewards: -9.299999999999999, count: 50
epoch: 478429, loss: -6.375193333951756e-05, rewards: -9.299999999999999, count: 50
epoch: 478439, loss: 3.6065579479327425e-05, rewards: -9.299999999999999, count: 50
epoch: 478449, loss: 2.388238863204606e-05, rewards: -9.299999999999999, count: 50
epoch: 478459, loss: -1.0530948202358559e-05, rewards: -9.299999999999999, count

epoch: 479339, loss: 5.884408892598003e-05, rewards: -9.299999999999999, count: 50
epoch: 479349, loss: 1.4919042769179214e-05, rewards: -9.299999999999999, count: 50
epoch: 479359, loss: -1.5313626136048697e-05, rewards: -9.299999999999999, count: 50
epoch: 479369, loss: -1.043081283569336e-05, rewards: -9.299999999999999, count: 50
epoch: 479379, loss: 4.378557150630513e-06, rewards: -9.299999999999999, count: 50
epoch: 479389, loss: 3.1685829071648186e-06, rewards: -9.299999999999999, count: 50
epoch: 479399, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 479409, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 479419, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 479429, loss: -8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 479439, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 479449, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count:

epoch: 480329, loss: 4.054903911310248e-05, rewards: -9.299999999999999, count: 50
epoch: 480339, loss: 2.0369290723465383e-05, rewards: -9.299999999999999, count: 50
epoch: 480349, loss: -4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 480359, loss: -9.608268555894028e-06, rewards: -9.299999999999999, count: 50
epoch: 480369, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 480379, loss: 3.273486981925089e-06, rewards: -9.299999999999999, count: 50
epoch: 480389, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 480399, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 480409, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 480419, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 480429, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 480439, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count

epoch: 481319, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 481329, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 481339, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 481349, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 481359, loss: -2.5856495540210744e-06, rewards: -9.299999999999999, count: 50
epoch: 481369, loss: -1.4647245734522585e-05, rewards: -9.299999999999999, count: 50
epoch: 481379, loss: -9.788632451090962e-05, rewards: -9.299999999999999, count: 50
epoch: 481389, loss: 6.9040062953718e-05, rewards: -9.299999999999999, count: 50
epoch: 481399, loss: -3.0462741051451303e-05, rewards: -9.299999999999999, count: 50
epoch: 481409, loss: -1.7709731764625758e-05, rewards: -9.299999999999999, count: 50
epoch: 481419, loss: 1.4871358871459961e-05, rewards: -9.299999999999999, count: 50
epoch: 481429, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, c

epoch: 482309, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 482319, loss: 1.6534328324269154e-06, rewards: -9.299999999999999, count: 50
epoch: 482329, loss: -1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 482339, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 482349, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 482359, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 482369, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 482379, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 482389, loss: 2.511739694455173e-06, rewards: -9.299999999999999, count: 50
epoch: 482399, loss: 9.073019100469537e-06, rewards: -9.299999999999999, count: 50
epoch: 482409, loss: 4.740476651932113e-05, rewards: -9.299999999999999, count: 50
epoch: 482419, loss: 5.6165456044254825e-05, rewards: -9.299999999999999, count:

epoch: 483299, loss: 3.1297207897296175e-05, rewards: -9.299999999999999, count: 50
epoch: 483309, loss: 0.00011786579852923751, rewards: -9.299999999999999, count: 50
epoch: 483319, loss: 2.8555392418638803e-05, rewards: -9.299999999999999, count: 50
epoch: 483329, loss: -2.5439261662540957e-05, rewards: -9.299999999999999, count: 50
epoch: 483339, loss: -2.5297404135926627e-05, rewards: -9.299999999999999, count: 50
epoch: 483349, loss: -5.272626822261373e-06, rewards: -9.299999999999999, count: 50
epoch: 483359, loss: 7.296800504263956e-06, rewards: -9.299999999999999, count: 50
epoch: 483369, loss: 3.958940396842081e-06, rewards: -9.299999999999999, count: 50
epoch: 483379, loss: -2.667903800102067e-06, rewards: -9.299999999999999, count: 50
epoch: 483389, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 483399, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 483409, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, co

epoch: 484289, loss: -6.314396887319162e-05, rewards: -9.299999999999999, count: 50
epoch: 484299, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 484309, loss: -4.4604541471926495e-05, rewards: -9.299999999999999, count: 50
epoch: 484319, loss: 2.1947622371953912e-05, rewards: -9.299999999999999, count: 50
epoch: 484329, loss: 6.983280400163494e-06, rewards: -9.299999999999999, count: 50
epoch: 484339, loss: -1.0994673175446223e-05, rewards: -9.299999999999999, count: 50
epoch: 484349, loss: 5.460977718030335e-06, rewards: -9.299999999999999, count: 50
epoch: 484359, loss: -1.7046928633135394e-06, rewards: -9.299999999999999, count: 50
epoch: 484369, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 484379, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 484389, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 484399, loss: -5.173683348402847e-07, rewards: -9.299999999999999, c

epoch: 485279, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 485289, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 485299, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 485309, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 485319, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 485329, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 485339, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 485349, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 485359, loss: -2.645254198796465e-06, rewards: -9.299999999999999, count: 50
epoch: 485369, loss: -1.2941360182594508e-05, rewards: -9.299999999999999, count: 50
epoch: 485379, loss: -8.395075565204024e-05, rewards: -9.299999999999999, count: 50
epoch: 485389, loss: 4.866242306889035e-05, rewards: -9.299999999999999, coun

epoch: 486269, loss: -3.2131672924151644e-05, rewards: -9.299999999999999, count: 50
epoch: 486279, loss: -9.587407112121582e-05, rewards: -9.299999999999999, count: 50
epoch: 486289, loss: 4.88400473841466e-05, rewards: -9.299999999999999, count: 50
epoch: 486299, loss: -1.2712478564935736e-05, rewards: -9.299999999999999, count: 50
epoch: 486309, loss: -4.935264769301284e-06, rewards: -9.299999999999999, count: 50
epoch: 486319, loss: 8.14199484011624e-06, rewards: -9.299999999999999, count: 50
epoch: 486329, loss: -6.036758350091986e-06, rewards: -9.299999999999999, count: 50
epoch: 486339, loss: 3.949403890146641e-06, rewards: -9.299999999999999, count: 50
epoch: 486349, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 486359, loss: -1.4102458862907952e-06, rewards: -9.299999999999999, count: 50
epoch: 486369, loss: 2.130270104316878e-06, rewards: -9.299999999999999, count: 50
epoch: 486379, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, cou

epoch: 487259, loss: -6.993055285420269e-05, rewards: -9.299999999999999, count: 50
epoch: 487269, loss: -3.3221243938896805e-05, rewards: -9.299999999999999, count: 50
epoch: 487279, loss: 2.8632879548240453e-05, rewards: -9.299999999999999, count: 50
epoch: 487289, loss: 1.4138221558823716e-05, rewards: -9.299999999999999, count: 50
epoch: 487299, loss: -1.0749101420515217e-05, rewards: -9.299999999999999, count: 50
epoch: 487309, loss: -2.667903800102067e-06, rewards: -9.299999999999999, count: 50
epoch: 487319, loss: 4.971027465217048e-06, rewards: -9.299999999999999, count: 50
epoch: 487329, loss: -2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 487339, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 487349, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 487359, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 487369, loss: -1.99079508433897e-07, rewards: -9.299999999999999, cou

epoch: 488249, loss: 3.913640830432996e-05, rewards: -9.299999999999999, count: 50
epoch: 488259, loss: 3.788352114497684e-05, rewards: -9.299999999999999, count: 50
epoch: 488269, loss: -2.7204752768739127e-05, rewards: -9.299999999999999, count: 50
epoch: 488279, loss: -8.38279720483115e-06, rewards: -9.299999999999999, count: 50
epoch: 488289, loss: 1.218199759023264e-05, rewards: -9.299999999999999, count: 50
epoch: 488299, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 488309, loss: -1.693964009064075e-06, rewards: -9.299999999999999, count: 50
epoch: 488319, loss: 2.0444392703211633e-06, rewards: -9.299999999999999, count: 50
epoch: 488329, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 488339, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 488349, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 488359, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, cou

epoch: 489239, loss: -9.523630069452338e-06, rewards: -9.299999999999999, count: 50
epoch: 489249, loss: 2.7942658107349416e-06, rewards: -9.299999999999999, count: 50
epoch: 489259, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 489269, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 489279, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 489289, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 489299, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 489309, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 489319, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 489329, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 489339, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 489349, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, cou

epoch: 490229, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 490239, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 490249, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 490259, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 490269, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 490279, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 490289, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 490299, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 490309, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 490319, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 490329, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 490339, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, coun

epoch: 491219, loss: -2.7166604922967963e-05, rewards: -9.299999999999999, count: 50
epoch: 491229, loss: -5.0008296966552734e-05, rewards: -9.299999999999999, count: 50
epoch: 491239, loss: 1.5089512089616619e-05, rewards: -9.299999999999999, count: 50
epoch: 491249, loss: 1.817941665649414e-05, rewards: -9.299999999999999, count: 50
epoch: 491259, loss: -6.988048426137539e-06, rewards: -9.299999999999999, count: 50
epoch: 491269, loss: -4.444122168933973e-06, rewards: -9.299999999999999, count: 50
epoch: 491279, loss: 4.428625288710464e-06, rewards: -9.299999999999999, count: 50
epoch: 491289, loss: -1.971721758309286e-06, rewards: -9.299999999999999, count: 50
epoch: 491299, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 491309, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 491319, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 491329, loss: -5.781650429526053e-07, rewards: -9.299999999999999, coun

epoch: 492209, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 492219, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 492229, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 492239, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 492249, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 492259, loss: 5.68151472180034e-06, rewards: -9.299999999999999, count: 50
epoch: 492269, loss: 3.5563705750973895e-05, rewards: -9.299999999999999, count: 50
epoch: 492279, loss: 9.59384415182285e-05, rewards: -9.299999999999999, count: 50
epoch: 492289, loss: -1.3310909707797691e-05, rewards: -9.299999999999999, count: 50
epoch: 492299, loss: -3.080129681620747e-05, rewards: -9.299999999999999, count: 50
epoch: 492309, loss: 1.5566349247819744e-05, rewards: -9.299999999999999, count: 50
epoch: 492319, loss: 3.764629354918725e-06, rewards: -9.299999999999999, count: 5

epoch: 493199, loss: 9.763240313986898e-07, rewards: -9.299999999999999, count: 50
epoch: 493209, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 493219, loss: 5.811452865600586e-06, rewards: -9.299999999999999, count: 50
epoch: 493229, loss: 3.7091969716129825e-05, rewards: -9.299999999999999, count: 50
epoch: 493239, loss: 9.182572102872655e-05, rewards: -9.299999999999999, count: 50
epoch: 493249, loss: -6.588697488041362e-06, rewards: -9.299999999999999, count: 50
epoch: 493259, loss: -3.273725451435894e-05, rewards: -9.299999999999999, count: 50
epoch: 493269, loss: 1.3087988008919638e-05, rewards: -9.299999999999999, count: 50
epoch: 493279, loss: 5.781650543212891e-06, rewards: -9.299999999999999, count: 50
epoch: 493289, loss: -7.6544283729163e-06, rewards: -9.299999999999999, count: 50
epoch: 493299, loss: 4.117488970223349e-06, rewards: -9.299999999999999, count: 50
epoch: 493309, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50

epoch: 494189, loss: -5.632638931274414e-06, rewards: -9.299999999999999, count: 50
epoch: 494199, loss: 3.2949446904240176e-06, rewards: -9.299999999999999, count: 50
epoch: 494209, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 494219, loss: -8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 494229, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 494239, loss: -1.668930025289228e-07, rewards: -9.299999999999999, count: 50
epoch: 494249, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 494259, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 494269, loss: -9.481907000008505e-06, rewards: -9.299999999999999, count: 50
epoch: 494279, loss: -6.332516932161525e-05, rewards: -9.299999999999999, count: 50
epoch: 494289, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 494299, loss: -4.843711940338835e-05, rewards: -9.299999999999999, c

epoch: 495179, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 495189, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 495199, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 495209, loss: 2.8848648980783764e-07, rewards: -9.299999999999999, count: 50
epoch: 495219, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 495229, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 495239, loss: 1.3804435639030999e-06, rewards: -9.299999999999999, count: 50
epoch: 495249, loss: 6.506443241960369e-06, rewards: -9.299999999999999, count: 50
epoch: 495259, loss: 4.706978870672174e-05, rewards: -9.299999999999999, count: 50
epoch: 495269, loss: 5.390286605688743e-05, rewards: -9.299999999999999, count: 50
epoch: 495279, loss: 3.5890341678168625e-05, rewards: -9.299999999999999, count: 50
epoch: 495289, loss: -2.7620792025118135e-05, rewards: -9.299999999999999, count

epoch: 496169, loss: 4.266500582161825e-06, rewards: -9.299999999999999, count: 50
epoch: 496179, loss: 2.7711392249329947e-05, rewards: -9.299999999999999, count: 50
epoch: 496189, loss: 0.00011866569548146799, rewards: -9.299999999999999, count: 50
epoch: 496199, loss: -3.3882857678690925e-05, rewards: -9.299999999999999, count: 50
epoch: 496209, loss: -3.1907558877719566e-05, rewards: -9.299999999999999, count: 50
epoch: 496219, loss: 1.5084743608895224e-05, rewards: -9.299999999999999, count: 50
epoch: 496229, loss: 9.00506984180538e-06, rewards: -9.299999999999999, count: 50
epoch: 496239, loss: -8.647441973153036e-06, rewards: -9.299999999999999, count: 50
epoch: 496249, loss: 1.8358230136072962e-06, rewards: -9.299999999999999, count: 50
epoch: 496259, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 496269, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 496279, loss: 6.699561936329701e-07, rewards: -9.299999999999999, coun

epoch: 497159, loss: 6.907463102834299e-05, rewards: -9.299999999999999, count: 50
epoch: 497169, loss: -2.927064815594349e-05, rewards: -9.299999999999999, count: 50
epoch: 497179, loss: 3.725290298461914e-06, rewards: -9.299999999999999, count: 50
epoch: 497189, loss: 4.79340542369755e-06, rewards: -9.299999999999999, count: 50
epoch: 497199, loss: -5.117654836794827e-06, rewards: -9.299999999999999, count: 50
epoch: 497209, loss: 2.8181075322208926e-06, rewards: -9.299999999999999, count: 50
epoch: 497219, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 497229, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 497239, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 497249, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 497259, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 497269, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 5

epoch: 498149, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 498159, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 498169, loss: 1.5747547195132938e-06, rewards: -9.299999999999999, count: 50
epoch: 498179, loss: 5.655288532580016e-06, rewards: -9.299999999999999, count: 50
epoch: 498189, loss: 3.181338252034038e-05, rewards: -9.299999999999999, count: 50
epoch: 498199, loss: 0.00010392546391813084, rewards: -9.299999999999999, count: 50
epoch: 498209, loss: -3.589987682062201e-05, rewards: -9.299999999999999, count: 50
epoch: 498219, loss: -1.671910285949707e-05, rewards: -9.299999999999999, count: 50
epoch: 498229, loss: 2.1288395146257244e-05, rewards: -9.299999999999999, count: 50
epoch: 498239, loss: -7.127523531380575e-06, rewards: -9.299999999999999, count: 50
epoch: 498249, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 498259, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, cou

epoch: 499139, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 499149, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 499159, loss: 6.645917892456055e-06, rewards: -9.299999999999999, count: 50
epoch: 499169, loss: 4.0571689169155434e-05, rewards: -9.299999999999999, count: 50
epoch: 499179, loss: 7.971882587298751e-05, rewards: -9.299999999999999, count: 50
epoch: 499189, loss: 3.979206212534336e-06, rewards: -9.299999999999999, count: 50
epoch: 499199, loss: -3.369808109709993e-05, rewards: -9.299999999999999, count: 50
epoch: 499209, loss: 9.549856258672662e-06, rewards: -9.299999999999999, count: 50
epoch: 499219, loss: 7.246732820931356e-06, rewards: -9.299999999999999, count: 50
epoch: 499229, loss: -7.139444278436713e-06, rewards: -9.299999999999999, count: 50
epoch: 499239, loss: 3.6406518120202236e-06, rewards: -9.299999999999999, count: 50
epoch: 499249, loss: -1.2934208371007117e-06, rewards: -9.299999999999999, count: 5

epoch: 500129, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 500139, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 500149, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 500159, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 500169, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 500179, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 500189, loss: -1.978874252017704e-06, rewards: -9.299999999999999, count: 50
epoch: 500199, loss: -7.888078471296467e-06, rewards: -9.299999999999999, count: 50
epoch: 500209, loss: -4.824995994567871e-05, rewards: -9.299999999999999, count: 50
epoch: 500219, loss: -5.2857398259220645e-05, rewards: -9.299999999999999, count: 50
epoch: 500229, loss: -1.4970302800065838e-05, rewards: -9.299999999999999, count: 50
epoch: 500239, loss: 3.153085708618164e-05, rewards: -9.299999999999999, co

epoch: 501119, loss: 3.4242868423461914e-05, rewards: -9.299999999999999, count: 50
epoch: 501129, loss: -9.953975677490234e-06, rewards: -9.299999999999999, count: 50
epoch: 501139, loss: -1.6349553334293887e-05, rewards: -9.299999999999999, count: 50
epoch: 501149, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 501159, loss: 6.104707608756144e-06, rewards: -9.299999999999999, count: 50
epoch: 501169, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 501179, loss: -1.745224039950699e-06, rewards: -9.299999999999999, count: 50
epoch: 501189, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 501199, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 501209, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 501219, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 501229, loss: 5.173683348402847e-07, rewards: -9.299999999999999, c

epoch: 502109, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 502119, loss: -9.334087280876702e-07, rewards: -9.299999999999999, count: 50
epoch: 502129, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 502139, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 502149, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 502159, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 502169, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 502179, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 502189, loss: -8.511543455824722e-06, rewards: -9.299999999999999, count: 50
epoch: 502199, loss: -6.196379399625584e-05, rewards: -9.299999999999999, count: 50
epoch: 502209, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 502219, loss: -5.2481889724731445e-05, rewards: -9.299999999999999, c

epoch: 503099, loss: -1.2104511370125692e-05, rewards: -9.299999999999999, count: 50
epoch: 503109, loss: -0.00010254740482196212, rewards: -9.299999999999999, count: 50
epoch: 503119, loss: 8.482217526761815e-05, rewards: -9.299999999999999, count: 50
epoch: 503129, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 503139, loss: -2.8640031814575195e-05, rewards: -9.299999999999999, count: 50
epoch: 503149, loss: -1.2487173080444336e-05, rewards: -9.299999999999999, count: 50
epoch: 503159, loss: 6.611347089346964e-06, rewards: -9.299999999999999, count: 50
epoch: 503169, loss: 5.548000444832724e-06, rewards: -9.299999999999999, count: 50
epoch: 503179, loss: -2.83598910755245e-06, rewards: -9.299999999999999, count: 50
epoch: 503189, loss: -1.0454655239300337e-06, rewards: -9.299999999999999, count: 50
epoch: 503199, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 503209, loss: -6.461143584601814e-07, rewards: -9.299999999999999, 

epoch: 504089, loss: -3.5119055610266514e-06, rewards: -9.299999999999999, count: 50
epoch: 504099, loss: 2.948045676021138e-06, rewards: -9.299999999999999, count: 50
epoch: 504109, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 504119, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 504129, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 504139, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 504149, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 504159, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 504169, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 504179, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 504189, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 504199, loss: -4.220009031996597e-06, rewards: -9.299999999999999, cou

epoch: 505079, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 505089, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 505099, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 505109, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 505119, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 505129, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 505139, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 505149, loss: -2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 505159, loss: -1.8621683921082877e-05, rewards: -9.299999999999999, count: 50
epoch: 505169, loss: -0.00011986017489107326, rewards: -9.299999999999999, count: 50
epoch: 505179, loss: 7.24244091543369e-05, rewards: -9.299999999999999, count: 50
epoch: 505189, loss: 1.245737075805664e-05, rewards: -9.299999999999999, coun

epoch: 506069, loss: -2.221822796855122e-05, rewards: -9.299999999999999, count: 50
epoch: 506079, loss: 3.33511816279497e-05, rewards: -9.299999999999999, count: 50
epoch: 506089, loss: 1.2015104402962606e-05, rewards: -9.299999999999999, count: 50
epoch: 506099, loss: -1.1861324310302734e-05, rewards: -9.299999999999999, count: 50
epoch: 506109, loss: -2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 506119, loss: 5.332231467036763e-06, rewards: -9.299999999999999, count: 50
epoch: 506129, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 506139, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 506149, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 506159, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 506169, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 506179, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count

epoch: 507059, loss: -0.0001094698891392909, rewards: -9.299999999999999, count: 50
epoch: 507069, loss: 7.015466690063477e-05, rewards: -9.299999999999999, count: 50
epoch: 507079, loss: -1.542806603538338e-05, rewards: -9.299999999999999, count: 50
epoch: 507089, loss: -1.9174814951838925e-05, rewards: -9.299999999999999, count: 50
epoch: 507099, loss: 1.341104507446289e-05, rewards: -9.299999999999999, count: 50
epoch: 507109, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 507119, loss: -2.806186785164755e-06, rewards: -9.299999999999999, count: 50
epoch: 507129, loss: 2.6023387817986077e-06, rewards: -9.299999999999999, count: 50
epoch: 507139, loss: -1.871585823209898e-06, rewards: -9.299999999999999, count: 50
epoch: 507149, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 507159, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 507169, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, co

epoch: 508049, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 508059, loss: 7.498264267269406e-07, rewards: -9.299999999999999, count: 50
epoch: 508069, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 508079, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 508089, loss: -1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 508099, loss: -8.016824722290039e-06, rewards: -9.299999999999999, count: 50
epoch: 508109, loss: -5.360484283301048e-05, rewards: -9.299999999999999, count: 50
epoch: 508119, loss: -3.178358019795269e-05, rewards: -9.299999999999999, count: 50
epoch: 508129, loss: -3.688573997351341e-05, rewards: -9.299999999999999, count: 50
epoch: 508139, loss: 2.794981082843151e-05, rewards: -9.299999999999999, count: 50
epoch: 508149, loss: 4.594326128426474e-06, rewards: -9.299999999999999, count: 50
epoch: 508159, loss: -1.157879796664929e-05, rewards: -9.299999999999999, co

epoch: 509039, loss: -4.2310952267143875e-05, rewards: -9.299999999999999, count: 50
epoch: 509049, loss: -1.828908898460213e-05, rewards: -9.299999999999999, count: 50
epoch: 509059, loss: 2.2242069462663494e-05, rewards: -9.299999999999999, count: 50
epoch: 509069, loss: -3.7741660889878403e-06, rewards: -9.299999999999999, count: 50
epoch: 509079, loss: -4.937648554914631e-06, rewards: -9.299999999999999, count: 50
epoch: 509089, loss: 4.981756319466513e-06, rewards: -9.299999999999999, count: 50
epoch: 509099, loss: -3.210306203982327e-06, rewards: -9.299999999999999, count: 50
epoch: 509109, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 509119, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 509129, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 509139, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 509149, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, c

epoch: 510029, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 510039, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 510049, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 510059, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 510069, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 510079, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 510089, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 510099, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 510109, loss: 4.419088327267673e-06, rewards: -9.299999999999999, count: 50
epoch: 510119, loss: 3.2035113690653816e-05, rewards: -9.299999999999999, count: 50
epoch: 510129, loss: 0.00011115789675386623, rewards: -9.299999999999999, count: 50
epoch: 510139, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 

epoch: 511019, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 511029, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 511039, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 511049, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 511059, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 511069, loss: -3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 511079, loss: -1.5512705431319773e-05, rewards: -9.299999999999999, count: 50
epoch: 511089, loss: -9.289383888244629e-05, rewards: -9.299999999999999, count: 50
epoch: 511099, loss: 5.9341193264117464e-05, rewards: -9.299999999999999, count: 50
epoch: 511109, loss: -3.929972808691673e-05, rewards: -9.299999999999999, count: 50
epoch: 511119, loss: -1.9347667148394976e-06, rewards: -9.299999999999999, count: 50
epoch: 511129, loss: 1.4449357877310831e-05, rewards: -9.299999999999999,

epoch: 512009, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 512019, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 512029, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 512039, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 512049, loss: 1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 512059, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 512069, loss: 1.1389255632821005e-05, rewards: -9.299999999999999, count: 50
epoch: 512079, loss: 9.867787593975663e-05, rewards: -9.299999999999999, count: 50
epoch: 512089, loss: -8.299470209749416e-05, rewards: -9.299999999999999, count: 50
epoch: 512099, loss: 7.6544283729163e-06, rewards: -9.299999999999999, count: 50
epoch: 512109, loss: 3.0205250368453562e-05, rewards: -9.299999999999999, count: 50
epoch: 512119, loss: 1.1705160432029516e-05, rewards: -9.299999999999999, count: 50

epoch: 512999, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 513009, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 513019, loss: 1.3387202670855913e-06, rewards: -9.299999999999999, count: 50
epoch: 513029, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 513039, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 513049, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 513059, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 513069, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 513079, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 513089, loss: -5.30004490428837e-06, rewards: -9.299999999999999, count: 50
epoch: 513099, loss: -2.7166604922967963e-05, rewards: -9.299999999999999, count: 50
epoch: 513109, loss: -0.00010750412911875173, rewards: -9.299999999999999, 

epoch: 513989, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 513999, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 514009, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 514019, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 514029, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 514039, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 514049, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 514059, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 514069, loss: 5.108117875352036e-06, rewards: -9.299999999999999, count: 50
epoch: 514079, loss: 3.222584564355202e-05, rewards: -9.299999999999999, count: 50
epoch: 514089, loss: 0.00010792732064146549, rewards: -9.299999999999999, count: 50
epoch: 514099, loss: -1.582741788297426e-05, rewards: -9.299999999999999, count: 5

epoch: 514979, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 514989, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 514999, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 515009, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 515019, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 515029, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 515039, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 515049, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 515059, loss: -2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 515069, loss: -1.6279220290016383e-05, rewards: -9.299999999999999, count: 50
epoch: 515079, loss: -0.00011307001113891602, rewards: -9.299999999999999, count: 50
epoch: 515089, loss: 7.811188697814941e-05, rewards: -9.299999999999999, 

epoch: 515969, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 515979, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 515989, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 515999, loss: -1.1718273526639678e-05, rewards: -9.299999999999999, count: 50
epoch: 516009, loss: -8.46540933707729e-05, rewards: -9.299999999999999, count: 50
epoch: 516019, loss: 5.524158405023627e-05, rewards: -9.299999999999999, count: 50
epoch: 516029, loss: -4.287958290660754e-05, rewards: -9.299999999999999, count: 50
epoch: 516039, loss: -1.4218091564544011e-05, rewards: -9.299999999999999, count: 50
epoch: 516049, loss: 1.6301870346069336e-05, rewards: -9.299999999999999, count: 50
epoch: 516059, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 516069, loss: -6.034374109731289e-06, rewards: -9.299999999999999, count: 50
epoch: 516079, loss: 3.2711029689380666e-06, rewards: -9.299999999999999, co

epoch: 516959, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 516969, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 516979, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 516989, loss: 3.12209135699959e-06, rewards: -9.299999999999999, count: 50
epoch: 516999, loss: 1.450538638891885e-05, rewards: -9.299999999999999, count: 50
epoch: 517009, loss: 9.171485726255924e-05, rewards: -9.299999999999999, count: 50
epoch: 517019, loss: -5.9820413298439234e-05, rewards: -9.299999999999999, count: 50
epoch: 517029, loss: 3.951072721974924e-05, rewards: -9.299999999999999, count: 50
epoch: 517039, loss: 5.564689672610257e-06, rewards: -9.299999999999999, count: 50
epoch: 517049, loss: -1.627206802368164e-05, rewards: -9.299999999999999, count: 50
epoch: 517059, loss: 7.063150405883789e-06, rewards: -9.299999999999999, count: 50
epoch: 517069, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 

epoch: 517949, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 517959, loss: 6.363392003549961e-06, rewards: -9.299999999999999, count: 50
epoch: 517969, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 517979, loss: -2.0444392703211633e-06, rewards: -9.299999999999999, count: 50
epoch: 517989, loss: 1.497268726780021e-06, rewards: -9.299999999999999, count: 50
epoch: 517999, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 518009, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 518019, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 518029, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 518039, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 518049, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 518059, loss: -3.826618240054813e-07, rewards: -9.299999999999999

epoch: 518939, loss: 6.608962848986266e-06, rewards: -9.299999999999999, count: 50
epoch: 518949, loss: 3.5670997021952644e-05, rewards: -9.299999999999999, count: 50
epoch: 518959, loss: 9.189486445393413e-05, rewards: -9.299999999999999, count: 50
epoch: 518969, loss: -3.383040530025028e-05, rewards: -9.299999999999999, count: 50
epoch: 518979, loss: -7.944106982904486e-06, rewards: -9.299999999999999, count: 50
epoch: 518989, loss: 1.7936230506165884e-05, rewards: -9.299999999999999, count: 50
epoch: 518999, loss: -1.1895895113411825e-05, rewards: -9.299999999999999, count: 50
epoch: 519009, loss: 6.173849214974325e-06, rewards: -9.299999999999999, count: 50
epoch: 519019, loss: -3.4046172459056834e-06, rewards: -9.299999999999999, count: 50
epoch: 519029, loss: 2.5975705284508877e-06, rewards: -9.299999999999999, count: 50
epoch: 519039, loss: -2.123117383234785e-06, rewards: -9.299999999999999, count: 50
epoch: 519049, loss: 1.6534328324269154e-06, rewards: -9.299999999999999, cou

epoch: 519929, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 519939, loss: -4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 519949, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 519959, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 519969, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 519979, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 519989, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 519999, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 520009, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 520019, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 520029, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 520039, loss: -1.99079508433897e-07, rewards: -9.299999999999999, coun

epoch: 520919, loss: 3.409385726627079e-06, rewards: -9.299999999999999, count: 50
epoch: 520929, loss: -1.978874252017704e-06, rewards: -9.299999999999999, count: 50
epoch: 520939, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 520949, loss: -9.810923984332476e-07, rewards: -9.299999999999999, count: 50
epoch: 520959, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 520969, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 520979, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 520989, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 520999, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 521009, loss: 8.165836334228516e-06, rewards: -9.299999999999999, count: 50
epoch: 521019, loss: 6.582141213584691e-05, rewards: -9.299999999999999, count: 50
epoch: 521029, loss: -1.833558053476736e-05, rewards: -9.299999999999999, count: 

epoch: 521909, loss: 1.5294552213163115e-05, rewards: -9.299999999999999, count: 50
epoch: 521919, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 521929, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 521939, loss: 4.122257450944744e-06, rewards: -9.299999999999999, count: 50
epoch: 521949, loss: -2.6428699584357673e-06, rewards: -9.299999999999999, count: 50
epoch: 521959, loss: 1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 521969, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 521979, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 521989, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 521999, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 522009, loss: -1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 522019, loss: -7.241964340209961e-06, rewards: -9.299999999999999, co

epoch: 522899, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 522909, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 522919, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 522929, loss: 2.8848648980783764e-07, rewards: -9.299999999999999, count: 50
epoch: 522939, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 522949, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 522959, loss: 1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 522969, loss: 3.6263465972297126e-06, rewards: -9.299999999999999, count: 50
epoch: 522979, loss: 1.0865926924452651e-05, rewards: -9.299999999999999, count: 50
epoch: 522989, loss: 5.1397084462223575e-05, rewards: -9.299999999999999, count: 50
epoch: 522999, loss: 4.4386386434780434e-05, rewards: -9.299999999999999, count: 50
epoch: 523009, loss: -1.1056661605834961e-05, rewards: -9.299999999999999, c

epoch: 523889, loss: 2.1207333702477627e-06, rewards: -9.299999999999999, count: 50
epoch: 523899, loss: -5.083084033685736e-06, rewards: -9.299999999999999, count: 50
epoch: 523909, loss: 1.6438960983578e-06, rewards: -9.299999999999999, count: 50
epoch: 523919, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 523929, loss: -1.0037422271125251e-06, rewards: -9.299999999999999, count: 50
epoch: 523939, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 523949, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 523959, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 523969, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 523979, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 523989, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 523999, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 

epoch: 524879, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 524889, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 524899, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 524909, loss: 3.5512448448571377e-06, rewards: -9.299999999999999, count: 50
epoch: 524919, loss: 2.2152662495500408e-05, rewards: -9.299999999999999, count: 50
epoch: 524929, loss: 0.0001211392882396467, rewards: -9.299999999999999, count: 50
epoch: 524939, loss: -6.147145904833451e-05, rewards: -9.299999999999999, count: 50
epoch: 524949, loss: -1.6486645108670928e-05, rewards: -9.299999999999999, count: 50
epoch: 524959, loss: 2.3642778614885174e-05, rewards: -9.299999999999999, count: 50
epoch: 524969, loss: 2.404451379334205e-06, rewards: -9.299999999999999, count: 50
epoch: 524979, loss: -8.64267349243164e-06, rewards: -9.299999999999999, count: 50
epoch: 524989, loss: 4.0149689084501006e-06, rewards: -9.299999999999999, count

epoch: 525869, loss: -2.440810203552246e-05, rewards: -9.299999999999999, count: 50
epoch: 525879, loss: -5.402564966061618e-06, rewards: -9.299999999999999, count: 50
epoch: 525889, loss: 1.0713339179346804e-05, rewards: -9.299999999999999, count: 50
epoch: 525899, loss: -5.428791155281942e-06, rewards: -9.299999999999999, count: 50
epoch: 525909, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 525919, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 525929, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 525939, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 525949, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 525959, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 525969, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 525979, loss: 9.89437126008852e-07, rewards: -9.299999999999999, count: 5

epoch: 526859, loss: -3.8973092159722e-05, rewards: -9.299999999999999, count: 50
epoch: 526869, loss: 6.711482910759514e-06, rewards: -9.299999999999999, count: 50
epoch: 526879, loss: 6.645917892456055e-06, rewards: -9.299999999999999, count: 50
epoch: 526889, loss: -7.41362555345404e-06, rewards: -9.299999999999999, count: 50
epoch: 526899, loss: 4.930496288579889e-06, rewards: -9.299999999999999, count: 50
epoch: 526909, loss: -2.256631887576077e-06, rewards: -9.299999999999999, count: 50
epoch: 526919, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 526929, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 526939, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 526949, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 526959, loss: -1.497268726780021e-06, rewards: -9.299999999999999, count: 50
epoch: 526969, loss: -4.147291292611044e-06, rewards: -9.299999999999999, count:

epoch: 527849, loss: -9.890436922432855e-05, rewards: -9.299999999999999, count: 50
epoch: 527859, loss: 6.383180880220607e-05, rewards: -9.299999999999999, count: 50
epoch: 527869, loss: -3.420472057769075e-05, rewards: -9.299999999999999, count: 50
epoch: 527879, loss: -3.312826265755575e-06, rewards: -9.299999999999999, count: 50
epoch: 527889, loss: 1.3321638107299805e-05, rewards: -9.299999999999999, count: 50
epoch: 527899, loss: -8.89539751369739e-06, rewards: -9.299999999999999, count: 50
epoch: 527909, loss: 3.750324140128214e-06, rewards: -9.299999999999999, count: 50
epoch: 527919, loss: -2.1660328002326423e-06, rewards: -9.299999999999999, count: 50
epoch: 527929, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 527939, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 527949, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 527959, loss: -4.0531159584134e-07, rewards: -9.299999999999999, count: 5

epoch: 528839, loss: -3.650188546089339e-06, rewards: -9.299999999999999, count: 50
epoch: 528849, loss: 5.552768925554119e-06, rewards: -9.299999999999999, count: 50
epoch: 528859, loss: -4.163980520388577e-06, rewards: -9.299999999999999, count: 50
epoch: 528869, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 528879, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 528889, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 528899, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 528909, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 528919, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 528929, loss: 2.071857352348161e-06, rewards: -9.299999999999999, count: 50
epoch: 528939, loss: 9.253025382349733e-06, rewards: -9.299999999999999, count: 50
epoch: 528949, loss: 6.501436291728169e-05, rewards: -9.299999999999999, count: 50
e

epoch: 529829, loss: -7.340908268815838e-06, rewards: -9.299999999999999, count: 50
epoch: 529839, loss: 3.743171646419796e-06, rewards: -9.299999999999999, count: 50
epoch: 529849, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 529859, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 529869, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 529879, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 529889, loss: -5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 529899, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 529909, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 529919, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 529929, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 529939, loss: -7.307529585887096e-07, rewards: -9.299999999999999, coun

epoch: 530819, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 530829, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 530839, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 530849, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 530859, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 530869, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 530879, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 530889, loss: -1.8656253814697266e-05, rewards: -9.299999999999999, count: 50
epoch: 530899, loss: -0.00011204362090211362, rewards: -9.299999999999999, count: 50
epoch: 530909, loss: 7.30502579244785e-05, rewards: -9.299999999999999, count: 50
epoch: 530919, loss: -7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 530929, loss: -2.4293660317198373e-05, rewards: -9.299999999999999, 

epoch: 531809, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 531819, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 531829, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 531839, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 531849, loss: 1.3214349564805161e-05, rewards: -9.299999999999999, count: 50
epoch: 531859, loss: 8.115648961393163e-05, rewards: -9.299999999999999, count: 50
epoch: 531869, loss: -4.051089126733132e-05, rewards: -9.299999999999999, count: 50
epoch: 531879, loss: 4.587054354487918e-05, rewards: -9.299999999999999, count: 50
epoch: 531889, loss: -9.5617770057288e-06, rewards: -9.299999999999999, count: 50
epoch: 531899, loss: -1.0659694453352131e-05, rewards: -9.299999999999999, count: 50
epoch: 531909, loss: 9.876489457383286e-06, rewards: -9.299999999999999, count: 50
epoch: 531919, loss: -4.708766937255859e-06, rewards: -9.299999999999999, count: 50
e

epoch: 532799, loss: 1.0100603503815364e-05, rewards: -9.299999999999999, count: 50
epoch: 532809, loss: 3.7717818486271426e-06, rewards: -9.299999999999999, count: 50
epoch: 532819, loss: -3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 532829, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 532839, loss: 1.2731552487821318e-06, rewards: -9.299999999999999, count: 50
epoch: 532849, loss: -7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 532859, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 532869, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 532879, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 532889, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 532899, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 532909, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, coun

epoch: 533789, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 533799, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 533809, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 533819, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 533829, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 533839, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 533849, loss: 2.473592758178711e-05, rewards: -9.299999999999999, count: 50
epoch: 533859, loss: 0.0001325786142842844, rewards: -9.299999999999999, count: 50
epoch: 533869, loss: -2.0478963051573373e-05, rewards: -9.299999999999999, count: 50
epoch: 533879, loss: -4.323005850892514e-05, rewards: -9.299999999999999, count: 50
epoch: 533889, loss: -1.0907649993896484e-05, rewards: -9.299999999999999, count: 50
epoch: 533899, loss: 1.197576511913212e-05, rewards: -9.299999999999999, count: 

epoch: 534779, loss: -1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 534789, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 534799, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 534809, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 534819, loss: 3.560781578926253e-06, rewards: -9.299999999999999, count: 50
epoch: 534829, loss: 2.310156742169056e-05, rewards: -9.299999999999999, count: 50
epoch: 534839, loss: 0.00012369394244160503, rewards: -9.299999999999999, count: 50
epoch: 534849, loss: -5.478143793880008e-05, rewards: -9.299999999999999, count: 50
epoch: 534859, loss: -2.5339126295875758e-05, rewards: -9.299999999999999, count: 50
epoch: 534869, loss: 1.985788367164787e-05, rewards: -9.299999999999999, count: 50
epoch: 534879, loss: 7.969140824570786e-06, rewards: -9.299999999999999, count: 50
epoch: 534889, loss: -8.897781299310736e-06, rewards: -9.299999999999999, count:

epoch: 535769, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 535779, loss: 3.0303001494758064e-06, rewards: -9.299999999999999, count: 50
epoch: 535789, loss: 1.4598369489249308e-05, rewards: -9.299999999999999, count: 50
epoch: 535799, loss: 8.817196066956967e-05, rewards: -9.299999999999999, count: 50
epoch: 535809, loss: -5.265116851660423e-05, rewards: -9.299999999999999, count: 50
epoch: 535819, loss: 4.287481351639144e-05, rewards: -9.299999999999999, count: 50
epoch: 535829, loss: -3.144740958305192e-06, rewards: -9.299999999999999, count: 50
epoch: 535839, loss: -1.3011694136366714e-05, rewards: -9.299999999999999, count: 50
epoch: 535849, loss: 9.729862540552858e-06, rewards: -9.299999999999999, count: 50
epoch: 535859, loss: -4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 535869, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 535879, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count

epoch: 536759, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 536769, loss: -3.670454134407919e-06, rewards: -9.299999999999999, count: 50
epoch: 536779, loss: -1.5442370568052866e-05, rewards: -9.299999999999999, count: 50
epoch: 536789, loss: -8.453727059531957e-05, rewards: -9.299999999999999, count: 50
epoch: 536799, loss: 4.3364761950215325e-05, rewards: -9.299999999999999, count: 50
epoch: 536809, loss: -4.2654275603126734e-05, rewards: -9.299999999999999, count: 50
epoch: 536819, loss: 1.5600919141434133e-05, rewards: -9.299999999999999, count: 50
epoch: 536829, loss: 2.5188921881635906e-06, rewards: -9.299999999999999, count: 50
epoch: 536839, loss: -6.662607120233588e-06, rewards: -9.299999999999999, count: 50
epoch: 536849, loss: 5.304813385009766e-06, rewards: -9.299999999999999, count: 50
epoch: 536859, loss: -3.114938635917497e-06, rewards: -9.299999999999999, count: 50
epoch: 536869, loss: 1.7333030655208859e-06, rewards: -9.299999999999999, 

epoch: 537749, loss: 9.626150131225586e-06, rewards: -9.299999999999999, count: 50
epoch: 537759, loss: -6.300210770859849e-06, rewards: -9.299999999999999, count: 50
epoch: 537769, loss: 3.054142098335433e-06, rewards: -9.299999999999999, count: 50
epoch: 537779, loss: -1.871585823209898e-06, rewards: -9.299999999999999, count: 50
epoch: 537789, loss: 1.1181831496287487e-06, rewards: -9.299999999999999, count: 50
epoch: 537799, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 537809, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 537819, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 537829, loss: 7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 537839, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 537849, loss: 2.034902536252048e-06, rewards: -9.299999999999999, count: 50
epoch: 537859, loss: 1.1457204891485162e-05, rewards: -9.299999999999999, count: 50

epoch: 538739, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 538749, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 538759, loss: 2.419948486931389e-06, rewards: -9.299999999999999, count: 50
epoch: 538769, loss: 1.0175705028814264e-05, rewards: -9.299999999999999, count: 50
epoch: 538779, loss: 5.598306597676128e-05, rewards: -9.299999999999999, count: 50
epoch: 538789, loss: 2.9046535928500816e-05, rewards: -9.299999999999999, count: 50
epoch: 538799, loss: 1.670956589805428e-05, rewards: -9.299999999999999, count: 50
epoch: 538809, loss: -2.7395486540626734e-05, rewards: -9.299999999999999, count: 50
epoch: 538819, loss: 1.555204471515026e-05, rewards: -9.299999999999999, count: 50
epoch: 538829, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 538839, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 538849, loss: -5.066394805908203e-07, rewards: -9.299999999999999, coun

epoch: 539729, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 539739, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 539749, loss: 4.380941390991211e-06, rewards: -9.299999999999999, count: 50
epoch: 539759, loss: 2.8066635422874242e-05, rewards: -9.299999999999999, count: 50
epoch: 539769, loss: 0.00011915445065824315, rewards: -9.299999999999999, count: 50
epoch: 539779, loss: -2.7968882932327688e-05, rewards: -9.299999999999999, count: 50
epoch: 539789, loss: -3.550171823007986e-05, rewards: -9.299999999999999, count: 50
epoch: 539799, loss: 1.0316372026863974e-05, rewards: -9.299999999999999, count: 50
epoch: 539809, loss: 1.1826753507193644e-05, rewards: -9.299999999999999, count: 50
epoch: 539819, loss: -6.588697488041362e-06, rewards: -9.299999999999999, count: 50
epoch: 539829, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 539839, loss: 2.282857849422726e-06, rewards: -9.299999999999999, count

epoch: 540719, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 540729, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 540739, loss: -8.916854881135805e-07, rewards: -9.299999999999999, count: 50
epoch: 540749, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 540759, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 540769, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 540779, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 540789, loss: -2.59995454143791e-06, rewards: -9.299999999999999, count: 50
epoch: 540799, loss: -1.1141300092276651e-05, rewards: -9.299999999999999, count: 50
epoch: 540809, loss: -6.979227327974513e-05, rewards: -9.299999999999999, count: 50
epoch: 540819, loss: 1.4644861039414536e-05, rewards: -9.299999999999999, count: 50
epoch: 540829, loss: -4.450321284821257e-05, rewards: -9.299999999999999, cou

epoch: 541709, loss: -5.50627692064154e-06, rewards: -9.299999999999999, count: 50
epoch: 541719, loss: 3.5405159906076733e-06, rewards: -9.299999999999999, count: 50
epoch: 541729, loss: -2.297163064213237e-06, rewards: -9.299999999999999, count: 50
epoch: 541739, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 541749, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 541759, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 541769, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 541779, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 541789, loss: 4.507303401624085e-06, rewards: -9.299999999999999, count: 50
epoch: 541799, loss: 2.3658276404603384e-05, rewards: -9.299999999999999, count: 50
epoch: 541809, loss: 0.00011487722076708451, rewards: -9.299999999999999, count: 50
epoch: 541819, loss: -6.147145904833451e-05, rewards: -9.299999999999999, count: 5

epoch: 542699, loss: 9.829997907218058e-06, rewards: -9.299999999999999, count: 50
epoch: 542709, loss: 7.031798304524273e-05, rewards: -9.299999999999999, count: 50
epoch: 542719, loss: -2.2665262804366648e-05, rewards: -9.299999999999999, count: 50
epoch: 542729, loss: 5.143403905094601e-05, rewards: -9.299999999999999, count: 50
epoch: 542739, loss: -3.908872713509481e-06, rewards: -9.299999999999999, count: 50
epoch: 542749, loss: -1.7580985513632186e-05, rewards: -9.299999999999999, count: 50
epoch: 542759, loss: 6.508827027573716e-06, rewards: -9.299999999999999, count: 50
epoch: 542769, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 542779, loss: -4.03046624342096e-06, rewards: -9.299999999999999, count: 50
epoch: 542789, loss: 2.1207333702477627e-06, rewards: -9.299999999999999, count: 50
epoch: 542799, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 542809, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count:

epoch: 543689, loss: 1.2705326298600994e-05, rewards: -9.299999999999999, count: 50
epoch: 543699, loss: -6.735324859619141e-06, rewards: -9.299999999999999, count: 50
epoch: 543709, loss: -3.6144256227998994e-06, rewards: -9.299999999999999, count: 50
epoch: 543719, loss: 3.3998489925579634e-06, rewards: -9.299999999999999, count: 50
epoch: 543729, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 543739, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 543749, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 543759, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 543769, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 543779, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 543789, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 543799, loss: 3.755092734536447e-07, rewards: -9.299999999999999, co

epoch: 544679, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 544689, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 544699, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 544709, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 544719, loss: -5.891322871320881e-06, rewards: -9.299999999999999, count: 50
epoch: 544729, loss: -4.3607949919532984e-05, rewards: -9.299999999999999, count: 50
epoch: 544739, loss: -6.75833216519095e-05, rewards: -9.299999999999999, count: 50
epoch: 544749, loss: -2.9528140657930635e-05, rewards: -9.299999999999999, count: 50
epoch: 544759, loss: 3.0790568416705355e-05, rewards: -9.299999999999999, count: 50
epoch: 544769, loss: 9.608268555894028e-06, rewards: -9.299999999999999, count: 50
epoch: 544779, loss: -1.2022256669297349e-05, rewards: -9.299999999999999, count: 50
epoch: 544789, loss: 6.413459914256237e-07, rewards: -9.299999999999999, c

epoch: 545669, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 545679, loss: -2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 545689, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 545699, loss: -1.5974044345057337e-06, rewards: -9.299999999999999, count: 50
epoch: 545709, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 545719, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 545729, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 545739, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 545749, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 545759, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 545769, loss: 8.12649705039803e-06, rewards: -9.299999999999999, count: 50
epoch: 545779, loss: 4.516482295002788e-05, rewards: -9.299999999999999, count: 50

epoch: 546659, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 546669, loss: 5.078315552964341e-06, rewards: -9.299999999999999, count: 50
epoch: 546679, loss: 3.4896136639872566e-05, rewards: -9.299999999999999, count: 50
epoch: 546689, loss: 0.00010113001189893112, rewards: -9.299999999999999, count: 50
epoch: 546699, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 546709, loss: -3.725767237483524e-05, rewards: -9.299999999999999, count: 50
epoch: 546719, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 546729, loss: 1.2915133993374184e-05, rewards: -9.299999999999999, count: 50
epoch: 546739, loss: -4.554986844595987e-06, rewards: -9.299999999999999, count: 50
epoch: 546749, loss: -2.1660328002326423e-06, rewards: -9.299999999999999, count: 50
epoch: 546759, loss: 2.8204917725815903e-06, rewards: -9.299999999999999, count: 50
epoch: 546769, loss: -1.9502640498103574e-06, rewards: -9.299999999999999, cou

epoch: 547649, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 547659, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 547669, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 547679, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 547689, loss: -1.1831521987915039e-05, rewards: -9.299999999999999, count: 50
epoch: 547699, loss: -8.219003939302638e-05, rewards: -9.299999999999999, count: 50
epoch: 547709, loss: 4.8304795200238004e-05, rewards: -9.299999999999999, count: 50
epoch: 547719, loss: -4.597544830176048e-05, rewards: -9.299999999999999, count: 50
epoch: 547729, loss: -6.300210770859849e-06, rewards: -9.299999999999999, count: 50
epoch: 547739, loss: 1.7588137779966928e-05, rewards: -9.299999999999999, count: 50
epoch: 547749, loss: -3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 547759, loss: -3.4296513149456587e-06, rewards: -9.299999999999999, 

epoch: 548639, loss: -4.795789664058248e-06, rewards: -9.299999999999999, count: 50
epoch: 548649, loss: -5.083084033685736e-06, rewards: -9.299999999999999, count: 50
epoch: 548659, loss: 2.7263165520707844e-06, rewards: -9.299999999999999, count: 50
epoch: 548669, loss: 4.994869300389837e-07, rewards: -9.299999999999999, count: 50
epoch: 548679, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 548689, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 548699, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 548709, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 548719, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 548729, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 548739, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 548749, loss: 3.397464638510428e-07, rewards: -9.299999999999999, coun

epoch: 549629, loss: -4.129409717279486e-06, rewards: -9.299999999999999, count: 50
epoch: 549639, loss: -2.2470951080322266e-05, rewards: -9.299999999999999, count: 50
epoch: 549649, loss: -0.00011322617501718923, rewards: -9.299999999999999, count: 50
epoch: 549659, loss: 6.46197804599069e-05, rewards: -9.299999999999999, count: 50
epoch: 549669, loss: -4.755258487421088e-06, rewards: -9.299999999999999, count: 50
epoch: 549679, loss: -2.1164416466490366e-05, rewards: -9.299999999999999, count: 50
epoch: 549689, loss: 1.2015104402962606e-05, rewards: -9.299999999999999, count: 50
epoch: 549699, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 549709, loss: -2.448558916512411e-06, rewards: -9.299999999999999, count: 50
epoch: 549719, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 549729, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 549739, loss: 9.667872973295744e-07, rewards: -9.299999999999999, 

epoch: 550619, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 550629, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 550639, loss: -7.534027304245683e-07, rewards: -9.299999999999999, count: 50
epoch: 550649, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 550659, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 550669, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 550679, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 550689, loss: -1.8131732986148563e-06, rewards: -9.299999999999999, count: 50
epoch: 550699, loss: -7.6043606895837e-06, rewards: -9.299999999999999, count: 50
epoch: 550709, loss: -5.581498044193722e-05, rewards: -9.299999999999999, count: 50
epoch: 550719, loss: -2.0444393157958984e-05, rewards: -9.299999999999999, count: 50
epoch: 550729, loss: -4.820346657652408e-05, rewards: -9.299999999999999,

epoch: 551609, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 551619, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 551629, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 551639, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 551649, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 551659, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 551669, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 551679, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 551689, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 551699, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count: 50
epoch: 551709, loss: -9.40203699428821e-06, rewards: -9.299999999999999, count: 50
epoch: 551719, loss: -8.322358189616352e-05, rewards: -9.299999999999999, coun

epoch: 552599, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 552609, loss: -1.1062621751989354e-06, rewards: -9.299999999999999, count: 50
epoch: 552619, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 552629, loss: -8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 552639, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 552649, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 552659, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 552669, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 552679, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 552689, loss: 2.130270104316878e-06, rewards: -9.299999999999999, count: 50
epoch: 552699, loss: 1.0778903742902912e-05, rewards: -9.299999999999999, count: 50
epoch: 552709, loss: 8.103490108624101e-05, rewards: -9.299999999999999, count:

epoch: 553589, loss: -1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 553599, loss: -6.844997187727131e-06, rewards: -9.299999999999999, count: 50
epoch: 553609, loss: -3.91280664189253e-05, rewards: -9.299999999999999, count: 50
epoch: 553619, loss: -8.383631939068437e-05, rewards: -9.299999999999999, count: 50
epoch: 553629, loss: 1.63400181918405e-05, rewards: -9.299999999999999, count: 50
epoch: 553639, loss: 2.232193946838379e-05, rewards: -9.299999999999999, count: 50
epoch: 553649, loss: -1.9490718841552734e-05, rewards: -9.299999999999999, count: 50
epoch: 553659, loss: 6.608962848986266e-06, rewards: -9.299999999999999, count: 50
epoch: 553669, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 553679, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 553689, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 553699, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, cou

epoch: 554579, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 554589, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 554599, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 554609, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 554619, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 554629, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 554639, loss: 2.3176669856184162e-05, rewards: -9.299999999999999, count: 50
epoch: 554649, loss: 0.0001303899334743619, rewards: -9.299999999999999, count: 50
epoch: 554659, loss: -4.0590763092041016e-05, rewards: -9.299999999999999, count: 50
epoch: 554669, loss: -3.975152867496945e-05, rewards: -9.299999999999999, count: 50
epoch: 554679, loss: 3.4999848139705136e-06, rewards: -9.299999999999999, count: 50
epoch: 554689, loss: 1.5194415937003214e-05, rewards: -9.299999999999999, count

epoch: 555569, loss: -4.668951078201644e-05, rewards: -9.299999999999999, count: 50
epoch: 555579, loss: -5.890488682780415e-05, rewards: -9.299999999999999, count: 50
epoch: 555589, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 555599, loss: 2.5407076464034617e-05, rewards: -9.299999999999999, count: 50
epoch: 555609, loss: -1.8174649085267447e-05, rewards: -9.299999999999999, count: 50
epoch: 555619, loss: 6.476640919572674e-06, rewards: -9.299999999999999, count: 50
epoch: 555629, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 555639, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 555649, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 555659, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 555669, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 555679, loss: -2.837181227732799e-07, rewards: -9.299999999999999, co

epoch: 556559, loss: 9.953975677490234e-06, rewards: -9.299999999999999, count: 50
epoch: 556569, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 556579, loss: -3.5047530673182337e-06, rewards: -9.299999999999999, count: 50
epoch: 556589, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 556599, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 556609, loss: -9.834766387939453e-07, rewards: -9.299999999999999, count: 50
epoch: 556619, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 556629, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 556639, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 556649, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 556659, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 556669, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count

epoch: 557549, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 557559, loss: -1.8453597476764116e-06, rewards: -9.299999999999999, count: 50
epoch: 557569, loss: -7.416009793814737e-06, rewards: -9.299999999999999, count: 50
epoch: 557579, loss: -4.392862319946289e-05, rewards: -9.299999999999999, count: 50
epoch: 557589, loss: -6.75833216519095e-05, rewards: -9.299999999999999, count: 50
epoch: 557599, loss: -6.777047929062974e-06, rewards: -9.299999999999999, count: 50
epoch: 557609, loss: 3.1988620321499184e-05, rewards: -9.299999999999999, count: 50
epoch: 557619, loss: -1.3223886526247952e-05, rewards: -9.299999999999999, count: 50
epoch: 557629, loss: -3.013610921698273e-06, rewards: -9.299999999999999, count: 50
epoch: 557639, loss: 6.260871941776713e-06, rewards: -9.299999999999999, count: 50
epoch: 557649, loss: -4.318952505855123e-06, rewards: -9.299999999999999, count: 50
epoch: 557659, loss: 2.282857849422726e-06, rewards: -9.299999999999999, cou

epoch: 558539, loss: -6.593823491130024e-05, rewards: -9.299999999999999, count: 50
epoch: 558549, loss: 3.371834827703424e-05, rewards: -9.299999999999999, count: 50
epoch: 558559, loss: 9.912252608046401e-06, rewards: -9.299999999999999, count: 50
epoch: 558569, loss: -1.5870333299972117e-05, rewards: -9.299999999999999, count: 50
epoch: 558579, loss: 6.107091849116841e-06, rewards: -9.299999999999999, count: 50
epoch: 558589, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 558599, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 558609, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 558619, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 558629, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 558639, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 558649, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 5

epoch: 559529, loss: -1.1831521987915039e-05, rewards: -9.299999999999999, count: 50
epoch: 559539, loss: -9.399652481079102e-05, rewards: -9.299999999999999, count: 50
epoch: 559549, loss: 7.359504525084049e-05, rewards: -9.299999999999999, count: 50
epoch: 559559, loss: -2.4487972041242756e-05, rewards: -9.299999999999999, count: 50
epoch: 559569, loss: -2.9073953555780463e-05, rewards: -9.299999999999999, count: 50
epoch: 559579, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 559589, loss: 1.0830163773789536e-05, rewards: -9.299999999999999, count: 50
epoch: 559599, loss: -2.5188921881635906e-06, rewards: -9.299999999999999, count: 50
epoch: 559609, loss: -2.847909854608588e-06, rewards: -9.299999999999999, count: 50
epoch: 559619, loss: 2.455711410220829e-06, rewards: -9.299999999999999, count: 50
epoch: 559629, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 559639, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, 

epoch: 560519, loss: -4.380941390991211e-06, rewards: -9.299999999999999, count: 50
epoch: 560529, loss: 2.5463104975642636e-06, rewards: -9.299999999999999, count: 50
epoch: 560539, loss: -1.996755599975586e-06, rewards: -9.299999999999999, count: 50
epoch: 560549, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 560559, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 560569, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 560579, loss: -1.3589858554041712e-06, rewards: -9.299999999999999, count: 50
epoch: 560589, loss: -4.197358975943644e-06, rewards: -9.299999999999999, count: 50
epoch: 560599, loss: -1.78420541487867e-05, rewards: -9.299999999999999, count: 50
epoch: 560609, loss: -8.787513070274144e-05, rewards: -9.299999999999999, count: 50
epoch: 560619, loss: 4.681706559495069e-05, rewards: -9.299999999999999, count: 50
epoch: 560629, loss: -3.9302110963035375e-05, rewards: -9.299999999999999, cou

epoch: 561509, loss: 3.2697917049517855e-05, rewards: -9.299999999999999, count: 50
epoch: 561519, loss: 6.779789691790938e-05, rewards: -9.299999999999999, count: 50
epoch: 561529, loss: 3.316283255117014e-05, rewards: -9.299999999999999, count: 50
epoch: 561539, loss: 5.460977718030335e-06, rewards: -9.299999999999999, count: 50
epoch: 561549, loss: -8.369684110220987e-06, rewards: -9.299999999999999, count: 50
epoch: 561559, loss: -8.79883737070486e-06, rewards: -9.299999999999999, count: 50
epoch: 561569, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 561579, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 561589, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 561599, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 561609, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 561619, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 5

epoch: 562499, loss: -1.7631053879085812e-06, rewards: -9.299999999999999, count: 50
epoch: 562509, loss: -1.424551010131836e-05, rewards: -9.299999999999999, count: 50
epoch: 562519, loss: -0.00012483715545386076, rewards: -9.299999999999999, count: 50
epoch: 562529, loss: 8.197307761292905e-05, rewards: -9.299999999999999, count: 50
epoch: 562539, loss: 3.824114901362918e-05, rewards: -9.299999999999999, count: 50
epoch: 562549, loss: -2.7990340640826616e-06, rewards: -9.299999999999999, count: 50
epoch: 562559, loss: -1.6267300452454947e-05, rewards: -9.299999999999999, count: 50
epoch: 562569, loss: -8.757114301261026e-06, rewards: -9.299999999999999, count: 50
epoch: 562579, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 562589, loss: 4.197358975943644e-06, rewards: -9.299999999999999, count: 50
epoch: 562599, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 562609, loss: -8.165836220541678e-07, rewards: -9.299999999999999, 

epoch: 563489, loss: -7.569789886474609e-06, rewards: -9.299999999999999, count: 50
epoch: 563499, loss: 3.185868263244629e-05, rewards: -9.299999999999999, count: 50
epoch: 563509, loss: -1.2489556866057683e-05, rewards: -9.299999999999999, count: 50
epoch: 563519, loss: -3.1757354008732364e-06, rewards: -9.299999999999999, count: 50
epoch: 563529, loss: 6.25371922069462e-06, rewards: -9.299999999999999, count: 50
epoch: 563539, loss: -4.606246875482611e-06, rewards: -9.299999999999999, count: 50
epoch: 563549, loss: 2.511739694455173e-06, rewards: -9.299999999999999, count: 50
epoch: 563559, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 563569, loss: 1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 563579, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 563589, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 563599, loss: -5.018711135562626e-07, rewards: -9.299999999999999, cou

epoch: 564479, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 564489, loss: 2.2923945834918413e-06, rewards: -9.299999999999999, count: 50
epoch: 564499, loss: 8.710622751095798e-06, rewards: -9.299999999999999, count: 50
epoch: 564509, loss: 4.8435926146339625e-05, rewards: -9.299999999999999, count: 50
epoch: 564519, loss: 5.2621366194216534e-05, rewards: -9.299999999999999, count: 50
epoch: 564529, loss: 4.713535417977255e-06, rewards: -9.299999999999999, count: 50
epoch: 564539, loss: -2.6212930606561713e-05, rewards: -9.299999999999999, count: 50
epoch: 564549, loss: 1.722574234008789e-05, rewards: -9.299999999999999, count: 50
epoch: 564559, loss: -5.891322871320881e-06, rewards: -9.299999999999999, count: 50
epoch: 564569, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 564579, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 564589, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count:

epoch: 565469, loss: 6.029606083757244e-06, rewards: -9.299999999999999, count: 50
epoch: 565479, loss: -3.5071373076789314e-06, rewards: -9.299999999999999, count: 50
epoch: 565489, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 565499, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 565509, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 565519, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 565529, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 565539, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 565549, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 565559, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 565569, loss: -1.5283823813661e-05, rewards: -9.299999999999999, count: 50
epoch: 565579, loss: -0.00010977387137245387, rewards: -9.299999999999999, co

epoch: 566459, loss: -1.9495486412779428e-05, rewards: -9.299999999999999, count: 50
epoch: 566469, loss: -6.172776193125173e-05, rewards: -9.299999999999999, count: 50
epoch: 566479, loss: -1.253247228305554e-05, rewards: -9.299999999999999, count: 50
epoch: 566489, loss: 1.5935896954033524e-05, rewards: -9.299999999999999, count: 50
epoch: 566499, loss: 1.1538267244759481e-05, rewards: -9.299999999999999, count: 50
epoch: 566509, loss: -2.7668475013342686e-06, rewards: -9.299999999999999, count: 50
epoch: 566519, loss: -4.380941390991211e-06, rewards: -9.299999999999999, count: 50
epoch: 566529, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 566539, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 566549, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 566559, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 566569, loss: 5.960464477539063e-08, rewards: -9.299999999999999, co

epoch: 567449, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 567459, loss: 2.753734520410944e-07, rewards: -9.299999999999999, count: 50
epoch: 567469, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 567479, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 567489, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 567499, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 567509, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 567519, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 567529, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 567539, loss: 4.994869300389837e-07, rewards: -9.299999999999999, count: 50
epoch: 567549, loss: 4.631280717148911e-06, rewards: -9.299999999999999, count: 50
epoch: 567559, loss: 4.70328341179993e-05, rewards: -9.299999999999999, co

epoch: 568439, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 568449, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 568459, loss: 1.1539459592313506e-06, rewards: -9.299999999999999, count: 50
epoch: 568469, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 568479, loss: -1.1062621751989354e-06, rewards: -9.299999999999999, count: 50
epoch: 568489, loss: -2.409219632681925e-06, rewards: -9.299999999999999, count: 50
epoch: 568499, loss: -9.740590940054972e-06, rewards: -9.299999999999999, count: 50
epoch: 568509, loss: -6.406545435311273e-05, rewards: -9.299999999999999, count: 50
epoch: 568519, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 568529, loss: -4.46951380581595e-05, rewards: -9.299999999999999, count: 50
epoch: 568539, loss: 2.104043960571289e-05, rewards: -9.299999999999999, count: 50
epoch: 568549, loss: 7.476806786144152e-06, rewards: -9.299999999999999, coun

epoch: 569429, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 569439, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 569449, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 569459, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 569469, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 569479, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 569489, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 569499, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 569509, loss: -1.840591380641854e-06, rewards: -9.299999999999999, count: 50
epoch: 569519, loss: -8.131265531119425e-06, rewards: -9.299999999999999, count: 50
epoch: 569529, loss: -4.963159517501481e-05, rewards: -9.299999999999999, count: 50
epoch: 569539, loss: -4.6991110139060766e-05, rewards: -9.299999999999999, coun

epoch: 570419, loss: -7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 570429, loss: -9.5617770057288e-06, rewards: -9.299999999999999, count: 50
epoch: 570439, loss: 7.539987564086914e-06, rewards: -9.299999999999999, count: 50
epoch: 570449, loss: -2.3436546143784653e-06, rewards: -9.299999999999999, count: 50
epoch: 570459, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 570469, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 570479, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 570489, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 570499, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 570509, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 570519, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 570529, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 

epoch: 571409, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 571419, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 571429, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 571439, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 571449, loss: 1.8429756210025516e-06, rewards: -9.299999999999999, count: 50
epoch: 571459, loss: 9.00506984180538e-06, rewards: -9.299999999999999, count: 50
epoch: 571469, loss: 5.9189795138081536e-05, rewards: -9.299999999999999, count: 50
epoch: 571479, loss: 1.3284683518577367e-05, rewards: -9.299999999999999, count: 50
epoch: 571489, loss: 4.237175016896799e-05, rewards: -9.299999999999999, count: 50
epoch: 571499, loss: -2.412557660136372e-05, rewards: -9.299999999999999, count: 50
epoch: 571509, loss: -6.990432666498236e-06, rewards: -9.299999999999999, count: 50
epoch: 571519, loss: 1.1286735571047757e-05, rewards: -9.299999999999999, count: 50

epoch: 572399, loss: 1.2342929949227255e-05, rewards: -9.299999999999999, count: 50
epoch: 572409, loss: 7.45868674130179e-05, rewards: -9.299999999999999, count: 50
epoch: 572419, loss: -2.5231838662875816e-05, rewards: -9.299999999999999, count: 50
epoch: 572429, loss: 4.355669079814106e-05, rewards: -9.299999999999999, count: 50
epoch: 572439, loss: -1.996755599975586e-05, rewards: -9.299999999999999, count: 50
epoch: 572449, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 572459, loss: 7.642507625860162e-06, rewards: -9.299999999999999, count: 50
epoch: 572469, loss: -6.300210770859849e-06, rewards: -9.299999999999999, count: 50
epoch: 572479, loss: 3.942251169064548e-06, rewards: -9.299999999999999, count: 50
epoch: 572489, loss: -2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 572499, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 572509, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count

epoch: 573389, loss: 2.2863148842589e-05, rewards: -9.299999999999999, count: 50
epoch: 573399, loss: -6.616115570068359e-06, rewards: -9.299999999999999, count: 50
epoch: 573409, loss: -3.114938635917497e-06, rewards: -9.299999999999999, count: 50
epoch: 573419, loss: 4.504919161263388e-06, rewards: -9.299999999999999, count: 50
epoch: 573429, loss: -3.1197071166388923e-06, rewards: -9.299999999999999, count: 50
epoch: 573439, loss: 2.0694733393611386e-06, rewards: -9.299999999999999, count: 50
epoch: 573449, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 573459, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 573469, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 573479, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 573489, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 573499, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50


epoch: 574379, loss: 3.4999848139705136e-06, rewards: -9.299999999999999, count: 50
epoch: 574389, loss: -8.389949471165892e-06, rewards: -9.299999999999999, count: 50
epoch: 574399, loss: 4.708766937255859e-06, rewards: -9.299999999999999, count: 50
epoch: 574409, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 574419, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 574429, loss: -2.7894972731701273e-07, rewards: -9.299999999999999, count: 50
epoch: 574439, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 574449, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 574459, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 574469, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 574479, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 574489, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, cou

epoch: 575369, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 575379, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 575389, loss: 2.7942658107349416e-06, rewards: -9.299999999999999, count: 50
epoch: 575399, loss: 6.953478077775799e-06, rewards: -9.299999999999999, count: 50
epoch: 575409, loss: 2.6568173780106008e-05, rewards: -9.299999999999999, count: 50
epoch: 575419, loss: 9.277224307879806e-05, rewards: -9.299999999999999, count: 50
epoch: 575429, loss: -5.3579806262860075e-05, rewards: -9.299999999999999, count: 50
epoch: 575439, loss: 3.2643078156979755e-05, rewards: -9.299999999999999, count: 50
epoch: 575449, loss: -1.7478465451858938e-05, rewards: -9.299999999999999, count: 50
epoch: 575459, loss: 9.367466191179119e-06, rewards: -9.299999999999999, count: 50
epoch: 575469, loss: -6.104707608756144e-06, rewards: -9.299999999999999, count: 50
epoch: 575479, loss: 4.667043867812026e-06, rewards: -9.299999999999999, cou

epoch: 576359, loss: 2.006769136642106e-05, rewards: -9.299999999999999, count: 50
epoch: 576369, loss: 4.843354327022098e-05, rewards: -9.299999999999999, count: 50
epoch: 576379, loss: -1.6905069060157984e-05, rewards: -9.299999999999999, count: 50
epoch: 576389, loss: -1.6267300452454947e-05, rewards: -9.299999999999999, count: 50
epoch: 576399, loss: 8.540153430658393e-06, rewards: -9.299999999999999, count: 50
epoch: 576409, loss: 2.1851062683708733e-06, rewards: -9.299999999999999, count: 50
epoch: 576419, loss: -4.134178198000882e-06, rewards: -9.299999999999999, count: 50
epoch: 576429, loss: 2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 576439, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 576449, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 576459, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 576469, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count:

epoch: 577349, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 577359, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 577369, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 577379, loss: -7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 577389, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 577399, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 577409, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 577419, loss: 8.010864007701457e-07, rewards: -9.299999999999999, count: 50
epoch: 577429, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 577439, loss: 1.5747547195132938e-06, rewards: -9.299999999999999, count: 50
epoch: 577449, loss: 9.019375283969566e-06, rewards: -9.299999999999999, count: 50
epoch: 577459, loss: 8.650779636809602e-05, rewards: -9.299999999999999, count: 5

epoch: 578339, loss: 7.139086665119976e-05, rewards: -9.299999999999999, count: 50
epoch: 578349, loss: -1.605033867235761e-05, rewards: -9.299999999999999, count: 50
epoch: 578359, loss: 3.998994725407101e-05, rewards: -9.299999999999999, count: 50
epoch: 578369, loss: -2.4102926545310766e-05, rewards: -9.299999999999999, count: 50
epoch: 578379, loss: 3.2031537102739094e-06, rewards: -9.299999999999999, count: 50
epoch: 578389, loss: 4.79340542369755e-06, rewards: -9.299999999999999, count: 50
epoch: 578399, loss: -5.022287496103672e-06, rewards: -9.299999999999999, count: 50
epoch: 578409, loss: 3.5405159906076733e-06, rewards: -9.299999999999999, count: 50
epoch: 578419, loss: -2.161264319511247e-06, rewards: -9.299999999999999, count: 50
epoch: 578429, loss: 9.238719940185547e-07, rewards: -9.299999999999999, count: 50
epoch: 578439, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 578449, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count

epoch: 579329, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 579339, loss: 3.236532165828976e-06, rewards: -9.299999999999999, count: 50
epoch: 579349, loss: 1.8819569959305227e-05, rewards: -9.299999999999999, count: 50
epoch: 579359, loss: 0.0001140356034738943, rewards: -9.299999999999999, count: 50
epoch: 579369, loss: -7.301092409761623e-05, rewards: -9.299999999999999, count: 50
epoch: 579379, loss: 3.7205218177405186e-06, rewards: -9.299999999999999, count: 50
epoch: 579389, loss: 2.5612116587581113e-05, rewards: -9.299999999999999, count: 50
epoch: 579399, loss: -6.742477580701234e-06, rewards: -9.299999999999999, count: 50
epoch: 579409, loss: -6.655454853898846e-06, rewards: -9.299999999999999, count: 50
epoch: 579419, loss: 6.031989869370591e-06, rewards: -9.299999999999999, count: 50
epoch: 579429, loss: -2.5582312446204014e-06, rewards: -9.299999999999999, count: 50
epoch: 579439, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, coun

epoch: 580319, loss: 7.4406860221643e-05, rewards: -9.299999999999999, count: 50
epoch: 580329, loss: -2.177238457079511e-05, rewards: -9.299999999999999, count: 50
epoch: 580339, loss: -2.4095774278976023e-05, rewards: -9.299999999999999, count: 50
epoch: 580349, loss: 1.035571131069446e-05, rewards: -9.299999999999999, count: 50
epoch: 580359, loss: 5.935430635872763e-06, rewards: -9.299999999999999, count: 50
epoch: 580369, loss: -6.511211267934414e-06, rewards: -9.299999999999999, count: 50
epoch: 580379, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 580389, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 580399, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 580409, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 580419, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 580429, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 5

epoch: 581309, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 581319, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 581329, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 581339, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 581349, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 581359, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 581369, loss: 5.110502115712734e-06, rewards: -9.299999999999999, count: 50
epoch: 581379, loss: 3.7406683986773714e-05, rewards: -9.299999999999999, count: 50
epoch: 581389, loss: 9.163975482806563e-05, rewards: -9.299999999999999, count: 50
epoch: 581399, loss: 2.5247334633604623e-05, rewards: -9.299999999999999, count: 50
epoch: 581409, loss: -3.20494182233233e-05, rewards: -9.299999999999999, count: 50
epoch: 581419, loss: -1.5763043847982772e-05, rewards: -9.299999999999999, count:

epoch: 582299, loss: -5.6703091104282066e-05, rewards: -9.299999999999999, count: 50
epoch: 582309, loss: 7.6043606895837e-06, rewards: -9.299999999999999, count: 50
epoch: 582319, loss: 1.4975071280787233e-05, rewards: -9.299999999999999, count: 50
epoch: 582329, loss: -1.3182163456804119e-05, rewards: -9.299999999999999, count: 50
epoch: 582339, loss: 6.868839136586757e-06, rewards: -9.299999999999999, count: 50
epoch: 582349, loss: -3.0601024718635017e-06, rewards: -9.299999999999999, count: 50
epoch: 582359, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 582369, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 582379, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 582389, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 582399, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 582409, loss: 1.7189979644172126e-06, rewards: -9.299999999999999, c

epoch: 583289, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 583299, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 583309, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 583319, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 583329, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 583339, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 583349, loss: -2.256631887576077e-06, rewards: -9.299999999999999, count: 50
epoch: 583359, loss: -1.755356788635254e-05, rewards: -9.299999999999999, count: 50
epoch: 583369, loss: -0.00013896345626562834, rewards: -9.299999999999999, count: 50
epoch: 583379, loss: 5.505442459252663e-05, rewards: -9.299999999999999, count: 50
epoch: 583389, loss: 4.86600401927717e-05, rewards: -9.299999999999999, count: 50
epoch: 583399, loss: 1.547217289044056e-05, rewards: -9.299999999999999, coun

epoch: 584279, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 584289, loss: 3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 584299, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 584309, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 584319, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 584329, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 584339, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 584349, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 584359, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 584369, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 584379, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 584389, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count

epoch: 585269, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 585279, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 585289, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 585299, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 585309, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 585319, loss: -2.696514229683089e-06, rewards: -9.299999999999999, count: 50
epoch: 585329, loss: -7.693767656746786e-06, rewards: -9.299999999999999, count: 50
epoch: 585339, loss: -3.4506319934735075e-05, rewards: -9.299999999999999, count: 50
epoch: 585349, loss: -8.867860015016049e-05, rewards: -9.299999999999999, count: 50
epoch: 585359, loss: 4.6293735067592934e-05, rewards: -9.299999999999999, count: 50
epoch: 585369, loss: -1.6524791135452688e-05, rewards: -9.299999999999999, count: 50
epoch: 585379, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, 

epoch: 586259, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 586269, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 586279, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 586289, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 586299, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 586309, loss: -1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 586319, loss: -4.603862635121914e-06, rewards: -9.299999999999999, count: 50
epoch: 586329, loss: -2.7748346838052385e-05, rewards: -9.299999999999999, count: 50
epoch: 586339, loss: -0.00011573672236409038, rewards: -9.299999999999999, count: 50
epoch: 586349, loss: 4.058122794958763e-05, rewards: -9.299999999999999, count: 50
epoch: 586359, loss: 2.4658442271174863e-05, rewards: -9.299999999999999, count: 50
epoch: 586369, loss: -2.0673274775617756e-05, rewards: -9.299999999999999, 

epoch: 587249, loss: -3.116369407507591e-05, rewards: -9.299999999999999, count: 50
epoch: 587259, loss: -1.2109279850847088e-05, rewards: -9.299999999999999, count: 50
epoch: 587269, loss: 6.620884050789755e-06, rewards: -9.299999999999999, count: 50
epoch: 587279, loss: 6.211996151250787e-06, rewards: -9.299999999999999, count: 50
epoch: 587289, loss: -2.4139881134033203e-06, rewards: -9.299999999999999, count: 50
epoch: 587299, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 587309, loss: 1.6438960983578e-06, rewards: -9.299999999999999, count: 50
epoch: 587319, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 587329, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 587339, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 587349, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 587359, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count:

epoch: 588239, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 588249, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 588259, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 588269, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 588279, loss: 3.2031537102739094e-06, rewards: -9.299999999999999, count: 50
epoch: 588289, loss: 1.1026859283447266e-05, rewards: -9.299999999999999, count: 50
epoch: 588299, loss: 6.0617923736572266e-05, rewards: -9.299999999999999, count: 50
epoch: 588309, loss: 1.553177753521595e-05, rewards: -9.299999999999999, count: 50
epoch: 588319, loss: 2.1675825337297283e-05, rewards: -9.299999999999999, count: 50
epoch: 588329, loss: -2.6804209483088925e-05, rewards: -9.299999999999999, count: 50
epoch: 588339, loss: 1.4848708815407008e-05, rewards: -9.299999999999999, count: 50
epoch: 588349, loss: -5.316734132065903e-06, rewards: -9.299999999999999, co

epoch: 589229, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 589239, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 589249, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 589259, loss: -3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 589269, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 589279, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 589289, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 589299, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 589309, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 589319, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 589329, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 589339, loss: -4.460811396711506e-06, rewards: -9.299999999999999, cou

epoch: 590219, loss: -6.849765668448526e-06, rewards: -9.299999999999999, count: 50
epoch: 590229, loss: -3.212213414371945e-05, rewards: -9.299999999999999, count: 50
epoch: 590239, loss: -9.836316166911274e-05, rewards: -9.299999999999999, count: 50
epoch: 590249, loss: 4.591107426676899e-05, rewards: -9.299999999999999, count: 50
epoch: 590259, loss: -3.6013125281897373e-06, rewards: -9.299999999999999, count: 50
epoch: 590269, loss: -1.3009309441258665e-05, rewards: -9.299999999999999, count: 50
epoch: 590279, loss: 1.1667013495753054e-05, rewards: -9.299999999999999, count: 50
epoch: 590289, loss: -6.874799510114826e-06, rewards: -9.299999999999999, count: 50
epoch: 590299, loss: 3.7205218177405186e-06, rewards: -9.299999999999999, count: 50
epoch: 590309, loss: -2.448558916512411e-06, rewards: -9.299999999999999, count: 50
epoch: 590319, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 590329, loss: 9.787082717593876e-07, rewards: -9.299999999999999, cou

epoch: 591209, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 591219, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 591229, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 591239, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 591249, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 591259, loss: 2.548694510551286e-06, rewards: -9.299999999999999, count: 50
epoch: 591269, loss: 1.017808881442761e-05, rewards: -9.299999999999999, count: 50
epoch: 591279, loss: 6.41190999886021e-05, rewards: -9.299999999999999, count: 50
epoch: 591289, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 591299, loss: 4.0838716813595966e-05, rewards: -9.299999999999999, count: 50
epoch: 591309, loss: -2.4932622181950137e-05, rewards: -9.299999999999999, count: 50
epoch: 591319, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 5

epoch: 592199, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 592209, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 592219, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 592229, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 592239, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 592249, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 592259, loss: 9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 592269, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 592279, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 592289, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 592299, loss: -1.1110305422334932e-06, rewards: -9.299999999999999, count: 50
epoch: 592309, loss: -4.098415502085118e-06, rewards: -9.299999999999999, co

epoch: 593189, loss: -1.122832327382639e-05, rewards: -9.299999999999999, count: 50
epoch: 593199, loss: -8.636236452730373e-05, rewards: -9.299999999999999, count: 50
epoch: 593209, loss: 6.0517788369907066e-05, rewards: -9.299999999999999, count: 50
epoch: 593219, loss: -3.916025161743164e-05, rewards: -9.299999999999999, count: 50
epoch: 593229, loss: -2.1485089746420272e-05, rewards: -9.299999999999999, count: 50
epoch: 593239, loss: 1.2559890819829889e-05, rewards: -9.299999999999999, count: 50
epoch: 593249, loss: 7.09772120899288e-06, rewards: -9.299999999999999, count: 50
epoch: 593259, loss: -6.812811079726089e-06, rewards: -9.299999999999999, count: 50
epoch: 593269, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 593279, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 593289, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 593299, loss: 7.843971161491936e-07, rewards: -9.299999999999999, cou

epoch: 594179, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 594189, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 594199, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 594209, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 594219, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 594229, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 594239, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 594249, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 594259, loss: -2.3245811462402344e-06, rewards: -9.299999999999999, count: 50
epoch: 594269, loss: -2.1115542040206492e-05, rewards: -9.299999999999999, count: 50
epoch: 594279, loss: -0.00014050483878236264, rewards: -9.299999999999999, count: 50
epoch: 594289, loss: 3.0168295779731125e-05, rewards: -9.299999999999999, co

epoch: 595169, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 595179, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 595189, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 595199, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 595209, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 595219, loss: 2.7601718102232553e-05, rewards: -9.299999999999999, count: 50
epoch: 595229, loss: 0.00013023614883422852, rewards: -9.299999999999999, count: 50
epoch: 595239, loss: 1.1538267244759481e-05, rewards: -9.299999999999999, count: 50
epoch: 595249, loss: -3.4071206755470484e-05, rewards: -9.299999999999999, count: 50
epoch: 595259, loss: -2.485513687133789e-05, rewards: -9.299999999999999, count: 50
epoch: 595269, loss: -2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 595279, loss: 8.649825758766383e-06, rewards: -9.299999999999999, coun

epoch: 596159, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 596169, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 596179, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 596189, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 596199, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 596209, loss: -1.4805793853156501e-06, rewards: -9.299999999999999, count: 50
epoch: 596219, loss: -1.0992288480338175e-05, rewards: -9.299999999999999, count: 50
epoch: 596229, loss: -0.00012235403119120747, rewards: -9.299999999999999, count: 50
epoch: 596239, loss: 9.367346501676366e-05, rewards: -9.299999999999999, count: 50
epoch: 596249, loss: 5.422234607976861e-05, rewards: -9.299999999999999, count: 50
epoch: 596259, loss: 2.401113488303963e-05, rewards: -9.299999999999999, count: 50
epoch: 596269, loss: 5.995035280648153e-06, rewards: -9.299999999999999,

epoch: 597149, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 597159, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 597169, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 597179, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 597189, loss: -7.212162017822266e-06, rewards: -9.299999999999999, count: 50
epoch: 597199, loss: -3.938436566386372e-05, rewards: -9.299999999999999, count: 50
epoch: 597209, loss: -8.339166379300877e-05, rewards: -9.299999999999999, count: 50
epoch: 597219, loss: 1.9997358322143555e-05, rewards: -9.299999999999999, count: 50
epoch: 597229, loss: 1.790404348867014e-05, rewards: -9.299999999999999, count: 50
epoch: 597239, loss: -1.9590854208217934e-05, rewards: -9.299999999999999, count: 50
epoch: 597249, loss: 9.447336196899414e-06, rewards: -9.299999999999999, count: 50
epoch: 597259, loss: -2.84075736090017e-06, rewards: -9.299999999999999, c

epoch: 598139, loss: -6.616115570068359e-06, rewards: -9.299999999999999, count: 50
epoch: 598149, loss: -4.686474858317524e-05, rewards: -9.299999999999999, count: 50
epoch: 598159, loss: -5.51128396182321e-05, rewards: -9.299999999999999, count: 50
epoch: 598169, loss: -3.1177998607745394e-05, rewards: -9.299999999999999, count: 50
epoch: 598179, loss: 3.0487775802612305e-05, rewards: -9.299999999999999, count: 50
epoch: 598189, loss: 6.285905783443013e-06, rewards: -9.299999999999999, count: 50
epoch: 598199, loss: -1.2384653018671088e-05, rewards: -9.299999999999999, count: 50
epoch: 598209, loss: 3.713369324032101e-06, rewards: -9.299999999999999, count: 50
epoch: 598219, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 598229, loss: -1.840591380641854e-06, rewards: -9.299999999999999, count: 50
epoch: 598239, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 598249, loss: -9.286403610531124e-07, rewards: -9.299999999999999, cou

epoch: 599129, loss: 2.7273892555967905e-05, rewards: -9.299999999999999, count: 50
epoch: 599139, loss: 1.2555122339108493e-05, rewards: -9.299999999999999, count: 50
epoch: 599149, loss: -1.857638380897697e-05, rewards: -9.299999999999999, count: 50
epoch: 599159, loss: 1.0992288480338175e-05, rewards: -9.299999999999999, count: 50
epoch: 599169, loss: -5.204677563597215e-06, rewards: -9.299999999999999, count: 50
epoch: 599179, loss: 3.054142098335433e-06, rewards: -9.299999999999999, count: 50
epoch: 599189, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 599199, loss: 1.4054775192562374e-06, rewards: -9.299999999999999, count: 50
epoch: 599209, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 599219, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 599229, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 599239, loss: 2.988576852658298e-06, rewards: -9.299999999999999, co

epoch: 600119, loss: 1.098751999961678e-05, rewards: -9.299999999999999, count: 50
epoch: 600129, loss: -2.142190896847751e-05, rewards: -9.299999999999999, count: 50
epoch: 600139, loss: 9.608268555894028e-06, rewards: -9.299999999999999, count: 50
epoch: 600149, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 600159, loss: -1.971721758309286e-06, rewards: -9.299999999999999, count: 50
epoch: 600169, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 600179, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 600189, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 600199, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 600209, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 600219, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 600229, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, co

epoch: 601109, loss: -2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 601119, loss: 2.7668475013342686e-06, rewards: -9.299999999999999, count: 50
epoch: 601129, loss: -2.5856495540210744e-06, rewards: -9.299999999999999, count: 50
epoch: 601139, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 601149, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 601159, loss: 2.2900103431311436e-06, rewards: -9.299999999999999, count: 50
epoch: 601169, loss: 6.368160029524006e-06, rewards: -9.299999999999999, count: 50
epoch: 601179, loss: 3.0168295779731125e-05, rewards: -9.299999999999999, count: 50
epoch: 601189, loss: 9.791612683329731e-05, rewards: -9.299999999999999, count: 50
epoch: 601199, loss: -5.2713156037498266e-05, rewards: -9.299999999999999, count: 50
epoch: 601209, loss: 1.7702579498291016e-05, rewards: -9.299999999999999, count: 50
epoch: 601219, loss: 1.497268726780021e-06, rewards: -9.299999999999999, coun

epoch: 602099, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 602109, loss: -9.810923984332476e-07, rewards: -9.299999999999999, count: 50
epoch: 602119, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 602129, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 602139, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 602149, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 602159, loss: -1.2278557051104144e-06, rewards: -9.299999999999999, count: 50
epoch: 602169, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 602179, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 602189, loss: -3.0493736176140374e-06, rewards: -9.299999999999999, count: 50
epoch: 602199, loss: -1.5888213965808973e-05, rewards: -9.299999999999999, count: 50
epoch: 602209, loss: -0.00010536790068726987, rewards: -9.299999999999999, c

epoch: 603089, loss: 3.439188003540039e-05, rewards: -9.299999999999999, count: 50
epoch: 603099, loss: 9.720802336232737e-05, rewards: -9.299999999999999, count: 50
epoch: 603109, loss: -3.16238401865121e-05, rewards: -9.299999999999999, count: 50
epoch: 603119, loss: -1.5457868357771076e-05, rewards: -9.299999999999999, count: 50
epoch: 603129, loss: 2.0461082385736518e-05, rewards: -9.299999999999999, count: 50
epoch: 603139, loss: -9.046792911249213e-06, rewards: -9.299999999999999, count: 50
epoch: 603149, loss: 1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 603159, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 603169, loss: -5.269050689094001e-07, rewards: -9.299999999999999, count: 50
epoch: 603179, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 603189, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 603199, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, coun

epoch: 604079, loss: 7.241249113576487e-05, rewards: -9.299999999999999, count: 50
epoch: 604089, loss: -2.8061866032658145e-05, rewards: -9.299999999999999, count: 50
epoch: 604099, loss: 5.1221846661064774e-05, rewards: -9.299999999999999, count: 50
epoch: 604109, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 604119, loss: -1.828908898460213e-05, rewards: -9.299999999999999, count: 50
epoch: 604129, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 604139, loss: 3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 604149, loss: -4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 604159, loss: 2.511739694455173e-06, rewards: -9.299999999999999, count: 50
epoch: 604169, loss: -1.5676021121180383e-06, rewards: -9.299999999999999, count: 50
epoch: 604179, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 604189, loss: -6.508827254947391e-07, rewards: -9.299999999999999, coun

epoch: 605069, loss: -2.696514229683089e-06, rewards: -9.299999999999999, count: 50
epoch: 605079, loss: 2.769708589767106e-05, rewards: -9.299999999999999, count: 50
epoch: 605089, loss: 1.0924339221674018e-05, rewards: -9.299999999999999, count: 50
epoch: 605099, loss: -7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 605109, loss: -4.554986844595987e-06, rewards: -9.299999999999999, count: 50
epoch: 605119, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 605129, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 605139, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 605149, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 605159, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 605169, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 605179, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count:

epoch: 606059, loss: -7.216930498543661e-06, rewards: -9.299999999999999, count: 50
epoch: 606069, loss: 1.0435581316414755e-05, rewards: -9.299999999999999, count: 50
epoch: 606079, loss: -5.905628313485067e-06, rewards: -9.299999999999999, count: 50
epoch: 606089, loss: 2.874136043828912e-06, rewards: -9.299999999999999, count: 50
epoch: 606099, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 606109, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 606119, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 606129, loss: 5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 606139, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 606149, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 606159, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 606169, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, coun

epoch: 607059, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 607069, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 607079, loss: -1.4853477523502079e-06, rewards: -9.299999999999999, count: 50
epoch: 607089, loss: -5.650520506605972e-06, rewards: -9.299999999999999, count: 50
epoch: 607099, loss: -4.1471721488051116e-05, rewards: -9.299999999999999, count: 50
epoch: 607109, loss: -7.597446528961882e-05, rewards: -9.299999999999999, count: 50
epoch: 607119, loss: -2.5614499463699758e-05, rewards: -9.299999999999999, count: 50
epoch: 607129, loss: 3.2370091503253207e-05, rewards: -9.299999999999999, count: 50
epoch: 607139, loss: 9.146929187409114e-06, rewards: -9.299999999999999, count: 50
epoch: 607149, loss: -1.2357235391391441e-05, rewards: -9.299999999999999, count: 50
epoch: 607159, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 607169, loss: 3.55362885784416e-06, rewards: -9.299999999999999, co

epoch: 608049, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 608059, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 608069, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 608079, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 608089, loss: 9.549856258672662e-06, rewards: -9.299999999999999, count: 50
epoch: 608099, loss: 8.146763138938695e-05, rewards: -9.299999999999999, count: 50
epoch: 608109, loss: -5.977392356726341e-05, rewards: -9.299999999999999, count: 50
epoch: 608119, loss: 3.705859126057476e-05, rewards: -9.299999999999999, count: 50
epoch: 608129, loss: 3.0053854061407037e-05, rewards: -9.299999999999999, count: 50
epoch: 608139, loss: -2.0813940864172764e-06, rewards: -9.299999999999999, count: 50
epoch: 608149, loss: -1.1727809578587767e-05, rewards: -9.299999999999999, count: 50
epoch: 608159, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count:

epoch: 609039, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 609049, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 609059, loss: 1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 609069, loss: 4.98414055982721e-06, rewards: -9.299999999999999, count: 50
epoch: 609079, loss: 2.769708589767106e-05, rewards: -9.299999999999999, count: 50
epoch: 609089, loss: 0.00011735677981050685, rewards: -9.299999999999999, count: 50
epoch: 609099, loss: -3.7248133594403043e-05, rewards: -9.299999999999999, count: 50
epoch: 609109, loss: -2.8959511837456375e-05, rewards: -9.299999999999999, count: 50
epoch: 609119, loss: 1.795053503883537e-05, rewards: -9.299999999999999, count: 50
epoch: 609129, loss: 6.333589681162266e-06, rewards: -9.299999999999999, count: 50
epoch: 609139, loss: -8.684396561875474e-06, rewards: -9.299999999999999, count: 50
epoch: 609149, loss: 3.350973202032037e-06, rewards: -9.299999999999999, count: 5

epoch: 610029, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 610039, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 610049, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 610059, loss: 5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 610069, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 610079, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 610089, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 610099, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 610109, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 610119, loss: 1.167058940154675e-06, rewards: -9.299999999999999, count: 50
epoch: 610129, loss: 6.401538939826423e-06, rewards: -9.299999999999999, count: 50
epoch: 610139, loss: 4.798650843440555e-05, rewards: -9.299999999999999, count: 50


epoch: 611019, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 611029, loss: -4.0518047171644866e-05, rewards: -9.299999999999999, count: 50
epoch: 611039, loss: 2.4988650693558156e-05, rewards: -9.299999999999999, count: 50
epoch: 611049, loss: 1.1515617188706528e-06, rewards: -9.299999999999999, count: 50
epoch: 611059, loss: -9.084939847525675e-06, rewards: -9.299999999999999, count: 50
epoch: 611069, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count: 50
epoch: 611079, loss: -3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 611089, loss: 2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 611099, loss: -1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 611109, loss: 1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 611119, loss: -8.094310715023312e-07, rewards: -9.299999999999999, count: 50
epoch: 611129, loss: -1.01327898960335e-07, rewards: -9.299999999999999, cou

epoch: 612009, loss: 3.976821972173639e-06, rewards: -9.299999999999999, count: 50
epoch: 612019, loss: 2.1241903596092016e-05, rewards: -9.299999999999999, count: 50
epoch: 612029, loss: 0.00011525988520588726, rewards: -9.299999999999999, count: 50
epoch: 612039, loss: -6.763935380149633e-05, rewards: -9.299999999999999, count: 50
epoch: 612049, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 612059, loss: 2.388238863204606e-05, rewards: -9.299999999999999, count: 50
epoch: 612069, loss: -8.170604814949911e-06, rewards: -9.299999999999999, count: 50
epoch: 612079, loss: -3.972053491452243e-06, rewards: -9.299999999999999, count: 50
epoch: 612089, loss: 5.438327661977382e-06, rewards: -9.299999999999999, count: 50
epoch: 612099, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 612109, loss: 1.5747547195132938e-06, rewards: -9.299999999999999, count: 50
epoch: 612119, loss: -7.534027304245683e-07, rewards: -9.299999999999999, count:

epoch: 612999, loss: -2.9370785341598094e-05, rewards: -9.299999999999999, count: 50
epoch: 613009, loss: -0.00011273741984041408, rewards: -9.299999999999999, count: 50
epoch: 613019, loss: 3.5231114452471957e-05, rewards: -9.299999999999999, count: 50
epoch: 613029, loss: 2.612471507745795e-05, rewards: -9.299999999999999, count: 50
epoch: 613039, loss: -1.9347668057889678e-05, rewards: -9.299999999999999, count: 50
epoch: 613049, loss: -2.536773763495148e-06, rewards: -9.299999999999999, count: 50
epoch: 613059, loss: 7.84516305429861e-06, rewards: -9.299999999999999, count: 50
epoch: 613069, loss: -4.98414055982721e-06, rewards: -9.299999999999999, count: 50
epoch: 613079, loss: 2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 613089, loss: -4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 613099, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 613109, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, coun

epoch: 613989, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 613999, loss: 1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 614009, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 614019, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 614029, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 614039, loss: 1.7602444131625816e-05, rewards: -9.299999999999999, count: 50
epoch: 614049, loss: 0.0001394009595969692, rewards: -9.299999999999999, count: 50
epoch: 614059, loss: -5.463838533614762e-05, rewards: -9.299999999999999, count: 50
epoch: 614069, loss: -4.8986672481987625e-05, rewards: -9.299999999999999, count: 50
epoch: 614079, loss: -1.6199350284296088e-05, rewards: -9.299999999999999, count: 50
epoch: 614089, loss: 5.809068625239888e-06, rewards: -9.299999999999999, count: 50
epoch: 614099, loss: 1.0073184967041016e-05, rewards: -9.299999999999999, cou

epoch: 614979, loss: -6.586313247680664e-06, rewards: -9.299999999999999, count: 50
epoch: 614989, loss: 1.4030933925823774e-06, rewards: -9.299999999999999, count: 50
epoch: 614999, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 615009, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 615019, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 615029, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 615039, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 615049, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 615059, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 615069, loss: 1.7547607740198146e-06, rewards: -9.299999999999999, count: 50
epoch: 615079, loss: 8.776188224146608e-06, rewards: -9.299999999999999, count: 50
epoch: 615089, loss: 6.182193465065211e-05, rewards: -9.299999999999999, count: 50


epoch: 615969, loss: -2.8188229407533072e-05, rewards: -9.299999999999999, count: 50
epoch: 615979, loss: -0.00010016083979280666, rewards: -9.299999999999999, count: 50
epoch: 615989, loss: 5.630731538985856e-05, rewards: -9.299999999999999, count: 50
epoch: 615999, loss: -2.1322965039871633e-05, rewards: -9.299999999999999, count: 50
epoch: 616009, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 616019, loss: 5.983114078844665e-06, rewards: -9.299999999999999, count: 50
epoch: 616029, loss: -5.309581865731161e-06, rewards: -9.299999999999999, count: 50
epoch: 616039, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 616049, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 616059, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 616069, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 616079, loss: 4.220008804622921e-07, rewards: -9.299999999999999, c

epoch: 616959, loss: -4.465579877432901e-06, rewards: -9.299999999999999, count: 50
epoch: 616969, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 616979, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 616989, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 616999, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 617009, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 617019, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 617029, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 617039, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 617049, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 617059, loss: -1.0454655239300337e-06, rewards: -9.299999999999999, count: 50
epoch: 617069, loss: -1.4603137969970703e-06, rewards: -9.299999999999999, c

epoch: 617949, loss: -1.8870830444939202e-06, rewards: -9.299999999999999, count: 50
epoch: 617959, loss: 4.028082003060263e-06, rewards: -9.299999999999999, count: 50
epoch: 617969, loss: -2.806186785164755e-06, rewards: -9.299999999999999, count: 50
epoch: 617979, loss: 7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 617989, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 617999, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 618009, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 618019, loss: 1.943111328728264e-06, rewards: -9.299999999999999, count: 50
epoch: 618029, loss: 4.090070888196351e-06, rewards: -9.299999999999999, count: 50
epoch: 618039, loss: 1.4359950910147745e-05, rewards: -9.299999999999999, count: 50
epoch: 618049, loss: 6.654619937762618e-05, rewards: -9.299999999999999, count: 50
epoch: 618059, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50

epoch: 618939, loss: -9.810923984332476e-07, rewards: -9.299999999999999, count: 50
epoch: 618949, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 618959, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 618969, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 618979, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 618989, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 618999, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 619009, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 619019, loss: 1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 619029, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 619039, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 619049, loss: -4.148483299104555e-07, rewards: -9.299999999999999, co

epoch: 619929, loss: 9.146929187409114e-06, rewards: -9.299999999999999, count: 50
epoch: 619939, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 619949, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 619959, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 619969, loss: -1.1575222060855594e-06, rewards: -9.299999999999999, count: 50
epoch: 619979, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 619989, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 619999, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 620009, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 620019, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 620029, loss: 3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 620039, loss: 1.996755599975586e-06, rewards: -9.299999999999999, count: 

epoch: 620919, loss: -3.4722088457783684e-05, rewards: -9.299999999999999, count: 50
epoch: 620929, loss: -9.91570923360996e-05, rewards: -9.299999999999999, count: 50
epoch: 620939, loss: 1.7330647096969187e-05, rewards: -9.299999999999999, count: 50
epoch: 620949, loss: 3.0263661756180227e-05, rewards: -9.299999999999999, count: 50
epoch: 620959, loss: -1.6524791135452688e-05, rewards: -9.299999999999999, count: 50
epoch: 620969, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 620979, loss: 7.246732820931356e-06, rewards: -9.299999999999999, count: 50
epoch: 620989, loss: -4.869699296250474e-06, rewards: -9.299999999999999, count: 50
epoch: 620999, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 621009, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 621019, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 621029, loss: -8.940696716308594e-07, rewards: -9.299999999999999, c

epoch: 621909, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 621919, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 621929, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 621939, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 621949, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 621959, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 621969, loss: 5.632638931274414e-06, rewards: -9.299999999999999, count: 50
epoch: 621979, loss: 2.9389857445494272e-05, rewards: -9.299999999999999, count: 50
epoch: 621989, loss: 0.00010850786929950118, rewards: -9.299999999999999, count: 50
epoch: 621999, loss: -4.543661998468451e-05, rewards: -9.299999999999999, count: 50
epoch: 622009, loss: -1.0554790605965536e-05, rewards: -9.299999999999999, count: 50
epoch: 622019, loss: 2.1989346350892447e-05, rewards: -9.299999999999999, co

epoch: 622899, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 622909, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 622919, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 622929, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 622939, loss: 2.397298885625787e-06, rewards: -9.299999999999999, count: 50
epoch: 622949, loss: 6.471872438851278e-06, rewards: -9.299999999999999, count: 50
epoch: 622959, loss: 3.1764506275067106e-05, rewards: -9.299999999999999, count: 50
epoch: 622969, loss: 9.73296191659756e-05, rewards: -9.299999999999999, count: 50
epoch: 622979, loss: -4.802346302312799e-05, rewards: -9.299999999999999, count: 50
epoch: 622989, loss: 8.902549780032132e-06, rewards: -9.299999999999999, count: 50
epoch: 622999, loss: 8.78334049048135e-06, rewards: -9.299999999999999, count: 50
epoch: 623009, loss: -1.0362863577029202e-05, rewards: -9.299999999999999, count: 50

epoch: 623889, loss: 4.866242306889035e-05, rewards: -9.299999999999999, count: 50
epoch: 623899, loss: 3.914117769454606e-05, rewards: -9.299999999999999, count: 50
epoch: 623909, loss: -5.155801773071289e-06, rewards: -9.299999999999999, count: 50
epoch: 623919, loss: -1.599669485585764e-05, rewards: -9.299999999999999, count: 50
epoch: 623929, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 623939, loss: 5.655288532580016e-06, rewards: -9.299999999999999, count: 50
epoch: 623949, loss: -1.971721758309286e-06, rewards: -9.299999999999999, count: 50
epoch: 623959, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 623969, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 623979, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 623989, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 623999, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 

epoch: 624879, loss: -9.02414285519626e-06, rewards: -9.299999999999999, count: 50
epoch: 624889, loss: 5.029439762438415e-06, rewards: -9.299999999999999, count: 50
epoch: 624899, loss: -3.5214425224694423e-06, rewards: -9.299999999999999, count: 50
epoch: 624909, loss: 2.4461746761517134e-06, rewards: -9.299999999999999, count: 50
epoch: 624919, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 624929, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 624939, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 624949, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 624959, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 624969, loss: 2.040266917902045e-05, rewards: -9.299999999999999, count: 50
epoch: 624979, loss: 0.00011195540719199926, rewards: -9.299999999999999, count: 50
epoch: 624989, loss: -6.980180478421971e-05, rewards: -9.299999999999999, count:

epoch: 625869, loss: 1.5958547010086477e-05, rewards: -9.299999999999999, count: 50
epoch: 625879, loss: -2.3245811462402344e-06, rewards: -9.299999999999999, count: 50
epoch: 625889, loss: -5.336999947758159e-06, rewards: -9.299999999999999, count: 50
epoch: 625899, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 625909, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 625919, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 625929, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 625939, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 625949, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 625959, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 625969, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 625979, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count:

epoch: 626859, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 626869, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 626879, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 626889, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 626899, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 626909, loss: -3.0446053642663173e-06, rewards: -9.299999999999999, count: 50
epoch: 626919, loss: -1.186609279102413e-05, rewards: -9.299999999999999, count: 50
epoch: 626929, loss: -7.393717532977462e-05, rewards: -9.299999999999999, count: 50
epoch: 626939, loss: 2.5399924197699875e-05, rewards: -9.299999999999999, count: 50
epoch: 626949, loss: -4.584074122249149e-05, rewards: -9.299999999999999, count: 50
epoch: 626959, loss: 1.6111134755192325e-05, rewards: -9.299999999999999, count: 50
epoch: 626969, loss: 7.827282388461754e-06, rewards: -9.299999999999999, co

epoch: 627849, loss: 6.434797978727147e-05, rewards: -9.299999999999999, count: 50
epoch: 627859, loss: -3.0606985092163086e-05, rewards: -9.299999999999999, count: 50
epoch: 627869, loss: -2.6428699584357673e-06, rewards: -9.299999999999999, count: 50
epoch: 627879, loss: 1.189112663269043e-05, rewards: -9.299999999999999, count: 50
epoch: 627889, loss: -8.610487384430598e-06, rewards: -9.299999999999999, count: 50
epoch: 627899, loss: 4.981756319466513e-06, rewards: -9.299999999999999, count: 50
epoch: 627909, loss: -3.3652781894488726e-06, rewards: -9.299999999999999, count: 50
epoch: 627919, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, count: 50
epoch: 627929, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count: 50
epoch: 627939, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 627949, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 627959, loss: -1.01327898960335e-07, rewards: -9.299999999999999, cou

epoch: 628839, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 628849, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 628859, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 628869, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 628879, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 628889, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 628899, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 628909, loss: 3.901719992427388e-06, rewards: -9.299999999999999, count: 50
epoch: 628919, loss: 1.9237995729781687e-05, rewards: -9.299999999999999, count: 50
epoch: 628929, loss: 0.0001057207555277273, rewards: -9.299999999999999, count: 50
epoch: 628939, loss: -6.813287473050877e-05, rewards: -9.299999999999999, count: 50
epoch: 628949, loss: 2.367258093727287e-05, rewards: -9.299999999999999, count: 5

epoch: 629829, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 629839, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count: 50
epoch: 629849, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 629859, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 629869, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count: 50
epoch: 629879, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 629889, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 629899, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 629909, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 629919, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 629929, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 629939, loss: 2.16841704059334e-06, rewards: -9.299999999999999, cou

epoch: 630819, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 630829, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, count: 50
epoch: 630839, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 630849, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 630859, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 630869, loss: 1.7035007431331906e-06, rewards: -9.299999999999999, count: 50
epoch: 630879, loss: 3.7896634239587e-06, rewards: -9.299999999999999, count: 50
epoch: 630889, loss: 1.7031430616043508e-05, rewards: -9.299999999999999, count: 50
epoch: 630899, loss: 9.35149219003506e-05, rewards: -9.299999999999999, count: 50
epoch: 630909, loss: -5.7981014833785594e-05, rewards: -9.299999999999999, count: 50
epoch: 630919, loss: 3.952264887630008e-05, rewards: -9.299999999999999, count: 50
epoch: 630929, loss: -4.34637058788212e-06, rewards: -9.299999999999999, count: 5

epoch: 631809, loss: 3.388047116459347e-05, rewards: -9.299999999999999, count: 50
epoch: 631819, loss: 1.0751486115623266e-05, rewards: -9.299999999999999, count: 50
epoch: 631829, loss: -7.76410070102429e-06, rewards: -9.299999999999999, count: 50
epoch: 631839, loss: -6.718635631841607e-06, rewards: -9.299999999999999, count: 50
epoch: 631849, loss: 1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 631859, loss: 2.071857352348161e-06, rewards: -9.299999999999999, count: 50
epoch: 631869, loss: -1.394748665006773e-06, rewards: -9.299999999999999, count: 50
epoch: 631879, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 631889, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 631899, loss: -2.7894972731701273e-07, rewards: -9.299999999999999, count: 50
epoch: 631909, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 631919, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count

epoch: 632799, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 632809, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 632819, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 632829, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 632839, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 632849, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 632859, loss: 7.345676294789882e-06, rewards: -9.299999999999999, count: 50
epoch: 632869, loss: 5.466699440148659e-05, rewards: -9.299999999999999, count: 50
epoch: 632879, loss: 2.4060011128312908e-05, rewards: -9.299999999999999, count: 50
epoch: 632889, loss: 5.0455331802368164e-05, rewards: -9.299999999999999, count: 50
epoch: 632899, loss: -1.4177560842654202e-05, rewards: -9.299999999999999, count: 50
epoch: 632909, loss: -1.8639564586919732e-05, rewards: -9.299999999999999, coun

epoch: 633789, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 633799, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 633809, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 633819, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 633829, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 633839, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 633849, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 633859, loss: -2.294778823852539e-06, rewards: -9.299999999999999, count: 50
epoch: 633869, loss: -1.261353463632986e-05, rewards: -9.299999999999999, count: 50
epoch: 633879, loss: -8.291006088256836e-05, rewards: -9.299999999999999, count: 50
epoch: 633889, loss: 4.7409535909537226e-05, rewards: -9.299999999999999, count: 50
epoch: 633899, loss: -4.576325591187924e-05, rewards: -9.299999999999999, c

epoch: 634779, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 634789, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 634799, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 634809, loss: -2.847909854608588e-06, rewards: -9.299999999999999, count: 50
epoch: 634819, loss: -1.5131235159060452e-05, rewards: -9.299999999999999, count: 50
epoch: 634829, loss: -0.00010468601976754144, rewards: -9.299999999999999, count: 50
epoch: 634839, loss: 7.590412860736251e-05, rewards: -9.299999999999999, count: 50
epoch: 634849, loss: -1.710653305053711e-05, rewards: -9.299999999999999, count: 50
epoch: 634859, loss: -2.5546551114530303e-05, rewards: -9.299999999999999, count: 50
epoch: 634869, loss: 7.441043635481037e-06, rewards: -9.299999999999999, count: 50
epoch: 634879, loss: 7.09772120899288e-06, rewards: -9.299999999999999, count: 50
epoch: 634889, loss: -5.577802767220419e-06, rewards: -9.299999999999999, co

epoch: 635769, loss: -7.902026118244976e-05, rewards: -9.299999999999999, count: 50
epoch: 635779, loss: -1.043081283569336e-05, rewards: -9.299999999999999, count: 50
epoch: 635789, loss: 3.4849643270717934e-05, rewards: -9.299999999999999, count: 50
epoch: 635799, loss: -4.795789664058248e-06, rewards: -9.299999999999999, count: 50
epoch: 635809, loss: -1.0557174391578883e-05, rewards: -9.299999999999999, count: 50
epoch: 635819, loss: 7.455349077645224e-06, rewards: -9.299999999999999, count: 50
epoch: 635829, loss: -1.8942356518891756e-06, rewards: -9.299999999999999, count: 50
epoch: 635839, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 635849, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 635859, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 635869, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 635879, loss: 6.771087441848067e-07, rewards: -9.299999999999999, cou

epoch: 636759, loss: -4.197358975943644e-06, rewards: -9.299999999999999, count: 50
epoch: 636769, loss: -2.9476881536538713e-05, rewards: -9.299999999999999, count: 50
epoch: 636779, loss: -0.00011471509787952527, rewards: -9.299999999999999, count: 50
epoch: 636789, loss: 2.8457641747081652e-05, rewards: -9.299999999999999, count: 50
epoch: 636799, loss: 3.283738988102414e-05, rewards: -9.299999999999999, count: 50
epoch: 636809, loss: -1.4479160199698526e-05, rewards: -9.299999999999999, count: 50
epoch: 636819, loss: -8.932352102419827e-06, rewards: -9.299999999999999, count: 50
epoch: 636829, loss: 8.55803500598995e-06, rewards: -9.299999999999999, count: 50
epoch: 636839, loss: -2.0444392703211633e-06, rewards: -9.299999999999999, count: 50
epoch: 636849, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 636859, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 636869, loss: -6.115436690379283e-07, rewards: -9.299999999999999, c

epoch: 637749, loss: 3.1340123314294033e-06, rewards: -9.299999999999999, count: 50
epoch: 637759, loss: 2.190947452618275e-05, rewards: -9.299999999999999, count: 50
epoch: 637769, loss: -1.0839700735232327e-05, rewards: -9.299999999999999, count: 50
epoch: 637779, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 637789, loss: 3.6454200653679436e-06, rewards: -9.299999999999999, count: 50
epoch: 637799, loss: -3.12209135699959e-06, rewards: -9.299999999999999, count: 50
epoch: 637809, loss: 2.100467781929183e-06, rewards: -9.299999999999999, count: 50
epoch: 637819, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 637829, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 637839, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 637849, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 637859, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 5

epoch: 638739, loss: -4.554986844595987e-06, rewards: -9.299999999999999, count: 50
epoch: 638749, loss: -1.7583370208740234e-05, rewards: -9.299999999999999, count: 50
epoch: 638759, loss: -8.698582678334787e-05, rewards: -9.299999999999999, count: 50
epoch: 638769, loss: 4.534959953161888e-05, rewards: -9.299999999999999, count: 50
epoch: 638779, loss: -3.9604903577128425e-05, rewards: -9.299999999999999, count: 50
epoch: 638789, loss: 1.9160508600180037e-05, rewards: -9.299999999999999, count: 50
epoch: 638799, loss: -4.355907549324911e-06, rewards: -9.299999999999999, count: 50
epoch: 638809, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 638819, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 638829, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 638839, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 638849, loss: 1.4710426512465347e-06, rewards: -9.299999999999999, c

epoch: 639729, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 639739, loss: 1.113176313083386e-05, rewards: -9.299999999999999, count: 50
epoch: 639749, loss: 7.197499508038163e-05, rewards: -9.299999999999999, count: 50
epoch: 639759, loss: -2.239108107460197e-05, rewards: -9.299999999999999, count: 50
epoch: 639769, loss: 4.754662586492486e-05, rewards: -9.299999999999999, count: 50
epoch: 639779, loss: -1.4506578736472875e-05, rewards: -9.299999999999999, count: 50
epoch: 639789, loss: -1.0753869901236612e-05, rewards: -9.299999999999999, count: 50
epoch: 639799, loss: 1.0980367733282037e-05, rewards: -9.299999999999999, count: 50
epoch: 639809, loss: -4.318952505855123e-06, rewards: -9.299999999999999, count: 50
epoch: 639819, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 639829, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 639839, loss: -8.940696716308594e-08, rewards: -9.299999999999999, coun

epoch: 640719, loss: -4.204511697025737e-06, rewards: -9.299999999999999, count: 50
epoch: 640729, loss: -2.2023916244506836e-05, rewards: -9.299999999999999, count: 50
epoch: 640739, loss: -0.00010988831490976736, rewards: -9.299999999999999, count: 50
epoch: 640749, loss: 6.552696140715852e-05, rewards: -9.299999999999999, count: 50
epoch: 640759, loss: -1.3625622159452178e-05, rewards: -9.299999999999999, count: 50
epoch: 640769, loss: -1.5532970792264678e-05, rewards: -9.299999999999999, count: 50
epoch: 640779, loss: 1.3554095858125947e-05, rewards: -9.299999999999999, count: 50
epoch: 640789, loss: -5.029439762438415e-06, rewards: -9.299999999999999, count: 50
epoch: 640799, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 640809, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 640819, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 640829, loss: -5.018711135562626e-07, rewards: -9.299999999999999, c

epoch: 641709, loss: 1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 641719, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 641729, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 641739, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 641749, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 641759, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 641769, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 641779, loss: -2.161264319511247e-06, rewards: -9.299999999999999, count: 50
epoch: 641789, loss: -7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 641799, loss: -3.907561404048465e-05, rewards: -9.299999999999999, count: 50
epoch: 641809, loss: -8.227705984609202e-05, rewards: -9.299999999999999, count: 50
epoch: 641819, loss: 2.89857380266767e-05, rewards: -9.299999999999999, count: 

epoch: 642699, loss: -6.111860329838237e-06, rewards: -9.299999999999999, count: 50
epoch: 642709, loss: 2.130270104316878e-06, rewards: -9.299999999999999, count: 50
epoch: 642719, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 642729, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 642739, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 642749, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 642759, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 642769, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 642779, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 642789, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 642799, loss: -4.678964614868164e-06, rewards: -9.299999999999999, count: 50
epoch: 642809, loss: -2.9978751626913436e-05, rewards: -9.299999999999999, co

epoch: 643689, loss: -1.5020370938145788e-06, rewards: -9.299999999999999, count: 50
epoch: 643699, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 643709, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 643719, loss: -9.179115068036481e-07, rewards: -9.299999999999999, count: 50
epoch: 643729, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 643739, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 643749, loss: -3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 643759, loss: -1.8469094356987625e-05, rewards: -9.299999999999999, count: 50
epoch: 643769, loss: -0.00010318040585843846, rewards: -9.299999999999999, count: 50
epoch: 643779, loss: 6.792664498789236e-05, rewards: -9.299999999999999, count: 50
epoch: 643789, loss: -2.7601718102232553e-05, rewards: -9.299999999999999, count: 50
epoch: 643799, loss: -1.102209080272587e-05, rewards: -9.299999999999999, 

epoch: 644679, loss: 1.165986031992361e-05, rewards: -9.299999999999999, count: 50
epoch: 644689, loss: 9.549856258672662e-06, rewards: -9.299999999999999, count: 50
epoch: 644699, loss: -4.80890275866841e-06, rewards: -9.299999999999999, count: 50
epoch: 644709, loss: -2.059936605292023e-06, rewards: -9.299999999999999, count: 50
epoch: 644719, loss: 2.226829565188382e-06, rewards: -9.299999999999999, count: 50
epoch: 644729, loss: -1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 644739, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 644749, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 644759, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 644769, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 644779, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 644789, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count

epoch: 645669, loss: 2.88128853753733e-06, rewards: -9.299999999999999, count: 50
epoch: 645679, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 645689, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 645699, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 645709, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 645719, loss: -1.6188621430046624e-06, rewards: -9.299999999999999, count: 50
epoch: 645729, loss: -3.759861101571005e-06, rewards: -9.299999999999999, count: 50
epoch: 645739, loss: -1.5790463294251822e-05, rewards: -9.299999999999999, count: 50
epoch: 645749, loss: -8.366227120859548e-05, rewards: -9.299999999999999, count: 50
epoch: 645759, loss: 4.027724207844585e-05, rewards: -9.299999999999999, count: 50
epoch: 645769, loss: -4.0953160350909457e-05, rewards: -9.299999999999999, count: 50
epoch: 645779, loss: 1.948595127032604e-05, rewards: -9.299999999999999, coun

epoch: 646659, loss: -6.4754485720186494e-06, rewards: -9.299999999999999, count: 50
epoch: 646669, loss: -2.5584697141312063e-05, rewards: -9.299999999999999, count: 50
epoch: 646679, loss: -9.796738595468923e-05, rewards: -9.299999999999999, count: 50
epoch: 646689, loss: 5.7413577451370656e-05, rewards: -9.299999999999999, count: 50
epoch: 646699, loss: -3.047704740311019e-05, rewards: -9.299999999999999, count: 50
epoch: 646709, loss: 1.0175705028814264e-05, rewards: -9.299999999999999, count: 50
epoch: 646719, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 646729, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 646739, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 646749, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 646759, loss: -1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 646769, loss: 1.3113021779531664e-08, rewards: -9.29999999999999

epoch: 647649, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 647659, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 647669, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 647679, loss: -6.783008643651556e-07, rewards: -9.299999999999999, count: 50
epoch: 647689, loss: -4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 647699, loss: -3.0981304007582366e-05, rewards: -9.299999999999999, count: 50
epoch: 647709, loss: -0.00011470437311800197, rewards: -9.299999999999999, count: 50
epoch: 647719, loss: 8.890629032975994e-06, rewards: -9.299999999999999, count: 50
epoch: 647729, loss: 3.9664508221903816e-05, rewards: -9.299999999999999, count: 50
epoch: 647739, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 647749, loss: -1.4359950910147745e-05, rewards: -9.299999999999999, count: 50
epoch: 647759, loss: 1.6915797687033773e-06, rewards: -9.29999999999999

epoch: 648639, loss: -5.5482389143435284e-05, rewards: -9.299999999999999, count: 50
epoch: 648649, loss: 4.266142786946148e-05, rewards: -9.299999999999999, count: 50
epoch: 648659, loss: 1.623749813006725e-05, rewards: -9.299999999999999, count: 50
epoch: 648669, loss: -1.5432835425599478e-05, rewards: -9.299999999999999, count: 50
epoch: 648679, loss: -3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 648689, loss: 6.762742941646138e-06, rewards: -9.299999999999999, count: 50
epoch: 648699, loss: -2.696514229683089e-06, rewards: -9.299999999999999, count: 50
epoch: 648709, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 648719, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 648729, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 648739, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 648749, loss: 5.018711135562626e-07, rewards: -9.299999999999999, co

epoch: 649629, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 649639, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 649649, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 649659, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 649669, loss: 3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 649679, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 649689, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 649699, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 649709, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 649719, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 649729, loss: 2.896785645134514e-06, rewards: -9.299999999999999, count: 50
epoch: 649739, loss: 2.899765968322754e-05, rewards: -9.299999999999999, co

epoch: 650619, loss: -5.3350926464190707e-05, rewards: -9.299999999999999, count: 50
epoch: 650629, loss: -3.750562609639019e-05, rewards: -9.299999999999999, count: 50
epoch: 650639, loss: -9.710788617667276e-06, rewards: -9.299999999999999, count: 50
epoch: 650649, loss: 2.5359391656820662e-05, rewards: -9.299999999999999, count: 50
epoch: 650659, loss: -1.7300844774581492e-05, rewards: -9.299999999999999, count: 50
epoch: 650669, loss: 7.445812116202433e-06, rewards: -9.299999999999999, count: 50
epoch: 650679, loss: -3.312826265755575e-06, rewards: -9.299999999999999, count: 50
epoch: 650689, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 650699, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 650709, loss: 1.9693375179485884e-06, rewards: -9.299999999999999, count: 50
epoch: 650719, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 650729, loss: -6.139278525552072e-07, rewards: -9.299999999999999, 

epoch: 651609, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 651619, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 651629, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 651639, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 651649, loss: 8.856058229866903e-06, rewards: -9.299999999999999, count: 50
epoch: 651659, loss: 7.018923497525975e-05, rewards: -9.299999999999999, count: 50
epoch: 651669, loss: -2.9062031899229623e-05, rewards: -9.299999999999999, count: 50
epoch: 651679, loss: 5.3015948651591316e-05, rewards: -9.299999999999999, count: 50
epoch: 651689, loss: 1.2260675248398911e-05, rewards: -9.299999999999999, count: 50
epoch: 651699, loss: -1.695036917226389e-05, rewards: -9.299999999999999, count: 50
epoch: 651709, loss: -4.79340542369755e-06, rewards: -9.299999999999999, count: 50
epoch: 651719, loss: 6.737709099979838e-06, rewards: -9.299999999999999, count: 

epoch: 652599, loss: -1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 652609, loss: -3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 652619, loss: -2.0444393157958984e-05, rewards: -9.299999999999999, count: 50
epoch: 652629, loss: -0.00010116100020240992, rewards: -9.299999999999999, count: 50
epoch: 652639, loss: 6.28876659902744e-05, rewards: -9.299999999999999, count: 50
epoch: 652649, loss: -3.1343697628472e-05, rewards: -9.299999999999999, count: 50
epoch: 652659, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 652669, loss: 9.379386938235257e-06, rewards: -9.299999999999999, count: 50
epoch: 652679, loss: -7.944106982904486e-06, rewards: -9.299999999999999, count: 50
epoch: 652689, loss: 5.143881026015151e-06, rewards: -9.299999999999999, count: 50
epoch: 652699, loss: -3.3783912840590347e-06, rewards: -9.299999999999999, count: 50
epoch: 652709, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, coun

epoch: 653589, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 653599, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 653609, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 653619, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 653629, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 653639, loss: 1.4030933925823774e-06, rewards: -9.299999999999999, count: 50
epoch: 653649, loss: 6.223916898306925e-06, rewards: -9.299999999999999, count: 50
epoch: 653659, loss: 3.878831921610981e-05, rewards: -9.299999999999999, count: 50
epoch: 653669, loss: 8.58056519064121e-05, rewards: -9.299999999999999, count: 50
epoch: 653679, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 653689, loss: -3.4139156923629344e-05, rewards: -9.299999999999999, count: 50
epoch: 653699, loss: 1.0141134225705173e-05, rewards: -9.299999999999999, count: 50

epoch: 654579, loss: 2.8181075322208926e-06, rewards: -9.299999999999999, count: 50
epoch: 654589, loss: 5.178451374376891e-06, rewards: -9.299999999999999, count: 50
epoch: 654599, loss: 1.7770529666449875e-05, rewards: -9.299999999999999, count: 50
epoch: 654609, loss: 8.03744769655168e-05, rewards: -9.299999999999999, count: 50
epoch: 654619, loss: -3.082990588154644e-05, rewards: -9.299999999999999, count: 50
epoch: 654629, loss: 3.1876563298283145e-05, rewards: -9.299999999999999, count: 50
epoch: 654639, loss: -2.292871431563981e-05, rewards: -9.299999999999999, count: 50
epoch: 654649, loss: 1.3074874914309476e-05, rewards: -9.299999999999999, count: 50
epoch: 654659, loss: -7.828473826521076e-06, rewards: -9.299999999999999, count: 50
epoch: 654669, loss: 5.178451374376891e-06, rewards: -9.299999999999999, count: 50
epoch: 654679, loss: -2.774000222416362e-06, rewards: -9.299999999999999, count: 50
epoch: 654689, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count:

epoch: 655569, loss: -2.696514229683089e-06, rewards: -9.299999999999999, count: 50
epoch: 655579, loss: 1.8429756210025516e-06, rewards: -9.299999999999999, count: 50
epoch: 655589, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 655599, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 655609, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 655619, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 655629, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 655639, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 655649, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 655659, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 655669, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 655679, loss: -6.294250738392293e-07, rewards: -9.299999999999999, cou

epoch: 656559, loss: 2.6869774956139736e-06, rewards: -9.299999999999999, count: 50
epoch: 656569, loss: -1.1694431805153727e-06, rewards: -9.299999999999999, count: 50
epoch: 656579, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 656589, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 656599, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 656609, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 656619, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 656629, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 656639, loss: 4.017353148810798e-06, rewards: -9.299999999999999, count: 50
epoch: 656649, loss: 2.518296241760254e-05, rewards: -9.299999999999999, count: 50
epoch: 656659, loss: 0.00011783838272094727, rewards: -9.299999999999999, count: 50
epoch: 656669, loss: -5.3020714403828606e-05, rewards: -9.299999999999999, cou

epoch: 657549, loss: 3.903627293766476e-05, rewards: -9.299999999999999, count: 50
epoch: 657559, loss: -1.0823011507454794e-05, rewards: -9.299999999999999, count: 50
epoch: 657569, loss: -3.621578116508317e-06, rewards: -9.299999999999999, count: 50
epoch: 657579, loss: 6.14404689258663e-06, rewards: -9.299999999999999, count: 50
epoch: 657589, loss: -4.460811396711506e-06, rewards: -9.299999999999999, count: 50
epoch: 657599, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 657609, loss: -7.534027304245683e-07, rewards: -9.299999999999999, count: 50
epoch: 657619, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 657629, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 657639, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 657649, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 657659, loss: -1.3756751968685421e-06, rewards: -9.299999999999999, cou

epoch: 658539, loss: 2.5975705284508877e-06, rewards: -9.299999999999999, count: 50
epoch: 658549, loss: 9.253025382349733e-06, rewards: -9.299999999999999, count: 50
epoch: 658559, loss: -7.085800007189391e-06, rewards: -9.299999999999999, count: 50
epoch: 658569, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 658579, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 658589, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 658599, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 658609, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 658619, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 658629, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 658639, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 658649, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, co

epoch: 659529, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 659539, loss: -1.1101960808446165e-05, rewards: -9.299999999999999, count: 50
epoch: 659549, loss: -9.176850289804861e-05, rewards: -9.299999999999999, count: 50
epoch: 659559, loss: 7.275700772879645e-05, rewards: -9.299999999999999, count: 50
epoch: 659569, loss: -2.5212764739990234e-05, rewards: -9.299999999999999, count: 50
epoch: 659579, loss: -3.0027627872186713e-05, rewards: -9.299999999999999, count: 50
epoch: 659589, loss: 2.8848648980783764e-07, rewards: -9.299999999999999, count: 50
epoch: 659599, loss: 1.1111497769888956e-05, rewards: -9.299999999999999, count: 50
epoch: 659609, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 659619, loss: -3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 659629, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, count: 50
epoch: 659639, loss: -3.325939132992062e-07, rewards: -9.299999999999999

epoch: 660519, loss: 4.80890275866841e-06, rewards: -9.299999999999999, count: 50
epoch: 660529, loss: -7.287263997568516e-06, rewards: -9.299999999999999, count: 50
epoch: 660539, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 660549, loss: 2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 660559, loss: -1.23739243917953e-06, rewards: -9.299999999999999, count: 50
epoch: 660569, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 660579, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 660589, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 660599, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 660609, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 660619, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 660629, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 

epoch: 661509, loss: -5.800962389912456e-05, rewards: -9.299999999999999, count: 50
epoch: 661519, loss: -1.986503593798261e-05, rewards: -9.299999999999999, count: 50
epoch: 661529, loss: 2.2580623408430256e-05, rewards: -9.299999999999999, count: 50
epoch: 661539, loss: 3.764629354918725e-06, rewards: -9.299999999999999, count: 50
epoch: 661549, loss: -8.79883737070486e-06, rewards: -9.299999999999999, count: 50
epoch: 661559, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 661569, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 661579, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 661589, loss: 6.902217819515499e-07, rewards: -9.299999999999999, count: 50
epoch: 661599, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 661609, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 661619, loss: -9.179115068036481e-08, rewards: -9.299999999999999, coun

epoch: 662499, loss: 2.6619434265739983e-06, rewards: -9.299999999999999, count: 50
epoch: 662509, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 662519, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 662529, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 662539, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 662549, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 662559, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 662569, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 662579, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 662589, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 662599, loss: -4.460811396711506e-06, rewards: -9.299999999999999, count: 50
epoch: 662609, loss: -2.3151636924012564e-05, rewards: -9.29999999999999

epoch: 663489, loss: 5.552768925554119e-06, rewards: -9.299999999999999, count: 50
epoch: 663499, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 663509, loss: 1.3804435639030999e-06, rewards: -9.299999999999999, count: 50
epoch: 663519, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 663529, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 663539, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 663549, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 663559, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 663569, loss: 8.869171210790228e-07, rewards: -9.299999999999999, count: 50
epoch: 663579, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 663589, loss: 1.994371359614888e-06, rewards: -9.299999999999999, count: 50
epoch: 663599, loss: 1.1538267244759481e-05, rewards: -9.299999999999999, count: 5

epoch: 664479, loss: 3.248453140258789e-06, rewards: -9.299999999999999, count: 50
epoch: 664489, loss: -1.5223026821331587e-06, rewards: -9.299999999999999, count: 50
epoch: 664499, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 664509, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 664519, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 664529, loss: 1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 664539, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 664549, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 664559, loss: 1.2636185147130163e-06, rewards: -9.299999999999999, count: 50
epoch: 664569, loss: 7.368326350842835e-06, rewards: -9.299999999999999, count: 50
epoch: 664579, loss: 5.5749416787875816e-05, rewards: -9.299999999999999, count: 50
epoch: 664589, loss: 2.0544528524624184e-05, rewards: -9.299999999999999, cou

epoch: 665469, loss: 7.93576255091466e-05, rewards: -9.299999999999999, count: 50
epoch: 665479, loss: -3.487586945993826e-05, rewards: -9.299999999999999, count: 50
epoch: 665489, loss: 4.421114863362163e-05, rewards: -9.299999999999999, count: 50
epoch: 665499, loss: -1.6760826838435605e-05, rewards: -9.299999999999999, count: 50
epoch: 665509, loss: -4.479885319597088e-06, rewards: -9.299999999999999, count: 50
epoch: 665519, loss: 8.603334208601154e-06, rewards: -9.299999999999999, count: 50
epoch: 665529, loss: -6.139278411865234e-06, rewards: -9.299999999999999, count: 50
epoch: 665539, loss: 3.850459961540764e-06, rewards: -9.299999999999999, count: 50
epoch: 665549, loss: -2.59995454143791e-06, rewards: -9.299999999999999, count: 50
epoch: 665559, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 665569, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 665579, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count:

epoch: 666459, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 666469, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 666479, loss: -1.341104507446289e-06, rewards: -9.299999999999999, count: 50
epoch: 666489, loss: -2.83598910755245e-06, rewards: -9.299999999999999, count: 50
epoch: 666499, loss: -1.1136531611555256e-05, rewards: -9.299999999999999, count: 50
epoch: 666509, loss: -6.911635136930272e-05, rewards: -9.299999999999999, count: 50
epoch: 666519, loss: 1.2240409887454007e-05, rewards: -9.299999999999999, count: 50
epoch: 666529, loss: -4.305005131755024e-05, rewards: -9.299999999999999, count: 50
epoch: 666539, loss: 2.2099018679000437e-05, rewards: -9.299999999999999, count: 50
epoch: 666549, loss: 2.6571751732262783e-06, rewards: -9.299999999999999, count: 50
epoch: 666559, loss: -8.965730557974894e-06, rewards: -9.299999999999999, count: 50
epoch: 666569, loss: 6.840229161753086e-06, rewards: -9.299999999999999, co

epoch: 667449, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 667459, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 667469, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 667479, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 667489, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 667499, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 667509, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 667519, loss: 6.837844921392389e-06, rewards: -9.299999999999999, count: 50
epoch: 667529, loss: 4.7003031795611605e-05, rewards: -9.299999999999999, count: 50
epoch: 667539, loss: 5.5475233239121735e-05, rewards: -9.299999999999999, count: 50
epoch: 667549, loss: 2.8066635422874242e-05, rewards: -9.299999999999999, count: 50
epoch: 667559, loss: -3.144621950923465e-05, rewards: -9.299999999999999, coun

epoch: 668439, loss: 1.62363051003922e-06, rewards: -9.299999999999999, count: 50
epoch: 668449, loss: 6.586313247680664e-06, rewards: -9.299999999999999, count: 50
epoch: 668459, loss: 2.844095251930412e-05, rewards: -9.299999999999999, count: 50
epoch: 668469, loss: 0.00010117888450622559, rewards: -9.299999999999999, count: 50
epoch: 668479, loss: -5.5356023949570954e-05, rewards: -9.299999999999999, count: 50
epoch: 668489, loss: 1.6912221326492727e-05, rewards: -9.299999999999999, count: 50
epoch: 668499, loss: 4.248619006830268e-06, rewards: -9.299999999999999, count: 50
epoch: 668509, loss: -8.857250577420928e-06, rewards: -9.299999999999999, count: 50
epoch: 668519, loss: 7.09772120899288e-06, rewards: -9.299999999999999, count: 50
epoch: 668529, loss: -4.507303401624085e-06, rewards: -9.299999999999999, count: 50
epoch: 668539, loss: 2.528428922232706e-06, rewards: -9.299999999999999, count: 50
epoch: 668549, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 5

epoch: 669429, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 669439, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 669449, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 669459, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 669469, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 669479, loss: -8.932352102419827e-06, rewards: -9.299999999999999, count: 50
epoch: 669489, loss: -4.9428941565565765e-05, rewards: -9.299999999999999, count: 50
epoch: 669499, loss: -5.0108432333217934e-05, rewards: -9.299999999999999, count: 50
epoch: 669509, loss: -6.959438451303868e-06, rewards: -9.299999999999999, count: 50
epoch: 669519, loss: 2.7124880944029428e-05, rewards: -9.299999999999999, count: 50
epoch: 669529, loss: -1.7232894606422633e-05, rewards: -9.299999999999999, count: 50
epoch: 669539, loss: 5.372762643673923e-06, rewards: -9.299999999999999, 

epoch: 670419, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 670429, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 670439, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 670449, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 670459, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 670469, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 670479, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 670489, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 670499, loss: 9.121894436248112e-06, rewards: -9.299999999999999, count: 50
epoch: 670509, loss: 7.506609108531848e-05, rewards: -9.299999999999999, count: 50
epoch: 670519, loss: -4.4544936827151105e-05, rewards: -9.299999999999999, count: 50
epoch: 670529, loss: 4.722714584204368e-05, rewards: -9.299999999999999, count:

epoch: 671409, loss: 5.231499744695611e-05, rewards: -9.299999999999999, count: 50
epoch: 671419, loss: 1.843929203459993e-05, rewards: -9.299999999999999, count: 50
epoch: 671429, loss: -1.3625622159452178e-05, rewards: -9.299999999999999, count: 50
epoch: 671439, loss: -9.194612175633665e-06, rewards: -9.299999999999999, count: 50
epoch: 671449, loss: 5.552768925554119e-06, rewards: -9.299999999999999, count: 50
epoch: 671459, loss: 2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 671469, loss: -2.88605679088505e-06, rewards: -9.299999999999999, count: 50
epoch: 671479, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 671489, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 671499, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 671509, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 671519, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count:

epoch: 672399, loss: -4.823207746085245e-06, rewards: -9.299999999999999, count: 50
epoch: 672409, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 672419, loss: 6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 672429, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 672439, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 672449, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 672459, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 672469, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 672479, loss: 9.107589562518115e-07, rewards: -9.299999999999999, count: 50
epoch: 672489, loss: 1.5747547195132938e-06, rewards: -9.299999999999999, count: 50
epoch: 672499, loss: 4.857778549194336e-06, rewards: -9.299999999999999, count: 50
epoch: 672509, loss: 2.578973726485856e-05, rewards: -9.299999999999999, count: 50

epoch: 673389, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 673399, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 673409, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 673419, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 673429, loss: -3.427267074584961e-06, rewards: -9.299999999999999, count: 50
epoch: 673439, loss: -2.82347209576983e-05, rewards: -9.299999999999999, count: 50
epoch: 673449, loss: -0.00012677907943725586, rewards: -9.299999999999999, count: 50
epoch: 673459, loss: -4.708766937255859e-06, rewards: -9.299999999999999, count: 50
epoch: 673469, loss: 3.8309095543809235e-05, rewards: -9.299999999999999, count: 50
epoch: 673479, loss: 2.047419548034668e-05, rewards: -9.299999999999999, count: 50
epoch: 673489, loss: -4.428625288710464e-06, rewards: -9.299999999999999, count: 50
epoch: 673499, loss: -9.000301361083984e-06, rewards: -9.299999999999999, cou

epoch: 674379, loss: -1.3434887478069868e-06, rewards: -9.299999999999999, count: 50
epoch: 674389, loss: -6.372928510245401e-06, rewards: -9.299999999999999, count: 50
epoch: 674399, loss: -4.6899320295779034e-05, rewards: -9.299999999999999, count: 50
epoch: 674409, loss: -5.238056110101752e-05, rewards: -9.299999999999999, count: 50
epoch: 674419, loss: -4.507660923991352e-05, rewards: -9.299999999999999, count: 50
epoch: 674429, loss: 2.0245313862687908e-05, rewards: -9.299999999999999, count: 50
epoch: 674439, loss: 1.8758773876470514e-05, rewards: -9.299999999999999, count: 50
epoch: 674449, loss: -6.70075405651005e-06, rewards: -9.299999999999999, count: 50
epoch: 674459, loss: -5.460977718030335e-06, rewards: -9.299999999999999, count: 50
epoch: 674469, loss: 4.363059815659653e-06, rewards: -9.299999999999999, count: 50
epoch: 674479, loss: -1.0037422271125251e-06, rewards: -9.299999999999999, count: 50
epoch: 674489, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, 

epoch: 675369, loss: 7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 675379, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 675389, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 675399, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 675409, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 675419, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 675429, loss: -2.0956993012077874e-06, rewards: -9.299999999999999, count: 50
epoch: 675439, loss: -4.490613719099201e-06, rewards: -9.299999999999999, count: 50
epoch: 675449, loss: -1.9960403733421117e-05, rewards: -9.299999999999999, count: 50
epoch: 675459, loss: -0.00010150671005249023, rewards: -9.299999999999999, count: 50
epoch: 675469, loss: 6.395578384399414e-05, rewards: -9.299999999999999, count: 50
epoch: 675479, loss: -3.108382225036621e-05, rewards: -9.299999999999999, co

epoch: 676359, loss: 2.1076202756376006e-06, rewards: -9.299999999999999, count: 50
epoch: 676369, loss: 2.2923945834918413e-06, rewards: -9.299999999999999, count: 50
epoch: 676379, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 676389, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 676399, loss: 1.5878677004366182e-06, rewards: -9.299999999999999, count: 50
epoch: 676409, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 676419, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 676429, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 676439, loss: 1.5747547195132938e-06, rewards: -9.299999999999999, count: 50
epoch: 676449, loss: 5.21540641784668e-06, rewards: -9.299999999999999, count: 50
epoch: 676459, loss: 2.9127597372280434e-05, rewards: -9.299999999999999, count: 50
epoch: 676469, loss: 0.00010925054812105373, rewards: -9.299999999999999, co

epoch: 677349, loss: -2.2867918232805096e-05, rewards: -9.299999999999999, count: 50
epoch: 677359, loss: -0.00012428521586116403, rewards: -9.299999999999999, count: 50
epoch: 677369, loss: 5.492568016052246e-05, rewards: -9.299999999999999, count: 50
epoch: 677379, loss: 2.626180685183499e-05, rewards: -9.299999999999999, count: 50
epoch: 677389, loss: -1.8984079360961914e-05, rewards: -9.299999999999999, count: 50
epoch: 677399, loss: -8.997917575470638e-06, rewards: -9.299999999999999, count: 50
epoch: 677409, loss: 8.319616426888388e-06, rewards: -9.299999999999999, count: 50
epoch: 677419, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 677429, loss: -2.5677682060631923e-06, rewards: -9.299999999999999, count: 50
epoch: 677439, loss: 2.511739694455173e-06, rewards: -9.299999999999999, count: 50
epoch: 677449, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 677459, loss: 7.700920150455204e-07, rewards: -9.299999999999999, co

epoch: 678339, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 678349, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 678359, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 678369, loss: 3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 678379, loss: 1.9609928131103516e-05, rewards: -9.299999999999999, count: 50
epoch: 678389, loss: 0.00010726094478741288, rewards: -9.299999999999999, count: 50
epoch: 678399, loss: -6.894111720612273e-05, rewards: -9.299999999999999, count: 50
epoch: 678409, loss: 2.0463467080844566e-05, rewards: -9.299999999999999, count: 50
epoch: 678419, loss: 1.531839370727539e-05, rewards: -9.299999999999999, count: 50
epoch: 678429, loss: -1.4474391718977131e-05, rewards: -9.299999999999999, count: 50
epoch: 678439, loss: 4.98414055982721e-06, rewards: -9.299999999999999, count: 50
epoch: 678449, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50


epoch: 679329, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 679339, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 679349, loss: -2.064704858639743e-06, rewards: -9.299999999999999, count: 50
epoch: 679359, loss: -1.1718273526639678e-05, rewards: -9.299999999999999, count: 50
epoch: 679369, loss: -8.681177860125899e-05, rewards: -9.299999999999999, count: 50
epoch: 679379, loss: 6.001710789860226e-05, rewards: -9.299999999999999, count: 50
epoch: 679389, loss: -3.9517879486083984e-05, rewards: -9.299999999999999, count: 50
epoch: 679399, loss: -1.9257067833677866e-05, rewards: -9.299999999999999, count: 50
epoch: 679409, loss: 1.3803243746224325e-05, rewards: -9.299999999999999, count: 50
epoch: 679419, loss: 4.895925485470798e-06, rewards: -9.299999999999999, count: 50
epoch: 679429, loss: -6.263256182137411e-06, rewards: -9.299999999999999, count: 50
epoch: 679439, loss: 1.9693375179485884e-06, rewards: -9.299999999999999, 

epoch: 680319, loss: -4.8100948333740234e-05, rewards: -9.299999999999999, count: 50
epoch: 680329, loss: 7.214546258182963e-06, rewards: -9.299999999999999, count: 50
epoch: 680339, loss: 1.4566183381248266e-05, rewards: -9.299999999999999, count: 50
epoch: 680349, loss: -9.886026418826077e-06, rewards: -9.299999999999999, count: 50
epoch: 680359, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 680369, loss: 9.107589562518115e-07, rewards: -9.299999999999999, count: 50
epoch: 680379, loss: -1.1372566177669796e-06, rewards: -9.299999999999999, count: 50
epoch: 680389, loss: 1.3387202670855913e-06, rewards: -9.299999999999999, count: 50
epoch: 680399, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 680409, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 680419, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 680429, loss: -2.74181360282455e-07, rewards: -9.299999999999999, cou

epoch: 681309, loss: -1.0926723916782066e-05, rewards: -9.299999999999999, count: 50
epoch: 681319, loss: -2.371072696405463e-06, rewards: -9.299999999999999, count: 50
epoch: 681329, loss: 5.071163286629599e-06, rewards: -9.299999999999999, count: 50
epoch: 681339, loss: -3.844499588012695e-06, rewards: -9.299999999999999, count: 50
epoch: 681349, loss: 2.312660171810421e-06, rewards: -9.299999999999999, count: 50
epoch: 681359, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 681369, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 681379, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 681389, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 681399, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 681409, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 681419, loss: -3.7741660889878403e-06, rewards: -9.299999999999999, c

epoch: 682299, loss: -2.6251078452332877e-05, rewards: -9.299999999999999, count: 50
epoch: 682309, loss: -0.0001356959401164204, rewards: -9.299999999999999, count: 50
epoch: 682319, loss: -1.2094974408682901e-05, rewards: -9.299999999999999, count: 50
epoch: 682329, loss: 3.200292485416867e-05, rewards: -9.299999999999999, count: 50
epoch: 682339, loss: 2.653360388649162e-05, rewards: -9.299999999999999, count: 50
epoch: 682349, loss: 7.165670467657037e-06, rewards: -9.299999999999999, count: 50
epoch: 682359, loss: -5.93781487623346e-06, rewards: -9.299999999999999, count: 50
epoch: 682369, loss: -5.397796485340223e-06, rewards: -9.299999999999999, count: 50
epoch: 682379, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 682389, loss: 2.051591764029581e-06, rewards: -9.299999999999999, count: 50
epoch: 682399, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 682409, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count:

epoch: 683289, loss: 2.8204917725815903e-06, rewards: -9.299999999999999, count: 50
epoch: 683299, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 683309, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 683319, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 683329, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 683339, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 683349, loss: 3.6156177429802483e-06, rewards: -9.299999999999999, count: 50
epoch: 683359, loss: 1.911401705001481e-05, rewards: -9.299999999999999, count: 50
epoch: 683369, loss: 0.00010130285954801366, rewards: -9.299999999999999, count: 50
epoch: 683379, loss: -6.464719626819715e-05, rewards: -9.299999999999999, count: 50
epoch: 683389, loss: 3.1007526558823884e-05, rewards: -9.299999999999999, count: 50
epoch: 683399, loss: 3.942251169064548e-06, rewards: -9.299999999999999, count

epoch: 684279, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 684289, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 684299, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 684309, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 684319, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 684329, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 684339, loss: 2.2923945834918413e-06, rewards: -9.299999999999999, count: 50
epoch: 684349, loss: 7.84516305429861e-06, rewards: -9.299999999999999, count: 50
epoch: 684359, loss: 4.1025876271305606e-05, rewards: -9.299999999999999, count: 50
epoch: 684369, loss: 7.643819117220119e-05, rewards: -9.299999999999999, count: 50
epoch: 684379, loss: -1.9327402696944773e-05, rewards: -9.299999999999999, count: 50
epoch: 684389, loss: -1.4504194041364826e-05, rewards: -9.299999999999999, coun

epoch: 685269, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 685279, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 685289, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 685299, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 685309, loss: -2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 685319, loss: -1.281976710743038e-05, rewards: -9.299999999999999, count: 50
epoch: 685329, loss: -9.971857070922852e-05, rewards: -9.299999999999999, count: 50
epoch: 685339, loss: 7.824182830518112e-05, rewards: -9.299999999999999, count: 50
epoch: 685349, loss: -1.6369820514228195e-05, rewards: -9.299999999999999, count: 50
epoch: 685359, loss: -2.946972927020397e-05, rewards: -9.299999999999999, count: 50
epoch: 685369, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 685379, loss: 1.0797977665788494e-05, rewards: -9.299999999999999, 

epoch: 686259, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 686269, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 686279, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 686289, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 686299, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 686309, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 686319, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 686329, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 686339, loss: 3.662109293145477e-06, rewards: -9.299999999999999, count: 50
epoch: 686349, loss: 2.6715993953985162e-05, rewards: -9.299999999999999, count: 50
epoch: 686359, loss: 0.00012846231402363628, rewards: -9.299999999999999, count: 50
epoch: 686369, loss: -1.0557174391578883e-05, rewards: -9.299999999999999, count

epoch: 687249, loss: 3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 687259, loss: -2.6607513063936494e-06, rewards: -9.299999999999999, count: 50
epoch: 687269, loss: 1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 687279, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 687289, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 687299, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 687309, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 687319, loss: -2.733469045779202e-06, rewards: -9.299999999999999, count: 50
epoch: 687329, loss: -6.70075405651005e-06, rewards: -9.299999999999999, count: 50
epoch: 687339, loss: -2.865076021407731e-05, rewards: -9.299999999999999, count: 50
epoch: 687349, loss: -9.851217328105122e-05, rewards: -9.299999999999999, count: 50
epoch: 687359, loss: 5.574703391175717e-05, rewards: -9.299999999999999, co

epoch: 688239, loss: 7.011532579781488e-05, rewards: -9.299999999999999, count: 50
epoch: 688249, loss: -1.0584592928353231e-05, rewards: -9.299999999999999, count: 50
epoch: 688259, loss: -2.0638704882003367e-05, rewards: -9.299999999999999, count: 50
epoch: 688269, loss: 1.9109249478788115e-05, rewards: -9.299999999999999, count: 50
epoch: 688279, loss: -9.496212442172691e-06, rewards: -9.299999999999999, count: 50
epoch: 688289, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 688299, loss: -1.5974044345057337e-06, rewards: -9.299999999999999, count: 50
epoch: 688309, loss: 1.1765956742237904e-06, rewards: -9.299999999999999, count: 50
epoch: 688319, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 688329, loss: 1.146793351836095e-06, rewards: -9.299999999999999, count: 50
epoch: 688339, loss: 1.6438960983578e-06, rewards: -9.299999999999999, count: 50
epoch: 688349, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, co

epoch: 689229, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 689239, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 689249, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 689259, loss: 5.125999678057269e-07, rewards: -9.299999999999999, count: 50
epoch: 689269, loss: 3.850460075227602e-07, rewards: -9.299999999999999, count: 50
epoch: 689279, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 689289, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 689299, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 689309, loss: 1.5950203078318737e-06, rewards: -9.299999999999999, count: 50
epoch: 689319, loss: 1.194119431602303e-05, rewards: -9.299999999999999, count: 50
epoch: 689329, loss: 0.00011184572940692306, rewards: -9.299999999999999, count: 50
epoch: 689339, loss: -9.102105832425877e-05, rewards: -9.299999999999999, count: 

epoch: 690219, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 690229, loss: -9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 690239, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 690249, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 690259, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 690269, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 690279, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 690289, loss: 4.460811396711506e-06, rewards: -9.299999999999999, count: 50
epoch: 690299, loss: 1.9768476704484783e-05, rewards: -9.299999999999999, count: 50
epoch: 690309, loss: 0.00010211467451881617, rewards: -9.299999999999999, count: 50
epoch: 690319, loss: -6.476044654846191e-05, rewards: -9.299999999999999, count: 50
epoch: 690329, loss: 3.034830115211662e-05, rewards: -9.299999999999999, count: 

epoch: 691209, loss: -1.7220974768861197e-05, rewards: -9.299999999999999, count: 50
epoch: 691219, loss: 8.481741133437026e-06, rewards: -9.299999999999999, count: 50
epoch: 691229, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 691239, loss: -4.163980520388577e-06, rewards: -9.299999999999999, count: 50
epoch: 691249, loss: 3.28302394336788e-06, rewards: -9.299999999999999, count: 50
epoch: 691259, loss: -1.8358230136072962e-06, rewards: -9.299999999999999, count: 50
epoch: 691269, loss: 1.2731552487821318e-06, rewards: -9.299999999999999, count: 50
epoch: 691279, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 691289, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 691299, loss: -4.2915345943583816e-08, rewards: -9.299999999999999, count: 50
epoch: 691309, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 691319, loss: -5.781650429526053e-07, rewards: -9.299999999999999, co

epoch: 692199, loss: -1.6622543625999242e-05, rewards: -9.299999999999999, count: 50
epoch: 692209, loss: -3.983855276601389e-05, rewards: -9.299999999999999, count: 50
epoch: 692219, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 692229, loss: 1.4238357834983617e-05, rewards: -9.299999999999999, count: 50
epoch: 692239, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 692249, loss: -4.255771727912361e-06, rewards: -9.299999999999999, count: 50
epoch: 692259, loss: 3.0231476557673886e-06, rewards: -9.299999999999999, count: 50
epoch: 692269, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 692279, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 692289, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 692299, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 692309, loss: -3.826618240054813e-07, rewards: -9.299999999999999, co

epoch: 693189, loss: 1.0830163773789536e-05, rewards: -9.299999999999999, count: 50
epoch: 693199, loss: 8.23915033834055e-05, rewards: -9.299999999999999, count: 50
epoch: 693209, loss: -5.327820690581575e-05, rewards: -9.299999999999999, count: 50
epoch: 693219, loss: 4.384159910841845e-05, rewards: -9.299999999999999, count: 50
epoch: 693229, loss: 1.720666841720231e-05, rewards: -9.299999999999999, count: 50
epoch: 693239, loss: -1.5203952898446005e-05, rewards: -9.299999999999999, count: 50
epoch: 693249, loss: -4.490613719099201e-06, rewards: -9.299999999999999, count: 50
epoch: 693259, loss: 6.742477580701234e-06, rewards: -9.299999999999999, count: 50
epoch: 693269, loss: -2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 693279, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 693289, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 693299, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 5

epoch: 694179, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 694189, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 694199, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 694209, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 694219, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 694229, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 694239, loss: -2.696514229683089e-06, rewards: -9.299999999999999, count: 50
epoch: 694249, loss: -9.275674528907984e-06, rewards: -9.299999999999999, count: 50
epoch: 694259, loss: -4.738092320621945e-05, rewards: -9.299999999999999, count: 50
epoch: 694269, loss: -5.700469046132639e-05, rewards: -9.299999999999999, count: 50
epoch: 694279, loss: 1.2230872926011216e-05, rewards: -9.299999999999999, count: 50
epoch: 694289, loss: 1.253247228305554e-05, rewards: -9.299999999999999, count

epoch: 695169, loss: -2.123117383234785e-06, rewards: -9.299999999999999, count: 50
epoch: 695179, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 695189, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 695199, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 695209, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 695219, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 695229, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 695239, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count: 50
epoch: 695249, loss: -4.920959327137098e-06, rewards: -9.299999999999999, count: 50
epoch: 695259, loss: -2.759695053100586e-05, rewards: -9.299999999999999, count: 50
epoch: 695269, loss: -0.00011166930198669434, rewards: -9.299999999999999, count: 50
epoch: 695279, loss: 5.033373963669874e-05, rewards: -9.299999999999999, coun

epoch: 696159, loss: 3.2436846595373936e-06, rewards: -9.299999999999999, count: 50
epoch: 696169, loss: 2.4405717340414412e-05, rewards: -9.299999999999999, count: 50
epoch: 696179, loss: 0.00012772559421136975, rewards: -9.299999999999999, count: 50
epoch: 696189, loss: -3.840088902506977e-05, rewards: -9.299999999999999, count: 50
epoch: 696199, loss: -3.753304554265924e-05, rewards: -9.299999999999999, count: 50
epoch: 696209, loss: 7.015466508164536e-06, rewards: -9.299999999999999, count: 50
epoch: 696219, loss: 1.480221726524178e-05, rewards: -9.299999999999999, count: 50
epoch: 696229, loss: -3.427267074584961e-06, rewards: -9.299999999999999, count: 50
epoch: 696239, loss: -4.575252660288243e-06, rewards: -9.299999999999999, count: 50
epoch: 696249, loss: 3.495216333249118e-06, rewards: -9.299999999999999, count: 50
epoch: 696259, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 696269, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, coun

epoch: 697149, loss: -4.5657156988454517e-07, rewards: -9.299999999999999, count: 50
epoch: 697159, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 697169, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 697179, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 697189, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 697199, loss: 1.341104507446289e-06, rewards: -9.299999999999999, count: 50
epoch: 697209, loss: 4.912614713248331e-06, rewards: -9.299999999999999, count: 50
epoch: 697219, loss: 3.2255647965939716e-05, rewards: -9.299999999999999, count: 50
epoch: 697229, loss: 0.00010745286999735981, rewards: -9.299999999999999, count: 50
epoch: 697239, loss: -1.9509792764438316e-05, rewards: -9.299999999999999, count: 50
epoch: 697249, loss: -3.3289194107055664e-05, rewards: -9.299999999999999, count: 50
epoch: 697259, loss: 1.341104507446289e-05, rewards: -9.299999999999999, co

epoch: 698139, loss: 5.939364564255811e-05, rewards: -9.299999999999999, count: 50
epoch: 698149, loss: -3.8479567592730746e-05, rewards: -9.299999999999999, count: 50
epoch: 698159, loss: 1.6486644653923577e-06, rewards: -9.299999999999999, count: 50
epoch: 698169, loss: 1.1721849659807049e-05, rewards: -9.299999999999999, count: 50
epoch: 698179, loss: -9.59634780883789e-06, rewards: -9.299999999999999, count: 50
epoch: 698189, loss: 4.895925485470798e-06, rewards: -9.299999999999999, count: 50
epoch: 698199, loss: -3.0446053642663173e-06, rewards: -9.299999999999999, count: 50
epoch: 698209, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 698219, loss: -1.4579295566363726e-06, rewards: -9.299999999999999, count: 50
epoch: 698229, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 698239, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 698249, loss: 5.364418029785156e-07, rewards: -9.299999999999999, coun

epoch: 699129, loss: -1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 699139, loss: 1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 699149, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 699159, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 699169, loss: 2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 699179, loss: 4.007816187368007e-06, rewards: -9.299999999999999, count: 50
epoch: 699189, loss: 1.0942220797005575e-05, rewards: -9.299999999999999, count: 50
epoch: 699199, loss: 5.2745341236004606e-05, rewards: -9.299999999999999, count: 50
epoch: 699209, loss: 3.9705038943793625e-05, rewards: -9.299999999999999, count: 50
epoch: 699219, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 699229, loss: -1.8106698917108588e-05, rewards: -9.299999999999999, count: 50
epoch: 699239, loss: 1.628398968023248e-05, rewards: -9.299999999999999, count:

epoch: 700119, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 700129, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 700139, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 700149, loss: 3.834962626569904e-06, rewards: -9.299999999999999, count: 50
epoch: 700159, loss: 1.7101765479310416e-05, rewards: -9.299999999999999, count: 50
epoch: 700169, loss: 8.804082608548924e-05, rewards: -9.299999999999999, count: 50
epoch: 700179, loss: -4.772424654220231e-05, rewards: -9.299999999999999, count: 50
epoch: 700189, loss: 4.058599370182492e-05, rewards: -9.299999999999999, count: 50
epoch: 700199, loss: -1.6279220290016383e-05, rewards: -9.299999999999999, count: 50
epoch: 700209, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 700219, loss: 4.117488970223349e-06, rewards: -9.299999999999999, count: 50
epoch: 700229, loss: -3.621578116508317e-06, rewards: -9.299999999999999, count:

epoch: 701109, loss: 6.608962848986266e-06, rewards: -9.299999999999999, count: 50
epoch: 701119, loss: 5.143881026015151e-06, rewards: -9.299999999999999, count: 50
epoch: 701129, loss: -4.674196134146769e-06, rewards: -9.299999999999999, count: 50
epoch: 701139, loss: 1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 701149, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 701159, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 701169, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, count: 50
epoch: 701179, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 701189, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 701199, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 701209, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 701219, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50

epoch: 702099, loss: 6.293058504525106e-06, rewards: -9.299999999999999, count: 50
epoch: 702109, loss: 5.679130481439643e-06, rewards: -9.299999999999999, count: 50
epoch: 702119, loss: -6.387233952409588e-06, rewards: -9.299999999999999, count: 50
epoch: 702129, loss: 4.332065600465285e-06, rewards: -9.299999999999999, count: 50
epoch: 702139, loss: -3.0303001494758064e-06, rewards: -9.299999999999999, count: 50
epoch: 702149, loss: 2.1255016235954827e-06, rewards: -9.299999999999999, count: 50
epoch: 702159, loss: -1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 702169, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 702179, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 702189, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 702199, loss: 1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 702209, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count:

epoch: 703089, loss: -3.6120413824392017e-06, rewards: -9.299999999999999, count: 50
epoch: 703099, loss: -2.8864145861007273e-05, rewards: -9.299999999999999, count: 50
epoch: 703109, loss: -0.00012202977814013138, rewards: -9.299999999999999, count: 50
epoch: 703119, loss: 4.224777057970641e-06, rewards: -9.299999999999999, count: 50
epoch: 703129, loss: 4.027008981211111e-05, rewards: -9.299999999999999, count: 50
epoch: 703139, loss: 1.1307000931992661e-05, rewards: -9.299999999999999, count: 50
epoch: 703149, loss: -1.1793375051638577e-05, rewards: -9.299999999999999, count: 50
epoch: 703159, loss: -5.346536454453599e-06, rewards: -9.299999999999999, count: 50
epoch: 703169, loss: 4.740953500004252e-06, rewards: -9.299999999999999, count: 50
epoch: 703179, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 703189, loss: -1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 703199, loss: 1.363754222438729e-06, rewards: -9.299999999999999, c

epoch: 704079, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 704089, loss: 1.7547607740198146e-06, rewards: -9.299999999999999, count: 50
epoch: 704099, loss: 6.983280400163494e-06, rewards: -9.299999999999999, count: 50
epoch: 704109, loss: 4.4840573536930606e-05, rewards: -9.299999999999999, count: 50
epoch: 704119, loss: 6.394624506356195e-05, rewards: -9.299999999999999, count: 50
epoch: 704129, loss: 2.187490463256836e-05, rewards: -9.299999999999999, count: 50
epoch: 704139, loss: -3.3160449675051495e-05, rewards: -9.299999999999999, count: 50
epoch: 704149, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 704159, loss: 1.1274814823991619e-05, rewards: -9.299999999999999, count: 50
epoch: 704169, loss: -6.223916898306925e-06, rewards: -9.299999999999999, count: 50
epoch: 704179, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 704189, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count

epoch: 705069, loss: 1.1563300859052106e-06, rewards: -9.299999999999999, count: 50
epoch: 705079, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 705089, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 705099, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 705109, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 705119, loss: 1.1765956742237904e-06, rewards: -9.299999999999999, count: 50
epoch: 705129, loss: 7.368326350842835e-06, rewards: -9.299999999999999, count: 50
epoch: 705139, loss: 7.213711796794087e-05, rewards: -9.299999999999999, count: 50
epoch: 705149, loss: -5.327820690581575e-05, rewards: -9.299999999999999, count: 50
epoch: 705159, loss: 3.383040530025028e-05, rewards: -9.299999999999999, count: 50
epoch: 705169, loss: 3.597617251216434e-05, rewards: -9.299999999999999, count: 50
epoch: 705179, loss: 1.720666841720231e-05, rewards: -9.299999999999999, count: 50
e

epoch: 706059, loss: -3.983974238508381e-06, rewards: -9.299999999999999, count: 50
epoch: 706069, loss: -6.333589681162266e-06, rewards: -9.299999999999999, count: 50
epoch: 706079, loss: 3.827810360235162e-06, rewards: -9.299999999999999, count: 50
epoch: 706089, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 706099, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 706109, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 706119, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 706129, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 706139, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 706149, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 706159, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 706169, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 

epoch: 707049, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 707059, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 707069, loss: 2.0015240806969814e-06, rewards: -9.299999999999999, count: 50
epoch: 707079, loss: 3.7896634239587e-06, rewards: -9.299999999999999, count: 50
epoch: 707089, loss: 2.0704268536064774e-05, rewards: -9.299999999999999, count: 50
epoch: 707099, loss: 0.00011495828948682174, rewards: -9.299999999999999, count: 50
epoch: 707109, loss: -6.927609501872212e-05, rewards: -9.299999999999999, count: 50
epoch: 707119, loss: 1.3804435639030999e-06, rewards: -9.299999999999999, count: 50
epoch: 707129, loss: 2.4263857994810678e-05, rewards: -9.299999999999999, count: 50
epoch: 707139, loss: -8.046627044677734e-06, rewards: -9.299999999999999, count: 50
epoch: 707149, loss: -4.363059815659653e-06, rewards: -9.299999999999999, count: 50
epoch: 707159, loss: 5.220174898568075e-06, rewards: -9.299999999999999, count:

epoch: 708039, loss: 5.19037257618038e-06, rewards: -9.299999999999999, count: 50
epoch: 708049, loss: -3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 708059, loss: 1.8060206912196008e-06, rewards: -9.299999999999999, count: 50
epoch: 708069, loss: -4.541873863672663e-07, rewards: -9.299999999999999, count: 50
epoch: 708079, loss: -3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 708089, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 708099, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 708109, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 708119, loss: -2.4116038730426226e-06, rewards: -9.299999999999999, count: 50
epoch: 708129, loss: -1.8637179891811684e-05, rewards: -9.299999999999999, count: 50
epoch: 708139, loss: -0.0001237165997736156, rewards: -9.299999999999999, count: 50
epoch: 708149, loss: 7.048010593280196e-05, rewards: -9.299999999999999, coun

epoch: 709029, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 709039, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 709049, loss: -6.508827254947391e-07, rewards: -9.299999999999999, count: 50
epoch: 709059, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 709069, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 709079, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 709089, loss: -5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 709099, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 709109, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 709119, loss: 3.3605099361011526e-06, rewards: -9.299999999999999, count: 50
epoch: 709129, loss: 2.8582811864907853e-05, rewards: -9.299999999999999, count: 50
epoch: 709139, loss: 0.00012497186253312975, rewards: -9.299999999999999, coun

epoch: 710019, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 710029, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 710039, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 710049, loss: 2.473592758178711e-06, rewards: -9.299999999999999, count: 50
epoch: 710059, loss: 9.698867870611139e-06, rewards: -9.299999999999999, count: 50
epoch: 710069, loss: 6.642461084993556e-05, rewards: -9.299999999999999, count: 50
epoch: 710079, loss: -1.0673999895516317e-05, rewards: -9.299999999999999, count: 50
epoch: 710089, loss: 4.993319453205913e-05, rewards: -9.299999999999999, count: 50
epoch: 710099, loss: -1.1970996638410725e-05, rewards: -9.299999999999999, count: 50
epoch: 710109, loss: -1.5035867363621946e-05, rewards: -9.299999999999999, count: 50
epoch: 710119, loss: 9.591579328116495e-06, rewards: -9.299999999999999, count: 50
epoch: 710129, loss: -5.4836272056491e-07, rewards: -9.299999999999999, count: 

epoch: 711009, loss: -3.1273364584194496e-05, rewards: -9.299999999999999, count: 50
epoch: 711019, loss: 4.930496288579889e-06, rewards: -9.299999999999999, count: 50
epoch: 711029, loss: 7.172822733991779e-06, rewards: -9.299999999999999, count: 50
epoch: 711039, loss: -6.703138296870748e-06, rewards: -9.299999999999999, count: 50
epoch: 711049, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 711059, loss: -2.123117383234785e-06, rewards: -9.299999999999999, count: 50
epoch: 711069, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 711079, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 711089, loss: 9.739399047248298e-07, rewards: -9.299999999999999, count: 50
epoch: 711099, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 711109, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 711119, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count:

epoch: 711999, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 712009, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 712019, loss: 1.5246868088070187e-06, rewards: -9.299999999999999, count: 50
epoch: 712029, loss: 7.58886335461284e-06, rewards: -9.299999999999999, count: 50
epoch: 712039, loss: 5.823373794555664e-05, rewards: -9.299999999999999, count: 50
epoch: 712049, loss: 9.222030712408014e-06, rewards: -9.299999999999999, count: 50
epoch: 712059, loss: 5.4764746892033145e-05, rewards: -9.299999999999999, count: 50
epoch: 712069, loss: -5.158186013431987e-06, rewards: -9.299999999999999, count: 50
epoch: 712079, loss: -2.0006895283586346e-05, rewards: -9.299999999999999, count: 50
epoch: 712089, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 712099, loss: 6.583929007319966e-06, rewards: -9.299999999999999, count: 50
epoch: 712109, loss: -3.842115347651998e-06, rewards: -9.299999999999999, count: 5

epoch: 712989, loss: 1.341104507446289e-05, rewards: -9.299999999999999, count: 50
epoch: 712999, loss: 4.456043370737461e-06, rewards: -9.299999999999999, count: 50
epoch: 713009, loss: -6.462335477408487e-06, rewards: -9.299999999999999, count: 50
epoch: 713019, loss: 2.541542016842868e-06, rewards: -9.299999999999999, count: 50
epoch: 713029, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 713039, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 713049, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 713059, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 713069, loss: -2.7894972731701273e-07, rewards: -9.299999999999999, count: 50
epoch: 713079, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 713089, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 713099, loss: 6.67572024326546e-08, rewards: -9.299999999999999, count: 5

epoch: 713979, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 713989, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 713999, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 714009, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 714019, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 714029, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 714039, loss: 1.0291338185197674e-05, rewards: -9.299999999999999, count: 50
epoch: 714049, loss: 8.871674799593166e-05, rewards: -9.299999999999999, count: 50
epoch: 714059, loss: -7.298350101336837e-05, rewards: -9.299999999999999, count: 50
epoch: 714069, loss: 2.3740529286442325e-05, rewards: -9.299999999999999, count: 50
epoch: 714079, loss: 3.220915823476389e-05, rewards: -9.299999999999999, count: 50
epoch: 714089, loss: 6.173849214974325e-06, rewards: -9.299999999999999, count:

epoch: 714969, loss: -4.234910011291504e-05, rewards: -9.299999999999999, count: 50
epoch: 714979, loss: 4.304647518438287e-06, rewards: -9.299999999999999, count: 50
epoch: 714989, loss: 1.1854172043967992e-05, rewards: -9.299999999999999, count: 50
epoch: 714999, loss: -9.496212442172691e-06, rewards: -9.299999999999999, count: 50
epoch: 715009, loss: 4.202127456665039e-06, rewards: -9.299999999999999, count: 50
epoch: 715019, loss: -1.9371509552001953e-06, rewards: -9.299999999999999, count: 50
epoch: 715029, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 715039, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 715049, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 715059, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 715069, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 715079, loss: 2.7298926852381555e-07, rewards: -9.299999999999999, count

epoch: 715959, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 715969, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 715979, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 715989, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 715999, loss: 6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 716009, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 716019, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 716029, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 716039, loss: 5.538463483389933e-06, rewards: -9.299999999999999, count: 50
epoch: 716049, loss: 3.62646569556091e-05, rewards: -9.299999999999999, count: 50
epoch: 716059, loss: 9.510636300547048e-05, rewards: -9.299999999999999, count: 50
epoch: 716069, loss: -4.266500582161825e-06, rewards: -9.299999999999999, count: 50

epoch: 716949, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 716959, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, count: 50
epoch: 716969, loss: -3.5071373076789314e-06, rewards: -9.299999999999999, count: 50
epoch: 716979, loss: 7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 716989, loss: 1.5676021121180383e-06, rewards: -9.299999999999999, count: 50
epoch: 716999, loss: 2.948045676021138e-06, rewards: -9.299999999999999, count: 50
epoch: 717009, loss: 9.084939847525675e-06, rewards: -9.299999999999999, count: 50
epoch: 717019, loss: 4.287481351639144e-05, rewards: -9.299999999999999, count: 50
epoch: 717029, loss: 6.968974776100367e-05, rewards: -9.299999999999999, count: 50
epoch: 717039, loss: -2.691864938242361e-05, rewards: -9.299999999999999, count: 50
epoch: 717049, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 717059, loss: 8.319616426888388e-06, rewards: -9.299999999999999, count: 

epoch: 717939, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 717949, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 717959, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 717969, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 717979, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 717989, loss: -1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 717999, loss: -1.0520219802856445e-05, rewards: -9.299999999999999, count: 50
epoch: 718009, loss: -8.494138455716893e-05, rewards: -9.299999999999999, count: 50
epoch: 718019, loss: 6.300210952758789e-05, rewards: -9.299999999999999, count: 50
epoch: 718029, loss: -3.5459994251141325e-05, rewards: -9.299999999999999, count: 50
epoch: 718039, loss: -2.8092861612094566e-05, rewards: -9.299999999999999, count: 50
epoch: 718049, loss: 5.19037257618038e-06, rewards: -9.299999999999999, cou

epoch: 718929, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 718939, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 718949, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 718959, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 718969, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 718979, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 718989, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 718999, loss: -3.842115347651998e-06, rewards: -9.299999999999999, count: 50
epoch: 719009, loss: -2.7246474928688258e-05, rewards: -9.299999999999999, count: 50
epoch: 719019, loss: -0.00012439966667443514, rewards: -9.299999999999999, count: 50
epoch: 719029, loss: 2.2424459530157037e-05, rewards: -9.299999999999999, count: 50
epoch: 719039, loss: 4.069328133482486e-05, rewards: -9.299999999999999, co

epoch: 719919, loss: 3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 719929, loss: 2.294778823852539e-05, rewards: -9.299999999999999, count: 50
epoch: 719939, loss: 0.00012185930972918868, rewards: -9.299999999999999, count: 50
epoch: 719949, loss: -5.783200322184712e-05, rewards: -9.299999999999999, count: 50
epoch: 719959, loss: -1.996755599975586e-05, rewards: -9.299999999999999, count: 50
epoch: 719969, loss: 2.2356509361998178e-05, rewards: -9.299999999999999, count: 50
epoch: 719979, loss: 3.7896634239587e-06, rewards: -9.299999999999999, count: 50
epoch: 719989, loss: -8.866786629369017e-06, rewards: -9.299999999999999, count: 50
epoch: 719999, loss: 3.7741660889878403e-06, rewards: -9.299999999999999, count: 50
epoch: 720009, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 720019, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 720029, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50

epoch: 720909, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 720919, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 720929, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 720939, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 720949, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 720959, loss: 6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 720969, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 720979, loss: 3.358125695740455e-06, rewards: -9.299999999999999, count: 50
epoch: 720989, loss: 2.5018453015945852e-05, rewards: -9.299999999999999, count: 50
epoch: 720999, loss: 0.0001279926364077255, rewards: -9.299999999999999, count: 50
epoch: 721009, loss: -3.328442471683957e-05, rewards: -9.299999999999999, count: 50
epoch: 721019, loss: -3.960728645324707e-05, rewards: -9.299999999999999, count: 50
e

epoch: 721899, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 721909, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 721919, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 721929, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 721939, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 721949, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 721959, loss: 4.380941390991211e-06, rewards: -9.299999999999999, count: 50
epoch: 721969, loss: 3.5574437788454816e-05, rewards: -9.299999999999999, count: 50
epoch: 721979, loss: 9.864568710327148e-05, rewards: -9.299999999999999, count: 50
epoch: 721989, loss: 2.761602445389144e-05, rewards: -9.299999999999999, count: 50
epoch: 721999, loss: -2.9876231565140188e-05, rewards: -9.299999999999999, count: 50
epoch: 722009, loss: -1.981139212148264e-05, rewards: -9.299999999999999, count:

epoch: 722889, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 722899, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 722909, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 722919, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 722929, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 722939, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 722949, loss: 3.869533429678995e-06, rewards: -9.299999999999999, count: 50
epoch: 722959, loss: 6.085157292545773e-05, rewards: -9.299999999999999, count: 50
epoch: 722969, loss: -6.564140494447201e-05, rewards: -9.299999999999999, count: 50
epoch: 722979, loss: -1.981139212148264e-05, rewards: -9.299999999999999, count: 50
epoch: 722989, loss: -1.9490718841552734e-05, rewards: -9.299999999999999, count: 50
epoch: 722999, loss: -1.687288204266224e-05, rewards: -9.299999999999999, count: 5

epoch: 723879, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 723889, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 723899, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 723909, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 723919, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 723929, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 723939, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 723949, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 723959, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 723969, loss: 2.4509429294994334e-06, rewards: -9.299999999999999, count: 50
epoch: 723979, loss: 2.147793748008553e-05, rewards: -9.299999999999999, count: 50
epoch: 723989, loss: 0.00014416933117900044, rewards: -9.299999999999999, count: 

epoch: 724869, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 724879, loss: 1.3629198292619549e-05, rewards: -9.299999999999999, count: 50
epoch: 724889, loss: 9.721040987642482e-05, rewards: -9.299999999999999, count: 50
epoch: 724899, loss: -7.130741869332269e-05, rewards: -9.299999999999999, count: 50
epoch: 724909, loss: 2.7996302378596738e-05, rewards: -9.299999999999999, count: 50
epoch: 724919, loss: 2.279400905536022e-05, rewards: -9.299999999999999, count: 50
epoch: 724929, loss: -1.1705160432029516e-05, rewards: -9.299999999999999, count: 50
epoch: 724939, loss: -5.508661161002237e-06, rewards: -9.299999999999999, count: 50
epoch: 724949, loss: 6.219148417585529e-06, rewards: -9.299999999999999, count: 50
epoch: 724959, loss: -2.409219632681925e-06, rewards: -9.299999999999999, count: 50
epoch: 724969, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 724979, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count:

epoch: 725859, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 725869, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 725879, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 725889, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 725899, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 725909, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 725919, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 725929, loss: 6.171464974613627e-06, rewards: -9.299999999999999, count: 50
epoch: 725939, loss: 3.3795833587646484e-05, rewards: -9.299999999999999, count: 50
epoch: 725949, loss: 9.736418724060059e-05, rewards: -9.299999999999999, count: 50
epoch: 725959, loss: -3.674149411381222e-05, rewards: -9.299999999999999, count: 50
epoch: 725969, loss: -9.67144933383679e-06, rewards: -9.299999999999999, count: 5

epoch: 726849, loss: -2.6171206627623178e-05, rewards: -9.299999999999999, count: 50
epoch: 726859, loss: 1.8814802388078533e-05, rewards: -9.299999999999999, count: 50
epoch: 726869, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 726879, loss: -4.355907549324911e-06, rewards: -9.299999999999999, count: 50
epoch: 726889, loss: 4.351139068603516e-06, rewards: -9.299999999999999, count: 50
epoch: 726899, loss: -3.2782554626464844e-06, rewards: -9.299999999999999, count: 50
epoch: 726909, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 726919, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 726929, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 726939, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 726949, loss: 7.545947937614983e-07, rewards: -9.299999999999999, count: 50
epoch: 726959, loss: 7.379055091405462e-07, rewards: -9.299999999999999, co

epoch: 727839, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 727849, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 727859, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 727869, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 727879, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 727889, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 727899, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 727909, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 727919, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 727929, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 727939, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 727949, loss: -2.7894972731701273e-07, rewards: -9.299999999999999, count

epoch: 728829, loss: 2.960443453048356e-05, rewards: -9.299999999999999, count: 50
epoch: 728839, loss: 9.121894436248112e-06, rewards: -9.299999999999999, count: 50
epoch: 728849, loss: -1.712560697342269e-05, rewards: -9.299999999999999, count: 50
epoch: 728859, loss: 1.1484622518764809e-05, rewards: -9.299999999999999, count: 50
epoch: 728869, loss: -6.111860329838237e-06, rewards: -9.299999999999999, count: 50
epoch: 728879, loss: 3.2711029689380666e-06, rewards: -9.299999999999999, count: 50
epoch: 728889, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 728899, loss: 1.4710426512465347e-06, rewards: -9.299999999999999, count: 50
epoch: 728909, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 728919, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 728929, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 728939, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, c

epoch: 729819, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 729829, loss: -2.219677071479964e-06, rewards: -9.299999999999999, count: 50
epoch: 729839, loss: -1.0554790605965536e-05, rewards: -9.299999999999999, count: 50
epoch: 729849, loss: -7.378220470855013e-05, rewards: -9.299999999999999, count: 50
epoch: 729859, loss: 3.141164779663086e-05, rewards: -9.299999999999999, count: 50
epoch: 729869, loss: -5.061149568064138e-05, rewards: -9.299999999999999, count: 50
epoch: 729879, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 729889, loss: 1.8380880646873266e-05, rewards: -9.299999999999999, count: 50
epoch: 729899, loss: -4.903078206552891e-06, rewards: -9.299999999999999, count: 50
epoch: 729909, loss: -3.5119055610266514e-06, rewards: -9.299999999999999, count: 50
epoch: 729919, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 729929, loss: -1.996755599975586e-06, rewards: -9.299999999999999, c

epoch: 730809, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 730819, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 730829, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 730839, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 730849, loss: 7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 730859, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 730869, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 730879, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 730889, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 730899, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 730909, loss: 2.9206275939941406e-06, rewards: -9.299999999999999, count: 50
epoch: 730919, loss: 8.413791874772869e-06, rewards: -9.299999999999999, count

epoch: 731799, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 731809, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 731819, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 731829, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 731839, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 731849, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 731859, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 731869, loss: 1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 731879, loss: 7.798671504133381e-06, rewards: -9.299999999999999, count: 50
epoch: 731889, loss: 4.286527473595925e-05, rewards: -9.299999999999999, count: 50
epoch: 731899, loss: 7.186770380940288e-05, rewards: -9.299999999999999, count: 50
epoch: 731909, loss: -8.623599569546059e-06, rewards: -9.299999999999999, count: 

epoch: 732789, loss: -2.0416975530679338e-05, rewards: -9.299999999999999, count: 50
epoch: 732799, loss: 3.023862882400863e-05, rewards: -9.299999999999999, count: 50
epoch: 732809, loss: -1.1707544217642862e-05, rewards: -9.299999999999999, count: 50
epoch: 732819, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 732829, loss: 4.569292286760174e-06, rewards: -9.299999999999999, count: 50
epoch: 732839, loss: -3.7741660889878403e-06, rewards: -9.299999999999999, count: 50
epoch: 732849, loss: 2.319812665518839e-06, rewards: -9.299999999999999, count: 50
epoch: 732859, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 732869, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 732879, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 732889, loss: -6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 732899, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, 

epoch: 733779, loss: -1.2240409887454007e-05, rewards: -9.299999999999999, count: 50
epoch: 733789, loss: -0.00011999845446553081, rewards: -9.299999999999999, count: 50
epoch: 733799, loss: 9.076833521248773e-05, rewards: -9.299999999999999, count: 50
epoch: 733809, loss: 4.1979550587711856e-05, rewards: -9.299999999999999, count: 50
epoch: 733819, loss: 4.804134277947014e-06, rewards: -9.299999999999999, count: 50
epoch: 733829, loss: -1.1758804248529486e-05, rewards: -9.299999999999999, count: 50
epoch: 733839, loss: -1.165986031992361e-05, rewards: -9.299999999999999, count: 50
epoch: 733849, loss: -3.8397311072913e-06, rewards: -9.299999999999999, count: 50
epoch: 733859, loss: 2.8336048671917524e-06, rewards: -9.299999999999999, count: 50
epoch: 733869, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 733879, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 733889, loss: -5.710124924007687e-07, rewards: -9.299999999999999, co

epoch: 734769, loss: -2.9957293463667156e-06, rewards: -9.299999999999999, count: 50
epoch: 734779, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 734789, loss: -2.84075736090017e-06, rewards: -9.299999999999999, count: 50
epoch: 734799, loss: 2.5844574338407256e-06, rewards: -9.299999999999999, count: 50
epoch: 734809, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 734819, loss: -1.0454655239300337e-06, rewards: -9.299999999999999, count: 50
epoch: 734829, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 734839, loss: -9.179115068036481e-07, rewards: -9.299999999999999, count: 50
epoch: 734849, loss: -4.0912627810030244e-06, rewards: -9.299999999999999, count: 50
epoch: 734859, loss: -2.8270482289372012e-05, rewards: -9.299999999999999, count: 50
epoch: 734869, loss: -0.00012047529162373394, rewards: -9.299999999999999, count: 50
epoch: 734879, loss: 2.3658276404603384e-05, rewards: -9.29999999999999

epoch: 735759, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 735769, loss: 2.294778823852539e-06, rewards: -9.299999999999999, count: 50
epoch: 735779, loss: 9.856224096438382e-06, rewards: -9.299999999999999, count: 50
epoch: 735789, loss: 5.8144330978393555e-05, rewards: -9.299999999999999, count: 50
epoch: 735799, loss: 1.981139212148264e-05, rewards: -9.299999999999999, count: 50
epoch: 735809, loss: 3.228783680242486e-05, rewards: -9.299999999999999, count: 50
epoch: 735819, loss: -2.9046535928500816e-05, rewards: -9.299999999999999, count: 50
epoch: 735829, loss: 4.594326128426474e-06, rewards: -9.299999999999999, count: 50
epoch: 735839, loss: 6.688833309453912e-06, rewards: -9.299999999999999, count: 50
epoch: 735849, loss: -6.810426839365391e-06, rewards: -9.299999999999999, count: 50
epoch: 735859, loss: 4.304647518438287e-06, rewards: -9.299999999999999, count: 50
epoch: 735869, loss: -2.3245811462402344e-06, rewards: -9.299999999999999, count: 

epoch: 736749, loss: -7.253885087266099e-06, rewards: -9.299999999999999, count: 50
epoch: 736759, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 736769, loss: 2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 736779, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 736789, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 736799, loss: -6.294250738392293e-07, rewards: -9.299999999999999, count: 50
epoch: 736809, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 736819, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 736829, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 736839, loss: 6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 736849, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 736859, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count

epoch: 737739, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count: 50
epoch: 737749, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 737759, loss: -2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 737769, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 737779, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 737789, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 737799, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 737809, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 737819, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 737829, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 737839, loss: 8.034706297621597e-06, rewards: -9.299999999999999, count: 50
epoch: 737849, loss: 6.107569060986862e-05, rewards: -9.299999999999999, count: 

epoch: 738729, loss: -7.922649092506617e-05, rewards: -9.299999999999999, count: 50
epoch: 738739, loss: 3.9885042497189716e-05, rewards: -9.299999999999999, count: 50
epoch: 738749, loss: -4.7193767386488616e-05, rewards: -9.299999999999999, count: 50
epoch: 738759, loss: 3.958940396842081e-06, rewards: -9.299999999999999, count: 50
epoch: 738769, loss: 1.5215873645502143e-05, rewards: -9.299999999999999, count: 50
epoch: 738779, loss: -9.087324542633723e-06, rewards: -9.299999999999999, count: 50
epoch: 738789, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 738799, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 738809, loss: -1.6140937759701046e-06, rewards: -9.299999999999999, count: 50
epoch: 738819, loss: 1.043081283569336e-06, rewards: -9.299999999999999, count: 50
epoch: 738829, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 738839, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, c

epoch: 739719, loss: -1.9103288650512695e-05, rewards: -9.299999999999999, count: 50
epoch: 739729, loss: -0.00011252045806031674, rewards: -9.299999999999999, count: 50
epoch: 739739, loss: 7.230997289298102e-05, rewards: -9.299999999999999, count: 50
epoch: 739749, loss: -7.426738648064202e-06, rewards: -9.299999999999999, count: 50
epoch: 739759, loss: -2.3994445655262098e-05, rewards: -9.299999999999999, count: 50
epoch: 739769, loss: 9.260177648684476e-06, rewards: -9.299999999999999, count: 50
epoch: 739779, loss: 4.231929779052734e-06, rewards: -9.299999999999999, count: 50
epoch: 739789, loss: -5.50627692064154e-06, rewards: -9.299999999999999, count: 50
epoch: 739799, loss: 3.2949446904240176e-06, rewards: -9.299999999999999, count: 50
epoch: 739809, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 739819, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 739829, loss: -6.461143584601814e-07, rewards: -9.299999999999999, co

epoch: 740709, loss: -7.224082764878403e-06, rewards: -9.299999999999999, count: 50
epoch: 740719, loss: 4.713535417977255e-06, rewards: -9.299999999999999, count: 50
epoch: 740729, loss: -2.0420550299604656e-06, rewards: -9.299999999999999, count: 50
epoch: 740739, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 740749, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 740759, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 740769, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 740779, loss: -5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 740789, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 740799, loss: -7.510185469072894e-07, rewards: -9.299999999999999, count: 50
epoch: 740809, loss: -2.0813940864172764e-06, rewards: -9.299999999999999, count: 50
epoch: 740819, loss: -8.60810268932255e-06, rewards: -9.299999999999999, coun

epoch: 741699, loss: -4.678964614868164e-06, rewards: -9.299999999999999, count: 50
epoch: 741709, loss: -3.0214787329896353e-05, rewards: -9.299999999999999, count: 50
epoch: 741719, loss: -0.00010984182154061273, rewards: -9.299999999999999, count: 50
epoch: 741729, loss: 3.497839134070091e-05, rewards: -9.299999999999999, count: 50
epoch: 741739, loss: 2.3565291485283524e-05, rewards: -9.299999999999999, count: 50
epoch: 741749, loss: -2.0787716493941844e-05, rewards: -9.299999999999999, count: 50
epoch: 741759, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 741769, loss: 6.608962848986266e-06, rewards: -9.299999999999999, count: 50
epoch: 741779, loss: -4.903078206552891e-06, rewards: -9.299999999999999, count: 50
epoch: 741789, loss: 2.950429916381836e-06, rewards: -9.299999999999999, count: 50
epoch: 741799, loss: -1.7046928633135394e-06, rewards: -9.299999999999999, count: 50
epoch: 741809, loss: 1.7237663314517704e-06, rewards: -9.299999999999999, co

epoch: 742689, loss: -0.00011101603740826249, rewards: -9.299999999999999, count: 50
epoch: 742699, loss: 5.43320165888872e-05, rewards: -9.299999999999999, count: 50
epoch: 742709, loss: 2.577304940132308e-06, rewards: -9.299999999999999, count: 50
epoch: 742719, loss: -2.094030423904769e-05, rewards: -9.299999999999999, count: 50
epoch: 742729, loss: 1.14190579552087e-05, rewards: -9.299999999999999, count: 50
epoch: 742739, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 742749, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 742759, loss: 1.3291835330164758e-06, rewards: -9.299999999999999, count: 50
epoch: 742769, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 742779, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 742789, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 742799, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, coun

epoch: 743679, loss: 7.756948434689548e-06, rewards: -9.299999999999999, count: 50
epoch: 743689, loss: -5.557536951528164e-06, rewards: -9.299999999999999, count: 50
epoch: 743699, loss: 2.282857849422726e-06, rewards: -9.299999999999999, count: 50
epoch: 743709, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 743719, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 743729, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 743739, loss: 4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 743749, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 743759, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 743769, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 743779, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 743789, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50


epoch: 744669, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 744679, loss: -8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 744689, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 744699, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 744709, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 744719, loss: 5.304813385009766e-06, rewards: -9.299999999999999, count: 50
epoch: 744729, loss: 2.890348514483776e-05, rewards: -9.299999999999999, count: 50
epoch: 744739, loss: 0.0001099801083910279, rewards: -9.299999999999999, count: 50
epoch: 744749, loss: -4.4604541471926495e-05, rewards: -9.299999999999999, count: 50
epoch: 744759, loss: -1.410484310326865e-05, rewards: -9.299999999999999, count: 50
epoch: 744769, loss: 2.2276639356277883e-05, rewards: -9.299999999999999, count: 50
epoch: 744779, loss: -6.705522537231445e-06, rewards: -9.299999999999999, count: 50

epoch: 745659, loss: -8.172988600563258e-06, rewards: -9.299999999999999, count: 50
epoch: 745669, loss: -1.0229348845314234e-05, rewards: -9.299999999999999, count: 50
epoch: 745679, loss: 7.562637165392516e-06, rewards: -9.299999999999999, count: 50
epoch: 745689, loss: -1.6891956420295173e-06, rewards: -9.299999999999999, count: 50
epoch: 745699, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 745709, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 745719, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 745729, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 745739, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 745749, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 745759, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 745769, loss: -6.461143584601814e-07, rewards: -9.299999999999999, coun

epoch: 746649, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 746659, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 746669, loss: 3.55362885784416e-06, rewards: -9.299999999999999, count: 50
epoch: 746679, loss: 2.362608938710764e-05, rewards: -9.299999999999999, count: 50
epoch: 746689, loss: 0.00012765526480507106, rewards: -9.299999999999999, count: 50
epoch: 746699, loss: -4.3675900087691844e-05, rewards: -9.299999999999999, count: 50
epoch: 746709, loss: -3.6041736166225746e-05, rewards: -9.299999999999999, count: 50
epoch: 746719, loss: 9.759664862940554e-06, rewards: -9.299999999999999, count: 50
epoch: 746729, loss: 1.4215707778930664e-05, rewards: -9.299999999999999, count: 50
epoch: 746739, loss: -4.490613719099201e-06, rewards: -9.299999999999999, count: 50
epoch: 746749, loss: -3.6525725590763614e-06, rewards: -9.299999999999999, count: 50
epoch: 746759, loss: 3.7479401271411916e-06, rewards: -9.299999999999999, coun

epoch: 747639, loss: -2.294778823852539e-06, rewards: -9.299999999999999, count: 50
epoch: 747649, loss: 1.466274284211977e-06, rewards: -9.299999999999999, count: 50
epoch: 747659, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 747669, loss: -9.810923984332476e-07, rewards: -9.299999999999999, count: 50
epoch: 747679, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 747689, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 747699, loss: 4.971027465217048e-07, rewards: -9.299999999999999, count: 50
epoch: 747709, loss: 3.502368826957536e-06, rewards: -9.299999999999999, count: 50
epoch: 747719, loss: 2.829074946930632e-05, rewards: -9.299999999999999, count: 50
epoch: 747729, loss: 0.00012574791617225856, rewards: -9.299999999999999, count: 50
epoch: 747739, loss: 3.083944420723128e-06, rewards: -9.299999999999999, count: 50
epoch: 747749, loss: -3.871321678161621e-05, rewards: -9.299999999999999, count:

epoch: 748629, loss: 6.48248169454746e-05, rewards: -9.299999999999999, count: 50
epoch: 748639, loss: -5.232095645624213e-06, rewards: -9.299999999999999, count: 50
epoch: 748649, loss: 4.7961471864255145e-05, rewards: -9.299999999999999, count: 50
epoch: 748659, loss: -1.6279220290016383e-05, rewards: -9.299999999999999, count: 50
epoch: 748669, loss: -1.261353463632986e-05, rewards: -9.299999999999999, count: 50
epoch: 748679, loss: 1.0579824447631836e-05, rewards: -9.299999999999999, count: 50
epoch: 748689, loss: -2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 748699, loss: -9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 748709, loss: 1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 748719, loss: -1.1205672763026087e-06, rewards: -9.299999999999999, count: 50
epoch: 748729, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 748739, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, 

epoch: 749619, loss: 5.468129984365078e-06, rewards: -9.299999999999999, count: 50
epoch: 749629, loss: -4.34637058788212e-06, rewards: -9.299999999999999, count: 50
epoch: 749639, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 749649, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 749659, loss: -1.2731552487821318e-06, rewards: -9.299999999999999, count: 50
epoch: 749669, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 749679, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 749689, loss: 5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 749699, loss: 9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 749709, loss: 3.4749507449305383e-06, rewards: -9.299999999999999, count: 50
epoch: 749719, loss: 2.604246219561901e-05, rewards: -9.299999999999999, count: 50
epoch: 749729, loss: 0.00012725114356726408, rewards: -9.299999999999999, count: 

epoch: 750609, loss: -0.00010104894317919388, rewards: -9.299999999999999, count: 50
epoch: 750619, loss: 3.413319427636452e-05, rewards: -9.299999999999999, count: 50
epoch: 750629, loss: 1.5938281649141572e-05, rewards: -9.299999999999999, count: 50
epoch: 750639, loss: -2.0678044165833853e-05, rewards: -9.299999999999999, count: 50
epoch: 750649, loss: 7.948875463625882e-06, rewards: -9.299999999999999, count: 50
epoch: 750659, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 750669, loss: -2.16841704059334e-06, rewards: -9.299999999999999, count: 50
epoch: 750679, loss: 1.3589858554041712e-06, rewards: -9.299999999999999, count: 50
epoch: 750689, loss: -4.0531159584134e-07, rewards: -9.299999999999999, count: 50
epoch: 750699, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 750709, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 750719, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, cou

epoch: 751589, loss: -1.6319751239279867e-06, rewards: -9.299999999999999, count: 50
epoch: 751599, loss: -1.5432835425599478e-05, rewards: -9.299999999999999, count: 50
epoch: 751609, loss: 1.2464523024391383e-05, rewards: -9.299999999999999, count: 50
epoch: 751619, loss: -6.769895662728231e-06, rewards: -9.299999999999999, count: 50
epoch: 751629, loss: 3.6454200653679436e-06, rewards: -9.299999999999999, count: 50
epoch: 751639, loss: -2.485513732608524e-06, rewards: -9.299999999999999, count: 50
epoch: 751649, loss: 2.071857352348161e-06, rewards: -9.299999999999999, count: 50
epoch: 751659, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 751669, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 751679, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 751689, loss: 2.988576852658298e-06, rewards: -9.299999999999999, count: 50
epoch: 751699, loss: 8.177757081284653e-06, rewards: -9.299999999999999, co

epoch: 752579, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 752589, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 752599, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 752609, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 752619, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 752629, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 752639, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 752649, loss: -2.7668475013342686e-06, rewards: -9.299999999999999, count: 50
epoch: 752659, loss: -1.5288591384887695e-05, rewards: -9.299999999999999, count: 50
epoch: 752669, loss: -9.79554679361172e-05, rewards: -9.299999999999999, count: 50
epoch: 752679, loss: 6.753682828275487e-05, rewards: -9.299999999999999, count: 50
epoch: 752689, loss: -3.249168366892263e-05, rewards: -9.299999999999999, coun

epoch: 753569, loss: -3.1352042242360767e-06, rewards: -9.299999999999999, count: 50
epoch: 753579, loss: 3.827810360235162e-06, rewards: -9.299999999999999, count: 50
epoch: 753589, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 753599, loss: 1.3756751968685421e-06, rewards: -9.299999999999999, count: 50
epoch: 753609, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 753619, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 753629, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 753639, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 753649, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 753659, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 753669, loss: 4.580020686262287e-06, rewards: -9.299999999999999, count: 50
epoch: 753679, loss: 2.5285482479375787e-05, rewards: -9.299999999999999, co

epoch: 754549, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 754559, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 754569, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 754579, loss: -1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 754589, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 754599, loss: -1.8835068260614207e-07, rewards: -9.299999999999999, count: 50
epoch: 754609, loss: -1.668930025289228e-07, rewards: -9.299999999999999, count: 50
epoch: 754619, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 754629, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, count: 50
epoch: 754639, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 754649, loss: -1.6295909972541267e-06, rewards: -9.299999999999999, count: 50
epoch: 754659, loss: -1.3415813555184286e-05, rewards: -9.29999999999999

epoch: 755539, loss: 2.5217532311216928e-05, rewards: -9.299999999999999, count: 50
epoch: 755549, loss: -7.94172319729114e-06, rewards: -9.299999999999999, count: 50
epoch: 755559, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 755569, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 755579, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count: 50
epoch: 755589, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 755599, loss: 1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 755609, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 755619, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 755629, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 755639, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 755649, loss: -3.6013125281897373e-06, rewards: -9.299999999999999, co

epoch: 756529, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 756539, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 756549, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 756559, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 756569, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 756579, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 756589, loss: 2.88605679088505e-06, rewards: -9.299999999999999, count: 50
epoch: 756599, loss: 1.189112663269043e-05, rewards: -9.299999999999999, count: 50
epoch: 756609, loss: 6.963848863961175e-05, rewards: -9.299999999999999, count: 50
epoch: 756619, loss: -1.2440681530279107e-05, rewards: -9.299999999999999, count: 50
epoch: 756629, loss: 3.967166048823856e-05, rewards: -9.299999999999999, count: 50
epoch: 756639, loss: -2.434849739074707e-05, rewards: -9.299999999999999, count: 50

epoch: 757519, loss: -8.542538125766441e-06, rewards: -9.299999999999999, count: 50
epoch: 757529, loss: 5.735158993047662e-06, rewards: -9.299999999999999, count: 50
epoch: 757539, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 757549, loss: 1.2636185147130163e-06, rewards: -9.299999999999999, count: 50
epoch: 757559, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 757569, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 757579, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 757589, loss: 3.869533429678995e-06, rewards: -9.299999999999999, count: 50
epoch: 757599, loss: 1.737952152325306e-05, rewards: -9.299999999999999, count: 50
epoch: 757609, loss: 9.518385195406154e-05, rewards: -9.299999999999999, count: 50
epoch: 757619, loss: -5.954265725449659e-05, rewards: -9.299999999999999, count: 50
epoch: 757629, loss: 3.805279629887082e-05, rewards: -9.299999999999999, count: 5

epoch: 758509, loss: 3.07178488583304e-05, rewards: -9.299999999999999, count: 50
epoch: 758519, loss: 1.3659000615007244e-05, rewards: -9.299999999999999, count: 50
epoch: 758529, loss: -1.5778541637700982e-05, rewards: -9.299999999999999, count: 50
epoch: 758539, loss: 3.530979256538558e-06, rewards: -9.299999999999999, count: 50
epoch: 758549, loss: 2.2006033759680577e-06, rewards: -9.299999999999999, count: 50
epoch: 758559, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 758569, loss: 2.5975705284508877e-06, rewards: -9.299999999999999, count: 50
epoch: 758579, loss: -2.191066641898942e-06, rewards: -9.299999999999999, count: 50
epoch: 758589, loss: 6.949901489861077e-07, rewards: -9.299999999999999, count: 50
epoch: 758599, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 758609, loss: -1.341104507446289e-06, rewards: -9.299999999999999, count: 50
epoch: 758619, loss: -2.226829565188382e-06, rewards: -9.299999999999999, coun

epoch: 759499, loss: 7.174134225351736e-05, rewards: -9.299999999999999, count: 50
epoch: 759509, loss: -1.6826390492497012e-05, rewards: -9.299999999999999, count: 50
epoch: 759519, loss: 4.043698208988644e-05, rewards: -9.299999999999999, count: 50
epoch: 759529, loss: -2.3978947865543887e-05, rewards: -9.299999999999999, count: 50
epoch: 759539, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 759549, loss: 5.548000444832724e-06, rewards: -9.299999999999999, count: 50
epoch: 759559, loss: -5.460977718030335e-06, rewards: -9.299999999999999, count: 50
epoch: 759569, loss: 3.869533429678995e-06, rewards: -9.299999999999999, count: 50
epoch: 759579, loss: -2.4342537017219e-06, rewards: -9.299999999999999, count: 50
epoch: 759589, loss: 7.522106102442194e-07, rewards: -9.299999999999999, count: 50
epoch: 759599, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 759609, loss: -1.4805793853156501e-06, rewards: -9.299999999999999, count: 

epoch: 760489, loss: 2.8917789677507244e-05, rewards: -9.299999999999999, count: 50
epoch: 760499, loss: 0.00011924982391064987, rewards: -9.299999999999999, count: 50
epoch: 760509, loss: -1.7427206330467016e-05, rewards: -9.299999999999999, count: 50
epoch: 760519, loss: -3.9377213397528976e-05, rewards: -9.299999999999999, count: 50
epoch: 760529, loss: 1.7189979644172126e-06, rewards: -9.299999999999999, count: 50
epoch: 760539, loss: 1.4376640137925278e-05, rewards: -9.299999999999999, count: 50
epoch: 760549, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 760559, loss: -3.881454631482484e-06, rewards: -9.299999999999999, count: 50
epoch: 760569, loss: 3.005266080435831e-06, rewards: -9.299999999999999, count: 50
epoch: 760579, loss: -1.1229515166633064e-06, rewards: -9.299999999999999, count: 50
epoch: 760589, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 760599, loss: -1.6927718604620168e-07, rewards: -9.299999999999999,

epoch: 761479, loss: 6.157159805297852e-05, rewards: -9.299999999999999, count: 50
epoch: 761489, loss: 2.9062031899229623e-05, rewards: -9.299999999999999, count: 50
epoch: 761499, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 761509, loss: -1.1985302080574911e-05, rewards: -9.299999999999999, count: 50
epoch: 761519, loss: -5.708932803827338e-06, rewards: -9.299999999999999, count: 50
epoch: 761529, loss: 2.645254198796465e-06, rewards: -9.299999999999999, count: 50
epoch: 761539, loss: 2.3436546143784653e-06, rewards: -9.299999999999999, count: 50
epoch: 761549, loss: -1.4734267779203947e-06, rewards: -9.299999999999999, count: 50
epoch: 761559, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 761569, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 761579, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 761589, loss: 2.8848648980783764e-07, rewards: -9.299999999999999, cou

epoch: 762469, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 762479, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 762489, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 762499, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 762509, loss: -5.066394805908203e-07, rewards: -9.299999999999999, count: 50
epoch: 762519, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 762529, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 762539, loss: -1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 762549, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 762559, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 762569, loss: -3.670454134407919e-06, rewards: -9.299999999999999, count: 50
epoch: 762579, loss: -3.845453102258034e-05, rewards: -9.299999999999999, c

epoch: 763459, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 763469, loss: 2.59995454143791e-06, rewards: -9.299999999999999, count: 50
epoch: 763479, loss: -2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 763489, loss: 1.4507770629279548e-06, rewards: -9.299999999999999, count: 50
epoch: 763499, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 763509, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 763519, loss: 8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 763529, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 763539, loss: 4.2676924749684986e-07, rewards: -9.299999999999999, count: 50
epoch: 763549, loss: 5.316734359439579e-07, rewards: -9.299999999999999, count: 50
epoch: 763559, loss: 3.2949446904240176e-06, rewards: -9.299999999999999, count: 50
epoch: 763569, loss: 2.4044513338594697e-05, rewards: -9.299999999999999, coun

epoch: 764439, loss: 3.350973202032037e-06, rewards: -9.299999999999999, count: 50
epoch: 764449, loss: 9.00506984180538e-06, rewards: -9.299999999999999, count: 50
epoch: 764459, loss: 4.4653414079220966e-05, rewards: -9.299999999999999, count: 50
epoch: 764469, loss: 6.515741551993415e-05, rewards: -9.299999999999999, count: 50
epoch: 764479, loss: -1.9899605831597e-05, rewards: -9.299999999999999, count: 50
epoch: 764489, loss: -6.939172635611612e-06, rewards: -9.299999999999999, count: 50
epoch: 764499, loss: 1.2611150850716513e-05, rewards: -9.299999999999999, count: 50
epoch: 764509, loss: -9.706020136945881e-06, rewards: -9.299999999999999, count: 50
epoch: 764519, loss: 6.270408448472153e-06, rewards: -9.299999999999999, count: 50
epoch: 764529, loss: -3.650188546089339e-06, rewards: -9.299999999999999, count: 50
epoch: 764539, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 764549, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50

epoch: 765429, loss: 2.9032229576841928e-05, rewards: -9.299999999999999, count: 50
epoch: 765439, loss: -5.0934551836689934e-05, rewards: -9.299999999999999, count: 50
epoch: 765449, loss: 1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 765459, loss: 1.773238182067871e-05, rewards: -9.299999999999999, count: 50
epoch: 765469, loss: -6.335973921522964e-06, rewards: -9.299999999999999, count: 50
epoch: 765479, loss: -2.8383731205394724e-06, rewards: -9.299999999999999, count: 50
epoch: 765489, loss: 3.908872713509481e-06, rewards: -9.299999999999999, count: 50
epoch: 765499, loss: -2.536773763495148e-06, rewards: -9.299999999999999, count: 50
epoch: 765509, loss: 1.341104507446289e-06, rewards: -9.299999999999999, count: 50
epoch: 765519, loss: -9.131431397690903e-07, rewards: -9.299999999999999, count: 50
epoch: 765529, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 765539, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, cou

epoch: 766419, loss: 1.2421608062140876e-06, rewards: -9.299999999999999, count: 50
epoch: 766429, loss: 5.460977718030335e-06, rewards: -9.299999999999999, count: 50
epoch: 766439, loss: 2.9474496841430664e-05, rewards: -9.299999999999999, count: 50
epoch: 766449, loss: 0.00010341882443754002, rewards: -9.299999999999999, count: 50
epoch: 766459, loss: -5.1667691877810284e-05, rewards: -9.299999999999999, count: 50
epoch: 766469, loss: 6.097555342421401e-06, rewards: -9.299999999999999, count: 50
epoch: 766479, loss: 1.321673425991321e-05, rewards: -9.299999999999999, count: 50
epoch: 766489, loss: -1.2780427823599894e-05, rewards: -9.299999999999999, count: 50
epoch: 766499, loss: 7.569789886474609e-06, rewards: -9.299999999999999, count: 50
epoch: 766509, loss: -4.80890275866841e-06, rewards: -9.299999999999999, count: 50
epoch: 766519, loss: 3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 766529, loss: -2.219677071479964e-06, rewards: -9.299999999999999, count:

epoch: 767409, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 767419, loss: -9.706020136945881e-06, rewards: -9.299999999999999, count: 50
epoch: 767429, loss: -5.467176379170269e-05, rewards: -9.299999999999999, count: 50
epoch: 767439, loss: -3.2370091503253207e-05, rewards: -9.299999999999999, count: 50
epoch: 767449, loss: -1.7515420040581375e-05, rewards: -9.299999999999999, count: 50
epoch: 767459, loss: 2.8520822525024414e-05, rewards: -9.299999999999999, count: 50
epoch: 767469, loss: -1.4824867321294732e-05, rewards: -9.299999999999999, count: 50
epoch: 767479, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 767489, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 767499, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 767509, loss: -2.0146370616203058e-07, rewards: -9.299999999999999, count: 50
epoch: 767519, loss: 7.796287491146359e-07, rewards: -9.299999999999999

epoch: 768399, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 768409, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 768419, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 768429, loss: -2.74181360282455e-07, rewards: -9.299999999999999, count: 50
epoch: 768439, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 768449, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 768459, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 768469, loss: 3.713369324032101e-06, rewards: -9.299999999999999, count: 50
epoch: 768479, loss: 1.6038417015806772e-05, rewards: -9.299999999999999, count: 50
epoch: 768489, loss: 8.70919247972779e-05, rewards: -9.299999999999999, count: 50
epoch: 768499, loss: -4.6602486690972e-05, rewards: -9.299999999999999, count: 50
epoch: 768509, loss: 4.141807585256174e-05, rewards: -9.299999999999999, count: 50
ep

epoch: 769389, loss: -3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 769399, loss: 4.190206709608901e-06, rewards: -9.299999999999999, count: 50
epoch: 769409, loss: -2.2494793938676594e-06, rewards: -9.299999999999999, count: 50
epoch: 769419, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 769429, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 769439, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 769449, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 769459, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 769469, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 769479, loss: 5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 769489, loss: 5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 769499, loss: 3.5381317502469756e-06, rewards: -9.299999999999999, cou

epoch: 770379, loss: -5.2282808610470966e-05, rewards: -9.299999999999999, count: 50
epoch: 770389, loss: 2.0027160019253643e-07, rewards: -9.299999999999999, count: 50
epoch: 770399, loss: 1.8621683921082877e-05, rewards: -9.299999999999999, count: 50
epoch: 770409, loss: -4.003048161393963e-06, rewards: -9.299999999999999, count: 50
epoch: 770419, loss: -4.426241048349766e-06, rewards: -9.299999999999999, count: 50
epoch: 770429, loss: 4.075765446032165e-06, rewards: -9.299999999999999, count: 50
epoch: 770439, loss: -1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 770449, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 770459, loss: -7.557869139418472e-07, rewards: -9.299999999999999, count: 50
epoch: 770469, loss: 1.0251999356114538e-06, rewards: -9.299999999999999, count: 50
epoch: 770479, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 770489, loss: -1.01327898960335e-07, rewards: -9.299999999999999, co

epoch: 771369, loss: 4.351139068603516e-06, rewards: -9.299999999999999, count: 50
epoch: 771379, loss: 2.8192996978759766e-05, rewards: -9.299999999999999, count: 50
epoch: 771389, loss: 0.00011775374150602147, rewards: -9.299999999999999, count: 50
epoch: 771399, loss: -3.178358019795269e-05, rewards: -9.299999999999999, count: 50
epoch: 771409, loss: -3.2842159271240234e-05, rewards: -9.299999999999999, count: 50
epoch: 771419, loss: 1.4101266970101278e-05, rewards: -9.299999999999999, count: 50
epoch: 771429, loss: 9.496212442172691e-06, rewards: -9.299999999999999, count: 50
epoch: 771439, loss: -8.08596632850822e-06, rewards: -9.299999999999999, count: 50
epoch: 771449, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 771459, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 771469, loss: -1.5997886748664314e-06, rewards: -9.299999999999999, count: 50
epoch: 771479, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count:

epoch: 772359, loss: -5.331873762770556e-05, rewards: -9.299999999999999, count: 50
epoch: 772369, loss: 4.1135550418403e-05, rewards: -9.299999999999999, count: 50
epoch: 772379, loss: -9.000301361083984e-06, rewards: -9.299999999999999, count: 50
epoch: 772389, loss: -7.492303666367661e-06, rewards: -9.299999999999999, count: 50
epoch: 772399, loss: 8.876323590811808e-06, rewards: -9.299999999999999, count: 50
epoch: 772409, loss: -5.786419023934286e-06, rewards: -9.299999999999999, count: 50
epoch: 772419, loss: 3.465414010861423e-06, rewards: -9.299999999999999, count: 50
epoch: 772429, loss: -2.363920202697045e-06, rewards: -9.299999999999999, count: 50
epoch: 772439, loss: 8.523464316567697e-07, rewards: -9.299999999999999, count: 50
epoch: 772449, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 772459, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 772469, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 5

epoch: 773349, loss: 3.1685829071648186e-06, rewards: -9.299999999999999, count: 50
epoch: 773359, loss: 3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 773369, loss: -1.3589858554041712e-06, rewards: -9.299999999999999, count: 50
epoch: 773379, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 773389, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 773399, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 773409, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 773419, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 773429, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 773439, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 773449, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 773459, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count:

epoch: 774339, loss: 1.5580653780489229e-06, rewards: -9.299999999999999, count: 50
epoch: 774349, loss: 7.953643944347277e-06, rewards: -9.299999999999999, count: 50
epoch: 774359, loss: 5.8119298046221957e-05, rewards: -9.299999999999999, count: 50
epoch: 774369, loss: 1.2555122339108493e-05, rewards: -9.299999999999999, count: 50
epoch: 774379, loss: 4.9804450100054964e-05, rewards: -9.299999999999999, count: 50
epoch: 774389, loss: -1.4624595678469632e-05, rewards: -9.299999999999999, count: 50
epoch: 774399, loss: -1.6714335288270377e-05, rewards: -9.299999999999999, count: 50
epoch: 774409, loss: 8.747577339818235e-06, rewards: -9.299999999999999, count: 50
epoch: 774419, loss: 2.034902536252048e-06, rewards: -9.299999999999999, count: 50
epoch: 774429, loss: -3.8647649489576e-06, rewards: -9.299999999999999, count: 50
epoch: 774439, loss: 2.703666723391507e-06, rewards: -9.299999999999999, count: 50
epoch: 774449, loss: -1.4710426512465347e-06, rewards: -9.299999999999999, count

epoch: 775329, loss: 1.3296604265633505e-05, rewards: -9.299999999999999, count: 50
epoch: 775339, loss: 7.369875675067306e-05, rewards: -9.299999999999999, count: 50
epoch: 775349, loss: -2.088666042254772e-05, rewards: -9.299999999999999, count: 50
epoch: 775359, loss: 3.991961420979351e-05, rewards: -9.299999999999999, count: 50
epoch: 775369, loss: -2.4259090423583984e-05, rewards: -9.299999999999999, count: 50
epoch: 775379, loss: 4.986524800187908e-06, rewards: -9.299999999999999, count: 50
epoch: 775389, loss: 2.7692317416949663e-06, rewards: -9.299999999999999, count: 50
epoch: 775399, loss: -3.727674538822612e-06, rewards: -9.299999999999999, count: 50
epoch: 775409, loss: 2.6917457489616936e-06, rewards: -9.299999999999999, count: 50
epoch: 775419, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 775429, loss: -4.148483299104555e-07, rewards: -9.299999999999999, count: 50
epoch: 775439, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count

epoch: 776319, loss: 0.0001067566845449619, rewards: -9.299999999999999, count: 50
epoch: 776329, loss: -7.092118175933138e-05, rewards: -9.299999999999999, count: 50
epoch: 776339, loss: 2.0476580175454728e-05, rewards: -9.299999999999999, count: 50
epoch: 776349, loss: 1.790404348867014e-05, rewards: -9.299999999999999, count: 50
epoch: 776359, loss: -1.4177560842654202e-05, rewards: -9.299999999999999, count: 50
epoch: 776369, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 776379, loss: 2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 776389, loss: -2.9277800877025584e-06, rewards: -9.299999999999999, count: 50
epoch: 776399, loss: 2.100467781929183e-06, rewards: -9.299999999999999, count: 50
epoch: 776409, loss: -1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 776419, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 776429, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count:

epoch: 777309, loss: 3.093004124821164e-05, rewards: -9.299999999999999, count: 50
epoch: 777319, loss: 0.00011599898425629362, rewards: -9.299999999999999, count: 50
epoch: 777329, loss: 7.922649274405558e-06, rewards: -9.299999999999999, count: 50
epoch: 777339, loss: -3.7761925341328606e-05, rewards: -9.299999999999999, count: 50
epoch: 777349, loss: -1.5776156942592934e-05, rewards: -9.299999999999999, count: 50
epoch: 777359, loss: 9.447336196899414e-06, rewards: -9.299999999999999, count: 50
epoch: 777369, loss: 6.936788395250915e-06, rewards: -9.299999999999999, count: 50
epoch: 777379, loss: -3.874301910400391e-06, rewards: -9.299999999999999, count: 50
epoch: 777389, loss: -9.334087280876702e-07, rewards: -9.299999999999999, count: 50
epoch: 777399, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 777409, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 777419, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count

epoch: 778299, loss: -4.250288111506961e-05, rewards: -9.299999999999999, count: 50
epoch: 778309, loss: -1.5457868357771076e-05, rewards: -9.299999999999999, count: 50
epoch: 778319, loss: 2.2152662495500408e-05, rewards: -9.299999999999999, count: 50
epoch: 778329, loss: -6.63518903820659e-06, rewards: -9.299999999999999, count: 50
epoch: 778339, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 778349, loss: 3.827810360235162e-06, rewards: -9.299999999999999, count: 50
epoch: 778359, loss: -2.6988982426701114e-06, rewards: -9.299999999999999, count: 50
epoch: 778369, loss: 1.7082691101677483e-06, rewards: -9.299999999999999, count: 50
epoch: 778379, loss: -1.1110305422334932e-06, rewards: -9.299999999999999, count: 50
epoch: 778389, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 778399, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 778409, loss: 3.755092734536447e-07, rewards: -9.299999999999999, c

epoch: 779289, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 779299, loss: -1.971721758309286e-06, rewards: -9.299999999999999, count: 50
epoch: 779309, loss: -9.019375283969566e-06, rewards: -9.299999999999999, count: 50
epoch: 779319, loss: -6.431341171264648e-05, rewards: -9.299999999999999, count: 50
epoch: 779329, loss: 8.511543455824722e-06, rewards: -9.299999999999999, count: 50
epoch: 779339, loss: -5.3350926464190707e-05, rewards: -9.299999999999999, count: 50
epoch: 779349, loss: 4.266500582161825e-06, rewards: -9.299999999999999, count: 50
epoch: 779359, loss: 1.911401705001481e-05, rewards: -9.299999999999999, count: 50
epoch: 779369, loss: -4.554986844595987e-06, rewards: -9.299999999999999, count: 50
epoch: 779379, loss: -4.678964614868164e-06, rewards: -9.299999999999999, count: 50
epoch: 779389, loss: 4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 779399, loss: -1.7726421219776967e-06, rewards: -9.299999999999999, coun

epoch: 780279, loss: 9.442567716178019e-06, rewards: -9.299999999999999, count: 50
epoch: 780289, loss: 6.239414233277785e-06, rewards: -9.299999999999999, count: 50
epoch: 780299, loss: 1.62363051003922e-06, rewards: -9.299999999999999, count: 50
epoch: 780309, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 780319, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 780329, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 780339, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 780349, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 780359, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 780369, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 780379, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 780389, loss: -5.960464477539063e-08, rewards: -9.299999999999999, count: 5

epoch: 781269, loss: 1.1181831496287487e-06, rewards: -9.299999999999999, count: 50
epoch: 781279, loss: -6.651878265984124e-07, rewards: -9.299999999999999, count: 50
epoch: 781289, loss: 2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 781299, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 781309, loss: -1.5258788721439487e-07, rewards: -9.299999999999999, count: 50
epoch: 781319, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 781329, loss: -3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 781339, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 781349, loss: -3.4725665045698406e-06, rewards: -9.299999999999999, count: 50
epoch: 781359, loss: -2.2351741790771484e-05, rewards: -9.299999999999999, count: 50
epoch: 781369, loss: -0.00011685251956805587, rewards: -9.299999999999999, count: 50
epoch: 781379, loss: 6.343960558297113e-05, rewards: -9.299999999999999

epoch: 782259, loss: -0.00011454224295448512, rewards: -9.299999999999999, count: 50
epoch: 782269, loss: 4.007816187368007e-06, rewards: -9.299999999999999, count: 50
epoch: 782279, loss: 3.9637088775634766e-05, rewards: -9.299999999999999, count: 50
epoch: 782289, loss: 5.258321834844537e-06, rewards: -9.299999999999999, count: 50
epoch: 782299, loss: -1.3997554560774006e-05, rewards: -9.299999999999999, count: 50
epoch: 782309, loss: -7.915496667010302e-07, rewards: -9.299999999999999, count: 50
epoch: 782319, loss: 4.912614713248331e-06, rewards: -9.299999999999999, count: 50
epoch: 782329, loss: -2.4271012080134824e-06, rewards: -9.299999999999999, count: 50
epoch: 782339, loss: -2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 782349, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 782359, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 782369, loss: 3.075599579460686e-07, rewards: -9.299999999999999, cou

epoch: 783249, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 783259, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 783269, loss: 3.6799908684770344e-06, rewards: -9.299999999999999, count: 50
epoch: 783279, loss: 1.4203787031874526e-05, rewards: -9.299999999999999, count: 50
epoch: 783289, loss: 7.703661685809493e-05, rewards: -9.299999999999999, count: 50
epoch: 783299, loss: -2.8046370061929338e-05, rewards: -9.299999999999999, count: 50
epoch: 783309, loss: 4.0332077333005145e-05, rewards: -9.299999999999999, count: 50
epoch: 783319, loss: -2.284765287186019e-05, rewards: -9.299999999999999, count: 50
epoch: 783329, loss: 4.378557150630513e-06, rewards: -9.299999999999999, count: 50
epoch: 783339, loss: 2.7263165520707844e-06, rewards: -9.299999999999999, count: 50
epoch: 783349, loss: -3.266334488216671e-06, rewards: -9.299999999999999, count: 50
epoch: 783359, loss: 1.9741057712963084e-06, rewards: -9.299999999999999, count

epoch: 784239, loss: -1.7428397995900013e-06, rewards: -9.299999999999999, count: 50
epoch: 784249, loss: -6.061792191758286e-06, rewards: -9.299999999999999, count: 50
epoch: 784259, loss: -3.399252818780951e-05, rewards: -9.299999999999999, count: 50
epoch: 784269, loss: -9.92071654764004e-05, rewards: -9.299999999999999, count: 50
epoch: 784279, loss: 2.8508900868473575e-05, rewards: -9.299999999999999, count: 50
epoch: 784289, loss: 2.0334720829850994e-05, rewards: -9.299999999999999, count: 50
epoch: 784299, loss: -2.07054617931135e-05, rewards: -9.299999999999999, count: 50
epoch: 784309, loss: 5.781650543212891e-06, rewards: -9.299999999999999, count: 50
epoch: 784319, loss: 1.9741057712963084e-06, rewards: -9.299999999999999, count: 50
epoch: 784329, loss: -3.210306203982327e-06, rewards: -9.299999999999999, count: 50
epoch: 784339, loss: 2.359151949349325e-06, rewards: -9.299999999999999, count: 50
epoch: 784349, loss: -1.4603137969970703e-06, rewards: -9.299999999999999, coun

epoch: 785229, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 785239, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 785249, loss: -7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 785259, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 785269, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 785279, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 785289, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 785299, loss: 1.2266635849300656e-06, rewards: -9.299999999999999, count: 50
epoch: 785309, loss: 3.236532165828976e-06, rewards: -9.299999999999999, count: 50
epoch: 785319, loss: 1.5423298464156687e-05, rewards: -9.299999999999999, count: 50
epoch: 785329, loss: 8.742570935282856e-05, rewards: -9.299999999999999, count: 50
epoch: 785339, loss: -4.9329995817970484e-05, rewards: -9.299999999999999, coun

epoch: 786219, loss: 8.123159204842523e-05, rewards: -9.299999999999999, count: 50
epoch: 786229, loss: -4.799604357685894e-05, rewards: -9.299999999999999, count: 50
epoch: 786239, loss: 4.6455861593130976e-05, rewards: -9.299999999999999, count: 50
epoch: 786249, loss: 9.119510650634766e-06, rewards: -9.299999999999999, count: 50
epoch: 786259, loss: -1.744508699630387e-05, rewards: -9.299999999999999, count: 50
epoch: 786269, loss: 1.3804435639030999e-06, rewards: -9.299999999999999, count: 50
epoch: 786279, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 786289, loss: -3.7741660889878403e-06, rewards: -9.299999999999999, count: 50
epoch: 786299, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 786309, loss: -9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 786319, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 786329, loss: -7.379055091405462e-07, rewards: -9.299999999999999, coun

epoch: 787209, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 787219, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 787229, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 787239, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 787249, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 787259, loss: -4.129409717279486e-06, rewards: -9.299999999999999, count: 50
epoch: 787269, loss: -3.340363400639035e-05, rewards: -9.299999999999999, count: 50
epoch: 787279, loss: -0.00010712623770814389, rewards: -9.299999999999999, count: 50
epoch: 787289, loss: -9.740590940054972e-06, rewards: -9.299999999999999, count: 50
epoch: 787299, loss: 3.739356907317415e-05, rewards: -9.299999999999999, count: 50
epoch: 787309, loss: 1.1101960808446165e-05, rewards: -9.299999999999999, count: 50
epoch: 787319, loss: -1.2235641406732611e-05, rewards: -9.299999999999999, 

epoch: 788199, loss: -5.93781487623346e-06, rewards: -9.299999999999999, count: 50
epoch: 788209, loss: 1.7702579953038367e-06, rewards: -9.299999999999999, count: 50
epoch: 788219, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 788229, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 788239, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 788249, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 788259, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 788269, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 788279, loss: -2.1815299078298267e-06, rewards: -9.299999999999999, count: 50
epoch: 788289, loss: -5.080699793325039e-06, rewards: -9.299999999999999, count: 50
epoch: 788299, loss: -2.143740675819572e-05, rewards: -9.299999999999999, count: 50
epoch: 788309, loss: -9.81008997769095e-05, rewards: -9.299999999999999, co

epoch: 789189, loss: 1.0763406862679403e-05, rewards: -9.299999999999999, count: 50
epoch: 789199, loss: -8.012056241568644e-06, rewards: -9.299999999999999, count: 50
epoch: 789209, loss: 5.092620995128527e-06, rewards: -9.299999999999999, count: 50
epoch: 789219, loss: -3.324747012811713e-06, rewards: -9.299999999999999, count: 50
epoch: 789229, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 789239, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 789249, loss: -1.0454655239300337e-06, rewards: -9.299999999999999, count: 50
epoch: 789259, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 789269, loss: 7.796287491146359e-07, rewards: -9.299999999999999, count: 50
epoch: 789279, loss: 3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 789289, loss: 1.63400181918405e-05, rewards: -9.299999999999999, count: 50
epoch: 789299, loss: 9.956479334505275e-05, rewards: -9.299999999999999, count: 5

epoch: 790179, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 790189, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 790199, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 790209, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 790219, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 790229, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 790239, loss: -2.186298388551222e-06, rewards: -9.299999999999999, count: 50
epoch: 790249, loss: -1.33264065880212e-05, rewards: -9.299999999999999, count: 50
epoch: 790259, loss: -0.00010499596828594804, rewards: -9.299999999999999, count: 50
epoch: 790269, loss: 8.211016393033788e-05, rewards: -9.299999999999999, count: 50
epoch: 790279, loss: -5.781650543212891e-06, rewards: -9.299999999999999, count: 50
epoch: 790289, loss: -2.9370785341598094e-05, rewards: -9.299999999999999, coun

epoch: 791169, loss: 9.413003863301128e-05, rewards: -9.299999999999999, count: 50
epoch: 791179, loss: 2.421736644464545e-05, rewards: -9.299999999999999, count: 50
epoch: 791189, loss: -1.1618137250479776e-05, rewards: -9.299999999999999, count: 50
epoch: 791199, loss: -1.8318891306989826e-05, rewards: -9.299999999999999, count: 50
epoch: 791209, loss: -9.318589945905842e-06, rewards: -9.299999999999999, count: 50
epoch: 791219, loss: 1.8799305507855024e-06, rewards: -9.299999999999999, count: 50
epoch: 791229, loss: 4.327297119743889e-06, rewards: -9.299999999999999, count: 50
epoch: 791239, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 791249, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 791259, loss: 5.388259864957945e-07, rewards: -9.299999999999999, count: 50
epoch: 791269, loss: 1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 791279, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, co

epoch: 792159, loss: 6.803274118283298e-06, rewards: -9.299999999999999, count: 50
epoch: 792169, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 792179, loss: -2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 792189, loss: 1.7058849834938883e-06, rewards: -9.299999999999999, count: 50
epoch: 792199, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 792209, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 792219, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 792229, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 792239, loss: 4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 792249, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 792259, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 792269, loss: -1.8668174561753403e-06, rewards: -9.299999999999999, cou

epoch: 793149, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count: 50
epoch: 793159, loss: -1.745224039950699e-06, rewards: -9.299999999999999, count: 50
epoch: 793169, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 793179, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 793189, loss: -2.239942659798544e-06, rewards: -9.299999999999999, count: 50
epoch: 793199, loss: -1.3338327335077338e-05, rewards: -9.299999999999999, count: 50
epoch: 793209, loss: -9.887456690194085e-05, rewards: -9.299999999999999, count: 50
epoch: 793219, loss: 7.601976540172473e-05, rewards: -9.299999999999999, count: 50
epoch: 793229, loss: -2.0285844584577717e-05, rewards: -9.299999999999999, count: 50
epoch: 793239, loss: -2.769708589767106e-05, rewards: -9.299999999999999, count: 50
epoch: 793249, loss: 4.0149689084501006e-06, rewards: -9.299999999999999, count: 50
epoch: 793259, loss: 9.750127901497763e-06, rewards: -9.299999999999999, 

epoch: 794139, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 794149, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 794159, loss: -1.2302398317842744e-06, rewards: -9.299999999999999, count: 50
epoch: 794169, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 794179, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 794189, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 794199, loss: 1.7499924069852568e-06, rewards: -9.299999999999999, count: 50
epoch: 794209, loss: 5.915164820180507e-06, rewards: -9.299999999999999, count: 50
epoch: 794219, loss: 3.991723133367486e-05, rewards: -9.299999999999999, count: 50
epoch: 794229, loss: 8.253336272900924e-05, rewards: -9.299999999999999, count: 50
epoch: 794239, loss: 8.046627044677734e-06, rewards: -9.299999999999999, count: 50
epoch: 794249, loss: -3.4796001273207366e-05, rewards: -9.299999999999999, count

epoch: 795129, loss: -9.02962710824795e-05, rewards: -9.299999999999999, count: 50
epoch: 795139, loss: 5.527257962967269e-05, rewards: -9.299999999999999, count: 50
epoch: 795149, loss: -4.132032336201519e-05, rewards: -9.299999999999999, count: 50
epoch: 795159, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 795169, loss: 1.33264065880212e-05, rewards: -9.299999999999999, count: 50
epoch: 795179, loss: -1.0166168067371473e-05, rewards: -9.299999999999999, count: 50
epoch: 795189, loss: 4.23669825977413e-06, rewards: -9.299999999999999, count: 50
epoch: 795199, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 795209, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 795219, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 795229, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 795239, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 5

epoch: 796119, loss: 2.123117383234785e-06, rewards: -9.299999999999999, count: 50
epoch: 796129, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 796139, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 796149, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 796159, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 796169, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 796179, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 796189, loss: -5.197525183575635e-07, rewards: -9.299999999999999, count: 50
epoch: 796199, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 796209, loss: 6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 796219, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 796229, loss: -1.370906801412275e-07, rewards: -9.299999999999999, count:

epoch: 797119, loss: 1.2958049637745717e-06, rewards: -9.299999999999999, count: 50
epoch: 797129, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 797139, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 797149, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 797159, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 797169, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 797179, loss: -4.545450337900547e-06, rewards: -9.299999999999999, count: 50
epoch: 797189, loss: -3.70657435269095e-05, rewards: -9.299999999999999, count: 50
epoch: 797199, loss: -9.373426291858777e-05, rewards: -9.299999999999999, count: 50
epoch: 797209, loss: -3.292679684818722e-05, rewards: -9.299999999999999, count: 50
epoch: 797219, loss: 2.6715993953985162e-05, rewards: -9.299999999999999, count: 50
epoch: 797229, loss: 2.1290779841365293e-05, rewards: -9.299999999999999, c

epoch: 798109, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 798119, loss: -9.179115068036481e-08, rewards: -9.299999999999999, count: 50
epoch: 798129, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 798139, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 798149, loss: -1.006841648631962e-05, rewards: -9.299999999999999, count: 50
epoch: 798159, loss: -9.653687448007986e-05, rewards: -9.299999999999999, count: 50
epoch: 798169, loss: 8.661508763907477e-05, rewards: -9.299999999999999, count: 50
epoch: 798179, loss: 2.1338462374842493e-07, rewards: -9.299999999999999, count: 50
epoch: 798189, loss: -2.6500225430936553e-05, rewards: -9.299999999999999, count: 50
epoch: 798199, loss: -1.8106698917108588e-05, rewards: -9.299999999999999, count: 50
epoch: 798209, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 798219, loss: 6.574392500624526e-06, rewards: -9.299999999999999, cou

epoch: 799099, loss: -8.26621035230346e-05, rewards: -9.299999999999999, count: 50
epoch: 799109, loss: 4.2566061893012375e-05, rewards: -9.299999999999999, count: 50
epoch: 799119, loss: -4.4924021494807675e-05, rewards: -9.299999999999999, count: 50
epoch: 799129, loss: 1.1274814823991619e-05, rewards: -9.299999999999999, count: 50
epoch: 799139, loss: 8.890629032975994e-06, rewards: -9.299999999999999, count: 50
epoch: 799149, loss: -9.474754733673763e-06, rewards: -9.299999999999999, count: 50
epoch: 799159, loss: 5.311966106091859e-06, rewards: -9.299999999999999, count: 50
epoch: 799169, loss: -2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 799179, loss: 1.817941665649414e-06, rewards: -9.299999999999999, count: 50
epoch: 799189, loss: -1.1527538390510017e-06, rewards: -9.299999999999999, count: 50
epoch: 799199, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 799209, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, coun

epoch: 800089, loss: -2.5901794288074598e-05, rewards: -9.299999999999999, count: 50
epoch: 800099, loss: -2.9981135867274133e-06, rewards: -9.299999999999999, count: 50
epoch: 800109, loss: 9.47713851928711e-06, rewards: -9.299999999999999, count: 50
epoch: 800119, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 800129, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 800139, loss: 1.9478798094496597e-06, rewards: -9.299999999999999, count: 50
epoch: 800149, loss: -1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 800159, loss: 1.9788741667525755e-07, rewards: -9.299999999999999, count: 50
epoch: 800169, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 800179, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 800189, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 800199, loss: -2.7656554379973386e-07, rewards: -9.299999999999

epoch: 801079, loss: 8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 801089, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 801099, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 801109, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 801119, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 801129, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 801139, loss: -6.034374109731289e-06, rewards: -9.299999999999999, count: 50
epoch: 801149, loss: -5.906820297241211e-05, rewards: -9.299999999999999, count: 50
epoch: 801159, loss: 1.0942220797005575e-05, rewards: -9.299999999999999, count: 50
epoch: 801169, loss: -5.7275294238934293e-05, rewards: -9.299999999999999, count: 50
epoch: 801179, loss: -3.4533739381004125e-05, rewards: -9.299999999999999, count: 50
epoch: 801189, loss: -6.25371922069462e-06, rewards: -9.299999999999999, co

epoch: 802069, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 802079, loss: -3.1244753699866123e-06, rewards: -9.299999999999999, count: 50
epoch: 802089, loss: -1.6176700228243135e-05, rewards: -9.299999999999999, count: 50
epoch: 802099, loss: -9.41407706704922e-05, rewards: -9.299999999999999, count: 50
epoch: 802109, loss: 6.014108657836914e-05, rewards: -9.299999999999999, count: 50
epoch: 802119, loss: -3.834605377051048e-05, rewards: -9.299999999999999, count: 50
epoch: 802129, loss: -5.412101700130734e-07, rewards: -9.299999999999999, count: 50
epoch: 802139, loss: 1.3321638107299805e-05, rewards: -9.299999999999999, count: 50
epoch: 802149, loss: -9.334087735624053e-06, rewards: -9.299999999999999, count: 50
epoch: 802159, loss: 3.949403890146641e-06, rewards: -9.299999999999999, count: 50
epoch: 802169, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 802179, loss: 7.796287491146359e-07, rewards: -9.299999999999999, cou

epoch: 803059, loss: -4.489421917241998e-05, rewards: -9.299999999999999, count: 50
epoch: 803069, loss: 1.0316372026863974e-05, rewards: -9.299999999999999, count: 50
epoch: 803079, loss: 9.559393220115453e-06, rewards: -9.299999999999999, count: 50
epoch: 803089, loss: -9.710788617667276e-06, rewards: -9.299999999999999, count: 50
epoch: 803099, loss: 5.220174898568075e-06, rewards: -9.299999999999999, count: 50
epoch: 803109, loss: -2.4247169676527847e-06, rewards: -9.299999999999999, count: 50
epoch: 803119, loss: 1.3661384627994266e-06, rewards: -9.299999999999999, count: 50
epoch: 803129, loss: -7.307529585887096e-07, rewards: -9.299999999999999, count: 50
epoch: 803139, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 803149, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 803159, loss: 2.8848648980783764e-07, rewards: -9.299999999999999, count: 50
epoch: 803169, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count

epoch: 804049, loss: -1.6552210581721738e-05, rewards: -9.299999999999999, count: 50
epoch: 804059, loss: -2.847909854608588e-06, rewards: -9.299999999999999, count: 50
epoch: 804069, loss: 6.501674761238974e-06, rewards: -9.299999999999999, count: 50
epoch: 804079, loss: -2.645254198796465e-06, rewards: -9.299999999999999, count: 50
epoch: 804089, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count: 50
epoch: 804099, loss: 6.210804031070438e-07, rewards: -9.299999999999999, count: 50
epoch: 804109, loss: -8.594989822086063e-07, rewards: -9.299999999999999, count: 50
epoch: 804119, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 804129, loss: 2.932548568423954e-07, rewards: -9.299999999999999, count: 50
epoch: 804139, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 804149, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 804159, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count

epoch: 805039, loss: 1.5735626845980732e-07, rewards: -9.299999999999999, count: 50
epoch: 805049, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 805059, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 805069, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 805079, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 805089, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 805099, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 805109, loss: 9.667872973295744e-07, rewards: -9.299999999999999, count: 50
epoch: 805119, loss: 9.107589562518115e-07, rewards: -9.299999999999999, count: 50
epoch: 805129, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 805139, loss: 5.584955033555161e-06, rewards: -9.299999999999999, count: 50
epoch: 805149, loss: 4.253268343745731e-05, rewards: -9.299999999999999, count: 5

epoch: 806029, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 806039, loss: 8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 806049, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 806059, loss: 6.765127182006836e-06, rewards: -9.299999999999999, count: 50
epoch: 806069, loss: 5.803942622151226e-05, rewards: -9.299999999999999, count: 50
epoch: 806079, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 806089, loss: 6.013155143591575e-05, rewards: -9.299999999999999, count: 50
epoch: 806099, loss: 1.5710591469542123e-05, rewards: -9.299999999999999, count: 50
epoch: 806109, loss: -1.4348030163091607e-05, rewards: -9.299999999999999, count: 50
epoch: 806119, loss: -1.1498927960928995e-05, rewards: -9.299999999999999, count: 50
epoch: 806129, loss: 3.362893949088175e-06, rewards: -9.299999999999999, count: 50
epoch: 806139, loss: 4.190206709608901e-06, rewards: -9.299999999999999, count: 5

epoch: 807019, loss: 6.560445035574958e-05, rewards: -9.299999999999999, count: 50
epoch: 807029, loss: -2.988338383147493e-05, rewards: -9.299999999999999, count: 50
epoch: 807039, loss: -5.635023171635112e-06, rewards: -9.299999999999999, count: 50
epoch: 807049, loss: 1.3296604265633505e-05, rewards: -9.299999999999999, count: 50
epoch: 807059, loss: -8.623599569546059e-06, rewards: -9.299999999999999, count: 50
epoch: 807069, loss: 3.958940396842081e-06, rewards: -9.299999999999999, count: 50
epoch: 807079, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 807089, loss: 1.4436244555326994e-06, rewards: -9.299999999999999, count: 50
epoch: 807099, loss: -1.7797947293729521e-06, rewards: -9.299999999999999, count: 50
epoch: 807109, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 807119, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 807129, loss: -1.5246868088070187e-06, rewards: -9.299999999999999, 

epoch: 808009, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 808019, loss: -9.449719982512761e-06, rewards: -9.299999999999999, count: 50
epoch: 808029, loss: -5.772709846496582e-05, rewards: -9.299999999999999, count: 50
epoch: 808039, loss: -2.0409823264344595e-05, rewards: -9.299999999999999, count: 50
epoch: 808049, loss: -3.228068453609012e-05, rewards: -9.299999999999999, count: 50
epoch: 808059, loss: 2.9032229576841928e-05, rewards: -9.299999999999999, count: 50
epoch: 808069, loss: -4.465579877432901e-06, rewards: -9.299999999999999, count: 50
epoch: 808079, loss: -6.462335477408487e-06, rewards: -9.299999999999999, count: 50
epoch: 808089, loss: 6.401538939826423e-06, rewards: -9.299999999999999, count: 50
epoch: 808099, loss: -4.392862138047349e-06, rewards: -9.299999999999999, count: 50
epoch: 808109, loss: 2.607107262520003e-06, rewards: -9.299999999999999, count: 50
epoch: 808119, loss: -1.9323824744788e-06, rewards: -9.299999999999999, coun

epoch: 808999, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 809009, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 809019, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 809029, loss: 6.699561936329701e-07, rewards: -9.299999999999999, count: 50
epoch: 809039, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 809049, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 809059, loss: 4.79340542369755e-06, rewards: -9.299999999999999, count: 50
epoch: 809069, loss: 3.133654536213726e-05, rewards: -9.299999999999999, count: 50
epoch: 809079, loss: 0.00010972261225106195, rewards: -9.299999999999999, count: 50
epoch: 809089, loss: -2.5231838662875816e-05, rewards: -9.299999999999999, count: 50
epoch: 809099, loss: -3.079652742599137e-05, rewards: -9.299999999999999, count: 50
epoch: 809109, loss: 1.6010999388527125e-05, rewards: -9.299999999999999, count

epoch: 809989, loss: -1.7404556729161413e-07, rewards: -9.299999999999999, count: 50
epoch: 809999, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 810009, loss: -1.8477439880371094e-06, rewards: -9.299999999999999, count: 50
epoch: 810019, loss: -1.840353070292622e-05, rewards: -9.299999999999999, count: 50
epoch: 810029, loss: -0.00014968634059187025, rewards: -9.299999999999999, count: 50
epoch: 810039, loss: 2.5205612473655492e-05, rewards: -9.299999999999999, count: 50
epoch: 810049, loss: 4.3519736209418625e-05, rewards: -9.299999999999999, count: 50
epoch: 810059, loss: 2.9852390071027912e-05, rewards: -9.299999999999999, count: 50
epoch: 810069, loss: 1.2367963790893555e-05, rewards: -9.299999999999999, count: 50
epoch: 810079, loss: -6.806850478824344e-07, rewards: -9.299999999999999, count: 50
epoch: 810089, loss: -5.6970120567712e-06, rewards: -9.299999999999999, count: 50
epoch: 810099, loss: -3.1375884645967744e-06, rewards: -9.299999999999999,

epoch: 810979, loss: -5.921125193708576e-06, rewards: -9.299999999999999, count: 50
epoch: 810989, loss: -3.114700302830897e-05, rewards: -9.299999999999999, count: 50
epoch: 810999, loss: -0.00010290980571880937, rewards: -9.299999999999999, count: 50
epoch: 811009, loss: 4.429102045833133e-05, rewards: -9.299999999999999, count: 50
epoch: 811019, loss: 4.154443558945786e-06, rewards: -9.299999999999999, count: 50
epoch: 811029, loss: -1.8396378436591476e-05, rewards: -9.299999999999999, count: 50
epoch: 811039, loss: 1.233816146850586e-05, rewards: -9.299999999999999, count: 50
epoch: 811049, loss: -5.151033292349894e-06, rewards: -9.299999999999999, count: 50
epoch: 811059, loss: 1.5020370938145788e-06, rewards: -9.299999999999999, count: 50
epoch: 811069, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 811079, loss: 1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 811089, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, co

epoch: 811969, loss: -3.598928515202715e-06, rewards: -9.299999999999999, count: 50
epoch: 811979, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 811989, loss: 6.19888282926695e-08, rewards: -9.299999999999999, count: 50
epoch: 811999, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 812009, loss: -1.4948844864193234e-06, rewards: -9.299999999999999, count: 50
epoch: 812019, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 812029, loss: 1.0478496506038937e-06, rewards: -9.299999999999999, count: 50
epoch: 812039, loss: 8.189678055714467e-07, rewards: -9.299999999999999, count: 50
epoch: 812049, loss: 2.3174286525318166e-06, rewards: -9.299999999999999, count: 50
epoch: 812059, loss: 8.60810268932255e-06, rewards: -9.299999999999999, count: 50
epoch: 812069, loss: 5.0950049626408145e-05, rewards: -9.299999999999999, count: 50
epoch: 812079, loss: 4.413485658005811e-05, rewards: -9.299999999999999, count: 

epoch: 812959, loss: 1.659750887483824e-05, rewards: -9.299999999999999, count: 50
epoch: 812969, loss: 0.00010392069816589355, rewards: -9.299999999999999, count: 50
epoch: 812979, loss: -7.205724978120998e-05, rewards: -9.299999999999999, count: 50
epoch: 812989, loss: 2.3106336811906658e-05, rewards: -9.299999999999999, count: 50
epoch: 812999, loss: 1.9413233530940488e-05, rewards: -9.299999999999999, count: 50
epoch: 813009, loss: -1.3815164493280463e-05, rewards: -9.299999999999999, count: 50
epoch: 813019, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 813029, loss: 4.569292286760174e-06, rewards: -9.299999999999999, count: 50
epoch: 813039, loss: -3.759861101571005e-06, rewards: -9.299999999999999, count: 50
epoch: 813049, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 813059, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 813069, loss: 1.0979175613101688e-06, rewards: -9.299999999999999, cou

epoch: 813949, loss: 1.592636067471176e-06, rewards: -9.299999999999999, count: 50
epoch: 813959, loss: 8.36730032460764e-06, rewards: -9.299999999999999, count: 50
epoch: 813969, loss: 6.004095121170394e-05, rewards: -9.299999999999999, count: 50
epoch: 813979, loss: 7.092952728271484e-06, rewards: -9.299999999999999, count: 50
epoch: 813989, loss: 5.0103662943001837e-05, rewards: -9.299999999999999, count: 50
epoch: 813999, loss: -1.4359950910147745e-05, rewards: -9.299999999999999, count: 50
epoch: 814009, loss: -1.6527175830560736e-05, rewards: -9.299999999999999, count: 50
epoch: 814019, loss: 9.082556061912328e-06, rewards: -9.299999999999999, count: 50
epoch: 814029, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 814039, loss: -3.869533429678995e-06, rewards: -9.299999999999999, count: 50
epoch: 814049, loss: 2.83598910755245e-06, rewards: -9.299999999999999, count: 50
epoch: 814059, loss: -1.8942356518891756e-06, rewards: -9.299999999999999, count: 

epoch: 814939, loss: 2.557992957008537e-05, rewards: -9.299999999999999, count: 50
epoch: 814949, loss: -7.321834345930256e-06, rewards: -9.299999999999999, count: 50
epoch: 814959, loss: -1.363754222438729e-06, rewards: -9.299999999999999, count: 50
epoch: 814969, loss: 2.8014183044433594e-06, rewards: -9.299999999999999, count: 50
epoch: 814979, loss: -2.1708010535803623e-06, rewards: -9.299999999999999, count: 50
epoch: 814989, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 814999, loss: 1.167058940154675e-06, rewards: -9.299999999999999, count: 50
epoch: 815009, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 815019, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 815029, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 815039, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 815049, loss: -5.382299605116714e-06, rewards: -9.299999999999999, cou

epoch: 815929, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 815939, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 815949, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 815959, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 815969, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 815979, loss: -8.71777501743054e-06, rewards: -9.299999999999999, count: 50
epoch: 815989, loss: -4.454731970326975e-05, rewards: -9.299999999999999, count: 50
epoch: 815999, loss: -6.625532842008397e-05, rewards: -9.299999999999999, count: 50
epoch: 816009, loss: 1.2675523976213299e-05, rewards: -9.299999999999999, count: 50
epoch: 816019, loss: 1.6157626305357553e-05, rewards: -9.299999999999999, count: 50
epoch: 816029, loss: -1.7583370208740234e-05, rewards: -9.299999999999999, count: 50
epoch: 816039, loss: 1.0449886758578941e-05, rewards: -9.299999999999999, c

epoch: 816919, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 816929, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 816939, loss: -9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 816949, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 816959, loss: -5.185604095458984e-06, rewards: -9.299999999999999, count: 50
epoch: 816969, loss: -3.287911385996267e-05, rewards: -9.299999999999999, count: 50
epoch: 816979, loss: -0.00010602712427498773, rewards: -9.299999999999999, count: 50
epoch: 816989, loss: 1.4795064998907037e-05, rewards: -9.299999999999999, count: 50
epoch: 816999, loss: 3.4525393857620656e-05, rewards: -9.299999999999999, count: 50
epoch: 817009, loss: -1.1150837053719442e-05, rewards: -9.299999999999999, count: 50
epoch: 817019, loss: -9.447336196899414e-06, rewards: -9.299999999999999, count: 50
epoch: 817029, loss: 7.6043606895837e-06, rewards: -9.299999999999999, cou

epoch: 817909, loss: -2.5868416742014233e-07, rewards: -9.299999999999999, count: 50
epoch: 817919, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 817929, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 817939, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 817949, loss: 3.350973202032037e-06, rewards: -9.299999999999999, count: 50
epoch: 817959, loss: 2.006769136642106e-05, rewards: -9.299999999999999, count: 50
epoch: 817969, loss: 0.00011280655598966405, rewards: -9.299999999999999, count: 50
epoch: 817979, loss: -7.026433740975335e-05, rewards: -9.299999999999999, count: 50
epoch: 817989, loss: 7.528066817030776e-06, rewards: -9.299999999999999, count: 50
epoch: 817999, loss: 2.310156742169056e-05, rewards: -9.299999999999999, count: 50
epoch: 818009, loss: -1.0753869901236612e-05, rewards: -9.299999999999999, count: 50
epoch: 818019, loss: -2.5582312446204014e-06, rewards: -9.299999999999999, count:

epoch: 818899, loss: -1.901388168334961e-05, rewards: -9.299999999999999, count: 50
epoch: 818909, loss: 1.2600421541719697e-05, rewards: -9.299999999999999, count: 50
epoch: 818919, loss: -3.967285010730848e-06, rewards: -9.299999999999999, count: 50
epoch: 818929, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 818939, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 818949, loss: 5.4836272056491e-07, rewards: -9.299999999999999, count: 50
epoch: 818959, loss: -8.165836220541678e-07, rewards: -9.299999999999999, count: 50
epoch: 818969, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 818979, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 818989, loss: 9.286403610531124e-07, rewards: -9.299999999999999, count: 50
epoch: 818999, loss: 1.4030933925823774e-06, rewards: -9.299999999999999, count: 50
epoch: 819009, loss: 3.409385726627079e-06, rewards: -9.299999999999999, count: 50

epoch: 819889, loss: 2.190947452618275e-05, rewards: -9.299999999999999, count: 50
epoch: 819899, loss: 0.00014356851170305163, rewards: -9.299999999999999, count: 50
epoch: 819909, loss: -1.4264584024203941e-05, rewards: -9.299999999999999, count: 50
epoch: 819919, loss: -4.2378902435302734e-05, rewards: -9.299999999999999, count: 50
epoch: 819929, loss: -2.558708183642011e-05, rewards: -9.299999999999999, count: 50
epoch: 819939, loss: -3.6525725590763614e-06, rewards: -9.299999999999999, count: 50
epoch: 819949, loss: 7.586479114252143e-06, rewards: -9.299999999999999, count: 50
epoch: 819959, loss: 5.447864623420173e-06, rewards: -9.299999999999999, count: 50
epoch: 819969, loss: -1.341104507446289e-06, rewards: -9.299999999999999, count: 50
epoch: 819979, loss: -1.6844272749949596e-06, rewards: -9.299999999999999, count: 50
epoch: 819989, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 819999, loss: 5.662441253662109e-07, rewards: -9.299999999999999, co

epoch: 820879, loss: -8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 820889, loss: 1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 820899, loss: 1.9311904964069981e-07, rewards: -9.299999999999999, count: 50
epoch: 820909, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 820919, loss: 1.519918441772461e-06, rewards: -9.299999999999999, count: 50
epoch: 820929, loss: 9.353160749014933e-06, rewards: -9.299999999999999, count: 50
epoch: 820939, loss: 8.262038318207487e-05, rewards: -9.299999999999999, count: 50
epoch: 820949, loss: -6.475568079622462e-05, rewards: -9.299999999999999, count: 50
epoch: 820959, loss: 3.1297207897296175e-05, rewards: -9.299999999999999, count: 50
epoch: 820969, loss: 3.2942294637905434e-05, rewards: -9.299999999999999, count: 50
epoch: 820979, loss: 5.372762643673923e-06, rewards: -9.299999999999999, count: 50
epoch: 820989, loss: -1.0272264262312092e-05, rewards: -9.299999999999999, count

epoch: 821869, loss: 2.7139187295688316e-05, rewards: -9.299999999999999, count: 50
epoch: 821879, loss: 0.0001003289216896519, rewards: -9.299999999999999, count: 50
epoch: 821889, loss: -5.7637691497802734e-05, rewards: -9.299999999999999, count: 50
epoch: 821899, loss: 2.4660825147293508e-05, rewards: -9.299999999999999, count: 50
epoch: 821909, loss: -2.008676574405399e-06, rewards: -9.299999999999999, count: 50
epoch: 821919, loss: -5.379915364756016e-06, rewards: -9.299999999999999, count: 50
epoch: 821929, loss: 5.061626325186808e-06, rewards: -9.299999999999999, count: 50
epoch: 821939, loss: -3.0303001494758064e-06, rewards: -9.299999999999999, count: 50
epoch: 821949, loss: 8.893013045963016e-07, rewards: -9.299999999999999, count: 50
epoch: 821959, loss: 1.5175342014117632e-06, rewards: -9.299999999999999, count: 50
epoch: 821969, loss: -2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 821979, loss: -5.781650429526053e-07, rewards: -9.299999999999999, co

epoch: 822859, loss: -5.856752522959141e-06, rewards: -9.299999999999999, count: 50
epoch: 822869, loss: -3.8236379623413086e-05, rewards: -9.299999999999999, count: 50
epoch: 822879, loss: -8.83841494214721e-05, rewards: -9.299999999999999, count: 50
epoch: 822889, loss: -1.6343593642886844e-06, rewards: -9.299999999999999, count: 50
epoch: 822899, loss: 3.482699321466498e-05, rewards: -9.299999999999999, count: 50
epoch: 822909, loss: -7.569789886474609e-06, rewards: -9.299999999999999, count: 50
epoch: 822919, loss: -9.626150131225586e-06, rewards: -9.299999999999999, count: 50
epoch: 822929, loss: 7.321834345930256e-06, rewards: -9.299999999999999, count: 50
epoch: 822939, loss: -2.0766258330695564e-06, rewards: -9.299999999999999, count: 50
epoch: 822949, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 822959, loss: 4.470348358154297e-07, rewards: -9.299999999999999, count: 50
epoch: 822969, loss: -2.0384788967930945e-07, rewards: -9.299999999999999, c

epoch: 823849, loss: 1.5676021121180383e-06, rewards: -9.299999999999999, count: 50
epoch: 823859, loss: 6.688833309453912e-06, rewards: -9.299999999999999, count: 50
epoch: 823869, loss: 4.28879247920122e-05, rewards: -9.299999999999999, count: 50
epoch: 823879, loss: 7.179140811786056e-05, rewards: -9.299999999999999, count: 50
epoch: 823889, loss: 1.0141134225705173e-05, rewards: -9.299999999999999, count: 50
epoch: 823899, loss: -3.3479929697932675e-05, rewards: -9.299999999999999, count: 50
epoch: 823909, loss: 8.07642936706543e-06, rewards: -9.299999999999999, count: 50
epoch: 823919, loss: 7.574558367196005e-06, rewards: -9.299999999999999, count: 50
epoch: 823929, loss: -7.244348580570659e-06, rewards: -9.299999999999999, count: 50
epoch: 823939, loss: 3.6454200653679436e-06, rewards: -9.299999999999999, count: 50
epoch: 823949, loss: -1.5318394162022742e-06, rewards: -9.299999999999999, count: 50
epoch: 823959, loss: 8.618831657258852e-07, rewards: -9.299999999999999, count: 5

epoch: 824839, loss: -1.3363361404117313e-06, rewards: -9.299999999999999, count: 50
epoch: 824849, loss: -5.339384188118856e-06, rewards: -9.299999999999999, count: 50
epoch: 824859, loss: -2.92909153358778e-05, rewards: -9.299999999999999, count: 50
epoch: 824869, loss: -0.00010775208647828549, rewards: -9.299999999999999, count: 50
epoch: 824879, loss: 4.7845838707871735e-05, rewards: -9.299999999999999, count: 50
epoch: 824889, loss: 6.508827027573716e-06, rewards: -9.299999999999999, count: 50
epoch: 824899, loss: -2.048373244178947e-05, rewards: -9.299999999999999, count: 50
epoch: 824909, loss: 1.1270046343270224e-05, rewards: -9.299999999999999, count: 50
epoch: 824919, loss: -2.9039383662166074e-06, rewards: -9.299999999999999, count: 50
epoch: 824929, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 824939, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 824949, loss: -6.34193440873787e-07, rewards: -9.299999999999999, co

epoch: 825829, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 825839, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 825849, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 825859, loss: -9.417534130307104e-08, rewards: -9.299999999999999, count: 50
epoch: 825869, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 825879, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 825889, loss: 8.988380386654171e-07, rewards: -9.299999999999999, count: 50
epoch: 825899, loss: 3.7872791835980024e-06, rewards: -9.299999999999999, count: 50
epoch: 825909, loss: 2.4660825147293508e-05, rewards: -9.299999999999999, count: 50
epoch: 825919, loss: 0.0001256859250133857, rewards: -9.299999999999999, count: 50
epoch: 825929, loss: -4.1906834667315707e-05, rewards: -9.299999999999999, count: 50
epoch: 825939, loss: -3.4796001273207366e-05, rewards: -9.299999999999999, coun

epoch: 826819, loss: -8.972882824309636e-06, rewards: -9.299999999999999, count: 50
epoch: 826829, loss: 1.6982556189759634e-05, rewards: -9.299999999999999, count: 50
epoch: 826839, loss: 1.2357235391391441e-05, rewards: -9.299999999999999, count: 50
epoch: 826849, loss: -2.256631887576077e-06, rewards: -9.299999999999999, count: 50
epoch: 826859, loss: -4.942417035636026e-06, rewards: -9.299999999999999, count: 50
epoch: 826869, loss: 1.2314319519646233e-06, rewards: -9.299999999999999, count: 50
epoch: 826879, loss: 1.0406970432086382e-06, rewards: -9.299999999999999, count: 50
epoch: 826889, loss: -1.1062621751989354e-06, rewards: -9.299999999999999, count: 50
epoch: 826899, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 826909, loss: -6.604194595638546e-07, rewards: -9.299999999999999, count: 50
epoch: 826919, loss: 3.2424927098873013e-07, rewards: -9.299999999999999, count: 50
epoch: 826929, loss: -6.34193440873787e-07, rewards: -9.299999999999999, cou

epoch: 827809, loss: 1.5949010048643686e-05, rewards: -9.299999999999999, count: 50
epoch: 827819, loss: -7.058381925162394e-06, rewards: -9.299999999999999, count: 50
epoch: 827829, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 827839, loss: 1.62363051003922e-06, rewards: -9.299999999999999, count: 50
epoch: 827849, loss: -7.355213256232673e-07, rewards: -9.299999999999999, count: 50
epoch: 827859, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 827869, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 827879, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 827889, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 827899, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 827909, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 827919, loss: -3.013610921698273e-06, rewards: -9.299999999999999, count:

epoch: 828799, loss: 3.099441414633475e-07, rewards: -9.299999999999999, count: 50
epoch: 828809, loss: 3.5762788286319847e-08, rewards: -9.299999999999999, count: 50
epoch: 828819, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 828829, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 828839, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 828849, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 828859, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 828869, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 828879, loss: 8.925199836085085e-06, rewards: -9.299999999999999, count: 50
epoch: 828889, loss: 8.394122414756566e-05, rewards: -9.299999999999999, count: 50
epoch: 828899, loss: -7.204294524854049e-05, rewards: -9.299999999999999, count: 50
epoch: 828909, loss: 2.094030423904769e-05, rewards: -9.299999999999999, count: 50

epoch: 829789, loss: 1.5089512089616619e-05, rewards: -9.299999999999999, count: 50
epoch: 829799, loss: 7.801771425874904e-05, rewards: -9.299999999999999, count: 50
epoch: 829809, loss: -2.927064815594349e-05, rewards: -9.299999999999999, count: 50
epoch: 829819, loss: 3.8664340536342934e-05, rewards: -9.299999999999999, count: 50
epoch: 829829, loss: -2.3580789275001734e-05, rewards: -9.299999999999999, count: 50
epoch: 829839, loss: 7.058381925162394e-06, rewards: -9.299999999999999, count: 50
epoch: 829849, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 829859, loss: -1.4579295566363726e-06, rewards: -9.299999999999999, count: 50
epoch: 829869, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 829879, loss: 6.389618079083448e-07, rewards: -9.299999999999999, count: 50
epoch: 829889, loss: -1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 829899, loss: 5.173683348402847e-07, rewards: -9.299999999999999, coun

epoch: 830779, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 830789, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 830799, loss: -3.650188546089339e-06, rewards: -9.299999999999999, count: 50
epoch: 830809, loss: -2.0070076061529107e-05, rewards: -9.299999999999999, count: 50
epoch: 830819, loss: -0.00011829614959424362, rewards: -9.299999999999999, count: 50
epoch: 830829, loss: 6.955862045288086e-05, rewards: -9.299999999999999, count: 50
epoch: 830839, loss: 6.983280400163494e-06, rewards: -9.299999999999999, count: 50
epoch: 830849, loss: -2.5409459340153262e-05, rewards: -9.299999999999999, count: 50
epoch: 830859, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 830869, loss: 8.165836334228516e-06, rewards: -9.299999999999999, count: 50
epoch: 830879, loss: -4.674196134146769e-06, rewards: -9.299999999999999, count: 50
epoch: 830889, loss: 8.642673492431641e-07, rewards: -9.299999999999999, co

epoch: 831769, loss: 1.4359950910147745e-05, rewards: -9.299999999999999, count: 50
epoch: 831779, loss: -9.706020136945881e-06, rewards: -9.299999999999999, count: 50
epoch: 831789, loss: 2.32934962696163e-06, rewards: -9.299999999999999, count: 50
epoch: 831799, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 831809, loss: -8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 831819, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 831829, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 831839, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 831849, loss: 7.700920150455204e-07, rewards: -9.299999999999999, count: 50
epoch: 831859, loss: 1.920461727422662e-06, rewards: -9.299999999999999, count: 50
epoch: 831869, loss: 2.1100045159982983e-06, rewards: -9.299999999999999, count: 50
epoch: 831879, loss: 2.3567677089886274e-06, rewards: -9.299999999999999, count:

epoch: 832759, loss: 4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 832769, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 832779, loss: 2.0265579436795633e-08, rewards: -9.299999999999999, count: 50
epoch: 832789, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 832799, loss: 2.43902195506962e-06, rewards: -9.299999999999999, count: 50
epoch: 832809, loss: 1.481771505495999e-05, rewards: -9.299999999999999, count: 50
epoch: 832819, loss: 0.0001022458091028966, rewards: -9.299999999999999, count: 50
epoch: 832829, loss: -7.480382919311523e-05, rewards: -9.299999999999999, count: 50
epoch: 832839, loss: 2.1047591872047633e-05, rewards: -9.299999999999999, count: 50
epoch: 832849, loss: 2.4960041628219187e-05, rewards: -9.299999999999999, count: 50
epoch: 832859, loss: -9.372234671900515e-06, rewards: -9.299999999999999, count: 50
epoch: 832869, loss: -6.414652034436585e-06, rewards: -9.299999999999999, count: 5

epoch: 833749, loss: -3.4046172459056834e-06, rewards: -9.299999999999999, count: 50
epoch: 833759, loss: 3.12209135699959e-06, rewards: -9.299999999999999, count: 50
epoch: 833769, loss: -1.962184796866495e-06, rewards: -9.299999999999999, count: 50
epoch: 833779, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 833789, loss: -6.759166808478767e-07, rewards: -9.299999999999999, count: 50
epoch: 833799, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 833809, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 833819, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 833829, loss: -1.21712685086095e-06, rewards: -9.299999999999999, count: 50
epoch: 833839, loss: -1.394748665006773e-06, rewards: -9.299999999999999, count: 50
epoch: 833849, loss: -4.81843926536385e-06, rewards: -9.299999999999999, count: 50
epoch: 833859, loss: -2.8589964131242596e-05, rewards: -9.299999999999999, cou

epoch: 834739, loss: 0.00012997745943721384, rewards: -9.299999999999999, count: 50
epoch: 834749, loss: -8.79883737070486e-06, rewards: -9.299999999999999, count: 50
epoch: 834759, loss: -4.146456558373757e-05, rewards: -9.299999999999999, count: 50
epoch: 834769, loss: -1.529931978438981e-05, rewards: -9.299999999999999, count: 50
epoch: 834779, loss: 9.084939847525675e-06, rewards: -9.299999999999999, count: 50
epoch: 834789, loss: 8.165836334228516e-06, rewards: -9.299999999999999, count: 50
epoch: 834799, loss: -3.0624867122241994e-06, rewards: -9.299999999999999, count: 50
epoch: 834809, loss: -2.4509429294994334e-06, rewards: -9.299999999999999, count: 50
epoch: 834819, loss: 2.071857352348161e-06, rewards: -9.299999999999999, count: 50
epoch: 834829, loss: -3.8981437455731793e-07, rewards: -9.299999999999999, count: 50
epoch: 834839, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 834849, loss: 1.27553946072112e-07, rewards: -9.299999999999999, coun

epoch: 835729, loss: -2.1111964088049717e-06, rewards: -9.299999999999999, count: 50
epoch: 835739, loss: 2.5033950024067053e-08, rewards: -9.299999999999999, count: 50
epoch: 835749, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 835759, loss: 3.350973202032037e-06, rewards: -9.299999999999999, count: 50
epoch: 835769, loss: 1.0073184967041016e-05, rewards: -9.299999999999999, count: 50
epoch: 835779, loss: 4.638791142497212e-05, rewards: -9.299999999999999, count: 50
epoch: 835789, loss: 5.894422429264523e-05, rewards: -9.299999999999999, count: 50
epoch: 835799, loss: -2.0762681742780842e-05, rewards: -9.299999999999999, count: 50
epoch: 835809, loss: -1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 835819, loss: 7.773637662467081e-06, rewards: -9.299999999999999, count: 50
epoch: 835829, loss: -6.265640422498109e-06, rewards: -9.299999999999999, count: 50
epoch: 835839, loss: 2.7239323117100867e-06, rewards: -9.299999999999999, cou

epoch: 836719, loss: 5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 836729, loss: -2.226829565188382e-06, rewards: -9.299999999999999, count: 50
epoch: 836739, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, count: 50
epoch: 836749, loss: -7.96318033735588e-07, rewards: -9.299999999999999, count: 50
epoch: 836759, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 836769, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 836779, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 836789, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 836799, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 836809, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 836819, loss: -7.402896926578251e-07, rewards: -9.299999999999999, count: 50
epoch: 836829, loss: -1.4948844864193234e-06, rewards: -9.299999999999999, coun

epoch: 837709, loss: -1.6915797687033773e-06, rewards: -9.299999999999999, count: 50
epoch: 837719, loss: 1.1718273071892327e-06, rewards: -9.299999999999999, count: 50
epoch: 837729, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 837739, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 837749, loss: 2.610683509374212e-07, rewards: -9.299999999999999, count: 50
epoch: 837759, loss: 9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 837769, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 837779, loss: -1.8823146774593624e-06, rewards: -9.299999999999999, count: 50
epoch: 837789, loss: -7.212162017822266e-06, rewards: -9.299999999999999, count: 50
epoch: 837799, loss: -3.9765833207638934e-05, rewards: -9.299999999999999, count: 50
epoch: 837809, loss: -8.127808541757986e-05, rewards: -9.299999999999999, count: 50
epoch: 837819, loss: 1.2590885489771608e-05, rewards: -9.299999999999999,

epoch: 838699, loss: -0.00011413812899263576, rewards: -9.299999999999999, count: 50
epoch: 838709, loss: 5.238533049123362e-05, rewards: -9.299999999999999, count: 50
epoch: 838719, loss: 1.0426044354971964e-05, rewards: -9.299999999999999, count: 50
epoch: 838729, loss: -2.292871431563981e-05, rewards: -9.299999999999999, count: 50
epoch: 838739, loss: 7.528066817030776e-06, rewards: -9.299999999999999, count: 50
epoch: 838749, loss: 2.5463104975642636e-06, rewards: -9.299999999999999, count: 50
epoch: 838759, loss: -4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 838769, loss: 3.3974647521972656e-06, rewards: -9.299999999999999, count: 50
epoch: 838779, loss: -1.962184796866495e-06, rewards: -9.299999999999999, count: 50
epoch: 838789, loss: 1.3387202670855913e-06, rewards: -9.299999999999999, count: 50
epoch: 838799, loss: -4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 838809, loss: -6.508827254947391e-07, rewards: -9.299999999999999, co

epoch: 839689, loss: 1.996755599975586e-06, rewards: -9.299999999999999, count: 50
epoch: 839699, loss: 4.611015356204007e-06, rewards: -9.299999999999999, count: 50
epoch: 839709, loss: 2.362608938710764e-05, rewards: -9.299999999999999, count: 50
epoch: 839719, loss: 0.00010723352170316502, rewards: -9.299999999999999, count: 50
epoch: 839729, loss: -6.304382986854762e-05, rewards: -9.299999999999999, count: 50
epoch: 839739, loss: 1.710653305053711e-05, rewards: -9.299999999999999, count: 50
epoch: 839749, loss: 1.1342764082655776e-05, rewards: -9.299999999999999, count: 50
epoch: 839759, loss: -1.3605355889012571e-05, rewards: -9.299999999999999, count: 50
epoch: 839769, loss: 7.971525519678835e-06, rewards: -9.299999999999999, count: 50
epoch: 839779, loss: -3.844499588012695e-06, rewards: -9.299999999999999, count: 50
epoch: 839789, loss: 2.2101403374108486e-06, rewards: -9.299999999999999, count: 50
epoch: 839799, loss: -1.3709068298339844e-06, rewards: -9.299999999999999, count

epoch: 840679, loss: 4.80890275866841e-06, rewards: -9.299999999999999, count: 50
epoch: 840689, loss: -2.774000222416362e-06, rewards: -9.299999999999999, count: 50
epoch: 840699, loss: -5.0067900048134106e-08, rewards: -9.299999999999999, count: 50
epoch: 840709, loss: 4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 840719, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 840729, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 840739, loss: 2.4437904357910156e-06, rewards: -9.299999999999999, count: 50
epoch: 840749, loss: 1.1372566405043472e-05, rewards: -9.299999999999999, count: 50
epoch: 840759, loss: 7.204294524854049e-05, rewards: -9.299999999999999, count: 50
epoch: 840769, loss: -2.215027780039236e-05, rewards: -9.299999999999999, count: 50
epoch: 840779, loss: 4.6628712880192325e-05, rewards: -9.299999999999999, count: 50
epoch: 840789, loss: -1.615285873413086e-05, rewards: -9.299999999999999, count:

epoch: 841669, loss: -2.774000222416362e-06, rewards: -9.299999999999999, count: 50
epoch: 841679, loss: -7.830858521629125e-06, rewards: -9.299999999999999, count: 50
epoch: 841689, loss: 2.2470951535069617e-06, rewards: -9.299999999999999, count: 50
epoch: 841699, loss: 1.6283988770737778e-06, rewards: -9.299999999999999, count: 50
epoch: 841709, loss: -1.714229597382655e-06, rewards: -9.299999999999999, count: 50
epoch: 841719, loss: 1.2469291732486454e-06, rewards: -9.299999999999999, count: 50
epoch: 841729, loss: -1.0550022579991492e-06, rewards: -9.299999999999999, count: 50
epoch: 841739, loss: 5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 841749, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 841759, loss: -5.269050689094001e-07, rewards: -9.299999999999999, count: 50
epoch: 841769, loss: -5.710124924007687e-07, rewards: -9.299999999999999, count: 50
epoch: 841779, loss: -3.492832263418677e-07, rewards: -9.299999999999999, c

epoch: 842659, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 842669, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 842679, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 842689, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count: 50
epoch: 842699, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 842709, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 842719, loss: -3.5417078834143467e-06, rewards: -9.299999999999999, count: 50
epoch: 842729, loss: -2.537369800847955e-05, rewards: -9.299999999999999, count: 50
epoch: 842739, loss: -0.00012534856796264648, rewards: -9.299999999999999, count: 50
epoch: 842749, loss: 3.753542841877788e-05, rewards: -9.299999999999999, count: 50
epoch: 842759, loss: 3.632664811448194e-05, rewards: -9.299999999999999, count: 50
epoch: 842769, loss: -9.710788617667276e-06, rewards: -9.299999999999999, count

epoch: 843649, loss: 0.00010960578947560862, rewards: -9.299999999999999, count: 50
epoch: 843659, loss: -2.6615858587319963e-05, rewards: -9.299999999999999, count: 50
epoch: 843669, loss: -3.010511318279896e-05, rewards: -9.299999999999999, count: 50
epoch: 843679, loss: 1.6552210581721738e-05, rewards: -9.299999999999999, count: 50
epoch: 843689, loss: 5.071163286629599e-06, rewards: -9.299999999999999, count: 50
epoch: 843699, loss: -8.16106785350712e-06, rewards: -9.299999999999999, count: 50
epoch: 843709, loss: 4.156827799306484e-06, rewards: -9.299999999999999, count: 50
epoch: 843719, loss: -1.2123584838263923e-06, rewards: -9.299999999999999, count: 50
epoch: 843729, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 843739, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 843749, loss: 1.1765956742237904e-06, rewards: -9.299999999999999, count: 50
epoch: 843759, loss: -6.651878265984124e-07, rewards: -9.299999999999999, coun

epoch: 844639, loss: -2.239942659798544e-06, rewards: -9.299999999999999, count: 50
epoch: 844649, loss: 1.2779236158166896e-06, rewards: -9.299999999999999, count: 50
epoch: 844659, loss: -5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 844669, loss: -5.435943535303522e-07, rewards: -9.299999999999999, count: 50
epoch: 844679, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 844689, loss: -6.926059654688288e-07, rewards: -9.299999999999999, count: 50
epoch: 844699, loss: -2.4271012080134824e-06, rewards: -9.299999999999999, count: 50
epoch: 844709, loss: -1.0040998859039973e-05, rewards: -9.299999999999999, count: 50
epoch: 844719, loss: -5.91528405493591e-05, rewards: -9.299999999999999, count: 50
epoch: 844729, loss: -1.7480850146966986e-05, rewards: -9.299999999999999, count: 50
epoch: 844739, loss: -2.889394818339497e-05, rewards: -9.299999999999999, count: 50
epoch: 844749, loss: 2.9429196729324758e-05, rewards: -9.299999999999999

epoch: 845629, loss: 8.940696716308594e-07, rewards: -9.299999999999999, count: 50
epoch: 845639, loss: -8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 845649, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 845659, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 845669, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 845679, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 845689, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 845699, loss: -3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 845709, loss: -1.8525123550716671e-06, rewards: -9.299999999999999, count: 50
epoch: 845719, loss: -8.012056241568644e-06, rewards: -9.299999999999999, count: 50
epoch: 845729, loss: -5.551695721806027e-05, rewards: -9.299999999999999, count: 50
epoch: 845739, loss: -2.388238863204606e-05, rewards: -9.299999999999999,

epoch: 846619, loss: -8.666515327604429e-07, rewards: -9.299999999999999, count: 50
epoch: 846629, loss: 7.498264267269406e-07, rewards: -9.299999999999999, count: 50
epoch: 846639, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 846649, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, count: 50
epoch: 846659, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 846669, loss: -1.3029575711698271e-06, rewards: -9.299999999999999, count: 50
epoch: 846679, loss: -5.444288035505451e-06, rewards: -9.299999999999999, count: 50
epoch: 846689, loss: -3.772497075260617e-05, rewards: -9.299999999999999, count: 50
epoch: 846699, loss: -9.073376713786274e-05, rewards: -9.299999999999999, count: 50
epoch: 846709, loss: -9.149312973022461e-06, rewards: -9.299999999999999, count: 50
epoch: 846719, loss: 3.6457775422604755e-05, rewards: -9.299999999999999, count: 50
epoch: 846729, loss: 6.699561936329701e-07, rewards: -9.299999999999999, co

epoch: 847609, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 847619, loss: -2.7942658107349416e-06, rewards: -9.299999999999999, count: 50
epoch: 847629, loss: -1.379251443722751e-05, rewards: -9.299999999999999, count: 50
epoch: 847639, loss: -8.696794247953221e-05, rewards: -9.299999999999999, count: 50
epoch: 847649, loss: 5.2649975259555504e-05, rewards: -9.299999999999999, count: 50
epoch: 847659, loss: -4.3419600842753425e-05, rewards: -9.299999999999999, count: 50
epoch: 847669, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 847679, loss: 1.4939308130124118e-05, rewards: -9.299999999999999, count: 50
epoch: 847689, loss: -8.156299372785725e-06, rewards: -9.299999999999999, count: 50
epoch: 847699, loss: 1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 847709, loss: 9.942054930434097e-07, rewards: -9.299999999999999, count: 50
epoch: 847719, loss: -9.810923984332476e-07, rewards: -9.299999999999999,

epoch: 848599, loss: -6.437301891537572e-08, rewards: -9.299999999999999, count: 50
epoch: 848609, loss: -5.503892680280842e-06, rewards: -9.299999999999999, count: 50
epoch: 848619, loss: -0.0002955794334411621, rewards: -9.299999999999999, count: 50
epoch: 848629, loss: -6.0342550568748266e-05, rewards: -9.299999999999999, count: 50
epoch: 848639, loss: 2.2575855837203562e-05, rewards: -9.299999999999999, count: 50
epoch: 848649, loss: 1.417040857631946e-05, rewards: -9.299999999999999, count: 50
epoch: 848659, loss: -2.4231672796304338e-05, rewards: -9.299999999999999, count: 50
epoch: 848669, loss: 1.9490718841552734e-05, rewards: -9.299999999999999, count: 50
epoch: 848679, loss: -1.195073127746582e-05, rewards: -9.299999999999999, count: 50
epoch: 848689, loss: 6.401538939826423e-06, rewards: -9.299999999999999, count: 50
epoch: 848699, loss: -3.210306203982327e-06, rewards: -9.299999999999999, count: 50
epoch: 848709, loss: 1.7380714325554436e-06, rewards: -9.299999999999999, co

epoch: 849589, loss: -4.251003247190965e-06, rewards: -9.299999999999999, count: 50
epoch: 849599, loss: 2.88128853753733e-06, rewards: -9.299999999999999, count: 50
epoch: 849609, loss: -2.7418137094059603e-08, rewards: -9.299999999999999, count: 50
epoch: 849619, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 849629, loss: 1.2612342743523186e-06, rewards: -9.299999999999999, count: 50
epoch: 849639, loss: 1.2934208371007117e-06, rewards: -9.299999999999999, count: 50
epoch: 849649, loss: 1.8668174561753403e-06, rewards: -9.299999999999999, count: 50
epoch: 849659, loss: 7.969140824570786e-06, rewards: -9.299999999999999, count: 50
epoch: 849669, loss: 5.4895877838134766e-05, rewards: -9.299999999999999, count: 50
epoch: 849679, loss: 2.5331974029541016e-05, rewards: -9.299999999999999, count: 50
epoch: 849689, loss: 4.4473410525824875e-05, rewards: -9.299999999999999, count: 50
epoch: 849699, loss: -2.1313428078428842e-05, rewards: -9.299999999999999, cou

epoch: 850579, loss: 1.1181831496287487e-06, rewards: -9.299999999999999, count: 50
epoch: 850589, loss: 3.958940396842081e-06, rewards: -9.299999999999999, count: 50
epoch: 850599, loss: 2.9062031899229623e-05, rewards: -9.299999999999999, count: 50
epoch: 850609, loss: 0.00012068033538525924, rewards: -9.299999999999999, count: 50
epoch: 850619, loss: -5.701780537492596e-06, rewards: -9.299999999999999, count: 50
epoch: 850629, loss: -4.056096076965332e-05, rewards: -9.299999999999999, count: 50
epoch: 850639, loss: -8.752345820539631e-06, rewards: -9.299999999999999, count: 50
epoch: 850649, loss: 1.2899637113150675e-05, rewards: -9.299999999999999, count: 50
epoch: 850659, loss: 3.4320355553063564e-06, rewards: -9.299999999999999, count: 50
epoch: 850669, loss: -5.185604095458984e-06, rewards: -9.299999999999999, count: 50
epoch: 850679, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 850689, loss: 9.739399047248298e-07, rewards: -9.299999999999999, count

epoch: 851569, loss: -4.1007996287589776e-07, rewards: -9.299999999999999, count: 50
epoch: 851579, loss: 1.6450881901164394e-07, rewards: -9.299999999999999, count: 50
epoch: 851589, loss: -4.172325134277344e-07, rewards: -9.299999999999999, count: 50
epoch: 851599, loss: 6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 851609, loss: -5.018711135562626e-07, rewards: -9.299999999999999, count: 50
epoch: 851619, loss: -1.3470649662394862e-07, rewards: -9.299999999999999, count: 50
epoch: 851629, loss: 1.370906801412275e-07, rewards: -9.299999999999999, count: 50
epoch: 851639, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 851649, loss: 7.867812996664725e-07, rewards: -9.299999999999999, count: 50
epoch: 851659, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 851669, loss: 1.8775463104248047e-06, rewards: -9.299999999999999, count: 50
epoch: 851679, loss: 1.0389089766249526e-05, rewards: -9.299999999999999, cou

epoch: 852559, loss: -4.5180320284998743e-07, rewards: -9.299999999999999, count: 50
epoch: 852569, loss: 6.997585160206654e-07, rewards: -9.299999999999999, count: 50
epoch: 852579, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 852589, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 852599, loss: -1.6462803387184977e-06, rewards: -9.299999999999999, count: 50
epoch: 852609, loss: -7.828473826521076e-06, rewards: -9.299999999999999, count: 50
epoch: 852619, loss: -6.15763638052158e-05, rewards: -9.299999999999999, count: 50
epoch: 852629, loss: 3.076791699641035e-06, rewards: -9.299999999999999, count: 50
epoch: 852639, loss: -5.624651748803444e-05, rewards: -9.299999999999999, count: 50
epoch: 852649, loss: -2.7239323117100867e-06, rewards: -9.299999999999999, count: 50
epoch: 852659, loss: 1.9960403733421117e-05, rewards: -9.299999999999999, count: 50
epoch: 852669, loss: 1.9299984614917776e-06, rewards: -9.299999999999999, co

epoch: 853549, loss: -6.511211267934414e-06, rewards: -9.299999999999999, count: 50
epoch: 853559, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 853569, loss: 1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 853579, loss: -1.2660026413868763e-06, rewards: -9.299999999999999, count: 50
epoch: 853589, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 853599, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 853609, loss: -3.826618240054813e-07, rewards: -9.299999999999999, count: 50
epoch: 853619, loss: 4.899501959698682e-07, rewards: -9.299999999999999, count: 50
epoch: 853629, loss: 1.2779236158166896e-06, rewards: -9.299999999999999, count: 50
epoch: 853639, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 853649, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 853659, loss: 3.7741660889878403e-06, rewards: -9.299999999999999, coun

epoch: 854539, loss: -1.2862682297054562e-06, rewards: -9.299999999999999, count: 50
epoch: 854549, loss: -4.410743770222325e-07, rewards: -9.299999999999999, count: 50
epoch: 854559, loss: 4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 854569, loss: 1.2564659073177609e-06, rewards: -9.299999999999999, count: 50
epoch: 854579, loss: 5.650520506605972e-06, rewards: -9.299999999999999, count: 50
epoch: 854589, loss: 3.442645174800418e-05, rewards: -9.299999999999999, count: 50
epoch: 854599, loss: 9.76502924459055e-05, rewards: -9.299999999999999, count: 50
epoch: 854609, loss: -2.728104664129205e-05, rewards: -9.299999999999999, count: 50
epoch: 854619, loss: -2.059340476989746e-05, rewards: -9.299999999999999, count: 50
epoch: 854629, loss: 2.0914078049827367e-05, rewards: -9.299999999999999, count: 50
epoch: 854639, loss: -6.333589681162266e-06, rewards: -9.299999999999999, count: 50
epoch: 854649, loss: -1.0085105941470829e-06, rewards: -9.299999999999999, coun

epoch: 855529, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 855539, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 855549, loss: 2.4509429294994334e-06, rewards: -9.299999999999999, count: 50
epoch: 855559, loss: 9.59634780883789e-06, rewards: -9.299999999999999, count: 50
epoch: 855569, loss: 5.654215783579275e-05, rewards: -9.299999999999999, count: 50
epoch: 855579, loss: 2.5331974029541016e-05, rewards: -9.299999999999999, count: 50
epoch: 855589, loss: 2.6792287826538086e-05, rewards: -9.299999999999999, count: 50
epoch: 855599, loss: -2.9822587748640217e-05, rewards: -9.299999999999999, count: 50
epoch: 855609, loss: 9.119510650634766e-06, rewards: -9.299999999999999, count: 50
epoch: 855619, loss: 2.8312206268310547e-06, rewards: -9.299999999999999, count: 50
epoch: 855629, loss: -4.942417035636026e-06, rewards: -9.299999999999999, count: 50
epoch: 855639, loss: 3.662109293145477e-06, rewards: -9.299999999999999, count: 

epoch: 856519, loss: 5.087852514407132e-06, rewards: -9.299999999999999, count: 50
epoch: 856529, loss: 3.101944821537472e-05, rewards: -9.299999999999999, count: 50
epoch: 856539, loss: 0.00010914087033597752, rewards: -9.299999999999999, count: 50
epoch: 856549, loss: -3.200292485416867e-05, rewards: -9.299999999999999, count: 50
epoch: 856559, loss: -2.530217170715332e-05, rewards: -9.299999999999999, count: 50
epoch: 856569, loss: 1.985788367164787e-05, rewards: -9.299999999999999, count: 50
epoch: 856579, loss: 6.794929277020856e-07, rewards: -9.299999999999999, count: 50
epoch: 856589, loss: -6.936788395250915e-06, rewards: -9.299999999999999, count: 50
epoch: 856599, loss: 5.153417532710591e-06, rewards: -9.299999999999999, count: 50
epoch: 856609, loss: -2.5832653136603767e-06, rewards: -9.299999999999999, count: 50
epoch: 856619, loss: 1.4901161193847656e-06, rewards: -9.299999999999999, count: 50
epoch: 856629, loss: -1.4555454299625126e-06, rewards: -9.299999999999999, count

epoch: 857509, loss: -1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 857519, loss: 7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 857529, loss: -5.781650429526053e-07, rewards: -9.299999999999999, count: 50
epoch: 857539, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 857549, loss: 3.755092734536447e-07, rewards: -9.299999999999999, count: 50
epoch: 857559, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 857569, loss: 6.413459914256237e-07, rewards: -9.299999999999999, count: 50
epoch: 857579, loss: 2.708434976739227e-06, rewards: -9.299999999999999, count: 50
epoch: 857589, loss: 1.5795230865478516e-05, rewards: -9.299999999999999, count: 50
epoch: 857599, loss: 0.00011433959298301488, rewards: -9.299999999999999, count: 50
epoch: 857609, loss: -8.07547548902221e-05, rewards: -9.299999999999999, count: 50
epoch: 857619, loss: -6.959438451303868e-06, rewards: -9.299999999999999, count: 5

epoch: 858499, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 858509, loss: 8.940696716308594e-08, rewards: -9.299999999999999, count: 50
epoch: 858519, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 858529, loss: 7.653236480109626e-07, rewards: -9.299999999999999, count: 50
epoch: 858539, loss: 2.7692317416949663e-06, rewards: -9.299999999999999, count: 50
epoch: 858549, loss: 1.9376277123228647e-05, rewards: -9.299999999999999, count: 50
epoch: 858559, loss: 0.00012263417011126876, rewards: -9.299999999999999, count: 50
epoch: 858569, loss: -6.872415542602539e-05, rewards: -9.299999999999999, count: 50
epoch: 858579, loss: -1.8855333109968342e-05, rewards: -9.299999999999999, count: 50
epoch: 858589, loss: 2.238154411315918e-05, rewards: -9.299999999999999, count: 50
epoch: 858599, loss: 8.16106785350712e-06, rewards: -9.299999999999999, count: 50
epoch: 858609, loss: -8.862018148647621e-06, rewards: -9.299999999999999, count: 5

epoch: 859489, loss: 4.1246414639317663e-07, rewards: -9.299999999999999, count: 50
epoch: 859499, loss: 1.0883808272410533e-06, rewards: -9.299999999999999, count: 50
epoch: 859509, loss: 6.171464974613627e-06, rewards: -9.299999999999999, count: 50
epoch: 859519, loss: 5.771756332251243e-05, rewards: -9.299999999999999, count: 50
epoch: 859529, loss: -4.507303401624085e-06, rewards: -9.299999999999999, count: 50
epoch: 859539, loss: 5.9416295698611066e-05, rewards: -9.299999999999999, count: 50
epoch: 859549, loss: 3.1138657504925504e-05, rewards: -9.299999999999999, count: 50
epoch: 859559, loss: 1.0812282198457979e-06, rewards: -9.299999999999999, count: 50
epoch: 859569, loss: -1.130938562710071e-05, rewards: -9.299999999999999, count: 50
epoch: 859579, loss: -6.25371922069462e-06, rewards: -9.299999999999999, count: 50
epoch: 859589, loss: 2.2149085907585686e-06, rewards: -9.299999999999999, count: 50
epoch: 859599, loss: 2.2494793938676594e-06, rewards: -9.299999999999999, count

epoch: 860479, loss: -9.323358426627237e-06, rewards: -9.299999999999999, count: 50
epoch: 860489, loss: -5.779862476629205e-05, rewards: -9.299999999999999, count: 50
epoch: 860499, loss: -2.0164250599918887e-05, rewards: -9.299999999999999, count: 50
epoch: 860509, loss: -3.351569102960639e-05, rewards: -9.299999999999999, count: 50
epoch: 860519, loss: 2.877235419873614e-05, rewards: -9.299999999999999, count: 50
epoch: 860529, loss: -2.8204917725815903e-06, rewards: -9.299999999999999, count: 50
epoch: 860539, loss: -7.696151442360133e-06, rewards: -9.299999999999999, count: 50
epoch: 860549, loss: 6.936788395250915e-06, rewards: -9.299999999999999, count: 50
epoch: 860559, loss: -4.231929779052734e-06, rewards: -9.299999999999999, count: 50
epoch: 860569, loss: 1.9669532775878906e-06, rewards: -9.299999999999999, count: 50
epoch: 860579, loss: -1.268386881747574e-06, rewards: -9.299999999999999, count: 50
epoch: 860589, loss: 9.667872973295744e-07, rewards: -9.299999999999999, cou

epoch: 861469, loss: -9.405612786395068e-07, rewards: -9.299999999999999, count: 50
epoch: 861479, loss: 6.043911184860917e-07, rewards: -9.299999999999999, count: 50
epoch: 861489, loss: -4.0531158873591266e-08, rewards: -9.299999999999999, count: 50
epoch: 861499, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 861509, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 861519, loss: -4.637241488580912e-07, rewards: -9.299999999999999, count: 50
epoch: 861529, loss: -1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 861539, loss: -7.18235969543457e-06, rewards: -9.299999999999999, count: 50
epoch: 861549, loss: -6.147145904833451e-05, rewards: -9.299999999999999, count: 50
epoch: 861559, loss: 6.506443241960369e-06, rewards: -9.299999999999999, count: 50
epoch: 861569, loss: -5.786657493445091e-05, rewards: -9.299999999999999, count: 50
epoch: 861579, loss: -1.2941360182594508e-05, rewards: -9.299999999999999, c

epoch: 862459, loss: 1.5313626136048697e-05, rewards: -9.299999999999999, count: 50
epoch: 862469, loss: 2.282857849422726e-06, rewards: -9.299999999999999, count: 50
epoch: 862479, loss: -6.248950739973225e-06, rewards: -9.299999999999999, count: 50
epoch: 862489, loss: 3.5381317502469756e-06, rewards: -9.299999999999999, count: 50
epoch: 862499, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 862509, loss: 5.638599418489321e-07, rewards: -9.299999999999999, count: 50
epoch: 862519, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 862529, loss: 2.2292137202839513e-07, rewards: -9.299999999999999, count: 50
epoch: 862539, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 862549, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 862559, loss: 3.075599579460686e-07, rewards: -9.299999999999999, count: 50
epoch: 862569, loss: 2.1815300499383738e-07, rewards: -9.299999999999999, count

epoch: 863449, loss: -0.0001121580571634695, rewards: -9.299999999999999, count: 50
epoch: 863459, loss: 6.944417691556737e-05, rewards: -9.299999999999999, count: 50
epoch: 863469, loss: -9.603500075172633e-06, rewards: -9.299999999999999, count: 50
epoch: 863479, loss: -2.154707908630371e-05, rewards: -9.299999999999999, count: 50
epoch: 863489, loss: 1.1992454346909653e-05, rewards: -9.299999999999999, count: 50
epoch: 863499, loss: 6.747245606675278e-07, rewards: -9.299999999999999, count: 50
epoch: 863509, loss: -4.073381205671467e-06, rewards: -9.299999999999999, count: 50
epoch: 863519, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 863529, loss: -2.4116038730426226e-06, rewards: -9.299999999999999, count: 50
epoch: 863539, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 863549, loss: -5.173683348402847e-07, rewards: -9.299999999999999, count: 50
epoch: 863559, loss: -4.410743770222325e-07, rewards: -9.299999999999999, coun

epoch: 864439, loss: -2.0146370616203058e-07, rewards: -9.299999999999999, count: 50
epoch: 864449, loss: 7.152557657263969e-08, rewards: -9.299999999999999, count: 50
epoch: 864459, loss: 1.27553946072112e-07, rewards: -9.299999999999999, count: 50
epoch: 864469, loss: 1.4424324490391882e-07, rewards: -9.299999999999999, count: 50
epoch: 864479, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 864489, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 864499, loss: 1.0085105941470829e-06, rewards: -9.299999999999999, count: 50
epoch: 864509, loss: 1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 864519, loss: 1.020431568576896e-06, rewards: -9.299999999999999, count: 50
epoch: 864529, loss: 2.0015240806969814e-06, rewards: -9.299999999999999, count: 50
epoch: 864539, loss: 1.3002157174923923e-05, rewards: -9.299999999999999, count: 50
epoch: 864549, loss: 0.000111001732875593, rewards: -9.299999999999999, count: 5

epoch: 865429, loss: -1.3899802979722153e-06, rewards: -9.299999999999999, count: 50
epoch: 865439, loss: 9.262561775358336e-07, rewards: -9.299999999999999, count: 50
epoch: 865449, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 865459, loss: -9.787082717593876e-07, rewards: -9.299999999999999, count: 50
epoch: 865469, loss: -7.605552809764049e-07, rewards: -9.299999999999999, count: 50
epoch: 865479, loss: -6.461143584601814e-07, rewards: -9.299999999999999, count: 50
epoch: 865489, loss: -5.7721135817701e-06, rewards: -9.299999999999999, count: 50
epoch: 865499, loss: -3.821849895757623e-05, rewards: -9.299999999999999, count: 50
epoch: 865509, loss: -8.814811735646799e-05, rewards: -9.299999999999999, count: 50
epoch: 865519, loss: 4.0149689084501006e-06, rewards: -9.299999999999999, count: 50
epoch: 865529, loss: 3.2942294637905434e-05, rewards: -9.299999999999999, count: 50
epoch: 865539, loss: -1.2787580089934636e-05, rewards: -9.299999999999999, cou

epoch: 866419, loss: 4.3630600998767477e-07, rewards: -9.299999999999999, count: 50
epoch: 866429, loss: -8.82148754044465e-07, rewards: -9.299999999999999, count: 50
epoch: 866439, loss: -2.7656554379973386e-07, rewards: -9.299999999999999, count: 50
epoch: 866449, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 866459, loss: 6.115436690379283e-07, rewards: -9.299999999999999, count: 50
epoch: 866469, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 866479, loss: 7.239580099849263e-06, rewards: -9.299999999999999, count: 50
epoch: 866489, loss: 5.123972732690163e-05, rewards: -9.299999999999999, count: 50
epoch: 866499, loss: 3.811955320998095e-05, rewards: -9.299999999999999, count: 50
epoch: 866509, loss: 4.295110556995496e-05, rewards: -9.299999999999999, count: 50
epoch: 866519, loss: -2.3025273549137637e-05, rewards: -9.299999999999999, count: 50
epoch: 866529, loss: -1.3997554560774006e-05, rewards: -9.299999999999999, coun

epoch: 867409, loss: -2.092123031616211e-05, rewards: -9.299999999999999, count: 50
epoch: 867419, loss: -2.1708010535803623e-06, rewards: -9.299999999999999, count: 50
epoch: 867429, loss: 8.383989552385174e-06, rewards: -9.299999999999999, count: 50
epoch: 867439, loss: -6.89148919263971e-06, rewards: -9.299999999999999, count: 50
epoch: 867449, loss: 3.979206212534336e-06, rewards: -9.299999999999999, count: 50
epoch: 867459, loss: -4.220008804622921e-07, rewards: -9.299999999999999, count: 50
epoch: 867469, loss: -1.8203259060101118e-06, rewards: -9.299999999999999, count: 50
epoch: 867479, loss: 1.7082691101677483e-06, rewards: -9.299999999999999, count: 50
epoch: 867489, loss: 4.4941901933270856e-07, rewards: -9.299999999999999, count: 50
epoch: 867499, loss: -1.8942356518891756e-06, rewards: -9.299999999999999, count: 50
epoch: 867509, loss: -4.088878540642327e-06, rewards: -9.299999999999999, count: 50
epoch: 867519, loss: -1.6075373423518613e-05, rewards: -9.299999999999999, c

epoch: 868399, loss: 6.649613351328298e-05, rewards: -9.299999999999999, count: 50
epoch: 868409, loss: -3.193616976204794e-06, rewards: -9.299999999999999, count: 50
epoch: 868419, loss: 3.629922866821289e-05, rewards: -9.299999999999999, count: 50
epoch: 868429, loss: -2.6392935978947207e-05, rewards: -9.299999999999999, count: 50
epoch: 868439, loss: 5.809068625239888e-06, rewards: -9.299999999999999, count: 50
epoch: 868449, loss: 3.3104420253948774e-06, rewards: -9.299999999999999, count: 50
epoch: 868459, loss: -4.316568265494425e-06, rewards: -9.299999999999999, count: 50
epoch: 868469, loss: 3.2031537102739094e-06, rewards: -9.299999999999999, count: 50
epoch: 868479, loss: -1.5830993334020604e-06, rewards: -9.299999999999999, count: 50
epoch: 868489, loss: 4.339218264703959e-07, rewards: -9.299999999999999, count: 50
epoch: 868499, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 868509, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, cou

epoch: 869389, loss: 3.534555435180664e-05, rewards: -9.299999999999999, count: 50
epoch: 869399, loss: -2.2195577912498266e-05, rewards: -9.299999999999999, count: 50
epoch: 869409, loss: 1.1349916348990519e-05, rewards: -9.299999999999999, count: 50
epoch: 869419, loss: -6.457566996687092e-06, rewards: -9.299999999999999, count: 50
epoch: 869429, loss: 4.385709871712606e-06, rewards: -9.299999999999999, count: 50
epoch: 869439, loss: -3.1757354008732364e-06, rewards: -9.299999999999999, count: 50
epoch: 869449, loss: 1.3589858554041712e-06, rewards: -9.299999999999999, count: 50
epoch: 869459, loss: 1.5652179854441783e-06, rewards: -9.299999999999999, count: 50
epoch: 869469, loss: 2.777576355583733e-07, rewards: -9.299999999999999, count: 50
epoch: 869479, loss: -7.379055091405462e-07, rewards: -9.299999999999999, count: 50
epoch: 869489, loss: -4.392862138047349e-06, rewards: -9.299999999999999, count: 50
epoch: 869499, loss: -2.2599697331315838e-05, rewards: -9.299999999999999, co

epoch: 870379, loss: 5.754470839747228e-05, rewards: -9.299999999999999, count: 50
epoch: 870389, loss: 1.8758773876470514e-05, rewards: -9.299999999999999, count: 50
epoch: 870399, loss: 4.069566784892231e-05, rewards: -9.299999999999999, count: 50
epoch: 870409, loss: -2.5409459340153262e-05, rewards: -9.299999999999999, count: 50
epoch: 870419, loss: -5.859136763319839e-06, rewards: -9.299999999999999, count: 50
epoch: 870429, loss: 1.1342764082655776e-05, rewards: -9.299999999999999, count: 50
epoch: 870439, loss: -5.379915364756016e-06, rewards: -9.299999999999999, count: 50
epoch: 870449, loss: 1.0907649539149133e-06, rewards: -9.299999999999999, count: 50
epoch: 870459, loss: -1.01327898960335e-07, rewards: -9.299999999999999, count: 50
epoch: 870469, loss: 3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 870479, loss: -5.042552970735414e-07, rewards: -9.299999999999999, count: 50
epoch: 870489, loss: 4.5180320284998743e-07, rewards: -9.299999999999999, count

epoch: 871369, loss: -1.6927718604620168e-07, rewards: -9.299999999999999, count: 50
epoch: 871379, loss: -2.0146370616203058e-07, rewards: -9.299999999999999, count: 50
epoch: 871389, loss: 7.498264267269406e-07, rewards: -9.299999999999999, count: 50
epoch: 871399, loss: -6.437301749429025e-07, rewards: -9.299999999999999, count: 50
epoch: 871409, loss: -9.965896197172697e-07, rewards: -9.299999999999999, count: 50
epoch: 871419, loss: -1.1277198836978641e-06, rewards: -9.299999999999999, count: 50
epoch: 871429, loss: -2.5212764285242883e-06, rewards: -9.299999999999999, count: 50
epoch: 871439, loss: -1.0627508345351089e-05, rewards: -9.299999999999999, count: 50
epoch: 871449, loss: -6.749510794179514e-05, rewards: -9.299999999999999, count: 50
epoch: 871459, loss: 9.367466191179119e-06, rewards: -9.299999999999999, count: 50
epoch: 871469, loss: -4.432082278071903e-05, rewards: -9.299999999999999, count: 50
epoch: 871479, loss: 2.1320582163752988e-05, rewards: -9.299999999999999,

epoch: 872359, loss: 1.7035007431331906e-06, rewards: -9.299999999999999, count: 50
epoch: 872369, loss: -2.9802322387695312e-08, rewards: -9.299999999999999, count: 50
epoch: 872379, loss: -1.2075901167918346e-06, rewards: -9.299999999999999, count: 50
epoch: 872389, loss: 7.843971161491936e-07, rewards: -9.299999999999999, count: 50
epoch: 872399, loss: 2.0265579792067e-07, rewards: -9.299999999999999, count: 50
epoch: 872409, loss: -2.157688072657038e-07, rewards: -9.299999999999999, count: 50
epoch: 872419, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 872429, loss: -2.6345253445470007e-07, rewards: -9.299999999999999, count: 50
epoch: 872439, loss: -2.4783612389001064e-06, rewards: -9.299999999999999, count: 50
epoch: 872449, loss: -2.451777436363045e-05, rewards: -9.299999999999999, count: 50
epoch: 872459, loss: -0.00014311791164800525, rewards: -9.299999999999999, count: 50
epoch: 872469, loss: -1.916766086651478e-05, rewards: -9.299999999999999, co

epoch: 873349, loss: -5.364418029785156e-07, rewards: -9.299999999999999, count: 50
epoch: 873359, loss: -8.261203561232833e-07, rewards: -9.299999999999999, count: 50
epoch: 873369, loss: -2.2220610844669864e-06, rewards: -9.299999999999999, count: 50
epoch: 873379, loss: -1.1596679541980848e-05, rewards: -9.299999999999999, count: 50
epoch: 873389, loss: -8.904099377105013e-05, rewards: -9.299999999999999, count: 50
epoch: 873399, loss: 6.51156879030168e-05, rewards: -9.299999999999999, count: 50
epoch: 873409, loss: -3.522634506225586e-05, rewards: -9.299999999999999, count: 50
epoch: 873419, loss: -2.389192559348885e-05, rewards: -9.299999999999999, count: 50
epoch: 873429, loss: 1.0426044354971964e-05, rewards: -9.299999999999999, count: 50
epoch: 873439, loss: 7.890463166404516e-06, rewards: -9.299999999999999, count: 50
epoch: 873449, loss: -5.419254193839151e-06, rewards: -9.299999999999999, count: 50
epoch: 873459, loss: 1.6212463549436507e-07, rewards: -9.299999999999999, cou

epoch: 874339, loss: 5.960464477539063e-08, rewards: -9.299999999999999, count: 50
epoch: 874349, loss: -5.662441253662109e-07, rewards: -9.299999999999999, count: 50
epoch: 874359, loss: 3.4213064736832166e-07, rewards: -9.299999999999999, count: 50
epoch: 874369, loss: 1.2147426105002523e-06, rewards: -9.299999999999999, count: 50
epoch: 874379, loss: 2.2172928311192663e-06, rewards: -9.299999999999999, count: 50
epoch: 874389, loss: 3.6156177429802483e-06, rewards: -9.299999999999999, count: 50
epoch: 874399, loss: 1.284480094909668e-05, rewards: -9.299999999999999, count: 50
epoch: 874409, loss: 7.183551497291774e-05, rewards: -9.299999999999999, count: 50
epoch: 874419, loss: -1.5763043847982772e-05, rewards: -9.299999999999999, count: 50
epoch: 874429, loss: 3.608703627833165e-05, rewards: -9.299999999999999, count: 50
epoch: 874439, loss: -2.5556088075973094e-05, rewards: -9.299999999999999, count: 50
epoch: 874449, loss: 8.493661880493164e-06, rewards: -9.299999999999999, count

epoch: 875329, loss: 0.00012905358744319528, rewards: -9.299999999999999, count: 50
epoch: 875339, loss: -7.309079228434712e-05, rewards: -9.299999999999999, count: 50
epoch: 875349, loss: -3.855705290334299e-05, rewards: -9.299999999999999, count: 50
epoch: 875359, loss: 3.6776066281163367e-06, rewards: -9.299999999999999, count: 50
epoch: 875369, loss: 1.650571903155651e-05, rewards: -9.299999999999999, count: 50
epoch: 875379, loss: 6.155967639642768e-06, rewards: -9.299999999999999, count: 50
epoch: 875389, loss: -4.951953997078817e-06, rewards: -9.299999999999999, count: 50
epoch: 875399, loss: -2.186298388551222e-06, rewards: -9.299999999999999, count: 50
epoch: 875409, loss: 2.1803380150231533e-06, rewards: -9.299999999999999, count: 50
epoch: 875419, loss: -7.534027304245683e-07, rewards: -9.299999999999999, count: 50
epoch: 875429, loss: -1.99079508433897e-07, rewards: -9.299999999999999, count: 50
epoch: 875439, loss: 3.397464638510428e-07, rewards: -9.299999999999999, count:

epoch: 876319, loss: -1.2104511370125692e-05, rewards: -9.299999999999999, count: 50
epoch: 876329, loss: -8.642673492431641e-07, rewards: -9.299999999999999, count: 50
epoch: 876339, loss: 4.507303401624085e-06, rewards: -9.299999999999999, count: 50
epoch: 876349, loss: -2.5832653136603767e-06, rewards: -9.299999999999999, count: 50
epoch: 876359, loss: 1.115798909268051e-06, rewards: -9.299999999999999, count: 50
epoch: 876369, loss: -2.837181227732799e-07, rewards: -9.299999999999999, count: 50
epoch: 876379, loss: 1.3113021779531664e-08, rewards: -9.299999999999999, count: 50
epoch: 876389, loss: -2.539158003855846e-07, rewards: -9.299999999999999, count: 50
epoch: 876399, loss: -6.67572024326546e-08, rewards: -9.299999999999999, count: 50
epoch: 876409, loss: -3.1471253691961465e-07, rewards: -9.299999999999999, count: 50
epoch: 876419, loss: -3.1232832498062635e-07, rewards: -9.299999999999999, count: 50
epoch: 876429, loss: -1.0180473282161984e-06, rewards: -9.299999999999999, 

epoch: 877309, loss: 5.4836274188119205e-08, rewards: -9.299999999999999, count: 50
epoch: 877319, loss: 6.771087441848067e-07, rewards: -9.299999999999999, count: 50
epoch: 877329, loss: -6.34193440873787e-07, rewards: -9.299999999999999, count: 50
epoch: 877339, loss: -3.194809039541724e-07, rewards: -9.299999999999999, count: 50
epoch: 877349, loss: -3.325939132992062e-07, rewards: -9.299999999999999, count: 50
epoch: 877359, loss: -1.8358230136072962e-07, rewards: -9.299999999999999, count: 50
epoch: 877369, loss: -1.497268726780021e-06, rewards: -9.299999999999999, count: 50
epoch: 877379, loss: -8.275508662336506e-06, rewards: -9.299999999999999, count: 50
epoch: 877389, loss: -6.120443140389398e-05, rewards: -9.299999999999999, count: 50
epoch: 877399, loss: -2.5534629912726814e-06, rewards: -9.299999999999999, count: 50
epoch: 877409, loss: -5.143880844116211e-05, rewards: -9.299999999999999, count: 50
epoch: 877419, loss: 1.1601448022702243e-05, rewards: -9.299999999999999, co

epoch: 878299, loss: -1.9060373233514838e-05, rewards: -9.299999999999999, count: 50
epoch: 878309, loss: 4.55260260423529e-06, rewards: -9.299999999999999, count: 50
epoch: 878319, loss: 4.961490503774257e-06, rewards: -9.299999999999999, count: 50
epoch: 878329, loss: -4.428625288710464e-06, rewards: -9.299999999999999, count: 50
epoch: 878339, loss: 1.7726421219776967e-06, rewards: -9.299999999999999, count: 50
epoch: 878349, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 878359, loss: 3.0279159091151087e-07, rewards: -9.299999999999999, count: 50
epoch: 878369, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 878379, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 878389, loss: 8.702278364580707e-08, rewards: -9.299999999999999, count: 50
epoch: 878399, loss: -6.139278525552072e-07, rewards: -9.299999999999999, count: 50
epoch: 878409, loss: -6.437301749429025e-07, rewards: -9.299999999999999, count

epoch: 879289, loss: -5.708932803827338e-06, rewards: -9.299999999999999, count: 50
epoch: 879299, loss: 4.781484676641412e-06, rewards: -9.299999999999999, count: 50
epoch: 879309, loss: -2.6428699584357673e-06, rewards: -9.299999999999999, count: 50
epoch: 879319, loss: 1.4007091522216797e-06, rewards: -9.299999999999999, count: 50
epoch: 879329, loss: -5.686283088834898e-07, rewards: -9.299999999999999, count: 50
epoch: 879339, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 879349, loss: 3.8743019104003906e-07, rewards: -9.299999999999999, count: 50
epoch: 879359, loss: -3.492832263418677e-07, rewards: -9.299999999999999, count: 50
epoch: 879369, loss: -7.891654831837513e-07, rewards: -9.299999999999999, count: 50
epoch: 879379, loss: -1.0132789611816406e-06, rewards: -9.299999999999999, count: 50
epoch: 879389, loss: -3.7550926208496094e-06, rewards: -9.299999999999999, count: 50
epoch: 879399, loss: -2.046823465207126e-05, rewards: -9.299999999999999, 

In [None]:
def sample_action(self,state):
    probs = self.policy_net(state) # 4
    if np.random.uniform() < 0.5:
        action = np.random.randint(0,2)
        return action, torch.log(probs[action]+1e-8).detach()
    dist = Categorical(probs)
    action = dist.sample()
    log_prob = dist.log_prob(action)
    return action.item(),log_prob.detach()

# 替换方法
import types
agent.sample_action = types.MethodType(sample_action, agent)

In [16]:
import time
def visualize_agent(env, agent, num_episodes=5):
    """
    渲染显示智能体的行动
    """
    env = gym.make('CliffWalking-v0', render_mode='human')  # 创建可视化环境
    
    for episode in range(num_episodes):
        state_tuple = env.reset()
        state = state_tuple[0] if isinstance(state_tuple, tuple) else state_tuple
        total_reward = 0
        steps = 0
        done = False
        
        print(f"\nEpisode {episode + 1}")
        
        while not done:
            env.render()  # 渲染当前状态
            
            # 将状态转换为one-hot编码
            state_onehot = np.zeros(48)
            state_onehot[state] = 1
            
            # 使用训练好的策略选择动作
            with torch.no_grad():
                if np.random.random() < 0.0:
                    action = np.random.randint(0, 4)
                else:
                    state_tensor = torch.FloatTensor(state_onehot)
                    probs = agent.policy_net(state_tensor)
                    action = probs.argmax().item()  # 使用最可能的动作
            
            # 执行动作
            step_result = env.step(action)
            if len(step_result) == 4:
                next_state, reward, done, _ = step_result
            else:
                next_state, reward, terminated, truncated, _ = step_result
                done = terminated or truncated
            
            total_reward += reward
            steps += 1
            state = next_state
            
            # 添加小延迟使动作更容易观察
            time.sleep(0.5)
        
        print(f"Episode finished after {steps} steps. Total reward: {total_reward}")
    
    env.close()

# 在主程序最后添加：
if __name__ == "__main__":    
    # 训练完成后显示智能体行动
    print("\nVisualizing trained agent behavior...")
    env = gym.make('CliffWalking-v0',render_mode='human')
    visualize_agent(env, agent)


Visualizing trained agent behavior...


2025-04-27 13:52:09.865 python[67964:170339742] +[IMKClient subclass]: chose IMKClient_Modern
2025-04-27 13:52:09.865 python[67964:170339742] +[IMKInputSession subclass]: chose IMKInputSession_Modern



Episode 1
Episode finished after 17 steps. Total reward: -17

Episode 2


KeyboardInterrupt: 

In [None]:
env.close()