In [1]:
import os
import torch as T
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.distributions.normal import Normal
import numpy as np
import matplotlib.pyplot as plt
import random



from gym.envs.classic_control import rendering



In [2]:
class ReplayBuffer(object):
    def __init__(self, mem_size, input_shape, n_actions):
        self.mem_size = mem_size
        self.n_actions = n_actions
        self.mem_cntr = 0
        self.state_memory = np.zeros((self.mem_size,*input_shape))
        self.new_state_memory = np.zeros((self.mem_size,*input_shape))
        self.action_memory = np.zeros((self.mem_size,n_actions))
        self.reward_memory = np.zeros(self.mem_size)
        self.terminal_memory = np.zeros(self.mem_size,dtype=np.bool_)
    def store_transition(self, state, action, reward,new_state, done):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index]= state
        self.new_state_memory[index]= new_state
        self.action_memory[index]= action #action is an array, so array of arrays
        self.reward_memory[index]= reward
        self.terminal_memory[index]= done
        self.mem_cntr += 1
    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr,self.mem_size) # cntr can be greater than size, but don't want to take mems that don't exist
        batch = np.random.choice(max_mem,batch_size) # from 0 to max_mem of size batch_size
        
        states = self.state_memory[batch]
        new_states = self.new_state_memory[batch]
        actions = self.action_memory[batch]
        rewards = self.reward_memory[batch]
        terminals = self.terminal_memory[batch]
        
        return states,actions,rewards,new_states,terminals
    

In [3]:
class CriticNetwork(nn.Module):
    '''this evaluates the value of a state,action pair'''
    def __init__(self,beta, input_dims, n_actions,fc1_dims=256, fc2_dims=256,
                name='Critic Network',chkpt_dir='/Users/bookerschelhaas/Desktop/Code For fun/RL/SAC'):
        super(CriticNetwork,self).__init__()
        self.beta = beta #learning rate
        self.input_dims= input_dims
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.n_actions = n_actions
        self.name = name
        self.chkpt_dir = chkpt_dir
        self.chkpt_file = os.path.join(self.chkpt_dir, name +'_sac')
        
        self.fc1 = nn.Linear(self.input_dims[0]+n_actions,self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims,self.fc2_dims)
        self.q = nn.Linear(self.fc2_dims, 1)
        
        self.optimizer = optim.Adam(self.parameters(), lr=self.beta)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)
    def forward(self,state, action):
        action_value = self.fc1(T.cat([state,action],dim=1))
        action_value = F.relu(action_value)
        action_value = self.fc2(action_value)
        action_value = F.relu(action_value)
        q = self.q(action_value)
        return q
    def save_checkpoint(self):
        print('.....saving checkpoint.....')
        T.save(self.state_dict(), self.chkpt_file)
    def load_checkpoint(self):
        print('.......loading checkpoint........')
        self.load_state_dict(T.load(self.chkpt_file))
    

In [4]:
class ValueNetwork(nn.Module):
    '''Just estimates the value of a state or set of states,
    doesnt care about what action you took or are taking'''
    def __init__(self, beta, input_dims, fc1_dims=256,fc2_dims = 256,
                name='Value Network',chkpt_dir='/Users/bookerschelhaas/Desktop/Code For fun/RL/SAC'):
        super(ValueNetwork,self).__init__()
        self.beta = beta
        self.input_dims= input_dims
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.name = name
        self.chkpt_dir = chkpt_dir
        self.chkpt_file = os.path.join(self.chkpt_dir, name +'_sac')
        
        self.fc1 = nn.Linear(*self.input_dims,self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims,self.fc2_dims)
        self.v = nn.Linear(self.fc2_dims, 1)
        
        self.optimizer = optim.Adam(self.parameters(), lr=self.beta)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)
    def forward(self,state):
        state_value = self.fc1(state)
        state_value = F.relu(state_value)
        state_value = self.fc2(state_value)
        state_value = F.relu(state_value)
        v = self.v(state_value)
        return v
    
    def save_checkpoint(self):
        print('.....saving checkpoint.....')
        T.save(self.state_dict(), self.chkpt_file)
    def load_checkpoint(self):
        print('.......loading checkpoint........')
        self.load_state_dict(T.load(self.chkpt_file))
    
        

In [5]:
class ActorNetwork(nn.Module):
    '''Outputs a mean and stdev distribution of actions'''
    def __init__(self,alpha, input_dims,max_action,fc1_dims=256,fc2_dims = 256,
                n_actions = 2,name='actor',chkpt_dir='/Users/bookerschelhaas/Desktop/Code For fun/RL/SAC'):
        '''max_action is the mulptiplication scalar so that the 
        actions in the environment are of the right scale'''
        super(ActorNetwork,self).__init__()
        self.alpha = alpha
        self.input_dims= input_dims
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.chkpt_dir = chkpt_dir
        self.n_actions =n_actions
        self.max_action = max_action
        self.reparam_noise = 1e-6 # can't take log of 0, 
        self.chkpt_file = os.path.join(self.chkpt_dir, name +'_sac')
        self.normal_tracker = []
        self.fc1 = nn.Linear(*self.input_dims,self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims,self.fc2_dims)
        self.mu = nn.Linear(self.fc2_dims,self.n_actions) #output is mean of probability distribution of each action for the policy
        self.sigma = nn.Linear(self.fc2_dims, self.n_actions)#output is stdev of probability distribution of each action for the policy
        
        self.optimizer = optim.Adam(self.parameters(), lr=self.alpha)
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)
    def forward(self,state):
        prob = self.fc1(state)
        prob = F.relu(prob)
        prob = self.fc2(prob)
        prob = F.relu(prob)
        
        mu = self.mu(prob)
        sigma = self.sigma(prob)
        # the clamp makes it so that the std dev isn't so wide.
        sigma = T.clamp(sigma,min = self.reparam_noise, max=1)
        
        return mu, sigma
    def sample_normal(self, state, reparameterize = True):
        mu, sigma = self.forward(state)
        self.normal_tracker.append((mu,sigma,state))
        #print('MU:',mu, 'sigma ', sigma)
        probabilites = Normal(mu, sigma)
        
        if reparameterize:
            actions = probabilites.rsample() #this gives a sample + some noise to encourage exploration
        else:
            actions = probabilites.sample()
        # tanh puts the value between -1 and 1, then multiply that by max value of action space to scale
        action_tanh = T.tanh(actions)
        action_scaled = T.tanh(actions) * T.tensor(self.max_action).to(self.device)
        log_probs = probabilites.log_prob(actions)  # for the loss function for updating network
        # .pow() is same as ** but tensor
        log_probs -= T.log(1-action_tanh.pow(2)+self.reparam_noise) # need the noise because log(0) = UNDEF
        log_probs = log_probs.sum(1,keepdim=True) # need a scalar quality to calculate loss 
        
        return action_scaled, log_probs
    
    
    def save_checkpoint(self):
        print('.....saving checkpoint.....')
        T.save(self.state_dict(), self.chkpt_file)
    def load_checkpoint(self):
        print('.......loading checkpoint........')
        self.load_state_dict(T.load(self.chkpt_file))
        

In [6]:
class SACAgent():
    '''Reward scale is for rewards and critic loss. takes into account 
    the entropy in system. Can be messed around with.
    
    tau is for doing the soft copy of the networks, instead of a 
    hard copy like in DQL
    '''
    def __init__(self,alpha = 0.0003, beta = 0.0003, input_dims = [1],
                 env = None, gamma = 0.99,n_actions=2, max_size = 1000000,tau = 0.005,
                layer1_size = 256, layer2_size = 256, batch_size = 256,
                reward_scale = 1):
        self.alpha = alpha
        self.beta = beta
        self.tau = tau
        self.input_dims = input_dims
        self.env = env
        self.gamma = gamma
        self.n_actions = n_actions
        self.max_size = max_size
        self.layer1_size = layer1_size
        self.layer2_size = layer2_size
        self.batch_size = batch_size
        self.scale= reward_scale
        
        self.memory = ReplayBuffer(max_size, input_dims, n_actions = n_actions)
        
        self.actor = ActorNetwork(self.alpha, self.input_dims,n_actions = n_actions,
                                  name = 'actor', max_action = env.action_space.high.item())
        self.critic_1 = CriticNetwork(beta,input_dims,
                                      n_actions = n_actions, name = 'critic 1')
        
        self.critic_2 = CriticNetwork(beta,input_dims,
                                      n_actions = n_actions, name = 'critic 2')
        self.value = ValueNetwork(beta, input_dims,name = 'value')
        self.target_value = ValueNetwork(beta,input_dims, 
                                         name = 'target value')
        
        self.update_network_parameters(tau=1) #does a hard copy of value network to target network on first time. Otherwise we will detune by tau
        
    def choose_action(self,state):
        ''' returns a numpy array of the mean of distribution for each action'''
        state = T.tensor([state],dtype=T.float).to(self.actor.device)
        actions, _ = self.actor.sample_normal(state, reparameterize = False)
        
        return actions.cpu().detach().numpy()[0]
    def remember(self,state, action , reward, new_state, done):
        self.memory.store_transition(state, action, reward, new_state, done)
        
    def update_network_parameters(self, tau = None):
        '''updates the target_value network to be soft copy of value netork'''
        if tau is None: #this makes it so it does a soft copy for n=2 onward
            tau = self.tau
        
        target_value_params = self.target_value.named_parameters() #gets current params for target value network
        value_params = self.value.named_parameters() # gets current vals for params for value network
        
        target_value_state_dict = dict(target_value_params)
        value_state_dict = dict(value_params)
        
        for name in value_state_dict:
            value_state_dict[name] = tau*value_state_dict[name].clone() +\
            (1-tau)*target_value_state_dict[name].clone()
        self.target_value.load_state_dict(value_state_dict)
    
    def save_models(self):
        print('.....saving models......')
        self.actor.save_checkpoint()
        self.critic_1.save_checkpoint()
        self.critic_2.save_checkpoint()
        self.value.save_checkpoint()
        self.target_value.save_checkpoint()
            
    def load_models(self):
        print('.....saving models......')
        self.actor.load_checkpoint()
        self.critic_1.load_checkpoint()
        self.critic_2.load_checkpoint()
        self.value.load_checkpoint()
        self.target_value.load_checkpoint()
    
    def learn(self):
        if self.memory.mem_cntr < self.batch_size:
            return
        
        state, action, reward, new_state, done = \
                    self.memory.sample_buffer(self.batch_size)
        
        rewards = T.tensor(reward,dtype=T.float).to(self.actor.device)
        dones = T.tensor(done).to(self.actor.device)
        new_states = T.tensor(new_state,dtype=T.float).to(self.actor.device)
        action = T.tensor(action,dtype=T.float).to(self.actor.device)
        states = T.tensor(state,dtype=T.float).to(self.actor.device)
        
        value = self.value(states).view(-1) #makes the datatype correct
        target_value = self.target_value(new_states).view(-1) #print this out without view and see what it does
        target_value[done] = 0.0 #???
        
        actions, log_probs = self.actor.sample_normal(states, reparameterize = False)
        log_probs = log_probs.view(-1)
        
        q1_new_policy = self.critic_1.forward(states,actions)
        q2_new_policy = self.critic_2.forward(states,actions)
        # take the min of the two q values. Stabilizes learning
        critic_value = T.min(q1_new_policy,q2_new_policy) 
        critic_value= critic_value.view(-1)
        
        self.value.optimizer.zero_grad()
        value_target = critic_value - 0.2*log_probs
        value_loss = 0.5* F.mse_loss(value,value_target)
        value_loss.backward(retain_graph =True)
        self.value.optimizer.step()
        
        actions, log_probs = self.actor.sample_normal(states, reparameterize = True) #reparameterize because we dont waant to lose gradient to update actor
        log_probs = log_probs.view(-1)
        q1_new_policy = self.critic_1.forward(states,actions)
        q2_new_policy = self.critic_2.forward(states,actions)
        #print(q1_new_policy,q2_new_policy, actions)
        # take the min of the two q values. Stabilizes learning
        critic_value = T.min(q1_new_policy,q2_new_policy) 
        critic_value= critic_value.view(-1)
        
        actor_loss = 0.2*log_probs - critic_value
        actor_loss = T.mean(actor_loss)
        self.actor.optimizer.zero_grad()
        actor_loss.backward(retain_graph = True)
        self.actor.optimizer.step()
        
        self.critic_1.optimizer.zero_grad()
        self.critic_2.optimizer.zero_grad()
        # q_hat includes entropy in loss function
        q_hat = self.scale*rewards + self.gamma*target_value
        q1_old_policy = self.critic_1.forward(states,action).view(-1)
        q2_old_policy = self.critic_2.forward(states,action).view(-1)
        critic_1_loss = 0.5 * F.mse_loss(q1_old_policy, q_hat)
        critic_2_loss = 0.5 * F.mse_loss(q2_old_policy, q_hat)
        
        critic_loss = critic_1_loss + critic_2_loss
        critic_loss.backward()
        self.critic_1.optimizer.step()
        self.critic_2.optimizer.step()
        
        self.update_network_parameters()
        
        
        
        

In [7]:
import pybullet_envs
import pybullet as p

p.connect(p.DIRECT)
import gym

#env = InvertedPendulum()
env = gym.make('MountainCarContinuous-v0')
#env = gym.make("BipedalWalker-v3")
#env = gym.make('InvertedDoublePendulumBulletEnv-v0')

agent = SACAgent(input_dims = env.observation_space.shape, env =env,
                n_actions = env.action_space.shape[0])

seed = np.random.seed(0) #have to do this

file_name = 'kfjdlkjf'
figure_file = 'plots/' +file_name
load_checkpoint = True

best_score = env.reward_range[0]
score_history = []
n_games = 50000
avg_scores = []
if load_checkpoint:
    agent.load_models()
    #env.render(mode='human')

for i in range(n_games):
    done = False
    score =0
    obs = env.reset()
    obs = env.observation_space.sample()
    print(obs)
    steps = 0
    while not done:
        act = agent.choose_action(obs)
       
        new_state, reward, done, info = env.step(act)
        agent.remember(obs,act,reward,new_state,done)
        if not load_checkpoint:
            agent.learn()
        score += reward
        obs = new_state
        steps+=1

    score_history.append(score)
    avg_score = np.mean(score_history[-100:])
    avg_scores.append(avg_score)
    
    if i %10 ==0:
        agent.save_models()
    if avg_score==best_score:
        best_score = avg_score
        if not load_checkpoint:
            agent.save_models()
    print('episode ', i,'score: ', score,'avg score: ', avg_score)

.....saving models......
.......loading checkpoint........
.......loading checkpoint........
.......loading checkpoint........
.......loading checkpoint........
.......loading checkpoint........
[-0.98968554 -0.02885164]


  state = T.tensor([state],dtype=T.float).to(self.actor.device)


.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  0 score:  -31.59924170761155 avg score:  -31.59924170761155
[-0.1631567  -0.06659688]
episode  1 score:  -31.195416076320107 avg score:  -31.397328891965827
[0.5691051  0.02813734]
episode  2 score:  -29.91701443905644 avg score:  -30.903890740996033
[-0.64097506 -0.00368461]
episode  3 score:  -30.189457649004552 avg score:  -30.725282467998163
[0.4000836  0.00470653]
episode  4 score:  -31.143666879812276 avg score:  -30.808959350360986
[0.24884525 0.02019336]
episode  5 score:  -30.805424544677997 avg score:  -30.80837021608049
[ 0.25699818 -0.06280746]
episode  6 score:  -29.58099386017296 avg score:  -30.633030736665127
[-1.0846096   0.01441574]
episode  7 score:  -30.032696596702547 avg score:  -30.557988969169806
[-1.0330003  -0.00269347]
episode  8 score:  -30.03557415598263 avg score:  -30.499942878815673


episode  72 score:  -30.80999509696169 avg score:  -29.052605127087862
[-0.24416432 -0.01805981]
episode  73 score:  -31.43365130448283 avg score:  -29.08478142678239
[-0.604864   -0.01919538]
episode  74 score:  -30.235691920931608 avg score:  -29.100126900037708
[-0.57659775  0.03789165]
episode  75 score:  -33.16734952250613 avg score:  -29.15364298717545
[0.49182048 0.05012124]
episode  76 score:  -31.245396721443328 avg score:  -29.18080862008802
[-0.44981351 -0.0556794 ]
episode  77 score:  -30.34612277365309 avg score:  -29.19574854513372
[0.00126359 0.03032239]
episode  78 score:  -31.692362806804777 avg score:  -29.227351257306776
[-0.672322   -0.04357587]
episode  79 score:  -29.305703324795882 avg score:  -29.2283306581504
[-0.95067036 -0.06863715]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  80 score:  -30.817069618995856 avg score:  -29.247944719

episode  145 score:  -30.776845967845816 avg score:  -30.495205009924245
[ 0.5082654  -0.03553743]
episode  146 score:  -30.080496695309503 avg score:  -30.505401362488918
[-0.9158256  -0.04980589]
episode  147 score:  -30.107915458327998 avg score:  -30.503652950254413
[-0.9649609  -0.05395727]
episode  148 score:  -32.63695823098884 avg score:  -30.518867960053303
[-0.9692432 -0.0640692]
episode  149 score:  -30.046505478507207 avg score:  -30.535208535658832
[ 0.5413571 -0.0059736]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  150 score:  -30.77089523416766 avg score:  -30.551639487531073
[-0.9149493  0.0579451]
episode  151 score:  -30.267631088146945 avg score:  -30.5366326253004
[0.5183623  0.06360836]
episode  152 score:  -29.960854576719488 avg score:  -30.529148740442356
[0.0758213  0.04626741]
episode  153 score:  -29.86707482164044 avg score:  -30.5

episode  217 score:  -30.34666587932363 avg score:  -30.596034964812283
[ 0.19640997 -0.04971896]
episode  218 score:  -30.492184362552276 avg score:  -30.59233683930125
[-1.1725429   0.04052778]
episode  219 score:  -29.84256645448737 avg score:  -30.58246019883484
[-1.1819538   0.03819411]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  220 score:  -32.004143481383025 avg score:  -30.58790598962789
[-0.8318648   0.00169986]
episode  221 score:  -30.74617357118555 avg score:  -30.59133197071232
[0.23178403 0.01214442]
episode  222 score:  -30.53816867640316 avg score:  -30.586118947057912
[-0.88191134 -0.01980806]
episode  223 score:  -30.28948346367136 avg score:  -30.577186339249074
[ 0.2803068  -0.06840351]
episode  224 score:  -30.889956294199642 avg score:  -30.57165175267448
[-0.2943864   0.00268343]
episode  225 score:  -31.396239873365822 avg score:  -3

.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  290 score:  -31.45197237615597 avg score:  -30.52337184820713
[ 0.28789604 -0.01660373]
episode  291 score:  -30.110682318156137 avg score:  -30.52796813435613
[ 0.5937756  -0.01323976]
episode  292 score:  -30.59509271424126 avg score:  -30.535554160938446
[-0.6405249  -0.00839954]
episode  293 score:  -29.557424376345722 avg score:  -30.520952084774297
[-0.79318655 -0.02432498]
episode  294 score:  -30.679721078300638 avg score:  -30.527598101473572
[0.3991434  0.04912828]
episode  295 score:  -29.262711225359975 avg score:  -30.514850748493483
[-1.0941553   0.05503722]
episode  296 score:  -30.547332659557487 avg score:  -30.5067995044772
[-0.50163627 -0.05856646]
episode  297 score:  -31.147934052405905 avg score:  -30.506369212434894
[-1.1056204   0.00603382]
episode  298 score:  -30.644022211166106 avg score:

episode  361 score:  -32.113582926633214 avg score:  -30.391967778671514
[-0.3701428  -0.01435701]
episode  362 score:  -30.91686767330667 avg score:  -30.41306032403103
[ 0.42089248 -0.01811183]
episode  363 score:  -30.444785711394168 avg score:  -30.40992253822006
[-0.24754862  0.00285241]
episode  364 score:  -31.218305591112934 avg score:  -30.410810965397488
[-0.42438954 -0.00838286]
episode  365 score:  -29.360115233770014 avg score:  -30.40937115953828
[-0.09531266 -0.00615613]
episode  366 score:  -31.59312046889926 avg score:  -30.41830785182377
[-1.035211   0.0269997]
episode  367 score:  -29.538231695332442 avg score:  -30.39525378557561
[-0.11965329  0.03395991]
episode  368 score:  -29.929461323791603 avg score:  -30.391144039936638
[-0.9874101  -0.03970416]
episode  369 score:  -31.19046373068263 avg score:  -30.390083849632944
[-1.1309427 -0.0401139]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving 

episode  433 score:  -29.369002398588634 avg score:  -30.494744972914162
[-1.1499655   0.04004817]
episode  434 score:  -30.955097653465614 avg score:  -30.49446749944816
[ 0.0713893  -0.06929292]
episode  435 score:  -29.773036985600974 avg score:  -30.48283857468588
[-0.56851107 -0.03039336]
episode  436 score:  -30.619194921024107 avg score:  -30.49162580756488
[ 0.41230607 -0.00066307]
episode  437 score:  -31.606183072472852 avg score:  -30.493922581715875
[0.29223382 0.01857167]
episode  438 score:  -29.052476094264716 avg score:  -30.49558745276636
[-1.0511972   0.01869677]
episode  439 score:  -31.723090071652372 avg score:  -30.522049254987586
[-0.4167548   0.05345054]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  440 score:  -30.6008358230798 avg score:  -30.49954670853273
[-0.67809385 -0.03631746]
episode  441 score:  -29.55119262411826 avg score:  

episode  505 score:  -30.607766993891573 avg score:  -30.402717354794515
[ 0.37666315 -0.01239399]
episode  506 score:  -30.9310133792008 avg score:  -30.40371813018573
[-0.27144298 -0.04610218]
episode  507 score:  -28.786826974595105 avg score:  -30.393078898273437
[ 0.5451239  -0.02281108]
episode  508 score:  -31.33867159354141 avg score:  -30.403654517025664
[-0.20509389  0.04186676]
episode  509 score:  -29.893291383637994 avg score:  -30.395900612314712
[-0.6134683   0.00187428]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  510 score:  -30.548692807242134 avg score:  -30.408893635085338
[-0.04494889  0.05618679]
episode  511 score:  -30.448729995664127 avg score:  -30.40420715243031
[-0.56751853 -0.01874375]
episode  512 score:  -31.242510088245567 avg score:  -30.409902380439657
[0.5750599  0.06210459]
episode  513 score:  -30.021705617809864 avg score

episode  577 score:  -29.100811150761103 avg score:  -30.386706189113557
[-0.6188551   0.04325482]
episode  578 score:  -28.515806618393 avg score:  -30.37518440848658
[0.45348138 0.06730349]
episode  579 score:  -30.559305829149917 avg score:  -30.376961456071783
[0.37567717 0.04036043]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  580 score:  -29.39630441069391 avg score:  -30.371699703392334
[-0.6851966  -0.02819654]
episode  581 score:  -30.755664549174174 avg score:  -30.377648781543144
[0.22858222 0.02182808]
episode  582 score:  -28.59052976020738 avg score:  -30.355576812038326
[-0.32108074 -0.04241657]
episode  583 score:  -31.328180985401215 avg score:  -30.366224751217583
[-0.565263    0.06891557]
episode  584 score:  -28.985383233603514 avg score:  -30.359933780428
[-0.19398937  0.02263064]
episode  585 score:  -30.753346587779117 avg score:  -30.3

episode  649 score:  -31.441014002558905 avg score:  -30.397415469137638
[-0.0499579  -0.05649481]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  650 score:  -31.28621093157217 avg score:  -30.410342442952583
[0.21166517 0.04238564]
episode  651 score:  -32.551602423059855 avg score:  -30.42459707269777
[-0.18790606 -0.02105248]
episode  652 score:  -30.750111778808865 avg score:  -30.413112673825434
[-0.36642963  0.06500944]
episode  653 score:  -30.529451175163512 avg score:  -30.42163677143247
[-1.1112597  -0.06298829]
episode  654 score:  -30.700860071943637 avg score:  -30.43439746594138
[0.21840554 0.05561839]
episode  655 score:  -30.22637470744095 avg score:  -30.43936143959257
[-0.12674254  0.04669912]
episode  656 score:  -30.111412021799516 avg score:  -30.427413782599128
[ 0.2616224  -0.06181452]
episode  657 score:  -30.40081091030693 avg score:  -

episode  721 score:  -30.672345657959557 avg score:  -30.661351221138652
[-0.16401717  0.04960749]
episode  722 score:  -30.020722690404966 avg score:  -30.647326812884607
[-0.4063969   0.05046213]
episode  723 score:  -31.7704770016931 avg score:  -30.659219836516925
[-0.07531143  0.05432908]
episode  724 score:  -29.127307731971154 avg score:  -30.63555157958482
[-1.0997338   0.03012965]
episode  725 score:  -29.774912680776797 avg score:  -30.6171083564661
[ 0.57127553 -0.0278921 ]
episode  726 score:  -29.73368675008193 avg score:  -30.607531072889806
[-0.40135983  0.00718489]
episode  727 score:  -29.474984190272608 avg score:  -30.586949111281186
[ 0.50372946 -0.03289374]
episode  728 score:  -31.405605089284162 avg score:  -30.607573427536842
[-0.7390008  -0.03027395]
episode  729 score:  -30.32821549938833 avg score:  -30.61507358704465
[-0.86675876  0.04868669]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....sav

episode  793 score:  -31.126568939962784 avg score:  -29.489072677469018
[0.5712038  0.05999402]
episode  794 score:  -30.989527839073837 avg score:  -29.507140469886153
[ 0.59879816 -0.00149748]
episode  795 score:  -30.640673692770893 avg score:  -29.5186944445177
[-0.47210518 -0.02958538]
episode  796 score:  -29.293804891075144 avg score:  -29.502317622882206
[-0.98233867  0.02728496]
episode  797 score:  -30.817226601017204 avg score:  -29.50294295083026
[ 0.46362302 -0.03874733]
episode  798 score:  -29.551227068865632 avg score:  -29.495062139001604
[-0.20128548 -0.00965957]
episode  799 score:  -29.63758340683295 avg score:  -29.479942950122755
[ 0.34480527 -0.05667334]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  800 score:  -30.86450624437933 avg score:  -29.481460634882804
[ 0.5492511  -0.00081841]
episode  801 score:  -31.342830108934677 avg score

episode  866 score:  -31.025787362936622 avg score:  -30.556576879505684
[-0.6869439  0.0571248]
episode  867 score:  -29.386092096436354 avg score:  -30.533789490447653
[0.04137491 0.06004678]
episode  868 score:  -32.24018573059496 avg score:  -30.550509089800165
[-0.0772627   0.05536719]
episode  869 score:  -29.88405748070091 avg score:  -30.536876678430012
[ 0.29121196 -0.05913586]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  870 score:  -29.921384375624434 avg score:  -30.53801645688366
[-0.17625946  0.05666697]
episode  871 score:  -31.35797721678742 avg score:  -30.552053927850725
[ 0.20376165 -0.01431772]
episode  872 score:  -31.439077788634478 avg score:  -30.56813240907411
[-0.83477366  0.0120303 ]
episode  873 score:  -29.259119372221797 avg score:  -30.54356549830121
[-0.7754688  -0.06169784]
episode  874 score:  -30.877153114932987 avg score:  

episode  939 score:  -31.48090261073914 avg score:  -30.43111806079876
[-1.0001254  -0.05511542]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  940 score:  -30.291412353606404 avg score:  -30.42797558108949
[-0.00486639 -0.03124405]
episode  941 score:  -29.8607929671897 avg score:  -30.421017119693566
[-0.8556894   0.05306768]
episode  942 score:  -29.99693827373919 avg score:  -30.40701900861935
[-0.68907166  0.04833378]
episode  943 score:  -30.29457585159851 avg score:  -30.399975600332596
[-0.84948653 -0.01145107]
episode  944 score:  -31.082990846688137 avg score:  -30.396275551421976
[-0.852355   -0.01977743]
episode  945 score:  -31.998379106732706 avg score:  -30.420718022956436
[-0.1730014  -0.05195156]
episode  946 score:  -30.45617284312603 avg score:  -30.41006332588297
[0.33908454 0.02754529]
episode  947 score:  -30.880679817731078 avg score:  -3

episode  1011 score:  -30.077410134985506 avg score:  -30.385950107567893
[0.15354279 0.01752958]
episode  1012 score:  -31.302460344862194 avg score:  -30.39540159337979
[-0.6820458  -0.00703279]
episode  1013 score:  -30.85523634617359 avg score:  -30.40350605552264
[-0.46288496  0.0480979 ]
episode  1014 score:  -30.059681195586112 avg score:  -30.3996684005582
[-0.03276949  0.00990229]
episode  1015 score:  -30.794509317749604 avg score:  -30.409260474282746
[-0.65668434 -0.05425232]
episode  1016 score:  -29.953133172217296 avg score:  -30.402981510971202
[-0.1625805  -0.05495926]
episode  1017 score:  -30.512697681378143 avg score:  -30.40153412771893
[ 0.27695876 -0.03911518]
episode  1018 score:  -29.786910121752435 avg score:  -30.401527815232697
[-0.33913288  0.00995206]
episode  1019 score:  -30.67004476188024 avg score:  -30.406878069529757
[-1.1523595  -0.04673356]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....


episode  1083 score:  -30.968738912490824 avg score:  -30.470073045264385
[-0.679223   -0.05711135]
episode  1084 score:  -29.9877615498961 avg score:  -30.459137393444216
[-0.41307518 -0.01180194]
episode  1085 score:  -29.925619120442825 avg score:  -30.446000342266064
[-0.48004988 -0.06086544]
episode  1086 score:  -29.62038489764267 avg score:  -30.449896872681165
[-0.55646425  0.04517344]
episode  1087 score:  -30.44894825511509 avg score:  -30.453856623911516
[-0.60362715  0.03358487]
episode  1088 score:  -30.02342962749885 avg score:  -30.45436717169462
[-0.88868576  0.0544729 ]
episode  1089 score:  -30.47911994569278 avg score:  -30.45563179905483
[-0.28320587  0.04174066]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1090 score:  -29.82408159477036 avg score:  -30.43149628078346
[-0.7966442 -0.0168705]
episode  1091 score:  -31.512550498122245 avg s

episode  1155 score:  -31.83116398858733 avg score:  -30.531855722645158
[-0.67563814  0.06717732]
episode  1156 score:  -29.572438383205814 avg score:  -30.498752786421523
[-0.88981503 -0.0232364 ]
episode  1157 score:  -30.255790796015233 avg score:  -30.50835076949717
[ 0.5958374  -0.01500463]
episode  1158 score:  -29.895380393839098 avg score:  -30.48920579051938
[ 0.03488729 -0.04930448]
episode  1159 score:  -29.135768119642425 avg score:  -30.479558075961545
[-0.84197104  0.0152485 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1160 score:  -30.31324972657476 avg score:  -30.49945696293573
[ 0.23065485 -0.04274276]
episode  1161 score:  -30.561140891232657 avg score:  -30.491798679067816
[-0.36192828  0.01530762]
episode  1162 score:  -30.971573548463958 avg score:  -30.501315309647666
[-0.05885438 -0.04575218]
episode  1163 score:  -29.85170107178116

episode  1227 score:  -31.167143182304272 avg score:  -30.591099913537914
[-0.58068633 -0.0254098 ]
episode  1228 score:  -30.343463381415827 avg score:  -30.584490606578157
[ 0.14400041 -0.00245109]
episode  1229 score:  -29.415166185592877 avg score:  -30.576083249559733
[-0.61544025  0.00163593]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1230 score:  -31.36982691939084 avg score:  -30.582919835083466
[-0.03604029 -0.00627548]
episode  1231 score:  -30.12854115178947 avg score:  -30.58619877900415
[-0.18055752  0.06089764]
episode  1232 score:  -30.95904280519643 avg score:  -30.594243035453307
[0.44370583 0.0140619 ]
episode  1233 score:  -30.630028263198035 avg score:  -30.59710025932403
[-0.01696452  0.00325723]
episode  1234 score:  -30.806780416903976 avg score:  -30.606863028584712
[ 0.16818541 -0.00975106]
episode  1235 score:  -30.900092343144923 

episode  1299 score:  -30.982688329630545 avg score:  -30.494384994965955
[0.25096807 0.01633068]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1300 score:  -29.2922688031157 avg score:  -30.478920795113712
[ 0.48856276 -0.01050366]
episode  1301 score:  -30.457306516823127 avg score:  -30.48426164495394
[ 0.4882781  -0.05010812]
episode  1302 score:  -30.580159800135046 avg score:  -30.494757191684066
[ 0.48882365 -0.04452909]
episode  1303 score:  -31.60553354523986 avg score:  -30.51267681398229
[-0.65393025  0.01469404]
episode  1304 score:  -29.981716598087257 avg score:  -30.525695447131767
[-0.71725976  0.00313446]
episode  1305 score:  -31.299969563275187 avg score:  -30.530729069806053
[-0.48357287 -0.04638308]
episode  1306 score:  -30.274375663263346 avg score:  -30.52937688258625
[-0.96231854  0.05343502]
episode  1307 score:  -30.55270686045755 av

episode  1371 score:  -31.241466937496494 avg score:  -30.5616058218055
[0.2738302  0.02270501]
episode  1372 score:  -29.52734218074461 avg score:  -30.55404552416616
[-0.21102469  0.03624325]
episode  1373 score:  -30.201992886638905 avg score:  -30.540237575689066
[-0.15136188  0.00213991]
episode  1374 score:  -31.172638855579667 avg score:  -30.562481547348952
[-0.53404295  0.03654185]
episode  1375 score:  -31.0501882645724 avg score:  -30.559375885649057
[-0.7936025  -0.01364643]
episode  1376 score:  -31.433884273273293 avg score:  -30.561374575680713
[-0.75027853 -0.0203416 ]
episode  1377 score:  -30.1347886068259 avg score:  -30.55349456534174
[-1.1824296   0.02494971]
episode  1378 score:  -30.833441490851648 avg score:  -30.557604472807633
[-0.6314043  -0.03728087]
episode  1379 score:  -31.12671471374072 avg score:  -30.574457157527487
[0.28610417 0.00784873]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....

episode  1443 score:  -30.458893008137537 avg score:  -30.592699552226527
[-0.13396917  0.04868659]
episode  1444 score:  -31.150283828708293 avg score:  -30.60294281049235
[0.15432866 0.05606001]
episode  1445 score:  -29.168567587356947 avg score:  -30.593239280938533
[ 0.38458043 -0.05017016]
episode  1446 score:  -29.183076440348856 avg score:  -30.582674143040034
[-0.27452716  0.03578226]
episode  1447 score:  -29.295892397694626 avg score:  -30.575336910460393
[ 0.4897591  -0.02567187]
episode  1448 score:  -31.404306519600933 avg score:  -30.585263744307454
[ 0.41884854 -0.02863616]
episode  1449 score:  -30.374545790324134 avg score:  -30.578226922774643
[ 0.41876134 -0.06843512]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1450 score:  -31.722642837444017 avg score:  -30.58826684403752
[-0.15318882 -0.01437814]
episode  1451 score:  -30.6584694625473

episode  1515 score:  -31.314816815118835 avg score:  -30.417620655864503
[-1.0338866  -0.05866912]
episode  1516 score:  -30.363735271533415 avg score:  -30.416645195195024
[-1.0253662  -0.04653648]
episode  1517 score:  -30.539112812586716 avg score:  -30.40952533846108
[-0.74215263  0.01329805]
episode  1518 score:  -30.54018572922003 avg score:  -30.407638347852163
[-1.1089593  -0.03819469]
episode  1519 score:  -29.57340481757702 avg score:  -30.403120304205537
[0.3760373  0.00711803]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1520 score:  -28.73143592839893 avg score:  -30.37988442026269
[-0.73438215 -0.0316774 ]
episode  1521 score:  -30.487163942832513 avg score:  -30.380875160062452
[-0.30366793 -0.04944079]
episode  1522 score:  -28.54588892685594 avg score:  -30.353850292517755
[0.33185327 0.02149487]
episode  1523 score:  -30.811186887425613 avg

episode  1587 score:  -31.546803035968882 avg score:  -30.464327598809987
[-0.27734065 -0.02445572]
episode  1588 score:  -29.87678515566241 avg score:  -30.460987611450772
[-0.6222528   0.01802581]
episode  1589 score:  -28.75391199061095 avg score:  -30.45433779728075
[-1.0979396  -0.03589828]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1590 score:  -30.84984572133498 avg score:  -30.443982114233403
[-1.1104562  -0.06411863]
episode  1591 score:  -29.874525680094624 avg score:  -30.446035145507196
[-0.9196601   0.05707159]
episode  1592 score:  -29.87114105103217 avg score:  -30.438920190949638
[-0.08678105 -0.00991555]
episode  1593 score:  -30.13879578022756 avg score:  -30.42396657464341
[-8.570120e-01  8.479363e-04]
episode  1594 score:  -30.653662010339122 avg score:  -30.420068301911545
[-0.29763672 -0.05603443]
episode  1595 score:  -32.646164850098

episode  1659 score:  -31.65431010781799 avg score:  -30.5671260009447
[0.24257761 0.05110711]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1660 score:  -30.099761911325277 avg score:  -30.5842553053047
[-0.16612199 -0.04624872]
episode  1661 score:  -29.735555138091357 avg score:  -30.583318768106707
[-1.1572645  -0.00884382]
episode  1662 score:  -30.823465895777233 avg score:  -30.585573025799434
[-0.8930874  -0.01597788]
episode  1663 score:  -30.85267772956745 avg score:  -30.591789615881275
[-0.08519442 -0.0605403 ]
episode  1664 score:  -30.095974744608927 avg score:  -30.596382857631085
[ 0.43173137 -0.00832488]
episode  1665 score:  -29.696171076851314 avg score:  -30.599040301958162
[-0.94867647  0.05418718]
episode  1666 score:  -31.28001553297569 avg score:  -30.60763578442949
[-0.4699792   0.06916188]
episode  1667 score:  -31.590714951070538 avg

episode  1731 score:  -28.448846426073278 avg score:  -30.51903595587577
[0.28119648 0.0031303 ]
episode  1732 score:  -30.633524551641607 avg score:  -30.52470772528255
[0.04982394 0.01175269]
episode  1733 score:  -29.925262451312463 avg score:  -30.505890672095813
[-1.0945606  -0.05386151]
episode  1734 score:  -31.01123248934233 avg score:  -30.496497371908344
[ 0.426698  -0.0395769]
episode  1735 score:  -31.73067784393992 avg score:  -30.500572871398536
[-0.02159423 -0.04895346]
episode  1736 score:  -30.178619804869776 avg score:  -30.484865697573305
[-0.8066053   0.00953876]
episode  1737 score:  -30.930658027771976 avg score:  -30.48270108588984
[-0.31656706 -0.04998275]
episode  1738 score:  -30.607586364810977 avg score:  -30.487334522857537
[-1.1416552 -0.0243925]
episode  1739 score:  -30.742089481362584 avg score:  -30.48526295730549
[0.06052032 0.05041401]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....sa

episode  1803 score:  -31.776497304215134 avg score:  -30.611700788177657
[-0.88888496  0.00811694]
episode  1804 score:  -31.33336479072942 avg score:  -30.627132138629857
[-1.1611305  0.0283265]
episode  1805 score:  -31.11644895098907 avg score:  -30.637807865904648
[-0.2372128  -0.06162783]
episode  1806 score:  -29.61323369504725 avg score:  -30.6210738526114
[-0.7526028  -0.00447178]
episode  1807 score:  -30.94321032532085 avg score:  -30.62380130041799
[0.1792469  0.06740856]
episode  1808 score:  -30.089132980883406 avg score:  -30.62464406656352
[ 0.39874586 -0.0681443 ]
episode  1809 score:  -31.412159095259643 avg score:  -30.634737337121496
[ 0.18813829 -0.01825663]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1810 score:  -30.125036988069485 avg score:  -30.638009917863382
[-1.1544936  -0.02778396]
episode  1811 score:  -31.546014051700034 avg s

episode  1875 score:  -30.726886787003455 avg score:  -30.560011378246067
[0.09518099 0.00635117]
episode  1876 score:  -29.8499147222063 avg score:  -30.5430874843565
[-0.6853336   0.06212552]
episode  1877 score:  -30.831292802054918 avg score:  -30.55324636740386
[-0.56546426 -0.01011943]
episode  1878 score:  -30.348675675966884 avg score:  -30.553006214060243
[0.5552421  0.06723522]
episode  1879 score:  -29.584862122117684 avg score:  -30.557544523938535
[ 0.38832647 -0.00137358]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1880 score:  -30.54062806043676 avg score:  -30.553850410266627
[-0.45478022  0.00721643]
episode  1881 score:  -31.266099463645574 avg score:  -30.551416109168425
[ 0.20176147 -0.0143617 ]
episode  1882 score:  -31.160603173228324 avg score:  -30.550907653028798
[ 0.0986095  -0.05566635]
episode  1883 score:  -29.21137608742304 avg 

episode  1947 score:  -31.165142510606753 avg score:  -29.44650377785029
[ 0.43251294 -0.01498353]
episode  1948 score:  -31.451281316442238 avg score:  -29.43827432418085
[-0.32754707  0.04027813]
episode  1949 score:  -29.778413438465904 avg score:  -29.431728335624953
[-1.004006   -0.00731394]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  1950 score:  -29.61921831771714 avg score:  -29.43505017042713
[-0.5381424  -0.02988633]
episode  1951 score:  -30.73080186685229 avg score:  -29.429120715494903
[0.1388725  0.00643949]
episode  1952 score:  -29.043850550192293 avg score:  -29.409684441129603
[-0.29993737  0.03131185]
episode  1953 score:  -29.775748307263846 avg score:  -29.399739284885605
[0.41772342 0.00816232]
episode  1954 score:  -31.65519527836234 avg score:  -29.418608238294073
[-0.06638926  0.05106365]
episode  1955 score:  -29.253120083382033 avg

episode  2019 score:  -29.898495513674956 avg score:  -29.481025760556886
[-1.1675422  -0.05349492]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2020 score:  -30.46066418961234 avg score:  -29.48842619319527
[-0.36551255  0.02693527]
episode  2021 score:  -32.588329136543415 avg score:  -29.504647056849418
[-0.96447843 -0.0554752 ]
episode  2022 score:  -28.858723491359967 avg score:  -29.48300940333859
[0.38781118 0.03729456]
episode  2023 score:  -28.729424559649228 avg score:  -29.465094507242856
[-1.0419612  -0.04450689]
episode  2024 score:  -30.449966233002815 avg score:  -29.457183443426164
[-0.5518027   0.05122759]
episode  2025 score:  -29.768722604805227 avg score:  -29.452861674157184
[-0.81195784  0.03741923]
episode  2026 score:  -30.479141339884528 avg score:  -29.466906345872612
[-0.34420526 -0.010591  ]
episode  2027 score:  -30.21369274858124

episode  2091 score:  -31.25511705778402 avg score:  -29.463989270594826
[-1.0843164  -0.04355866]
episode  2092 score:  -30.18549528928461 avg score:  -29.465354468609288
[ 0.44514787 -0.00658813]
episode  2093 score:  -30.592419817899213 avg score:  -29.46024465086285
[ 0.22181053 -0.00936965]
episode  2094 score:  -31.06395488989836 avg score:  -29.48297900451163
[-0.36868405 -0.04751886]
episode  2095 score:  -30.259612056468228 avg score:  -29.488972459714688
[-0.821221   -0.02658023]
episode  2096 score:  -29.30717246445583 avg score:  -29.48569685985497
[-1.1558119  -0.06525078]
episode  2097 score:  -30.42603270954063 avg score:  -29.492705291360142
[-0.57333845 -0.01521381]
episode  2098 score:  -29.999354967552247 avg score:  -29.500643382774378
[-0.6978996   0.03337009]
episode  2099 score:  -28.846464425031353 avg score:  -29.477481656229557
[-0.83981717  0.0172875 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....

episode  2163 score:  -31.685249165164237 avg score:  -30.561508957931018
[-0.5997229  -0.05491668]
episode  2164 score:  -30.61322724729321 avg score:  -30.566618200945474
[ 0.17442341 -0.06407405]
episode  2165 score:  -31.259799774193283 avg score:  -30.5747401028728
[0.5015761  0.02603204]
episode  2166 score:  -30.040998667493543 avg score:  -30.56813367967295
[-0.09843701  0.05344196]
episode  2167 score:  -30.60340439771028 avg score:  -30.565741825971983
[ 0.27922913 -0.02066804]
episode  2168 score:  -30.823062682909164 avg score:  -30.568835092258745
[-0.4695861  -0.02219061]
episode  2169 score:  -29.38221863532456 avg score:  -30.559549214917865
[0.15361363 0.04043296]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2170 score:  -30.21096368175705 avg score:  -30.576263084373444
[-0.95295405 -0.01820927]
episode  2171 score:  -30.65947750410771 avg s

episode  2235 score:  -28.647077666113393 avg score:  -30.728960471957247
[ 0.12093354 -0.02364306]
episode  2236 score:  -30.1001431078755 avg score:  -30.708166681085892
[-0.62332135 -0.01072666]
episode  2237 score:  -29.111547304214454 avg score:  -30.69098773342901
[-0.19947478 -0.01436019]
episode  2238 score:  -28.642327742738352 avg score:  -30.65923940445277
[-1.0538133   0.04914143]
episode  2239 score:  -31.123849072270954 avg score:  -30.671394938662694
[-0.23656023 -0.06504022]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2240 score:  -29.520689927769833 avg score:  -30.665403795765425
[-0.8418963  0.0216831]
episode  2241 score:  -31.082807742381185 avg score:  -30.665368024273185
[-0.38609916  0.03478885]
episode  2242 score:  -30.32030849679387 avg score:  -30.657838794828198
[-0.47463578  0.06994482]
episode  2243 score:  -32.016175781587165 

episode  2307 score:  -29.350023117056196 avg score:  -30.534968832610815
[ 0.48586807 -0.01737363]
episode  2308 score:  -32.14202595702797 avg score:  -30.55743633142547
[-0.93516386 -0.0437857 ]
episode  2309 score:  -30.209225318832846 avg score:  -30.551637091916568
[0.28142118 0.0272527 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2310 score:  -30.487215966848073 avg score:  -30.55510658846301
[0.2623015  0.00120644]
episode  2311 score:  -30.311976607879302 avg score:  -30.53783525530431
[ 0.4689132 -0.0341658]
episode  2312 score:  -30.16536577179944 avg score:  -30.53837007043911
[-1.1176887  -0.06302212]
episode  2313 score:  -30.853527507217812 avg score:  -30.54626170116756
[-0.25414786  0.00269166]
episode  2314 score:  -30.591197556721184 avg score:  -30.538711249650195
[-0.9473299  0.0077795]
episode  2315 score:  -30.006180646588692 avg scor

episode  2379 score:  -31.890478130267333 avg score:  -30.51390575435594
[0.12256987 0.00175927]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2380 score:  -29.79122773389746 avg score:  -30.519404555616966
[-0.26150116 -0.03972558]
episode  2381 score:  -30.575391150313106 avg score:  -30.514652510320662
[-0.26865706  0.02267262]
episode  2382 score:  -31.185225926537242 avg score:  -30.521013560449127
[-0.78417605  0.0311764 ]
episode  2383 score:  -29.021208573683985 avg score:  -30.492641647050977
[-0.35608426  0.00497105]
episode  2384 score:  -31.226609636247897 avg score:  -30.500954640118845
[-0.43111065 -0.06434995]
episode  2385 score:  -30.970161239506766 avg score:  -30.52064886309436
[-0.95477486  0.03088244]
episode  2386 score:  -31.49703175213862 avg score:  -30.53355791116361
[ 0.06757185 -0.01370823]
episode  2387 score:  -30.44642600917199 a

episode  2451 score:  -30.0952369735311 avg score:  -30.568581172262252
[-0.7081119   0.02874283]
episode  2452 score:  -30.679951779905565 avg score:  -30.57933950133997
[0.1799451  0.02953211]
episode  2453 score:  -29.332105003618043 avg score:  -30.57792496273107
[-0.04532168 -0.00273533]
episode  2454 score:  -31.176063562885382 avg score:  -30.57542556985035
[ 0.44535267 -0.0449169 ]
episode  2455 score:  -30.996994722301885 avg score:  -30.572069106064188
[-0.5578293   0.04951859]
episode  2456 score:  -31.757892866308612 avg score:  -30.593222147309657
[-0.173526    0.04071641]
episode  2457 score:  -32.39124487857605 avg score:  -30.60205247520685
[-0.02881038 -0.02574084]
episode  2458 score:  -30.06371471013203 avg score:  -30.584837050131988
[0.5155779 0.0677327]
episode  2459 score:  -29.408227871104987 avg score:  -30.558078956795647
[-0.00913788  0.0013786 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....

episode  2523 score:  -32.27041063217571 avg score:  -29.574743522029998
[-0.82490873  0.03329552]
episode  2524 score:  -32.02201378601012 avg score:  -29.596689991790797
[ 0.5956998  -0.04964974]
episode  2525 score:  -30.65639347282017 avg score:  -29.626417141525813
[-0.4073969   0.02035405]
episode  2526 score:  -30.759477436899747 avg score:  -29.626546855244698
[-0.47189474  0.03735253]
episode  2527 score:  -32.40142789631465 avg score:  -29.656324950280286
[-0.0787794   0.06295154]
episode  2528 score:  -31.06113946698427 avg score:  -29.65987733557561
[-0.60701877 -0.04290873]
episode  2529 score:  -30.70384106713205 avg score:  -29.663486366536464
[-1.1116575   0.03469564]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2530 score:  -30.277640663158536 avg score:  -29.656310179655385
[-0.9020469  -0.00155686]
episode  2531 score:  -29.58297298811129 a

episode  2595 score:  -30.240424607826917 avg score:  -30.56145786906921
[0.4664677  0.05298013]
episode  2596 score:  -29.19542243556286 avg score:  -30.56071472967246
[-0.551167    0.04979454]
episode  2597 score:  -29.105753755955906 avg score:  -30.570427436356095
[-0.6679943  -0.02802439]
episode  2598 score:  -29.754757928027633 avg score:  -30.565127266526797
[0.50626385 0.06962603]
episode  2599 score:  -29.683650881709987 avg score:  -30.566274323152015
[-0.37758103  0.02011286]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2600 score:  -30.390717441222375 avg score:  -30.562379085900872
[-0.89002234  0.01288485]
episode  2601 score:  -29.270894116974816 avg score:  -30.543715721104057
[0.10555875 0.01344629]
episode  2602 score:  -32.077398586764474 avg score:  -30.56448490004201
[-1.0842888  -0.04635207]
episode  2603 score:  -31.893631566603283 avg

episode  2667 score:  -31.117123925464814 avg score:  -30.526020042950407
[-0.64593565 -0.04722088]
episode  2668 score:  -30.11850827081031 avg score:  -30.50931423858525
[-0.37979537  0.06505279]
episode  2669 score:  -30.423819901167104 avg score:  -30.510605372018116
[-0.5985396   0.02222563]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2670 score:  -29.973751840974955 avg score:  -30.519941193706362
[0.07412836 0.05756446]
episode  2671 score:  -30.591622111656953 avg score:  -30.515976737378704
[ 0.48223948 -0.04369248]
episode  2672 score:  -29.752164987764164 avg score:  -30.508906805224647
[ 0.1137025 -0.0507921]
episode  2673 score:  -30.374181671814878 avg score:  -30.52382173940065
[-0.9866058   0.01231027]
episode  2674 score:  -30.340281341759095 avg score:  -30.4999433002349
[-0.24650185 -0.01834277]
episode  2675 score:  -31.680436566690965 av

episode  2739 score:  -30.99566774127967 avg score:  -30.47438079252347
[0.49519035 0.01136172]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2740 score:  -29.732753462150377 avg score:  -30.463140479972353
[0.458598   0.03600048]
episode  2741 score:  -31.40858539865929 avg score:  -30.47403293286012
[-0.00221243 -0.03816054]
episode  2742 score:  -31.269242937050315 avg score:  -30.472636834494065
[-0.7206203 -0.0119744]
episode  2743 score:  -30.139725916601336 avg score:  -30.469183019478784
[ 0.5410719  -0.05924205]
episode  2744 score:  -30.94694013771601 avg score:  -30.477558921914476
[0.07638805 0.00198878]
episode  2745 score:  -30.53400864444929 avg score:  -30.477076014114672
[ 0.29372418 -0.06317477]
episode  2746 score:  -30.35627663891877 avg score:  -30.487867891038025
[-0.04304426  0.01509421]
episode  2747 score:  -28.7198975044454 avg score:

episode  2811 score:  -30.127319998858837 avg score:  -30.59241470505023
[ 0.04455924 -0.06248787]
episode  2812 score:  -30.210934212089693 avg score:  -30.59080370496005
[-0.1865268  -0.03604802]
episode  2813 score:  -28.482038857277505 avg score:  -30.566314318751747
[-0.6933966   0.06112286]
episode  2814 score:  -30.818122666923507 avg score:  -30.573038980102798
[-1.0166023  -0.02562514]
episode  2815 score:  -28.97326261728416 avg score:  -30.554791000424085
[ 0.44392782 -0.01560771]
episode  2816 score:  -30.374229478532207 avg score:  -30.55369802998285
[ 0.04027146 -0.0353874 ]
episode  2817 score:  -28.86099649614843 avg score:  -30.532257338778706
[-0.6450396   0.04149236]
episode  2818 score:  -30.983611107256134 avg score:  -30.544571683695832
[-0.40338162  0.05572936]
episode  2819 score:  -30.459106482540058 avg score:  -30.551268370388776
[-0.7924175  -0.05162398]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint..

episode  2883 score:  -30.925921675473383 avg score:  -30.46330054396033
[ 0.08502092 -0.04944877]
episode  2884 score:  -31.749163927595912 avg score:  -30.470756057023305
[-0.39510286  0.01598966]
episode  2885 score:  -30.847312315349754 avg score:  -30.46400679698443
[0.21334976 0.0348076 ]
episode  2886 score:  -30.68658678557617 avg score:  -30.475020397057737
[-1.0616558   0.02976249]
episode  2887 score:  -30.790956500754397 avg score:  -30.475453913183163
[-0.6710237  -0.02554641]
episode  2888 score:  -30.121523718878016 avg score:  -30.474396038886233
[-0.71723324  0.03395112]
episode  2889 score:  -32.67052745523668 avg score:  -30.49402981061927
[-0.7047747   0.04916799]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2890 score:  -31.509732585834932 avg score:  -30.529079517545245
[-0.5469436  0.00924  ]
episode  2891 score:  -30.786909817585727 av

episode  2955 score:  -32.32787780412317 avg score:  -30.61752871545566
[-0.12490088  0.04027816]
episode  2956 score:  -30.329417472057692 avg score:  -30.61123491492332
[-0.70250624  0.06185165]
episode  2957 score:  -30.666758140330998 avg score:  -30.590183071015222
[-0.97672147 -0.03284031]
episode  2958 score:  -30.596690406552277 avg score:  -30.582068425769407
[ 0.18806003 -0.04023112]
episode  2959 score:  -30.183496813706586 avg score:  -30.57260583852425
[-0.37727624  0.03646494]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  2960 score:  -30.42186298824994 avg score:  -30.582811826140727
[ 0.05387847 -0.01897975]
episode  2961 score:  -30.073851301032136 avg score:  -30.57449343220252
[0.5108852  0.06832084]
episode  2962 score:  -32.02455428896478 avg score:  -30.588112988207584
[0.36315164 0.05549569]
episode  2963 score:  -31.006544067050434 avg 

episode  3027 score:  -31.76422405974816 avg score:  -30.615688378718
[-0.03585728  0.02697579]
episode  3028 score:  -30.10591676189503 avg score:  -30.624206690102064
[-0.98163944 -0.00205198]
episode  3029 score:  -30.790856040393887 avg score:  -30.628230509463737
[-0.2690928   0.01558928]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3030 score:  -30.259778557092364 avg score:  -30.625163695685437
[-0.9455378   0.06304749]
episode  3031 score:  -29.97988459285781 avg score:  -30.622240526170966
[0.07470384 0.01425488]
episode  3032 score:  -30.391593632921747 avg score:  -30.629571696363342
[ 0.15116641 -0.00943076]
episode  3033 score:  -29.486686919852726 avg score:  -30.62675805390815
[0.49431762 0.04828416]
episode  3034 score:  -31.3556218969569 avg score:  -30.645163113387994
[-0.84107256  0.06115402]
episode  3035 score:  -32.99543461767174 avg sco

episode  3099 score:  -31.136099057964834 avg score:  -30.73641883240735
[ 0.19909358 -0.04664795]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3100 score:  -30.359199660100888 avg score:  -30.73502451388736
[-1.0162675   0.03176662]
episode  3101 score:  -31.16438646379334 avg score:  -30.733750681307853
[-0.2886144   0.01945143]
episode  3102 score:  -31.297574878119956 avg score:  -30.734400292063278
[-0.48822266  0.04540908]
episode  3103 score:  -31.673543693433032 avg score:  -30.751271661610136
[0.13369627 0.05922044]
episode  3104 score:  -30.121581331902707 avg score:  -30.755662412211983
[-0.7206445  -0.05471249]
episode  3105 score:  -30.974026810536433 avg score:  -30.76365111321861
[-0.01476926  0.06245282]
episode  3106 score:  -30.615445548475385 avg score:  -30.763375126099287
[-0.7602375  -0.01144022]
episode  3107 score:  -30.74590030877477 

.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3170 score:  -30.092788265869743 avg score:  -30.543083353507928
[-0.12873153  0.00194384]
episode  3171 score:  -31.212316591779043 avg score:  -30.55581610459157
[-0.80778325 -0.06189928]
episode  3172 score:  -29.993324604687277 avg score:  -30.54622837736667
[-0.9990759   0.00666012]
episode  3173 score:  -31.333266087044535 avg score:  -30.56076843977277
[-0.8431802  -0.06239808]
episode  3174 score:  -30.736224953256148 avg score:  -30.564063786446635
[-0.42393166 -0.05806247]
episode  3175 score:  -29.62853055638769 avg score:  -30.566848992549517
[-1.1971692   0.04922179]
episode  3176 score:  -30.047384319543994 avg score:  -30.559138725545072
[0.5538596  0.06854498]
episode  3177 score:  -30.63359054449103 avg score:  -30.55775582462172
[-0.5856798 -0.057652 ]
episode  3178 score:  -30.013902338466888 avg

episode  3241 score:  -30.408439484493744 avg score:  -29.38755087788145
[ 0.26950008 -0.04221472]
episode  3242 score:  -30.41209534449654 avg score:  -29.384233594885554
[-0.39750507  0.03299433]
episode  3243 score:  -29.77844308882402 avg score:  -29.388219046403513
[-0.20961778 -0.06348956]
episode  3244 score:  -31.10008736738492 avg score:  -29.41118519406451
[ 0.21272062 -0.05057795]
episode  3245 score:  -30.118735434973058 avg score:  -29.393926267519657
[ 0.48378122 -0.05554483]
episode  3246 score:  -30.629047978204078 avg score:  -29.389174353767434
[0.39290035 0.05861896]
episode  3247 score:  -30.760057547406838 avg score:  -29.40061139445848
[ 0.5391932  -0.04433594]
episode  3248 score:  -29.853477432332916 avg score:  -29.397517454035146
[-1.011524   -0.04501079]
episode  3249 score:  -31.307386622471025 avg score:  -29.397077009527553
[ 0.08577879 -0.06999569]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....

episode  3313 score:  -28.62470531113121 avg score:  -29.44375745314538
[-0.71430194 -0.02266699]
episode  3314 score:  -31.237755163600244 avg score:  -29.46728442044377
[-0.21782468  0.00519628]
episode  3315 score:  -30.496967892438555 avg score:  -29.46289385864736
[-0.17960519  0.02201268]
episode  3316 score:  -29.127955293809652 avg score:  -29.44383117949915
[-0.10623793 -0.05549914]
episode  3317 score:  -30.670500212265285 avg score:  -29.44238865027919
[0.22799248 0.00068408]
episode  3318 score:  -31.191434379247642 avg score:  -29.456062124519363
[-1.0724069  -0.02643393]
episode  3319 score:  -30.577288855823618 avg score:  -30.498383788099567
[-0.69361436  0.05054567]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3320 score:  -29.578599630536118 avg score:  -30.499997062750708
[-0.63994795 -0.00962229]
episode  3321 score:  -29.463953080239364 a

episode  3385 score:  -31.07930174610535 avg score:  -30.44630282711861
[-1.1153574  -0.02273318]
episode  3386 score:  -32.018522044013324 avg score:  -30.460917837316483
[-0.60290104  0.05185441]
episode  3387 score:  -29.52437498796432 avg score:  -30.453861262125788
[ 0.10279786 -0.05441676]
episode  3388 score:  -31.387563425834347 avg score:  -30.466146109477325
[-0.3358587 -0.016645 ]
episode  3389 score:  -28.409134000459044 avg score:  -30.438158620710446
[0.44012815 0.05514254]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3390 score:  -30.75306419720769 avg score:  -30.432060787105215
[-0.89995766 -0.0386838 ]
episode  3391 score:  -30.30047603148358 avg score:  -30.437125380420426
[-0.7988742   0.01916775]
episode  3392 score:  -29.47303600741257 avg score:  -30.42264934566208
[ 0.40434134 -0.03861679]
episode  3393 score:  -31.339226857851788 avg 

episode  3457 score:  -30.789812035319564 avg score:  -30.589952349213764
[ 0.4686454  -0.01832732]
episode  3458 score:  -30.815022069071137 avg score:  -30.590129213996278
[-0.05366362  0.00298726]
episode  3459 score:  -31.801198511377635 avg score:  -30.602808219020986
[-0.72229743  0.04979887]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3460 score:  -30.387166120341686 avg score:  -30.589285741722094
[-0.28470215  0.01254398]
episode  3461 score:  -30.8290025610437 avg score:  -30.592045112778226
[-0.6240318  -0.03385747]
episode  3462 score:  -30.543044192008047 avg score:  -30.598962820988117
[-0.6105007  0.0039711]
episode  3463 score:  -31.97429615689389 avg score:  -30.61470493598686
[ 0.04808524 -0.01540229]
episode  3464 score:  -30.3285640252439 avg score:  -30.611381380745176
[-1.185915    0.06385836]
episode  3465 score:  -29.564232822674214 a

episode  3529 score:  -29.760112194311358 avg score:  -30.55968385263158
[-0.6612061   0.00493975]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3530 score:  -32.08385364412066 avg score:  -30.58158861301756
[-0.42739168  0.06308479]
episode  3531 score:  -31.330096860459374 avg score:  -30.586424981482082
[-0.98641574  0.05640683]
episode  3532 score:  -31.05250440431689 avg score:  -30.592642291674938
[-1.1701431  -0.00894285]
episode  3533 score:  -30.453983767041464 avg score:  -30.59106844825898
[-1.1287059   0.06958622]
episode  3534 score:  -30.321718537316247 avg score:  -30.595826955181906
[ 0.02843259 -0.01110446]
episode  3535 score:  -30.00189449196726 avg score:  -30.586402319039493
[-0.53454    -0.00339784]
episode  3536 score:  -30.097465219071857 avg score:  -30.586603343334012
[-0.6818384  -0.05301546]
episode  3537 score:  -31.216031363685545

episode  3601 score:  -29.490493281477043 avg score:  -30.484134153167798
[ 0.2604272  -0.06033882]
episode  3602 score:  -29.612465027062107 avg score:  -30.47351672345309
[-1.0346209   0.02236163]
episode  3603 score:  -30.678613087708772 avg score:  -30.487134857191407
[ 0.26773044 -0.03793066]
episode  3604 score:  -31.56868059139641 avg score:  -30.50772266528768
[0.23801441 0.05544852]
episode  3605 score:  -29.87797988517119 avg score:  -30.50741525096646
[ 0.40244395 -0.0323652 ]
episode  3606 score:  -28.697498789290034 avg score:  -30.46935574276409
[-0.9295505  0.0222178]
episode  3607 score:  -29.555244329820024 avg score:  -30.46004636330471
[0.17945494 0.00242465]
episode  3608 score:  -29.172861118866372 avg score:  -30.453984025683635
[-0.711356    0.02688254]
episode  3609 score:  -31.959931434458994 avg score:  -30.471204079448356
[-0.3489274   0.03675357]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
....

episode  3673 score:  -29.022030632048697 avg score:  -29.199065932561826
[ 1.4485632e-01 -5.8927042e-05]
episode  3674 score:  -30.04086458574155 avg score:  -29.188028303062715
[-0.5634587  -0.04086917]
episode  3675 score:  -29.81845230057094 avg score:  -29.18634615796709
[-0.69895977 -0.00557072]
episode  3676 score:  -29.43311972679576 avg score:  -29.168153188328866
[-0.6293461  0.0130474]
episode  3677 score:  -31.94768730520612 avg score:  -29.181883715117493
[ 0.4982995  -0.01001156]
episode  3678 score:  -31.02256134264209 avg score:  -29.183070753295656
[ 0.1714475  -0.06348486]
episode  3679 score:  -31.063017298100327 avg score:  -29.186255981779283
[0.04747479 0.00876594]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3680 score:  -30.033475543225297 avg score:  -29.18845908348046
[-0.00286186  0.04292549]
episode  3681 score:  -30.53688552912962

episode  3745 score:  -30.335935835140805 avg score:  -30.517743142321912
[-0.33228514 -0.00344426]
episode  3746 score:  -31.2428445816241 avg score:  -30.51492497120273
[-1.1232572   0.04254205]
episode  3747 score:  -30.915088748270062 avg score:  -30.515745206338227
[-0.41478628 -0.04114137]
episode  3748 score:  -30.446388012238664 avg score:  -30.516932425598338
[-0.17295723  0.06387486]
episode  3749 score:  -30.69688386055097 avg score:  -30.524915066943414
[ 0.48601598 -0.02768424]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3750 score:  -30.30379848314319 avg score:  -30.524997103675855
[-1.1818733  -0.06499749]
episode  3751 score:  -30.694161379955503 avg score:  -30.530609125389013
[-1.197904    0.06150609]
episode  3752 score:  -29.916723293758867 avg score:  -30.532605194310467
[0.4978631  0.00930112]
episode  3753 score:  -31.31088135652464 a

episode  3817 score:  -30.78597515467803 avg score:  -30.5376140261616
[ 0.47658977 -0.04213461]
episode  3818 score:  -30.956931756795306 avg score:  -30.531427660330998
[-1.1343452   0.03433722]
episode  3819 score:  -31.215665849762836 avg score:  -30.54325172720433
[ 0.4865166  -0.00652935]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3820 score:  -30.779058316790305 avg score:  -30.55171642241552
[-0.68968225 -0.01822396]
episode  3821 score:  -32.093543073564135 avg score:  -30.567731095388325
[0.14118256 0.0131866 ]
episode  3822 score:  -30.859842558874995 avg score:  -30.571837303550804
[-0.78344524 -0.01042392]
episode  3823 score:  -30.696875561409605 avg score:  -30.56940932706266
[-0.8327458   0.01424462]
episode  3824 score:  -31.24841328184315 avg score:  -30.58000595380097
[-0.9742135  -0.00435525]
episode  3825 score:  -32.51872059138219 avg 

episode  3889 score:  -30.841249847555325 avg score:  -30.585469786575302
[-0.5278667  -0.01546341]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  3890 score:  -31.18688582964523 avg score:  -30.593397285611346
[0.37262896 0.06360453]
episode  3891 score:  -30.67891883226303 avg score:  -30.58150422497026
[ 0.45438567 -0.06332912]
episode  3892 score:  -31.567823981437506 avg score:  -30.577309017962712
[ 0.07455343 -0.0558117 ]
episode  3893 score:  -30.63166297433291 avg score:  -30.597952913294833
[-0.53300595 -0.01847486]
episode  3894 score:  -31.9560561938656 avg score:  -30.59814444801317
[-0.18506922  0.05198813]
episode  3895 score:  -30.477202233339167 avg score:  -30.59812166061713
[ 0.078964   -0.05141171]
episode  3896 score:  -30.057441322440116 avg score:  -30.5636266795659
[-0.15157385  0.05251368]
episode  3897 score:  -30.94116324129285 avg sc

episode  3961 score:  -29.65251444005159 avg score:  -30.522145431842265
[0.5485778  0.03520551]
episode  3962 score:  -30.28953272035362 avg score:  -30.527425384996445
[-1.0319083  -0.04333605]
episode  3963 score:  -29.716386589071654 avg score:  -30.517604744067967
[-0.99360627  0.02821162]
episode  3964 score:  -31.090162782559247 avg score:  -30.533830867935546
[-0.22610995  0.05821026]
episode  3965 score:  -30.731705856378085 avg score:  -30.551470156658148
[0.397087   0.01677957]
episode  3966 score:  -31.191893654034207 avg score:  -30.564945885870948
[-0.13985914 -0.03084781]
episode  3967 score:  -29.910719553363403 avg score:  -30.543471787479294
[-1.0275115  -0.01295214]
episode  3968 score:  -31.00664228965544 avg score:  -30.537260811338783
[-1.0787317   0.05605219]
episode  3969 score:  -29.617142733818472 avg score:  -30.52218679693752
[-1.0216888  -0.01882895]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....

episode  4033 score:  -29.392585063749873 avg score:  -30.52448480109968
[-0.42343593  0.05398404]
episode  4034 score:  -29.814117888266445 avg score:  -30.521431673930593
[ 0.48028323 -0.01189501]
episode  4035 score:  -30.05699012041834 avg score:  -30.518646153906616
[-0.06514045 -0.03997112]
episode  4036 score:  -31.230492359955768 avg score:  -30.51438536560957
[-0.9069234  -0.06545128]
episode  4037 score:  -29.978951001562884 avg score:  -30.518962332056685
[-0.9966484  -0.02746584]
episode  4038 score:  -29.4074449060163 avg score:  -30.49843670658488
[-0.94791377  0.06697515]
episode  4039 score:  -30.308769623783302 avg score:  -30.50125599177985
[ 0.3035869  -0.03826989]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4040 score:  -28.825380212884337 avg score:  -30.486014597890872
[ 0.09569539 -0.0646584 ]
episode  4041 score:  -30.545274926990338 

episode  4105 score:  -30.739963974054078 avg score:  -30.48651964128838
[0.46834534 0.02240542]
episode  4106 score:  -32.65295700380921 avg score:  -30.509183317108192
[-0.99454165  0.04957037]
episode  4107 score:  -31.330422591130898 avg score:  -30.518867849069725
[-0.6192377   0.00847056]
episode  4108 score:  -31.113000414369367 avg score:  -30.53347158691629
[-1.0885211  -0.06178701]
episode  4109 score:  -29.75309915401629 avg score:  -30.528835456171173
[0.35732356 0.04412189]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4110 score:  -31.983035793442248 avg score:  -30.560869688609877
[-0.11433465  0.01093458]
episode  4111 score:  -29.697662409696306 avg score:  -30.54595701469478
[-0.29001746 -0.04057102]
episode  4112 score:  -29.756349208480756 avg score:  -30.537087873633876
[-0.5534792  -0.02577809]
episode  4113 score:  -31.135084163739787 av

episode  4177 score:  -30.13049004954293 avg score:  -30.528048786034482
[-0.66296583  0.0540729 ]
episode  4178 score:  -29.645607834765716 avg score:  -30.508896885316304
[-0.0208703  -0.02198874]
episode  4179 score:  -30.781566184980903 avg score:  -30.508492779024415
[ 0.15269089 -0.0608565 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4180 score:  -29.83563535239251 avg score:  -30.492734061589417
[-0.6225449  -0.00960594]
episode  4181 score:  -29.681405261012785 avg score:  -30.483899280855656
[-0.670643   -0.02925167]
episode  4182 score:  -30.520200484040647 avg score:  -30.47190148877967
[0.1777813  0.02142953]
episode  4183 score:  -31.0708878030079 avg score:  -30.49844385115841
[-1.188511   -0.01891023]
episode  4184 score:  -30.181725789449295 avg score:  -30.499843748929152
[ 0.59791183 -0.05466022]
episode  4185 score:  -31.80943267149082 av

episode  4249 score:  -31.95866356217002 avg score:  -30.324991607576347
[ 0.12005612 -0.02840129]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4250 score:  -29.390567197623355 avg score:  -30.321186715475413
[ 0.5718427  -0.00586438]
episode  4251 score:  -32.06313396786533 avg score:  -30.33025288319952
[-0.11169332  0.0691163 ]
episode  4252 score:  -30.73098369264795 avg score:  -30.329658630278246
[-0.23095857  0.04001385]
episode  4253 score:  -31.023347069685574 avg score:  -30.336541656168073
[ 0.392857   -0.01768102]
episode  4254 score:  -31.869424531049454 avg score:  -30.350265107626907
[-1.1276922   0.01687047]
episode  4255 score:  -29.940582467386232 avg score:  -30.347623650272226
[-0.9979669  -0.04580632]
episode  4256 score:  -31.846444732107805 avg score:  -30.34596052416678
[0.28768858 0.05129779]
episode  4257 score:  -28.80552024645989 a

episode  4321 score:  -30.75763626371281 avg score:  -30.522529284209405
[-0.9833527  -0.05912172]
episode  4322 score:  -31.07194095938044 avg score:  -30.50853119847294
[ 0.25775284 -0.00167296]
episode  4323 score:  -30.466282448714317 avg score:  -30.505040563424032
[-0.98882616  0.04578571]
episode  4324 score:  -30.33813349326529 avg score:  -30.517106748965745
[-0.38128844 -0.0660972 ]
episode  4325 score:  -31.473207899795597 avg score:  -30.533858863852007
[ 0.06470396 -0.03362161]
episode  4326 score:  -30.527175834809 avg score:  -30.524529814986685
[-1.0072219   0.02146131]
episode  4327 score:  -30.309583380190883 avg score:  -30.52630546337993
[-1.0625696   0.03811546]
episode  4328 score:  -30.208438876565598 avg score:  -30.53059429166619
[-0.8513058  0.0303039]
episode  4329 score:  -28.61636733458751 avg score:  -30.502299156318962
[-1.0678127  -0.06776437]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
...

episode  4393 score:  -31.058644060754563 avg score:  -30.56212862388551
[-0.3569378  -0.01533542]
episode  4394 score:  -30.36341046218508 avg score:  -30.564922177966746
[-0.4796949  -0.01701199]
episode  4395 score:  -30.611120322291278 avg score:  -30.55822522426165
[0.05318639 0.05385364]
episode  4396 score:  -29.9768936307316 avg score:  -30.55397135278159
[ 0.13032788 -0.02130116]
episode  4397 score:  -30.769819764815885 avg score:  -30.55836147507624
[-0.05491162  0.05149944]
episode  4398 score:  -30.141493266965856 avg score:  -30.555050053506353
[-0.5409005   0.03052928]
episode  4399 score:  -30.604071551521425 avg score:  -30.55971358441892
[-0.5591609   0.06295022]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4400 score:  -30.435502869112604 avg score:  -30.564547437129228
[ 0.36272815 -0.015302  ]
episode  4401 score:  -29.937878719077403 avg

episode  4465 score:  -31.636083168614356 avg score:  -30.608785398920087
[-0.90527076 -0.00812135]
episode  4466 score:  -29.36755158456269 avg score:  -30.584820946626586
[ 0.5671654  -0.01454002]
episode  4467 score:  -30.386149188897555 avg score:  -30.583078887234976
[ 0.5984425  -0.05857137]
episode  4468 score:  -30.078868151873536 avg score:  -30.578085997843555
[-1.1987364   0.01456792]
episode  4469 score:  -29.50668278561599 avg score:  -30.54915816847199
[ 0.27297792 -0.01648798]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4470 score:  -30.788078759141268 avg score:  -30.549257174226874
[-1.1326814  -0.01436952]
episode  4471 score:  -28.933327102597318 avg score:  -30.546205981312223
[-0.20930271 -0.01896894]
episode  4472 score:  -30.035300505180498 avg score:  -30.539970233992822
[-0.24371849  0.020595  ]
episode  4473 score:  -29.723034598793

episode  4537 score:  -29.25688644900077 avg score:  -30.27994355993193
[-0.33563182  0.05111616]
episode  4538 score:  -29.03423709646853 avg score:  -30.274367157873325
[ 0.32360744 -0.00222113]
episode  4539 score:  -30.179647931370997 avg score:  -30.263194468285974
[-0.19898713 -0.05785706]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4540 score:  -30.009236188398678 avg score:  -30.258496695522055
[-1.1479021  -0.04105827]
episode  4541 score:  -29.690817177629405 avg score:  -30.255569474965036
[-0.2010852  -0.04610951]
episode  4542 score:  -30.784874994748535 avg score:  -30.26345877474177
[-0.8646885  0.0176368]
episode  4543 score:  -30.97888592851377 avg score:  -30.269892309388847
[-0.26997554  0.05290671]
episode  4544 score:  -30.33077600727296 avg score:  -30.273467589265447
[-0.970103   -0.01320312]
episode  4545 score:  -31.100793409563416 a

episode  4609 score:  -31.09494874995621 avg score:  -30.328820229018707
[0.2983134  0.01403969]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4610 score:  -29.949637372296248 avg score:  -30.320559528369277
[ 0.11517508 -0.04618278]
episode  4611 score:  -29.428789841585726 avg score:  -30.30582634772936
[0.5996026  0.02715422]
episode  4612 score:  -29.469895043365817 avg score:  -30.278939926170846
[-1.0046154  -0.01026545]
episode  4613 score:  -29.269052932940838 avg score:  -30.27827339355917
[-0.8264705  -0.01775102]
episode  4614 score:  -31.575700881438735 avg score:  -30.287990396749457
[-0.099553   -0.04777486]
episode  4615 score:  -31.038585002567356 avg score:  -30.299934904390046
[0.11078848 0.06219175]
episode  4616 score:  -30.782525945115772 avg score:  -30.31430095522821
[-0.91204363 -0.04508077]
episode  4617 score:  -30.462275390371644 avg

episode  4681 score:  -32.36197097686909 avg score:  -30.48139744739664
[-1.1930137  -0.01445515]
episode  4682 score:  -31.132271583918655 avg score:  -30.49156179548194
[-1.0329838   0.01010789]
episode  4683 score:  -29.99312106966737 avg score:  -30.48323335040898
[ 0.19375128 -0.00436029]
episode  4684 score:  -30.46921605449179 avg score:  -30.49643562848394
[-0.02224907 -0.0586016 ]
episode  4685 score:  -31.633294752018262 avg score:  -30.51481291785356
[0.23963201 0.01383617]
episode  4686 score:  -30.136399690488258 avg score:  -30.507696913810754
[-0.21418837  0.01620754]
episode  4687 score:  -30.530000885788162 avg score:  -30.49756130662446
[0.04786388 0.06360461]
episode  4688 score:  -30.84039937947656 avg score:  -30.49496700691796
[-0.85695654  0.02134123]
episode  4689 score:  -31.993887109582033 avg score:  -30.521822890118177
[0.4085562  0.00227038]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....sav

episode  4753 score:  -30.62127521220751 avg score:  -30.53692185804073
[-0.84525967  0.03862621]
episode  4754 score:  -30.49193973827981 avg score:  -30.534259695935685
[-0.339027   -0.01932007]
episode  4755 score:  -30.597833402736345 avg score:  -30.544003016224114
[-0.3100646   0.00804104]
episode  4756 score:  -29.909662233048522 avg score:  -30.551427485296944
[-0.48344842 -0.02908301]
episode  4757 score:  -29.46655246668381 avg score:  -30.530651943569445
[-0.3850925 -0.0012377]
episode  4758 score:  -31.456358779978007 avg score:  -30.54201618474336
[ 0.1440158  -0.00641745]
episode  4759 score:  -28.794057059794014 avg score:  -30.537359102937884
[-0.9553051  -0.02992936]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4760 score:  -30.7803366475316 avg score:  -30.531110966936
[0.14402498 0.01767582]
episode  4761 score:  -29.37377183991902 avg scor

episode  4825 score:  -28.369740577446848 avg score:  -30.403094668767856
[0.5842334  0.04844254]
episode  4826 score:  -30.658866511626 avg score:  -30.392037368153254
[0.40342146 0.06588098]
episode  4827 score:  -30.056645923517 avg score:  -30.387315801194955
[-0.5865678   0.02805555]
episode  4828 score:  -31.575072523449037 avg score:  -30.406471238553394
[0.16926916 0.0599532 ]
episode  4829 score:  -30.078820879432502 avg score:  -30.39244395204384
[-0.5712765   0.02302908]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4830 score:  -30.323896441713458 avg score:  -30.383343162451755
[0.25715163 0.00210586]
episode  4831 score:  -31.16747474982151 avg score:  -30.390461333640022
[0.1306566  0.01049533]
episode  4832 score:  -30.684603507334614 avg score:  -30.388353486594692
[0.35644105 0.05427805]
episode  4833 score:  -29.777731574252847 avg score:  -

episode  4897 score:  -31.28227307348581 avg score:  -30.53971657774857
[-0.98592824 -0.00177067]
episode  4898 score:  -31.787964998794475 avg score:  -30.548455165947463
[-0.38913342  0.01217269]
episode  4899 score:  -30.448207605057267 avg score:  -30.554830203136763
[-0.51395184 -0.05390796]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4900 score:  -30.117342160782645 avg score:  -30.5646245508769
[ 0.5228657  -0.06041469]
episode  4901 score:  -31.36652068341461 avg score:  -30.578248548328567
[ 0.29958233 -0.0282736 ]
episode  4902 score:  -30.426144931315836 avg score:  -30.574644013545655
[-0.5036053  -0.00275035]
episode  4903 score:  -29.035802086987207 avg score:  -30.551617782804147
[-0.22208525  0.04966789]
episode  4904 score:  -30.746720127223995 avg score:  -30.55029122433835
[0.1196392  0.02624351]
episode  4905 score:  -32.19353349775253 av

episode  4969 score:  -30.206610921194496 avg score:  -30.590488023581397
[-0.34132966 -0.03461216]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  4970 score:  -30.289687826236463 avg score:  -30.600424558570964
[ 0.19952169 -0.03520906]
episode  4971 score:  -30.652571798603322 avg score:  -30.595992890034168
[0.15400086 0.0492718 ]
episode  4972 score:  -29.74644094195708 avg score:  -30.597560404741202
[-0.3811295   0.05603981]
episode  4973 score:  -30.155757676897817 avg score:  -30.588264184238437
[0.55768675 0.01181693]
episode  4974 score:  -31.18199468528585 avg score:  -30.606875709962065
[-0.70743036 -0.01232228]
episode  4975 score:  -29.820170856782887 avg score:  -30.598619109215978
[-0.38166264  0.0048107 ]
episode  4976 score:  -31.51194487222252 avg score:  -30.613274658396193
[0.36752513 0.00397414]
episode  4977 score:  -29.875838767324392 av

episode  5041 score:  -30.651694004780953 avg score:  -30.626303520985683
[0.3880456  0.04080006]
episode  5042 score:  -31.264202126349907 avg score:  -30.6259773893304
[0.07155989 0.06808181]
episode  5043 score:  -31.104002402593252 avg score:  -30.628818824392408
[0.04164609 0.04370598]
episode  5044 score:  -29.95653304786267 avg score:  -30.63588499003233
[-0.85654396 -0.02090941]
episode  5045 score:  -30.81159733347895 avg score:  -30.634046111140844
[-0.02700983 -0.06886527]
episode  5046 score:  -31.872758281118628 avg score:  -30.64882707983857
[0.34913227 0.0059042 ]
episode  5047 score:  -30.143720298498188 avg score:  -30.646568820043225
[-0.6720617   0.00594164]
episode  5048 score:  -31.706676469871446 avg score:  -30.658220182059402
[-0.5115351   0.00127793]
episode  5049 score:  -30.613758298250392 avg score:  -30.646863361590807
[ 0.50915825 -0.05777713]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....

episode  5113 score:  -30.2536483052513 avg score:  -30.709616354175928
[-0.33902606  0.06759559]
episode  5114 score:  -28.276175502722097 avg score:  -30.678982453284384
[ 0.45322496 -0.0469435 ]
episode  5115 score:  -32.93493283414369 avg score:  -30.701419586357748
[ 0.5441954  -0.02124138]
episode  5116 score:  -29.81229764378335 avg score:  -30.688826757577495
[-0.13403848 -0.05601773]
episode  5117 score:  -29.289278162088266 avg score:  -30.66251853235497
[-0.76822174 -0.04442221]
episode  5118 score:  -30.305989299166853 avg score:  -30.663507525623437
[-0.33599547  0.04950133]
episode  5119 score:  -30.334561364205932 avg score:  -30.652192322044346
[ 0.52954406 -0.06190746]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5120 score:  -29.979892516044977 avg score:  -30.63524635542672
[ 0.11403874 -0.02072411]
episode  5121 score:  -31.580606178414357

episode  5185 score:  -29.71486348505214 avg score:  -29.370913304968067
[ 0.26953226 -0.01745531]
episode  5186 score:  -30.14619952380171 avg score:  -29.36423672190301
[0.2667999  0.03227407]
episode  5187 score:  -29.86745643642467 avg score:  -29.365397438739663
[-1.1379216   0.03451722]
episode  5188 score:  -30.54117600037116 avg score:  -29.359156015704762
[0.534116   0.03862944]
episode  5189 score:  -29.098101356351396 avg score:  -29.328403387547972
[0.17208411 0.03771361]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5190 score:  -31.406035743799006 avg score:  -29.31641851403102
[-0.60071546 -0.06906266]
episode  5191 score:  -29.2791199034705 avg score:  -29.30126305660168
[-0.38552493 -0.01031294]
episode  5192 score:  -31.08599729693936 avg score:  -29.299694186249976
[-0.92550105 -0.06050648]
episode  5193 score:  -32.108454999180104 avg score

episode  5257 score:  -29.042300421860155 avg score:  -30.521928124895588
[ 0.20205642 -0.05587765]
episode  5258 score:  -30.671192131478136 avg score:  -30.53509785117173
[ 0.09930708 -0.03403346]
episode  5259 score:  -31.98994742934428 avg score:  -30.547864139655704
[-0.77175105  0.0241501 ]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5260 score:  -31.57356445274579 avg score:  -30.555427508808297
[-0.97507834 -0.04715447]
episode  5261 score:  -32.47994234399695 avg score:  -30.57000673531002
[-1.0963098   0.05151301]
episode  5262 score:  -29.506376722056636 avg score:  -30.563251117846285
[ 0.5886266  -0.00620543]
episode  5263 score:  -28.23688193275139 avg score:  -30.547675236745736
[-0.5335577  -0.02427579]
episode  5264 score:  -31.16486964871256 avg score:  -30.548890661864686
[ 0.24031684 -0.03144543]
episode  5265 score:  -30.29289674998741 a

episode  5329 score:  -30.896505061741355 avg score:  -30.632899186387327
[-0.20379236  0.03329777]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5330 score:  -29.852596518535936 avg score:  -30.622240482077306
[-0.30693716 -0.03832325]
episode  5331 score:  -30.439452278301545 avg score:  -30.6129399331575
[-0.30639225  0.0016627 ]
episode  5332 score:  -30.824821740100642 avg score:  -30.625304711476982
[-0.79849064  0.04230371]
episode  5333 score:  -31.96159611361664 avg score:  -30.638800224393126
[-0.5166729   0.03796962]
episode  5334 score:  -29.984958251112623 avg score:  -30.631745827571045
[-0.38450068 -0.06141488]
episode  5335 score:  -30.31322661700571 avg score:  -30.62096210376226
[ 0.30535597 -0.01928895]
episode  5336 score:  -30.96539582893347 avg score:  -30.618554670684215
[-0.42677802 -0.05863434]
episode  5337 score:  -30.608533043575537

episode  5401 score:  -29.61265874134045 avg score:  -30.533143062967216
[0.20302352 0.03420443]
episode  5402 score:  -30.45192482382326 avg score:  -30.518303366322307
[0.17429332 0.04908588]
episode  5403 score:  -32.055952441718034 avg score:  -30.54179795926744
[-0.17965092 -0.05191534]
episode  5404 score:  -31.012635707792914 avg score:  -30.553148940831292
[-1.0097563   0.05377813]
episode  5405 score:  -30.77930857424273 avg score:  -30.555510459490552
[-0.00356655 -0.03099495]
episode  5406 score:  -31.17410817268676 avg score:  -30.55662810279569
[ 0.22998944 -0.06803648]
episode  5407 score:  -31.359140314965853 avg score:  -30.56498694403832
[-1.1943485e+00 -6.1667076e-04]
episode  5408 score:  -30.564607608807606 avg score:  -30.555430231829487
[-0.5515579  -0.06474197]
episode  5409 score:  -29.861100008520708 avg score:  -30.554851058324648
[-0.21520019 -0.04026135]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint..

episode  5473 score:  -30.233543558316395 avg score:  -30.526572281729955
[-0.95429003 -0.03172611]
episode  5474 score:  -30.542541928992023 avg score:  -30.528452792533017
[-1.1693857   0.01017264]
episode  5475 score:  -31.222651768187642 avg score:  -30.53760972300768
[-0.8806102   0.00964356]
episode  5476 score:  -29.65559144247039 avg score:  -30.530544301025333
[-0.49470088 -0.04279659]
episode  5477 score:  -30.846851313549244 avg score:  -30.52525437375324
[-0.58581674 -0.03527419]
episode  5478 score:  -30.77247416619165 avg score:  -30.521320599209975
[0.40127128 0.0307602 ]
episode  5479 score:  -29.299708531848484 avg score:  -30.508940379478034
[ 0.5846478  -0.02939407]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5480 score:  -31.06431824957368 avg score:  -30.528448140402283
[0.5688662  0.05962583]
episode  5481 score:  -30.79783102294055 avg

episode  5545 score:  -29.934956248338924 avg score:  -29.531536878993002
[-0.19389097 -0.01341143]
episode  5546 score:  -28.87214878993269 avg score:  -29.50494757305767
[-0.1198414  -0.03328613]
episode  5547 score:  -29.78705743100566 avg score:  -29.49568490256263
[-0.8389715   0.03740258]
episode  5548 score:  -29.31106378018317 avg score:  -29.481583470876025
[-0.71682715  0.00476215]
episode  5549 score:  -29.98841305604635 avg score:  -29.474497883433
[ 0.31276157 -0.01498431]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5550 score:  -32.49445302699131 avg score:  -29.500356223358402
[-1.087479   -0.04915849]
episode  5551 score:  -31.375912443278843 avg score:  -29.517218531362325
[-0.26657927 -0.04235926]
episode  5552 score:  -30.94051998335554 avg score:  -29.531401946684422
[-0.7623183  -0.02237082]
episode  5553 score:  -30.903235745291763 avg 

episode  5617 score:  -29.70192362224595 avg score:  -29.53893069881872
[ 0.36785775 -0.01599468]
episode  5618 score:  -29.31400783820453 avg score:  -29.535981026077305
[-0.48470443 -0.00104783]
episode  5619 score:  -30.198009402767642 avg score:  -29.534227989316154
[-0.14111704  0.04103125]
.....saving models......
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
.....saving checkpoint.....
episode  5620 score:  -30.846464192287183 avg score:  -29.523357965133524
[0.268762   0.06006227]
episode  5621 score:  -30.76071673042888 avg score:  -29.522147855233655
[-0.41302517  0.04397354]
episode  5622 score:  -30.39100602696075 avg score:  -29.509312418057387
[-0.38801685  0.02729886]
episode  5623 score:  -30.741180471174864 avg score:  -29.519955292893847
[-0.36297935  0.02655087]
episode  5624 score:  -30.57599753177692 avg score:  -29.514920391724573
[0.49276397 0.01197945]
episode  5625 score:  -30.225808437258337 avg

KeyboardInterrupt: 

In [None]:
plt.plot(score_history)
plt.figure()
plt.plot(avg_scores)

In [None]:
import pybullet as p
import gym

#env = InvertedPendulum()
env = gym.make('MountainCarContinuous-v0')
#env = gym.make("BipedalWalker-v3")
#env = gym.make('InvertedDoublePendulumBulletEnv-v0')


load_checkpoint = True
agent = SACAgent(input_dims = env.observation_space.shape, env =env,
                n_actions = env.action_space.shape[0])
if load_checkpoint:
    agent.load_models()
p.connect(p.DIRECT)
for i in range(1):
    obs = env.reset()
    score = 0
    reward =0
    done = False
    
    while not done:
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)        
        #env.render()
        env.render()
        obs = new_state
        score += reward
    print(score)
if env.viewer is not None:
    glfw.destroy_window(env.viewer.window)
    env.viewer = None
env.close()

In [None]:
!pip install glfw