# A2C - CartPole
Implementation of the A2C RL Algorithm for the OpenAI's Gym environment CartPole-V1 (not in parallel).

In [1]:
%load_ext tensorboard
# Import libraries
import numpy as np
import matplotlib.pyplot as pd
from datetime import datetime

import tensorflow as tf
import tensorflow_probability as tfp

import gym

In [19]:
# Value Fuction Estimator
class Critic(tf.keras.layers.Layer):
    
    def __init__(self):
        super(Critic, self).__init__()
        self.fc1 = tf.keras.layers.Dense(units=128, input_shape=[8,], activation='relu')
        
        self.Dropout = tf.keras.layers.Dropout(rate=0.2)

        self.fc2 = tf.keras.layers.Dense(units=64, activation='relu')
        self.out = tf.keras.layers.Dense(units=1, activation=None)
    
    def call(self, x):
        x = self.fc1(x)
        x = self.Dropout(x, training=True)
        x = self.fc2(x)
        x = self.out(x)
        return x

In [20]:
# Action Value Fuction Estimator (q-network)
class Actor(tf.keras.layers.Layer):
    
    def __init__(self):
        super(Actor, self).__init__()
        
        # 64(share) -> 64(share) -> 32 -> 32 -> mu(tanh) [-1,1]
        # 64(share) -> 64(share) -> 32 -> 32 -> sigma(sigmoid) [0,1]
        self.sharedFC1 = tf.keras.layers.Dense(units=64, input_shape=[8,], activation='relu')
        self.sharedFC2 = tf.keras.layers.Dense(units=64, activation='relu')
        
        self.sharedBatchNorm = tf.keras.layers.BatchNormalization()
        
        self.muFC1 = tf.keras.layers.Dense(units=32, activation='relu')
        self.muFC2 = tf.keras.layers.Dense(units=32, activation='relu')
        
        self.sigmaFC1 = tf.keras.layers.Dense(units=32, activation='relu')
        self.sigmaFC2 = tf.keras.layers.Dense(units=32, activation='relu')
        
        
        self.mu_out = tf.keras.layers.Dense(units=2, activation='tanh')
        self.sigma_out = tf.keras.layers.Dense(units=2, activation='sigmoid')
    
    def call(self, x):
        x = tf.convert_to_tensor(x)
        x = self.sharedFC1(x)
        x = self.sharedFC2(x)
        
        x = self.sharedBatchNorm(x, training=True)
        
        mu = self.muFC1(x)
        mu = self.muFC2(mu)
        mu = self.mu_out(mu)
        
        sigma = self.sigmaFC1(x)
        sigma = self.sigmaFC2(sigma)
        sigma = self.sigma_out(sigma)     
        
        return mu, sigma

In [21]:
# Prepare Tensorboard
!rm -rf ./logs/
current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
#%tensorboard --logdir logs/
tf.keras.backend.clear_session()
# Initialize cart pole environment
env = gym.make('LunarLanderContinuous-v2')
# Initialize model, loss and optimizer
actor = Actor()
critic = Critic()
actor_optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
critic_optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
mse = tf.keras.losses.MSE
weighted_sparse_ce = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
# Initialize replay memory
observations = []
# Set hyperparameters
discount = 0.95
max_time_steps = 500
num_episodes = 10000

# Store losses temporary
actor_losses = []
critic_losses = []
accum_reward = 0.

step = 0
# Run for agent and environment for num_episodes
for i_episode in range(num_episodes):
    state = env.reset()
    #breakpoint()
    
    # Agent has 500 trials at max, if it does not fail beforehand
    for t in range(max_time_steps):
        env.render()
        # Compute action
        state = np.reshape(state, [1,8])
        mu, sigma = actor(state)
        
        # sample two values from normal distribution
        mainEngineAction = tf.random.normal((1,), mean=mu[0,0], stddev=sigma[0,0])
        sideEngineAction = tf.random.normal((1,), mean=mu[0,1], stddev=sigma[0,1])
        action = tf.concat([mainEngineAction, sideEngineAction], 0)
        #        mainEngineAction = np.reshape(action, (2,))
        # Execute action and store action, state and reward
        next_state, reward, done, info = env.step(action)
        observations.append((state, action, reward))
        state = next_state
        accum_reward += reward
        # Interrupt the trial if the agent fails
        if done:
            break
        step += 1
        
    print(f"Episode {i_episode + 1} of {num_episodes} finished after {t+1} timesteps")
        
    # Initialize variable for the estimated return
    estimated_return = 0 if done else critic(next_state)
    
    # Iterate over taken actions and observed states and rewards
    observations.reverse()
    for state, action, reward in observations:
        # Compute estimated return
        estimated_return = discount * estimated_return + reward
        # Compute state value
        state_v = critic(state)
    
        # Compute gradients for the actor (policy gradient)
        # Maximize the estimated return
        with tf.GradientTape() as actor_tape:
            mu, sigma = actor(state)
            advantages = estimated_return - int(state_v)
            advantages = tf.cast([[advantages]], tf.float32)
            action_distribution = tfp.distributions.Normal(loc=mu, scale=sigma)
            logprob = action_distribution.log_prob(action)
            #breakpoint()
            actor_loss = logprob * advantages
            #breakpoint()
            # Compute the actor loss (log part of the policy gradient)
            # Compute gradient with respect to the parameters of the actor            
            policy_gradients = actor_tape.gradient(actor_loss, actor.trainable_variables)

        # Compute gradients for the critic
        # minimize MSE for the state value function
        with tf.GradientTape() as critic_tape:
            state_v = critic(state)
            # Compute the loss
            critic_loss = mse(estimated_return, state_v)
            # Compute the gradient
            critic_gradients = critic_tape.gradient(critic_loss, critic.trainable_variables)
            #breakpoint()
            # Accumulate gradients
            #critic_gradients.append(gradients)
            
        # Apply gradients.
        actor_optimizer.apply_gradients(zip(policy_gradients, actor.trainable_variables))
        critic_optimizer.apply_gradients(zip(critic_gradients, critic.trainable_variables))
        actor_losses.append(actor_loss)
        critic_losses.append(critic_loss)

    observations = []

    # Store summary statistics
    with train_summary_writer.as_default():
        tf.summary.scalar('policy loss', tf.reduce_mean(actor_losses), step=step)
        
        # Store summary statistics
        tf.summary.scalar('critic loss', tf.reduce_mean(critic_losses), step=step)
        
        # Critic
        tf.summary.scalar('V(s)', state_v[0,0], step=step)
        
        # Actor
        tf.summary.scalar('mu0', mu[0,0], step=step)
        tf.summary.scalar('sigma0', sigma[0,0], step=step)
        tf.summary.scalar('mu1', mu[0,1], step=step)
        tf.summary.scalar('sigma1', sigma[0,1], step=step)
    accum_reward = 0.

env.close()



Episode 1 of 10000 finished after 67 timesteps
[[-0.59924716  0.10465702 -1.1712816  -1.2105403   0.5148871  -0.63090223
   0.          1.        ]]
[[-0.58724046  0.13215138 -1.1468297  -1.2334796   0.5465536  -0.20530987
   0.          1.        ]]
[[-0.57569146  0.15997218 -1.1686605  -1.2804921   0.55675066  0.15064433
   0.          1.        ]]
[[-0.56406987  0.18872324 -1.1686642  -1.2538213   0.5492184   0.15064538
   0.          0.        ]]
[[-0.55244845  0.21687491 -1.1549531  -1.2418498   0.5416862   0.1510163
   0.          0.        ]]
[[-0.5409647   0.2447578  -1.1127968  -1.2502949   0.53413534  0.16019846
   0.          0.        ]]
[[-0.52990675  0.27282795 -1.1069416  -1.2216555   0.52612543  0.13244289
   0.          0.        ]]
[[-0.5188955   0.30026507 -1.1069443  -1.1949856   0.5195033   0.13244328
   0.          0.        ]]
[[-0.5078845   0.32710263 -1.0891263  -1.1785682   0.51288116  0.14018273
   0.          0.        ]]
[[-0.4970553   0.35356858 -1.0891293

[[ 0.08895817  0.61868536  0.4958149  -1.1442745  -0.9435872  -0.96138227
   0.          0.        ]]
[[ 0.08429613  0.64385164  0.4916424  -1.1134293  -0.8955181  -0.92660445
   0.          0.        ]]
[[ 0.07968245  0.6683657   0.48605752 -1.0823761  -0.84918797 -0.8872329
   0.          0.        ]]
[[ 0.07512684  0.6922242   0.47777995 -1.0501955  -0.8048264  -0.8344779
   0.          0.        ]]
[[ 0.07064895  0.71540684  0.48427123 -1.0273175  -0.76310253 -0.87330544
   0.          0.        ]]
[[ 0.06613312  0.7380747   0.47779194 -0.996539   -0.7194373  -0.83355
   0.          0.        ]]
[[ 0.06167908  0.7600904   0.45491427 -0.9849568  -0.6777599  -0.7900114
   0.          0.        ]]
[[ 0.05744705  0.7818858   0.4506579  -0.9560769  -0.63825935 -0.76654387
   0.          0.        ]]
[[ 0.05325718  0.80306053  0.41879544 -0.9646113  -0.5999322  -0.7231864
   0.          0.        ]]
[[ 0.04937563  0.8244631   0.37939665 -0.98275435 -0.56377286 -0.6914018
   0.          0

[[ 0.10103884  1.3308991   0.87084997 -0.39238504 -1.4534156  -1.1801037
   0.          0.        ]]
[[ 0.09241962  1.3388413   0.81899536 -0.37703323 -1.3944106  -1.1729805
   0.          0.        ]]
[[ 0.08435269  1.3464526   0.81706583 -0.34517843 -1.3357618  -1.133508
   0.          0.        ]]
[[ 0.07633314  1.3533883   0.76656175 -0.32380596 -1.2790865  -1.1404575
   0.          0.        ]]
[[ 0.06885099  1.3598522   0.76694244 -0.2970382  -1.2220637  -1.1407127
   0.          0.        ]]
[[ 0.06139603  1.3657308   0.7047419  -0.27217144 -1.1650281  -1.0926071
   0.          0.        ]]
[[ 0.05458222  1.3711022   0.6327306  -0.26270717 -1.1103977  -1.1014344
   0.          0.        ]]
[[ 0.0485177   1.3762751   0.63566726 -0.2393114  -1.0553261  -1.1292017
   0.          0.        ]]
[[ 0.04245825  1.3809266   0.6322876  -0.20874855 -0.9988662  -1.097597
   0.          0.        ]]
[[ 0.03645029  1.3849362   0.5932946  -0.19567363 -0.9439865  -1.0521241
   0.          0.   

[[-0.00442677  0.8904887   0.2003295  -0.9358825  -0.58480376 -0.7560185
   0.          0.        ]]
[[-0.00610638  0.9112391   0.16461553 -0.9586058  -0.5470029  -0.7079726
   0.          0.        ]]
[[-0.00744267  0.932537    0.16469009 -0.9318466  -0.51160437 -0.7080331
   0.          0.        ]]
[[-0.00877352  0.9532494   0.15891129 -0.9031418  -0.47620273 -0.6805296
   0.          0.        ]]
[[-0.01005335  0.97334164  0.15360747 -0.8750193  -0.44217628 -0.6567392
   0.          0.        ]]
[[-0.01128616  0.9928245   0.15365928 -0.84826916 -0.40933934 -0.6567869
   0.          0.        ]]
[[-0.01251516  1.0117202   0.13083395 -0.8387417  -0.3765     -0.61564803
   0.          0.        ]]
[[-0.01353159  1.0304275   0.10603372 -0.84938025 -0.3457176  -0.6199256
   0.          0.        ]]
[[-0.01429501  1.0493866   0.06697041 -0.86735725 -0.31472412 -0.58072275
   0.          0.        ]]
[[-0.01468401  1.0687724   0.04528465 -0.8851754  -0.2856906  -0.53947604
   0.          

[[-0.03911867  1.3540866  -0.77197707 -0.5585839   0.03423478  0.06664339
   0.          0.        ]]
[[-0.03143253  1.3666532  -0.7820364  -0.53190106  0.0309029   0.10703117
   0.          0.        ]]
[[-0.02366629  1.3786187  -0.7923146  -0.5050368   0.02555184  0.1483
   0.          0.        ]]
[[-0.01581812  1.3899795  -0.80000556 -0.4782221   0.01813753  0.1793468
   0.          0.        ]]
[[-0.00790873  1.4007376  -0.80108815 -0.4525849   0.00917107  0.18145844
   0.          0.        ]]
Episode 6 of 10000 finished after 64 timesteps
[[-0.17631789  0.02631613 -0.09027596 -1.8608657  -2.6840196  -1.8509328
   0.          0.        ]]
[[-0.17623377  0.06750084 -0.07952161 -1.8303059  -2.5914733  -1.7995888
   0.          0.        ]]
[[-0.17619085  0.10791107 -0.07205284 -1.8011929  -2.5014944  -1.7652385
   0.          0.        ]]
[[-0.17616042  0.14758553 -0.08376874 -1.7598902  -2.413233   -1.7223896
   0.          0.        ]]
[[-0.17594537  0.18626676 -0.09175946 -1.739

[[ 0.37514275  0.6488664   0.7667441  -1.49622    -2.7682457  -1.4496146
   0.          0.        ]]
[[ 0.366803    0.68208903  0.7708546  -1.4685361  -2.695765   -1.4317911
   0.          0.        ]]
[[ 0.35845298  0.71462375  0.771197   -1.4422373  -2.6241755  -1.4322957
   0.          0.        ]]
[[ 0.35012522  0.746499    0.78153694 -1.4110036  -2.552561   -1.3789499
   0.          0.        ]]
[[ 0.34174433  0.77763194  0.79030645 -1.3801916  -2.4836137  -1.3322248
   0.          0.        ]]
[[ 0.3333229   0.8080383   0.75834596 -1.3384224  -2.4170027  -1.3239577
   0.          0.        ]]
[[ 0.32525396  0.837459    0.7620932  -1.3097044  -2.350805   -1.3025771
   0.          0.        ]]
[[ 0.31718618  0.86619943  0.7076806  -1.2491956  -2.2856765  -1.3055203
   0.          0.        ]]
[[ 0.3096942   0.89353424  0.7081205  -1.2227365  -2.2204008  -1.3059032
   0.          0.        ]]
[[ 0.30223218  0.92023474  0.7085799  -1.1962577  -2.1551058  -1.3062862
   0.          0. 

[[-0.05674229  0.29890716  0.04176038 -1.7581551  -4.882101   -2.1998298
   0.          0.        ]]
[[-0.05703878  0.34012672  0.08313862 -1.7452857  -4.772111   -2.150673
   0.          0.        ]]
[[-0.05786991  0.3810292   0.08165702 -1.7187296  -4.6645784  -2.1523802
   0.          0.        ]]
[[-0.05880318  0.42132613  0.08020075 -1.6922787  -4.556961   -2.1540914
   0.          0.        ]]
[[-0.05983782  0.46100092  0.0811508  -1.6714612  -4.449258   -2.1129165
   0.          0.        ]]
[[-0.06098747  0.5001314   0.07984667 -1.6451937  -4.343613   -2.114535
   0.          0.        ]]
[[-0.06222973  0.53861     0.08351146 -1.6246375  -4.2378874  -2.069301
   0.          0.        ]]
[[-0.06359615  0.5765193   0.0870773  -1.6029917  -4.134423   -2.0321884
   0.          0.        ]]
[[-0.06507663  0.61383003  0.086098   -1.5769194  -4.0328145  -2.0336292
   0.          0.        ]]
[[-0.06662893  0.650456    0.08521353 -1.5509105  -3.9311335  -2.035073
   0.          0.     

[[-0.01367435  1.4474807  -0.33453473  0.39146882  0.00590017 -0.01832489
   0.          0.        ]]
[[-0.01031971  1.4386725  -0.337935    0.38455847  0.00681632  0.0113623
   0.          0.        ]]
[[-0.00694609  1.43002    -0.34594232  0.4115545   0.00624826  0.04351948
   0.          0.        ]]
[[-0.00350866  1.4207598  -0.3554111   0.43731728  0.00407251  0.08050597
   0.          0.        ]]
Episode 9 of 10000 finished after 72 timesteps
[[-0.31454736  0.11684161 -0.20933518 -1.875717   -2.7519104  -1.7888788
   0.          0.        ]]
[[-0.31327397  0.15846756 -0.20128775 -1.8469137  -2.662467   -1.7535446
   0.          0.        ]]
[[-0.3120284   0.19935252 -0.19334853 -1.8181682  -2.5747902  -1.718919
   0.          0.        ]]
[[-0.31080633  0.23950861 -0.1995168  -1.7957598  -2.4888449  -1.7587265
   0.          0.        ]]
[[-0.30949163  0.27905124 -0.2336051  -1.742221   -2.400909   -1.732151
   0.          0.        ]]
[[-0.30777392  0.31731778 -0.23287228 -1.71

[[-0.25581217  0.24652793 -0.36741796 -1.7044679  -1.0855508  -0.98131406
   0.          0.        ]]
[[-0.2518943   0.28422862 -0.3705811  -1.6731142  -1.0364851  -0.9447073
   0.          0.        ]]
[[-0.24793419  0.3212657  -0.37705913 -1.6401652  -0.9892497  -0.89109355
   0.          0.        ]]
[[-0.24390645  0.357613   -0.38310707 -1.6077888  -0.9446951  -0.84220237
   0.          0.        ]]
[[-0.23981747  0.39327866 -0.38772237 -1.5769504  -0.902585   -0.8062355
   0.          0.        ]]
[[-0.2356802   0.42828786 -0.3948537  -1.5441017  -0.86227334 -0.7523768
   0.          0.        ]]
[[-0.23147774  0.4626038  -0.39948004 -1.5135028  -0.8246545  -0.718149
   0.          0.        ]]
[[-0.22723079  0.49626434 -0.39936724 -1.4867585  -0.78874713 -0.7182127
   0.          0.        ]]
[[-0.22297569  0.529337   -0.39925873 -1.4600112  -0.7528366  -0.71827656
   0.          0.        ]]
[[-0.21871272  0.5618221  -0.39440534 -1.4360808  -0.7169228  -0.7466887
   0.          

[[-0.10292921  0.9746043  -0.14624421 -0.9432839  -0.5702618  -0.7759473
   0.          0.        ]]
[[-0.10113134  0.9955207  -0.14615177 -0.9165071  -0.5314644  -0.7760277
   0.          0.        ]]
[[-0.0993268   1.0158542  -0.15245576 -0.8871187  -0.492663   -0.74421054
   0.          0.        ]]
[[-0.09746647  1.0355573  -0.15952148 -0.8583868  -0.45545247 -0.7120763
   0.          0.        ]]
[[-0.09554434  1.0546427  -0.16733633 -0.82974684 -0.41984868 -0.67754227
   0.          0.        ]]
[[-0.09355517  1.073111   -0.19618978 -0.84244496 -0.3859716  -0.63626
   0.          0.        ]]
[[-0.09129258  1.0918918  -0.20612518 -0.8135909  -0.35415864 -0.5932878
   0.          0.        ]]
[[-0.08894787  1.1100483  -0.21542108 -0.7849117  -0.32449427 -0.5532147
   0.          0.        ]]
[[-0.08652668  1.1275808  -0.21539433 -0.7581834  -0.29683357 -0.55324215
   0.          0.        ]]
[[-0.08410358  1.1445231  -0.22378424 -0.72955227 -0.2691715  -0.5169467
   0.          0.

[[ 0.08468933  1.3112477   0.44706187 -0.52702063 -0.7420577  -1.0005746
   0.          0.        ]]
[[ 0.0806016   1.3226075   0.42027336 -0.51162803 -0.69202906 -0.95656097
   0.          0.        ]]
[[ 0.07677994  1.3336704   0.41171378 -0.48030862 -0.6442011  -0.90832263
   0.          0.        ]]
[[ 0.07303743  1.3440769   0.40189004 -0.44909045 -0.59878504 -0.8566834
   0.          0.        ]]
[[ 0.06938267  1.3538274   0.39591634 -0.4189225  -0.5559509  -0.82388514
   0.          0.        ]]
[[ 0.06578235  1.3629354   0.39591384 -0.39209366 -0.5147603  -0.8241154
   0.          0.        ]]
[[ 0.06219053  1.3714621   0.38560143 -0.3616975  -0.47355822 -0.7752423
   0.          0.        ]]
[[ 0.05868731  1.3793434   0.3786356  -0.33238044 -0.4347996  -0.74220014
   0.          0.        ]]
[[ 0.05524454  1.3865952   0.3678786  -0.30304098 -0.39769298 -0.6952896
   0.          0.        ]]
[[ 0.05189238  1.3932186   0.3612432  -0.27436927 -0.36293167 -0.6656357
   0.         

[[-0.24985461  0.15564853 -0.1842669  -1.7799413  -2.1386106  -1.5621933
   0.          0.        ]]
[[-0.24840827  0.1946701  -0.17851473 -1.7467668  -2.0605013  -1.5084298
   0.          0.        ]]
[[-0.24695416  0.23293956 -0.17478949 -1.7152469  -1.9850801  -1.4693098
   0.          0.        ]]
[[-0.24547791  0.27049342 -0.16995665 -1.6815486  -1.9116149  -1.4132243
   0.          0.        ]]
[[-0.24399118  0.3073032  -0.16750383 -1.6500319  -1.8409541  -1.3757279
   0.          0.        ]]
[[-0.24247646  0.3434118  -0.1669141  -1.62343    -1.7721679  -1.3761756
   0.          0.        ]]
[[-0.24092074  0.37890738 -0.16750687 -1.6006514  -1.7033594  -1.4060872
   0.          0.        ]]
[[-0.23931237  0.41385856 -0.16561666 -1.5677475  -1.6330552  -1.3591415
   0.          0.        ]]
[[-0.23767324  0.44810057 -0.22517183 -1.5347333  -1.5650984  -1.3305143
   0.          0.        ]]
[[-0.23539296  0.4816222  -0.22493923 -1.5006676  -1.498573   -1.2753719
   0.          0. 

[[ 0.13523579  0.7591537   0.23040386 -1.4495218  -3.7989552  -1.964711
   0.          0.        ]]
[[ 0.13211222  0.7926145   0.2526997  -1.4162608  -3.7007203  -1.9224113
   0.          0.        ]]
[[ 0.12873383  0.8251883   0.25958753 -1.3929232  -3.6046004  -1.8883774
   0.          0.        ]]
[[ 0.12526111  0.857102    0.25923413 -1.3669794  -3.5101824  -1.889534
   0.          0.        ]]
[[ 0.12175932  0.8883056   0.26127166 -1.3168453  -3.4157064  -1.8599548
   0.          0.        ]]
[[ 0.11822806  0.91824675  0.26112983 -1.2909089  -3.322709   -1.8610601
   0.          0.        ]]
[[ 0.11468229  0.94747496  0.26897705 -1.2655146  -3.2296565  -1.8302761
   0.          0.        ]]
[[ 0.11106634  0.9760011   0.2788419  -1.2396907  -3.1381433  -1.7919785
   0.          0.        ]]
[[ 0.10737161  1.003822    0.2875391  -1.2130607  -3.048545   -1.7583554
   0.          0.        ]]
[[ 0.10361452  1.0309274   0.2826882  -1.1717076  -2.9606276  -1.7584482
   0.          0.   

[[-0.11566582  0.36908805  0.04344822 -1.6292496  -1.885999   -1.3987823
   0.          0.        ]]
[[-0.11629371  0.40472397 -0.03146894 -1.5746347  -1.8160601  -1.3508488
   0.          0.        ]]
[[-0.11612034  0.43914908 -0.03003032 -1.5433252  -1.7485179  -1.3157327
   0.          0.        ]]
[[-0.11591387  0.47288427 -0.05199991 -1.5184048  -1.6827314  -1.3111918
   0.          0.        ]]
[[-0.11544409  0.5060551  -0.05145742 -1.4917423  -1.6171719  -1.3115795
   0.          0.        ]]
[[-0.11493625  0.53862286 -0.05091561 -1.4650558  -1.5515931  -1.3119675
   0.          0.        ]]
[[-0.11439037  0.5705914  -0.05061153 -1.433311   -1.4859948  -1.2743709
   0.          0.        ]]
[[-0.11380701  0.6018797  -0.05124222 -1.3984271  -1.4222764  -1.2131149
   0.          0.        ]]
[[-0.1131834   0.6324379  -0.05227257 -1.3684433  -1.3616209  -1.1882231
   0.          0.        ]]
[[-0.1125165   0.6623514  -0.05321282 -1.3376931  -1.3022099  -1.1579486
   0.          0. 

[[-0.30178052  1.0195091  -0.33013806 -1.1330132  -1.8679198  -1.3502874
   0.          0.        ]]
[[-0.29865488  1.0440103  -0.32791752 -1.1011449  -1.8004056  -1.3103936
   0.          0.        ]]
[[-0.29550332  1.0678091  -0.3262405  -1.0687913  -1.734886   -1.2673074
   0.          0.        ]]
[[-0.29232353  1.0909022  -0.32573217 -1.0421476  -1.671521   -1.2676576
   0.          0.        ]]
[[-0.28910837  1.1133895  -0.32519412 -1.0075878  -1.6081383  -1.2084143
   0.          0.        ]]
[[-0.28585893  1.1351422  -0.32515436 -0.9753861  -1.5477178  -1.1670109
   0.          0.        ]]
[[-0.2825747   1.156203   -0.32472536 -0.94868743 -1.4893674  -1.1672844
   0.          0.        ]]
[[-0.2792604   1.1766673  -0.32493997 -0.91682935 -1.4310032  -1.1287322
   0.          0.        ]]
[[-0.27591377  1.196451   -0.32714403 -0.8841642  -1.3745667  -1.0829587
   0.          0.        ]]
[[-0.2725194   1.2155429  -0.33059797 -0.8496813  -1.3204187  -1.0226966
   0.          0. 

[[-0.01228371  0.9309816   0.11227514 -1.1346611  -1.5621318  -1.263211
   0.          0.        ]]
[[-0.01337862  0.95555276  0.11220603 -1.1002609  -1.4989713  -1.2053857
   0.          0.        ]]
[[-0.01443672  0.97939795  0.11171506 -1.0686828  -1.4387022  -1.1687826
   0.          0.        ]]
[[-0.01545706  1.0025673   0.10962874 -1.0352502  -1.3802632  -1.1174366
   0.          0.        ]]
[[-0.01642914  1.0250322   0.10898201 -1.0057346  -1.3243915  -1.0963256
   0.          0.        ]]
[[-0.01736727  1.0468599   0.10765207 -0.9755348  -1.2695754  -1.0696441
   0.          0.        ]]
[[-0.01826792  1.0680407   0.10477694 -0.94280523 -1.2160933  -1.0230669
   0.          0.        ]]
[[-0.01912203  1.0885327   0.10507612 -0.91604626 -1.16494    -1.0232512
   0.          0.        ]]
[[-0.01995478  1.1084385   0.10536788 -0.8892773  -1.1137774  -1.0234356
   0.          0.        ]]
[[-0.02076674  1.1277596   0.10565142 -0.86249846 -1.0626057  -1.02362
   0.          0.    

[[ 0.091502   1.0884954  0.4244005 -0.9064448 -1.4969394 -1.4973595
   0.         0.       ]]
[[ 0.08734512  1.1077603   0.42335263 -0.87162024 -1.4220717  -1.43658
   0.          0.        ]]
[[ 0.08324786  1.1262996   0.42228413 -0.8400557  -1.350243   -1.4002941
   0.          0.        ]]
[[ 0.07920675  1.1441725   0.42079797 -0.807605   -1.2802285  -1.3574543
   0.          0.        ]]
[[ 0.0752202   1.1613673   0.41704854 -0.7735832  -1.212356   -1.3008444
   0.          0.        ]]
[[ 0.07130289  1.1778605   0.41526413 -0.7429338  -1.1473138  -1.2711265
   0.          0.        ]]
[[ 0.06743517  1.1937104   0.41134578 -0.7101001  -1.0837578  -1.2232215
   0.          0.        ]]
[[ 0.06362991  1.2088816   0.406878   -0.678868   -1.0225968  -1.1850388
   0.          0.        ]]
[[ 0.05989046  1.2234035   0.40244126 -0.64715326 -0.963345   -1.1441098
   0.          0.        ]]
[[ 0.05621176  1.2372667   0.39656535 -0.6147322  -0.9061396  -1.0958526
   0.          0.        ]]

[[ 0.1755003   1.2444935   0.89741296 -0.6076406  -0.51309335 -0.79096293
   0.          0.        ]]
[[ 0.1668789   1.2578824   0.88738763 -0.57747144 -0.4735488  -0.743851
   0.          0.        ]]
[[ 0.15834323  1.2706283   0.87799585 -0.5482286  -0.4363596  -0.7021919
   0.          0.        ]]
[[ 0.14988795  1.2827477   0.8666214  -0.51830214 -0.4012532  -0.6511873
   0.          0.        ]]
[[ 0.14152709  1.2942247   0.8665759  -0.4915357  -0.36869675 -0.6513242
   0.          0.        ]]
[[ 0.1331707   1.3051147   0.86652577 -0.46477106 -0.3361335  -0.65147763
   0.          0.        ]]
[[ 0.12481833  1.3154174   0.8567816  -0.43631124 -0.30356258 -0.6106992
   0.          0.        ]]
[[ 0.11654663  1.3251034   0.84684104 -0.4079453  -0.27303043 -0.56934875
   0.          0.        ]]
[[ 0.10835667  1.3341721   0.8375784  -0.37962294 -0.24456558 -0.53074944
   0.          0.        ]]
[[ 0.10024233  1.3426218   0.8270297  -0.3518663  -0.21803056 -0.48810023
   0.         

[[ 0.04706364  1.4225845   0.6294819  -0.03026015 -0.09872387 -0.3615889
   0.          0.        ]]
[[ 0.04095087  1.4232411   0.6215214  -0.00275222 -0.08064611 -0.3295246
   0.          0.        ]]
[[ 0.034902    1.4232851   0.612888    0.02463005 -0.06417141 -0.29485124
   0.          0.        ]]
[[ 0.02892199  1.4227183   0.6058682   0.05127912 -0.04943023 -0.26696214
   0.          0.        ]]
[[ 0.02299824  1.421556    0.5955319   0.07822373 -0.03608335 -0.22563002
   0.          0.        ]]
[[ 0.01715698  1.4197907   0.58870316  0.10524555 -0.0248029  -0.19833931
   0.          0.        ]]
[[ 0.01137018  1.4174197   0.58084357  0.13173236 -0.01488685 -0.16703574
   0.          0.        ]]
[[ 0.00564623  1.4144545   0.5718893   0.15706788 -0.00653579 -0.1295413
   0.          0.        ]]
Episode 22 of 10000 finished after 53 timesteps
[[ 0.4945081  -0.05483146  1.0781863  -2.0688953  -3.3609002  -2.4273016
   0.          0.        ]]
[[ 0.48251143 -0.00800177  1.0885218  

[[-0.15992919  0.82296824 -0.26027077 -1.1817144  -0.8740614  -1.2355155
   0.          0.        ]]
[[-0.1569088   0.84885794 -0.26717547 -1.1501693  -0.81228584 -1.1903425
   0.          0.        ]]
[[-0.1538082   0.87410116 -0.32813236 -1.1698776  -0.7527688  -1.1424366
   0.          0.        ]]
[[-0.15009241  0.89985025 -0.33545104 -1.1390541  -0.6956472  -1.1002891
   0.          0.        ]]
[[-0.14629936  0.924963   -0.34539905 -1.1077054  -0.64063287 -1.0476239
   0.          0.        ]]
[[-0.14241095  0.9494292  -0.35496432 -1.0760372  -0.58825177 -0.9951332
   0.          0.        ]]
[[-0.1384345   0.97323805 -0.3642369  -1.0460314  -0.5384952  -0.95062953
   0.          0.        ]]
[[-0.13437243  0.9964199  -0.3722044  -1.0162604  -0.49096376 -0.91169816
   0.          0.        ]]
[[-0.13023786  1.0189748  -0.38202763 -0.984979   -0.44537887 -0.8626236
   0.          0.        ]]
[[-0.12601957  1.040869   -0.39318594 -0.95519674 -0.4022516  -0.8130717
   0.          0

[[ 0.23877068  1.3213547   1.0669888  -0.50248635 -0.8944308  -1.1906022
   0.          0.        ]]
[[ 0.2284934   1.3319743   1.0617144  -0.47004178 -0.8349008  -1.1462224
   0.          0.        ]]
[[ 0.2182788   1.3419244   1.0551345  -0.43895537 -0.77759486 -1.1053624
   0.          0.        ]]
[[ 0.20813751  1.3512313   1.0486991  -0.4085738  -0.7223318  -1.0687348
   0.          0.        ]]
[[ 0.19806632  1.3599062   1.0427932  -0.37831986 -0.6688999  -1.0345503
   0.          0.        ]]
[[ 0.18805799  1.3679495   1.0352589  -0.34823757 -0.61717707 -0.9961165
   0.          0.        ]]
[[ 0.17812414  1.3753644   1.0263364  -0.31717673 -0.5673758  -0.949128
   0.          0.        ]]
[[ 0.16827211  1.3821299   1.0180972  -0.28773895 -0.51992375 -0.91103286
   0.          0.        ]]
[[ 0.15849666  1.3882756   1.0084753  -0.25793895 -0.47437635 -0.86679506
   0.          0.        ]]
[[ 0.14880648  1.3937929   0.99888927 -0.2284278  -0.43104059 -0.8237258
   0.          0.

[[ 0.01548843  1.3717499   0.21338291 -0.2653159  -0.09542526 -0.3586584
   0.          0.        ]]
[[ 0.01353521  1.3776964   0.20489085 -0.23828138 -0.077494   -0.32475588
   0.          0.        ]]
[[ 0.01165028  1.3830407   0.18638214 -0.24830273 -0.06125773 -0.28503722
   0.          0.        ]]
[[ 0.00993032  1.388616    0.17600173 -0.22124265 -0.04700718 -0.24350695
   0.          0.        ]]
[[ 0.00829344  1.3935864   0.16747078 -0.19478828 -0.03483297 -0.2095447
   0.          0.        ]]
[[ 0.00672455  1.3979646   0.15683436 -0.16818015 -0.02435671 -0.16704476
   0.          0.        ]]
[[ 0.00524073  1.401746    0.14827213 -0.1414615  -0.01600524 -0.13280627
   0.          0.        ]]
[[ 0.00382519  1.4049276   0.14026211 -0.11495901 -0.00936554 -0.1007807
   0.          0.        ]]
[[ 0.00247345  1.4075137   0.12976159 -0.08852757 -0.00432696 -0.0587407
   0.          0.        ]]
[[ 1.2055397e-03  1.4095054e+00  1.2209742e-01 -6.2881373e-02
  -1.3901830e-03 -2.7656

[[-0.19976702  0.6955619  -0.22065282 -1.4033507  -2.870858   -2.2726562
   0.          0.        ]]
[[-0.19864789  0.7265721  -0.21058607 -1.3755609  -2.7572262  -2.2338614
   0.          0.        ]]
[[-0.1975625  0.7567902 -0.1998568 -1.3466022 -2.645534  -2.1893768
   0.         0.       ]]
[[-0.19650535  0.78621024 -0.19003789 -1.3163508  -2.536066   -2.1429944
   0.          0.        ]]
[[-0.19545908  0.81482404 -0.1818639  -1.2861828  -2.4289174  -2.1021812
   0.          0.        ]]
[[-0.19440413  0.84265083 -0.17448625 -1.2548677  -2.3238091  -2.057509
   0.          0.        ]]
[[-0.19332743  0.8696862  -0.16740839 -1.2230594  -2.2209342  -2.0105433
   0.          0.        ]]
[[-0.19222374  0.89593995 -0.16159023 -1.191008   -2.1204078  -1.9647617
   0.          0.        ]]
[[-0.19108076  0.9214245  -0.15698284 -1.1592522  -2.0221705  -1.9231927
   0.          0.        ]]
[[-0.18988867  0.9461612  -0.1521549  -1.1255066  -1.9260113  -1.8674023
   0.          0.        ]

[[ 0.32023755  0.9557716   0.84427434 -1.2894924  -3.0613196  -2.3036268
   0.          0.        ]]
[[ 0.31064025  0.9845345   0.85567045 -1.2623559  -2.9461393  -2.2602267
   0.          0.        ]]
[[ 0.30097693  1.0124995   0.86809283 -1.2345891  -2.8331292  -2.2120786
   0.          0.        ]]
[[ 0.29125148  1.0396708   0.8786691  -1.2063469  -2.7225263  -2.1700938
   0.          0.        ]]
[[ 0.28148946  1.0660539   0.8673258  -1.1368921  -2.6140225  -2.1282272
   0.          0.        ]]
[[ 0.2719184   1.0907395   0.83483255 -1.072884   -2.507612   -2.0858216
   0.          0.        ]]
[[ 0.26275644  1.113869    0.84106797 -1.0431454  -2.4033217  -2.052855
   0.          0.        ]]
[[ 0.25361776  1.1362265   0.7826879  -0.98763597 -2.3006792  -2.0041146
   0.          0.        ]]
[[ 0.24515696  1.1572595   0.7541648  -0.93875873 -2.200474   -1.9587421
   0.          0.        ]]
[[ 0.23707533  1.1771333   0.76104164 -0.90633804 -2.1025376  -1.908297
   0.          0.   

[[ 0.14343663  1.1583619   0.78335065 -0.78724587 -0.5659867  -0.9097083
   0.          0.        ]]
[[ 0.13599758  1.1757196   0.7771437  -0.75748605 -0.52050537 -0.87740755
   0.          0.        ]]
[[ 0.12861633  1.192446    0.7666205  -0.72750527 -0.47663897 -0.8292786
   0.          0.        ]]
[[ 0.12132702  1.2085391   0.75701296 -0.6980454  -0.43517888 -0.78622705
   0.          0.        ]]
[[ 0.11412096  1.2240052   0.7479073  -0.66922927 -0.3958711  -0.74700284
   0.          0.        ]]
[[ 0.1069933   1.2388552   0.73906225 -0.6402086  -0.35852435 -0.7082087
   0.          0.        ]]
[[ 0.09994049  1.2530813   0.73133564 -0.6122185  -0.32311717 -0.67628205
   0.          0.        ]]
[[ 0.0929534   1.2667023   0.7227312  -0.5833536  -0.28930616 -0.6390374
   0.          0.        ]]
[[ 0.08603744  1.2796977   0.7143701  -0.55521625 -0.25735724 -0.6043253
   0.          0.        ]]
[[ 0.07919054  1.2920806   0.70374596 -0.5265914  -0.2271437  -0.55996823
   0.        

[[ 0.04286861  0.3569925   0.02148544 -1.5848657  -4.994388   -2.8971436
   0.          0.        ]]
[[ 0.04294586  0.39480537  0.01833522 -1.5636858  -4.8496675  -2.8590229
   0.          0.        ]]
[[ 0.04284506  0.43215367  0.0152814  -1.543987   -4.706849   -2.8112874
   0.          0.        ]]
[[ 0.04257279  0.4690183   0.01457186 -1.5228435  -4.566408   -2.7754364
   0.          0.        ]]
[[ 0.04211683  0.505336    0.01470768 -1.5023636  -4.4277277  -2.7356098
   0.          0.        ]]
[[ 0.0414814   0.5410833   0.01743746 -1.4823467  -4.2909493  -2.6904697
   0.          0.        ]]
[[ 0.04065905  0.57623315  0.02213593 -1.4634674  -4.156428   -2.63559
   0.          0.        ]]
[[ 0.03965282  0.61077994  0.02595028 -1.4419686  -4.02465    -2.600609
   0.          0.        ]]
[[ 0.03848648  0.6446561   0.033695   -1.4215171  -3.8946211  -2.5504909
   0.          0.        ]]
[[ 0.03714752  0.67786205  0.04247627 -1.4003927  -3.7670982  -2.501766
   0.          0.     

[[-0.01347427  0.34744042 -0.06150848 -1.598431   -5.100811   -2.9825568
   0.          0.        ]]
[[-0.01240683  0.38556254 -0.06622273 -1.5765586  -4.9518304  -2.9477172
   0.          0.        ]]
[[-0.01151199  0.42324343 -0.07054539 -1.5565782  -4.8045883  -2.900843
   0.          0.        ]]
[[-0.01079054  0.46046677 -0.07266088 -1.5360545  -4.659683   -2.8601434
   0.          0.        ]]
[[-0.01025553  0.49717814 -0.07349177 -1.51746    -4.5168085  -2.8062716
   0.          0.        ]]
[[-0.00990438  0.5333702  -0.07231908 -1.498988   -4.3766093  -2.751637
   0.          0.        ]]
[[-0.00973883  0.5690119  -0.07066843 -1.478845   -4.239054   -2.7100947
   0.          0.        ]]
[[-0.00974741  0.6040423  -0.06537578 -1.4595025  -4.1035514  -2.6577604
   0.          0.        ]]
[[-0.00994215  0.6384477  -0.05882666 -1.439778   -3.970665   -2.6062996
   0.          0.        ]]
[[-0.01031313  0.6722022  -0.05338495 -1.4179095  -3.8403516  -2.5709867
   0.          0.   

[[ 0.02132568  0.65442467  0.30499464 -1.3837464  -1.7542291  -1.7837942
   0.          0.        ]]
[[ 0.01815481  0.68421596  0.27852184 -1.3560511  -1.6650398  -1.7368466
   0.          0.        ]]
[[ 0.01532888  0.7134088   0.2790181  -1.3234819  -1.578198   -1.693322
   0.          0.        ]]
[[ 0.01257238  0.74190205  0.27839905 -1.2886323  -1.4935322  -1.63256
   0.          0.        ]]
[[ 0.00988932  0.7696651   0.278227   -1.2563654  -1.4119046  -1.5917084
   0.          0.        ]]
[[ 0.00726938  0.7967488   0.2757513  -1.2216016  -1.3323194  -1.5309266
   0.          0.        ]]
[[ 0.0047266   0.8231173   0.27381322 -1.188451   -1.2557733  -1.4828917
   0.          0.        ]]
[[ 0.00225029  0.84880155  0.27151307 -1.1565524  -1.181629   -1.4437315
   0.          0.        ]]
[[-1.6069412e-04  8.7382668e-01  2.6604995e-01 -1.1225708e+00
  -1.1094426e+00 -1.3853620e+00  0.0000000e+00  0.0000000e+00]]
[[-0.00248508  0.89816064  0.22125833 -1.1092087  -1.0401747  -1.3390

[[-0.07716904  1.2786685  -0.52189845 -0.59610707 -0.06320602 -0.3494994
   0.          0.        ]]
[[-0.07177353  1.2920667  -0.5331542  -0.56960565 -0.04573266 -0.30472118
   0.          0.        ]]
[[-0.06628799  1.3048741  -0.5475848  -0.56215006 -0.03049801 -0.26478034
   0.          0.        ]]
[[-0.06067829  1.317518   -0.55849564 -0.5351784  -0.0172602  -0.22117285
   0.          0.        ]]
[[-0.05498152  1.3295575  -0.5693047  -0.50818545 -0.00620261 -0.1779674
   0.          0.        ]]
[[-0.04919844  1.3409915  -0.58123577 -0.48204485  0.00269493 -0.1302199
   0.          0.        ]]
[[-0.04332018  1.3518381  -0.5917267  -0.4552029   0.00920533 -0.08823468
   0.          0.        ]]
[[-0.03735838  1.3620809  -0.60066587 -0.4281999   0.01361664 -0.05247725
   0.          0.        ]]
[[-0.03132515  1.371716   -0.6099027  -0.40112093  0.01624027 -0.01550015
   0.          0.        ]]
[[-0.0252183   1.3807414  -0.6175886  -0.37496167  0.01701519  0.01538464
   0.      

[[-0.12372904  0.35016534 -0.19360246 -1.6883167  -6.3157067  -3.2535126
   0.          0.        ]]
[[-0.12015085  0.38829833 -0.20347945 -1.6598895  -6.1532183  -3.2223747
   0.          0.        ]]
[[-0.11652641  0.42618078 -0.21484342 -1.632791   -5.992281   -3.1853828
   0.          0.        ]]
[[-0.11288281  0.46381545 -0.22559175 -1.6068951  -5.8331885  -3.1492097
   0.          0.        ]]
[[-0.10926037  0.5011951  -0.23466153 -1.5825436  -5.675899   -3.1141174
   0.          0.        ]]
[[-0.10570459  0.53831387 -0.24608262 -1.5600808  -5.5203586  -3.0635524
   0.          0.        ]]
[[-0.10222034  0.57515883 -0.25584027 -1.5382781  -5.3673396  -3.0149221
   0.          0.        ]]
[[-0.09883909  0.61169356 -0.26268584 -1.5151945  -5.2167444  -2.9823499
   0.          0.        ]]
[[-0.0955966   0.6478495  -0.26815835 -1.4944664  -5.067774   -2.9371476
   0.          0.        ]]
[[-0.0925148  0.68362   -0.2726163 -1.474544  -4.9210587 -2.8882368
   0.         0.       

[[-0.04889412  0.39077985  0.02395918 -1.6879395  -2.8431368  -2.2394025
   0.          0.        ]]
[[-0.05019484  0.42815882  0.02912973 -1.6435692  -2.7311676  -2.2005804
   0.          0.        ]]
[[-0.05147896  0.46438044  0.03814813 -1.6145337  -2.6211395  -2.161796
   0.          0.        ]]
[[-0.05277615  0.49980736  0.0459818  -1.5852307  -2.5130506  -2.1260238
   0.          0.        ]]
[[-0.05406819  0.5344505   0.05402466 -1.5546259  -2.4067502  -2.083376
   0.          0.        ]]
[[-0.05534925  0.5683026   0.02101127 -1.4986362  -2.3025815  -2.0387485
   0.          0.        ]]
[[-0.05620489  0.6008134   0.02836326 -1.4664415  -2.2006447  -1.9887316
   0.          0.        ]]
[[-0.05703611  0.6325406   0.03386894 -1.4341518  -2.101209   -1.9419777
   0.          0.        ]]
[[-0.05782623  0.6634983   0.04031374 -1.4005188  -2.0041106  -1.8844097
   0.          0.        ]]
[[-0.0585844   0.69368136  0.0452489  -1.3676333  -1.9098905  -1.8345553
   0.          0.   

[[-0.01938829  0.94954675  0.19526604 -1.0070044  -1.2032216  -1.384674
   0.          0.        ]]
[[-0.02106409  0.9712378   0.19147947 -0.97340244 -1.1339881  -1.3315384
   0.          0.        ]]
[[-0.02267141  0.9922391   0.18841885 -0.9414739  -1.0674114  -1.291508
   0.          0.        ]]
[[-0.02421989  1.0125802   0.15824565 -0.92410636 -1.0028361  -1.2448838
   0.          0.        ]]
[[-0.02544517  1.0325933   0.10738317 -0.91598237 -0.94059205 -1.1987636
   0.          0.        ]]
[[-0.02614517  1.0524852   0.10330726 -0.88489    -0.880654   -1.1631361
   0.          0.        ]]
[[-0.02678881  1.0717322   0.0965745  -0.8536356  -0.82249737 -1.1199038
   0.          0.        ]]
[[-0.02735605  1.0903336   0.09023368 -0.82296926 -0.7665023  -1.0811166
   0.          0.        ]]
[[-0.02785282  1.1082985   0.08357815 -0.7918506  -0.71244663 -1.0393654
   0.          0.        ]]
[[-0.02828064  1.1256163   0.07624236 -0.7612701  -0.6604785  -0.9983916
   0.          0.   

[[-0.05229139  1.3984311   0.08358064 -0.61578304 -1.4976342  -1.6039822
   0.          0.        ]]
[[-0.05303211  1.4110761   0.08315817 -0.5824284  -1.4174354  -1.554764
   0.          0.        ]]
[[-0.05371008  1.4230223   0.08176613 -0.547713   -1.3396976  -1.4952271
   0.          0.        ]]
[[-0.05432415  1.4342518   0.01040684 -0.5263853  -1.2649364  -1.4468746
   0.          0.        ]]
[[-0.05417986  1.4450613   0.00855615 -0.49429882 -1.192593   -1.4069252
   0.          0.        ]]
[[-0.05397682  1.4552058   0.00460861 -0.46153593 -1.122247   -1.3593704
   0.          0.        ]]
[[-0.05370121  1.4646772   0.00173698 -0.430142   -1.0542787  -1.3233869
   0.          0.        ]]
[[-0.05336647  1.4734998  -0.00379539 -0.3964306  -0.9881096  -1.2674428
   0.          0.        ]]
[[-0.05295677  1.4816344  -0.01098145 -0.3633209  -0.92473763 -1.2118777
   0.          0.        ]]
[[-0.05246134  1.489093   -0.01539021 -0.33205605 -0.86414385 -1.1745566
   0.          0.  

[[ 0.13294153  1.2138418   0.65332013 -0.71405315 -0.79274124 -1.1484313
   0.          0.        ]]
[[ 0.12682943  1.2293062   0.64582473 -0.6812976  -0.73531973 -1.0973614
   0.          0.        ]]
[[ 0.12079392  1.2440959   0.6373733  -0.6493887  -0.68045664 -1.0473487
   0.          0.        ]]
[[ 0.11484136  1.2582252   0.6275627  -0.6180743  -0.62809396 -0.9960292
   0.          0.        ]]
[[ 0.10898133  1.2717052   0.6194809  -0.58801746 -0.57829696 -0.9559585
   0.          0.        ]]
[[ 0.10319815  1.2845552   0.61060894 -0.55764    -0.5305034  -0.9120591
   0.          0.        ]]
[[ 0.09749556  1.2967671   0.602558   -0.5276295  -0.48490456 -0.8726279
   0.          0.        ]]
[[ 0.09186516  1.3083444   0.591962   -0.49726203 -0.44127712 -0.8230913
   0.          0.        ]]
[[ 0.08632584  1.3192787   0.5838597  -0.4680205  -0.4001263  -0.786348
   0.          0.        ]]
[[ 0.0808568   1.3295889   0.5765476  -0.43915212 -0.36081252 -0.75376046
   0.          0. 

[[-0.05914717  1.4089804  -0.70812273 -0.10334517  0.01638795 -0.09306397
   0.          0.        ]]
[[-0.05201893  1.411307   -0.7176112  -0.07666316  0.02104073 -0.05508048
   0.          0.        ]]
[[-0.04481506  1.4130329  -0.7277799  -0.05044342  0.02379449 -0.01424525
   0.          0.        ]]
[[-0.03753004  1.4141681  -0.7364973  -0.02407327  0.02450669  0.0207641
   0.          0.        ]]
[[-0.03017559  1.4147093  -0.746242    0.00254869  0.02346859  0.05987571
   0.          0.        ]]
[[-0.02274342  1.4146509  -0.75635636  0.02894675  0.02047508  0.10054643
   0.          0.        ]]
[[-0.01523066  1.4139982  -0.764007    0.05561821  0.01544821  0.13139027
   0.          0.        ]]
[[-0.00765705  1.4127457  -0.775585    0.08110724  0.00887933  0.17568168
   0.          0.        ]]
Episode 42 of 10000 finished after 56 timesteps
[[ 0.17247066  0.04395569  0.30740872 -1.9412254  -3.2903857  -2.37016
   0.          0.        ]]
[[ 0.16820058  0.08778306  0.3167128  

[[-0.10369787  0.8397929  -0.11676289 -1.233414   -1.0422839  -1.3176186
   0.          0.        ]]
[[-0.10217305  0.8666993  -0.12037931 -1.202002   -0.97640324 -1.2805089
   0.          0.        ]]
[[-0.10058717  0.89295805 -0.12757541 -1.1688237  -0.91237795 -1.2246084
   0.          0.        ]]
[[-0.09891567  0.9185404  -0.13310024 -1.1375922  -0.85114765 -1.1846755
   0.          0.        ]]
[[-0.09717455  0.9434791  -0.14007851 -1.1063229  -0.791914   -1.1408333
   0.          0.        ]]
[[-0.09535513  0.9677741  -0.14925814 -1.0741206  -0.73487246 -1.0859873
   0.          0.        ]]
[[-0.09344359  0.9914073  -0.1575109  -1.0426052  -0.6805732  -1.0376551
   0.          0.        ]]
[[-0.09145059  1.0143876  -0.16374919 -1.0124289  -0.62869054 -1.002217
   0.          0.        ]]
[[-0.08939447  1.0367365  -0.17110822 -0.9820334  -0.5785797  -0.9624543
   0.          0.        ]]
[[-0.08726845  1.0584488  -0.18189824 -0.9514739  -0.5304571  -0.9105794
   0.          0.  

[[-0.18597022  1.3867338  -0.3313746  -0.5636492  -1.0536203  -1.3099282
   0.          0.        ]]
[[-0.18230781  1.3985691  -0.3361525  -0.531336   -0.9881242  -1.2644659
   0.          0.        ]]
[[-0.17857552  1.409741   -0.3430694  -0.49873695 -0.92490107 -1.2126342
   0.          0.        ]]
[[-0.17475891  1.420246   -0.34786725 -0.46728906 -0.8642695  -1.1732504
   0.          0.        ]]
[[-0.17088003  1.430101   -0.35426444 -0.43559936 -0.80560714 -1.1286485
   0.          0.        ]]
[[-0.16692886  1.4393024  -0.36252123 -0.4031925  -0.74917483 -1.0753512
   0.          0.        ]]
[[-0.16289406  1.4478359  -0.36840707 -0.37243506 -0.6954074  -1.0378525
   0.          0.        ]]
[[-0.15879698  1.4557279  -0.37657577 -0.34097707 -0.6435148  -0.9903113
   0.          0.        ]]
[[-0.15462188  1.4629653  -0.3835297  -0.31129548 -0.5939993  -0.954807
   0.          0.        ]]
[[-0.1503788   1.4695795  -0.38999468 -0.28122315 -0.54625905 -0.91959065
   0.          0. 

[[ 0.32058507  1.4411961   1.1456816  -0.47259876 -1.4728179  -1.6130937
   0.          0.        ]]
[[ 0.30924433  1.4506165   1.1435652  -0.43812096 -1.3921635  -1.5545077
   0.          0.        ]]
[[ 0.29798156  1.4593221   1.1406912  -0.40343532 -1.3144383  -1.4939264
   0.          0.        ]]
[[ 0.28679648  1.4673144   1.0927675  -0.3740746  -1.2397423  -1.442775
   0.          0.        ]]
[[ 0.27613363  1.4747095   1.0883586  -0.34017965 -1.1676036  -1.3864464
   0.          0.        ]]
[[ 0.26555014  1.4814113   1.0850241  -0.30803916 -1.0982814  -1.3443158
   0.          0.        ]]
[[ 0.25503197  1.4874507   1.0789235  -0.27468947 -1.0310658  -1.2891672
   0.          0.        ]]
[[ 0.2445981  1.4928094  1.0731299 -0.2415118 -0.9666076 -1.2361481
   0.         0.       ]]
[[ 0.23423986  1.4974893   1.068297   -0.21002474 -0.90480024 -1.1961448
   0.          0.        ]]
[[ 0.22394666  1.5015191   1.0635736  -0.17819151 -0.8449931  -1.1565716
   0.          0.        ]

KeyboardInterrupt: 

In [16]:
env.observation_space

Box(8,)