In [1]:
#utils.py
import argparse
import collections
import random

import gym
import numpy as np
import torch
from tqdm import tqdm



In [2]:
class QPolicy:
    """
    Abstract policy to be subclassed
    """

    def __init__(self, statesize, actionsize, lr, gamma):
        self.statesize = statesize
        self.actionsize = actionsize
        self.lr = lr
        self.gamma = gamma

    def __call__(self, state, epsilon):
        qs = self.qvals(state[np.newaxis])[0]
        #print('qval index=',state[np.newaxis],self.qvals(state[np.newaxis]),'qs=',qs)
        decision = np.random.uniform(0, 1)
        if decision < epsilon:
            pi = np.ones(self.actionsize) / self.actionsize
        else:
            pi = np.zeros(self.actionsize)
            pi[np.argmax(qs)] = 1.0
        return pi

    def qvals(self, states):
        raise Exception("Not implemented")

    def td_step(self, state, action, reward, next_state, done):
        raise Exception("Not implemented")

    def save(self, outpath):
        raise Exception("Not implemented")

    def __str__(self):
        return self.__class__.__name__

In [43]:
class TabQPolicy(QPolicy):
    def __init__(self, env, buckets, actionsize, lr, gamma, model=None):
        """
        Inititalize the tabular q policy

        @param env: the gym environment
        @param buckets: specifies the discretization of the continuous state space for each dimension
        @param actionsize: dimension of the descrete action space.
        @param lr: learning rate for the model update 
        @param gamma: discount factor
        @param model (optional): Stores the Q-value for each state-action
            model = np.zeros(self.buckets + (actionsize,))
            
        """
        super().__init__(len(buckets), actionsize, lr, gamma)
        self.env = env
        self.buckets=buckets
        self.model=np.zeros(self.buckets + (actionsize,))

    def discretize(self, obs):
        """
        Discretizes the continuous input observation

        @param obs: continuous observation
        @return: discretized observation  
        """
        upper_bounds = [self.env.observation_space.high[0], self.env.observation_space.high[1]]
        lower_bounds = [self.env.observation_space.low[0], self.env.observation_space.low[1]]
        ratios = [(obs[i] + abs(lower_bounds[i])) / (upper_bounds[i] - lower_bounds[i]) for i in range(len(obs))]
        new_obs = [int(round((self.buckets[i] - 1) * ratios[i])) for i in range(len(obs))]
        new_obs = [min(self.buckets[i] - 1, max(0, new_obs[i])) for i in range(len(obs))]
        return tuple(new_obs)

    def qvals(self, states):
        """
        Returns the q values for the states.

        @param state: the state
        
        @return qvals: the q values for the state for each action. 
        """
        #if len(states)==1: #states in [[]] form 
        #    states=states[0]
        #else:
        #    states=states   
        #index_tuple=self.discretize(states)
        #print('index_tuple is ',index_tuple,'states=',states,'type of states',type(states))
        dstates = [self.discretize(state) for state in states]
        print('states=',states,'dstates=',dstates,self.discretize(states[0]))
        return np.array([self.model[state] for state in dstates])
    def td_step(self, state, action, reward, next_state, done):
        """
        One step TD update to the model

        @param state: the current state
        @param action: the action
        @param reward: the reward of taking the action at the current state
        @param next_state: the next state after taking the action at the
            current state
        @param done: true if episode has terminated, false otherwise
        @return loss: total loss the at this time step
        """
        #print('state in td_step',state)
        #if done==True and next_state[0]>=0.5:
        #    reward=1
        #if next_state[0]<0.5 or done==False:
        #    Target=np.max(self.qvals(next_state))*self.gamma+reward
        #    Q=self.qvals(state)[action]
        #else:
        #    Target=reward
        #    Q=self.qvals(state)[action]
        #loss=np.square(Target- Q)
        #Qnew=Q+self.lr*(Target-Q)
        #index_tuple=self.discretize(state)
        #self.model[index_tuple][action]=Qnew
        #return loss
        if done == True and next_state[0] >= 0.5:  # following homework documentation
            reward = 1
        if next_state[0] < 0.5 or done == False:  # if the state is not terminal
            Target = np.max(self.qvals(next_state[np.newaxis])[0]) * self.gamma + reward
            Q = self.qvals(state[np.newaxis])[0][action]
        else:
            Target = reward
            Q = self.qvals(state[np.newaxis])[0][action]
        loss = np.square(Target - Q)
        Qnew = Q + self.lr * (Target - Q)
        index_tuple = self.discretize(state)
        self.model[index_tuple][action] = Qnew  # get the index such that I can update the Q table
        return loss
    def save(self, outpath):
        torch.save(self.model, outpath)

In [44]:
def rollout(env: gym.Env, policies: QPolicy, episodes: int, temp: float, render: bool = True):
    """
    Simulates trajectories for the given number of episodes. Input policy is used to sample actions at each time step

    :param env: the gym environment
    :param policies: The policy used to sample actions (Tabular/DQN) 
    :param episodes: Number of episodes to be simulated
    :param epsilon: The exploration parameter for epsilon-greedy policy
    :param gamma: Discount factor
    :param render: If True, render the environment
    
    :return replay: Collection of (state, action, reward, next_state, done) at each timestep of all simulated episodes
    :return scores: Collection of total reward for each simulated episode  
    """
    replay = []
    scores = []
    for itrnum in range(episodes):
        state = env.reset()
        step = 0
        score = 0
        done = False
        while not done:
            if render:
                env.render()
            pi = policies(state, temp)
            # How do you select the action given pi. Hint: use np.random.choice
            action = np.random.choice(env.action_space.n,1,p=pi)[0]
            next_state, reward, done, _ = env.step(action)
            #print('action is',action,'next state',next_state,'reward',reward,'done',done)
            score += reward
            replay.append((state, action, reward, next_state, done))
            state = next_state
            step += 1

        env.close()
        scores.append(score)

    return replay, scores

In [45]:
def loadmodel(modelfile: str, env: gym.Env, statesize, actionsize):
    if '.model' in modelfile:
        # PyTorch
        pt_model = torch.load(modelfile)
        model = DQNPolicy(pt_model, statesize, actionsize, 0, None)
    elif '.npy' in modelfile:
        # Numpy
        pt_model = torch.load(modelfile)
        model = TabQPolicy(env, pt_model.shape[:-1], actionsize, 0, None, model=pt_model)
        pass
    else:
        raise Exception("Unknown model file extension")

    return model


In [46]:
def qlearn(env, policy, episodes,epsilon,epsilon_min,epsilon_decay_factor,trainsize):
    """
    Main training loop
    """
    replaymem = collections.deque(maxlen=500000)
    pbar = tqdm(range(episodes), desc='Iterations')
    all_scores = []
    epsilon =epsilon
    for i in pbar:
        replay, scores = rollout(env, policy, 1, epsilon, render=False)
        all_scores.extend(scores)
        replaymem.extend(replay)
        traindata = random.sample(replaymem, min(trainsize, len(replaymem)))
        losses = []
        for state, action, reward, next_state, terminal in traindata:
            loss = policy.td_step(state, action, reward, next_state, terminal)
            losses.append(loss)

        smoothed_score = np.mean(all_scores[-200:])
        pbar.set_postfix_str("Mean Rewards Per Episode: {:.1f} | {:.3f} MSE | Replay Size: {}"
                             .format(smoothed_score, np.mean(losses), len(replaymem)))
        epsilon = max(epsilon_min, epsilon*np.exp(-1*epsilon_decay_factor*(i+1)))

In [47]:
episodes=1000
trainsize=1000
epsilon=0.8
epsilon_min=0.02
epsilon_decay_factor=2.0e-5
gamma=0.90
lr=0.2
buckets=(18,14)

In [48]:
env = gym.make('MountainCar-v0')
statesize = env.observation_space.shape[0]
actionsize = env.action_space.n
#torch.save(policy.model, 'models/tabular.npy')

In [49]:
policy = TabQPolicy(env, buckets,actionsize, lr, gamma) 


In [50]:
replaymem = collections.deque(maxlen=500000)
pbar = tqdm(range(episodes), desc='Iterations')
all_scores = []
epsilon =epsilon
for i in pbar:
    replay, scores = rollout(env, policy, 1, epsilon, render=False)
    all_scores.extend(scores)
    replaymem.extend(replay)
    traindata = random.sample(replaymem, min(trainsize, len(replaymem)))
    losses = []
    for state, action, reward, next_state, terminal in traindata:
        #print('state input=',type(state),state,'action is',action)
        #dstates=(policy.discretize(state1) for state1 in state)
        #print(dstates)
        loss = policy.td_step(state, action, reward, next_state, terminal)
        losses.append(loss)
    smoothed_score = np.mean(all_scores[-200:])
    pbar.set_postfix_str("Mean Rewards Per Episode: {:.1f} | {:.3f} MSE | Replay Size: {}".format(smoothed_score, np.mean(losses), len(replaymem)))
    epsilon = max(epsilon_min, epsilon*np.exp(-1*epsilon_decay_factor*(i+1)))
print('at the end Q is',policy.model)






Iterations:   0%|                                                                             | 0/1000 [00:00<?, ?it/s][A[A[A[A[A

states= [[-0.50909243  0.        ]] dstates= False
states= [[-0.51020119 -0.00110876]] dstates= False
states= [[-5.10410409e-01 -2.09218103e-04]] dstates= False
states= [[-5.10718514e-01 -3.08105048e-04]] dstates= False
states= [[-0.5121232  -0.00140468]] dstates= False
states= [[-0.51461393 -0.00249073]] dstates= False
states= [[-0.51617204 -0.00155811]] dstates= False
states= [[-0.51878585 -0.00261381]] dstates= False
states= [[-0.52143575 -0.0026499 ]] dstates= False
states= [[-0.52410188 -0.00266613]] dstates= False
states= [[-0.52776423 -0.00366235]] dstates= False
states= [[-0.53239534 -0.00463111]] dstates= False
states= [[-0.53796048 -0.00556515]] dstates= False
states= [[-0.54341795 -0.00545747]] dstates= False
states= [[-0.54972686 -0.00630891]] dstates= False
states= [[-0.55584001 -0.00611315]] dstates= False
states= [[-0.56171173 -0.00587172]] dstates= False
states= [[-0.56829822 -0.00658649]] dstates= False
states= [[-0.57355047 -0.00525225]] dstates= False
states= [[-0.57

states= [[-0.47424505 -0.00482043]] dstates= False
states= [[-0.53604628 -0.0084962 ]] dstates= False
states= [[-0.52755008 -0.00752583]] dstates= False
states= [[-0.62467322 -0.01006552]] dstates= False
states= [[-0.6146077  -0.01073964]] dstates= False
states= [[-6.00461939e-01  1.23221401e-04]] dstates= False
states= [[-0.60058516 -0.00144906]] dstates= False
states= [[-0.4660087  -0.00298613]] dstates= False
states= [[-0.46302257 -0.00353431]] dstates= False
states= [[-0.57355047 -0.00525225]] dstates= False
states= [[-0.56829822 -0.00658649]] dstates= False
states= [[-0.69199662  0.00690009]] dstates= False
states= [[-0.69889671  0.00564512]] dstates= False
states= [[-0.39912383 -0.00695791]] dstates= False
states= [[-0.39216593 -0.00499751]] dstates= False
states= [[-0.52002425 -0.00749902]] dstates= False
states= [[-0.51252523 -0.00841599]] dstates= False
states= [[-0.40091645  0.00812273]] dstates= False
states= [[-0.40903918  0.00796512]] dstates= False
states= [[-0.64835727 -






Iterations:   0%|            | 0/1000 [00:00<?, ?it/s, Mean Rewards Per Episode: -200.0 | 0.697 MSE | Replay Size: 200][A[A[A[A[A




Iterations:   0%|    | 1/1000 [00:00<07:55,  2.10it/s, Mean Rewards Per Episode: -200.0 | 0.697 MSE | Replay Size: 200][A[A[A[A[A

 dstates= False
states= [[-0.54972686 -0.00630891]] dstates= False
states= [[-0.54341795 -0.00545747]] dstates= False
states= [[-0.38216581 -0.0029734 ]] dstates= False
states= [[-0.37919241 -0.00192392]] dstates= False
states= [[-0.53796048 -0.00556515]] dstates= False
states= [[-0.53239534 -0.00463111]] dstates= False
states= [[-0.50444931  0.01218098]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.52689219  0.01141768]] dstates= False
states= [[-0.53830987  0.01230738]] dstates= False
states= [[-0.5578734  0.0073871]] dstates= False
states= [[-0.56526049  0.00607545]] dstates= False
states= [[-0.55584001 -0.00611315]] dstates= False
states= [[-0.54972686 -0.00630891]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.52986353  0.01418626]] dstates= False
states= [[-0.53637254  0.00800645]] dstates= False
states= [[-0.54437899  0.0068507 ]] dstates= False
states= [[-0.45188308 -0.00301501]] dstates= False
states= [[-0.4488

states= [[-0.61854167 -0.00240858]] dstates= False
states= [[-0.61924777 -0.0007061 ]] dstates= False
states= [[-0.6182463   0.00100147]] dstates= False
states= [[-0.61654448  0.00170182]] dstates= False
states= [[-0.61315456  0.00338992]] dstates= False
states= [[-0.60910103  0.00405353]] dstates= False
states= [[-0.60341324  0.00568779]] dstates= False
states= [[-0.59713254  0.0062807 ]] dstates= False
states= [[-0.59130481  0.00582774]] dstates= False
states= [[-0.58497276  0.00633205]] dstates= False
states= [[-0.577183    0.00778976]] dstates= False
states= [[-0.56899309  0.00818991]] dstates= False
states= [[-0.55946378  0.00952932]] dstates= False
states= [[-0.54866599  0.01079778]] dstates= False
states= [[-0.53768038  0.01098561]] dstates= False
states= [[-0.52658919  0.01109119]] dstates= False
states= [[-0.51647557  0.01011362]] dstates= False
states= [[-0.50741537  0.0090602 ]] dstates= False
states= [[-0.4994765   0.00793887]] dstates= False
states= [[-0.49271839  0.006758

states= [[-0.58038143  0.00490303]] dstates= False
states= [[-0.72160951 -0.00192941]] dstates= False
states= [[-0.71968009 -0.00231666]] dstates= False
states= [[-0.68935869 -0.00740807]] dstates= False
states= [[-0.68195063 -0.00855154]] dstates= False
states= [[-0.51629182  0.00289537]] dstates= False
states= [[-0.51918719  0.00192846]] dstates= False
states= [[-0.53604628 -0.0084962 ]] dstates= False
states= [[-0.52755008 -0.00752583]] dstates= False
states= [[-0.66376148 -0.00965812]] dstates= False
states= [[-0.65410336 -0.00961209]] dstates= False
states= [[-0.48807943 -0.0006163 ]] dstates= False
states= [[-0.48746314  0.00065419]] dstates= False
states= [[-0.62112425  0.01105667]] dstates= False
states= [[-0.63218091  0.01125663]] dstates= False
states= [[-0.4365659   0.01282685]] dstates= False
states= [[-0.44939274  0.01337881]] dstates= False
states= [[-0.49526901  0.00430374]] dstates= False
states= [[-0.49957275  0.00348377]] dstates= False
states= [[-0.58497276  0.006332

states= [[-0.42675532  0.01146717]] dstates= False
states= [[-0.46878089  0.00318949]] dstates= False
states= [[-0.47197037  0.00357515]] dstates= False
states= [[-0.47434135  0.00929172]] dstates= False
states= [[-0.48363307  0.00859074]] dstates= False
states= [[-5.31648837e-01 -2.28957222e-04]] dstates= False
states= [[-0.53141988 -0.00128761]] dstates= False
states= [[-0.37740714  0.00220027]] dstates= False
states= [[-0.37960741  0.00224693]] dstates= False
states= [[-0.52779566 -0.00336808]] dstates= False
states= [[-0.52442758 -0.0023743 ]] dstates= False
states= [[-0.61654448  0.00170182]] dstates= False
states= [[-0.6182463   0.00100147]] dstates= False
states= [[-0.57016585 -0.00945453]] dstates= False
states= [[-0.56071132 -0.0087323 ]] dstates= False
states= [[-0.68329643  0.00386251]] dstates= False
states= [[-0.68715895  0.00368445]] dstates= False
states= [[-0.58189912 -0.00346964]] dstates= False
states= [[-0.57842948 -0.00487901]] dstates= False
states= [[-5.99258560e-






Iterations:   0%|    | 1/1000 [00:01<07:55,  2.10it/s, Mean Rewards Per Episode: -200.0 | 0.553 MSE | Replay Size: 400][A[A[A[A[A




Iterations:   0%|    | 2/1000 [00:01<09:36,  1.73it/s, Mean Rewards Per Episode: -200.0 | 0.553 MSE | Replay Size: 400][A[A[A[A[A

 False
states= [[-0.69657417 -0.00721548]] dstates= False
states= [[-0.68935869 -0.00740807]] dstates= False
states= [[-0.37935875  0.00461276]] dstates= False
states= [[-0.38397151  0.0046296 ]] dstates= False
states= [[-0.67076101 -0.01070433]] dstates= False
states= [[-0.66005668 -0.01169941]] dstates= False
states= [[-0.58528446  0.00544302]] dstates= False
states= [[-0.59072748  0.00394296]] dstates= False
states= [[-0.49117749  0.00409152]] dstates= False
states= [[-0.49526901  0.00430374]] dstates= False
states= [[-0.68690684 -0.00750971]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.61227566 -0.00342098]] dstates= False
states= [[-0.60885468 -0.00505345]] dstates= False
states= [[-0.56568705 -0.0099988 ]] dstates= False
states= [[-0.55568825 -0.0102391 ]] dstates= False
states= [[-0.49271839  0.00675811]] dstates= False
states= [[-0.4994765   0.00793887]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.52986353  0

states= [[-0.52153523 -0.00127581]] dstates= False
states= [[-0.52282652 -0.00129129]] dstates= False
states= [[-5.23123601e-01 -2.97082799e-04]] dstates= False
states= [[-0.52242425  0.00069935]] dstates= False
states= [[-0.5217337   0.00069054]] dstates= False
states= [[-0.52105715  0.00067656]] dstates= False
states= [[-0.52039965  0.00065749]] dstates= False
states= [[-0.51976615  0.0006335 ]] dstates= False
states= [[-5.20161393e-01 -3.95242671e-04]] dstates= False
states= [[-0.51958242  0.00057898]] dstates= False
states= [[-0.51803356  0.00154886]] dstates= False
states= [[-0.51652644  0.00150712]] dstates= False
states= [[-0.51507236  0.00145408]] dstates= False
states= [[-0.51368222  0.00139014]] dstates= False
states= [[-0.51236645  0.00131578]] dstates= False
states= [[-0.5111349   0.00123155]] dstates= False
states= [[-0.50899681  0.00213809]] dstates= False
states= [[-0.5079682   0.00102861]] dstates= False
states= [[-0.50705677  0.00091143]] dstates= False
states= [[-0.50

states= [[-0.51982145  0.01461025]] dstates= False
states= [[-0.5344317   0.01452902]] dstates= False
states= [[-0.43744156 -0.01022046]] dstates= False
states= [[-0.4272211  -0.01050766]] dstates= False
states= [[-0.45530146  0.00485441]] dstates= False
states= [[-0.46015587  0.00432736]] dstates= False
states= [[-0.59816482 -0.00788428]] dstates= False
states= [[-0.59028054 -0.00938106]] dstates= False
states= [[-0.4710317   0.00791297]] dstates= False
states= [[-0.47894467  0.00724687]] dstates= False
states= [[-0.48921374  0.00287551]] dstates= False
states= [[-0.49208925  0.00211148]] dstates= False
states= [[-5.31648837e-01 -2.28957222e-04]] dstates= False
states= [[-0.53141988 -0.00128761]] dstates= False
states= [[-3.77268489e-01  1.38648329e-04]] dstates= False
states= [[-0.37740714  0.00220027]] dstates= False
states= [[-0.4365659   0.01282685]] dstates= False
states= [[-0.44939274  0.01337881]] dstates= False
states= [[-0.55881873 -0.01241904]] dstates= False
states= [[-0.54

states= [[-0.50232672 -0.01350017]] dstates= False
states= [[-0.53796048 -0.00556515]] dstates= False
states= [[-0.53239534 -0.00463111]] dstates= False
states= [[-0.71701629  0.00325715]] dstates= False
states= [[-0.72027345  0.0008662 ]] dstates= False
states= [[-0.54404978  0.01503297]] dstates= False
states= [[-0.55908276  0.01376734]] dstates= False
states= [[-0.52676789 -0.00819942]] dstates= False
states= [[-0.51856847 -0.00816169]] dstates= False
states= [[-0.4794458  -0.00260625]] dstates= False
states= [[-0.47683955 -0.0012567 ]] dstates= False
states= [[-0.45948826 -0.00405644]] dstates= False
states= [[-0.45543182 -0.00354874]] dstates= False
states= [[-0.43473812  0.00198225]] dstates= False
states= [[-0.43672036  0.00362648]] dstates= False
states= [[-0.4499557   0.00534575]] dstates= False
states= [[-0.45530146  0.00485441]] dstates= False
states= [[-0.58596612  0.01196665]] dstates= False
states= [[-0.59793277  0.01141376]] dstates= False
states= [[-0.51900864 -0.001216

states= [[-0.50836737  0.00069598]] dstates= False
states= [[-0.50906336  0.00180497]] dstates= False
states= [[-0.4445607   0.00290066]] dstates= False
states= [[-0.44746136  0.00446674]] dstates= False
states= [[-0.52111565  0.00294708]] dstates= False
states= [[-0.52406273  0.0019436 ]] dstates= False
states= [[-0.40250208 -0.00063513]] dstates= False
states= [[-0.40186695  0.0012577 ]] dstates= False
states= [[-0.51958242  0.00057898]] dstates= False
states= [[-5.20161393e-01 -3.95242671e-04]] dstates= False
states= [[-0.51599935 -0.00544326]] dstates= False
states= [[-0.51055609 -0.00534546]] dstates= False
states= [[-5.18648406e-01  3.95957705e-04]] dstates= False
states= [[-0.51904436 -0.00056989]] dstates= False
states= [[-0.60386806 -0.01033586]] dstates= False
states= [[-0.5935322  -0.00985652]] dstates= False
states= [[-0.51020119 -0.00110876]] dstates= False
states= [[-0.50909243  0.        ]] dstates= False
states= [[-5.99002886e-01  1.83605376e-04]] dstates= False
states=

states= [[-0.51308972 -0.00099142]] dstates= False
states= [[-0.65796237  0.01297473]] dstates= False
states= [[-0.6709371   0.01190533]] dstates= False
states= [[-0.47560403  0.0141912 ]] dstates= False
states= [[-0.48979524  0.01544429]] dstates= False
states= [[-5.26019518e-01  1.11023313e-04]] dstates= False
states= [[-0.52613054  0.00109204]] dstates= False
states= [[-0.49341187  0.01103744]] dstates= False
states= [[-0.50444931  0.01218098]] dstates= False
states= [[-0.49136392  0.0019388 ]] dstates= False
states= [[-0.49330272  0.00116571]] dstates= False
states= [[-0.51040678 -0.00706278]] dstates= False
states= [[-0.50334401 -0.00791096]] dstates= False
states= [[-0.45792824  0.00748948]] dstates= False
states= [[-0.46541772  0.00892363]] dstates= False
states= [[-0.50232672 -0.01350017]] dstates= False
states= [[-0.48882656 -0.01423985]] dstates= False
states= [[-0.52689219  0.01141768]] dstates= False
states= [[-0.53830987  0.01230738]] dstates= False
states= [[-0.59976734  






Iterations:   0%|    | 2/1000 [00:02<09:36,  1.73it/s, Mean Rewards Per Episode: -200.0 | 0.564 MSE | Replay Size: 600][A[A[A[A[A




Iterations:   0%|    | 3/1000 [00:02<11:54,  1.40it/s, Mean Rewards Per Episode: -200.0 | 0.564 MSE | Replay Size: 600][A[A[A[A[A

 dstates= False
states= [[-0.60341324  0.00568779]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.67076101 -0.01070433]] dstates= False
states= [[-0.50705677  0.00091143]] dstates= False
states= [[-0.5079682   0.00102861]] dstates= False
states= [[-5.20161393e-01 -3.95242671e-04]] dstates= False
states= [[-0.51976615  0.0006335 ]] dstates= False
states= [[-0.52025941 -0.00125077]] dstates= False
states= [[-0.51900864 -0.00121634]] dstates= False
states= [[-0.51833236 -0.01174594]] dstates= False
states= [[-0.50658642 -0.0116184 ]] dstates= False
states= [[-0.65051686  0.00821268]] dstates= False
states= [[-0.65872954  0.00622673]] dstates= False
states= [[-0.4272211  -0.01050766]] dstates= False
states= [[-0.41671344 -0.00871968]] dstates= False
states= [[-0.37187982  0.00241591]] dstates= False
states= [[-0.37429573  0.00249862]] dstates= False
states= [[-0.49420073  0.00133168]] dstates= False
states= [[-0.4955324   0.00254193]] dstates= False
states=

states= [[-0.64533612  0.01045143]] dstates= False
states= [[-0.63399181  0.0113443 ]] dstates= False
states= [[-0.62083462  0.0131572 ]] dstates= False
states= [[-0.60795845  0.01287617]] dstates= False
states= [[-0.59445632  0.01350213]] dstates= False
states= [[-0.57942675  0.01502957]] dstates= False
states= [[-0.56498042  0.01444633]] dstates= False
states= [[-0.55022453  0.01475589]] dstates= False
states= [[-0.53426916  0.01595537]] dstates= False
states= [[-0.51723377  0.01703539]] dstates= False
states= [[-0.50024612  0.01698765]] dstates= False
states= [[-0.48243347  0.01781265]] dstates= False
states= [[-0.46492877  0.01750469]] dstates= False
states= [[-0.44686184  0.01806694]] dstates= False
states= [[-0.42836535  0.01849648]] dstates= False
states= [[-0.41057345  0.01779191]] dstates= False
states= [[-0.39361308  0.01696037]] dstates= False
states= [[-0.37860308  0.01501   ]] dstates= False
states= [[-0.36364658  0.0149565 ]] dstates= False
states= [[-0.34984421  0.013802

states= [[-0.709075 -0.002275]] dstates= False
states= [[-0.60380122 -0.00664918]] dstates= False
states= [[-0.59715204 -0.00819637]] dstates= False
states= [[-0.65796237  0.01297473]] dstates= False
states= [[-0.6709371   0.01190533]] dstates= False
states= [[-0.62083462  0.0131572 ]] dstates= False
states= [[-0.63399181  0.0113443 ]] dstates= False
states= [[-0.43822249  0.01110051]] dstates= False
states= [[-0.449323    0.01165298]] dstates= False
states= [[-0.51715742 -0.0013446 ]] dstates= False
states= [[-0.51581283 -0.00128621]] dstates= False
states= [[-0.65734381 -0.0109377 ]] dstates= False
states= [[-0.64640611 -0.01083806]] dstates= False
states= [[-0.521681    0.00451495]] dstates= False
states= [[-0.52619595  0.00549547]] dstates= False
states= [[-0.50523953  0.01458192]] dstates= False
states= [[-0.51982145  0.01461025]] dstates= False
states= [[-7.21139650e-01  4.69855926e-04]] dstates= False
states= [[-0.72160951 -0.00192941]] dstates= False
states= [[-0.51723377  0.01

states= [[-0.40642679 -0.0024012 ]] dstates= False
states= [[-0.40402559 -0.00152351]] dstates= False
states= [[-0.59909821 -0.01333027]] dstates= False
states= [[-0.58576795 -0.01379384]] dstates= False
states= [[-0.49612064 -0.00362523]] dstates= False
states= [[-0.49249541 -0.00239229]] dstates= False
states= [[-0.50692004  0.00311367]] dstates= False
states= [[-0.51003371  0.00421538]] dstates= False
states= [[-5.14526621e-01 -2.18173525e-04]] dstates= False
states= [[-0.51430845 -0.00114851]] dstates= False
states= [[-0.59746954 -0.01458994]] dstates= False
states= [[-0.58287961 -0.0140322 ]] dstates= False
states= [[-0.37679435  0.0025644 ]] dstates= False
states= [[-0.37935875  0.00461276]] dstates= False
states= [[-0.45074658 -0.01495322]] dstates= False
states= [[-0.43579337 -0.01330226]] dstates= False
states= [[-0.45360946 -0.00120937]] dstates= False
states= [[-4.52400093e-01  3.20572908e-04]] dstates= False
states= [[-0.48425454  0.00234112]] dstates= False
states= [[-0.48

states= [[-0.3009268   0.00688314]] dstates= False
states= [[-0.52039965  0.00065749]] dstates= False
states= [[-0.52105715  0.00067656]] dstates= False
states= [[-0.51983353 -0.00827473]] dstates= False
states= [[-0.5115588  -0.00718445]] dstates= False
states= [[-0.68715895  0.00368445]] dstates= False
states= [[-0.69084339  0.00148208]] dstates= False
states= [[-0.55261644 -0.00903059]] dstates= False
states= [[-0.54358584 -0.00818041]] dstates= False
states= [[-0.46065771  0.00434298]] dstates= False
states= [[-0.46500069  0.0037802 ]] dstates= False
states= [[-0.6192153   0.00470539]] dstates= False
states= [[-0.62392069  0.00496428]] dstates= False
states= [[-0.60087832 -0.00674269]] dstates= False
states= [[-0.59413563 -0.00626778]] dstates= False
states= [[-0.54896071  0.01433898]] dstates= False
states= [[-0.5632997   0.01504193]] dstates= False
states= [[-0.49633401 -0.00683609]] dstates= False
states= [[-0.48949792 -0.00758078]] dstates= False
states= [[-0.62047764 -0.006882

states= [[-0.57466425  0.00119125]] dstates= False
states= [[-0.57585551  0.00080093]] dstates= False
states= [[-0.6320485  -0.00540514]] dstates= False
states= [[-0.62664336 -0.00616572]] dstates= False
states= [[-0.52442758 -0.0023743 ]] dstates= False
states= [[-0.52205328 -0.00336271]] dstates= False
states= [[-0.59708439  0.00186726]] dstates= False
states= [[-5.98951647e-01  3.06913769e-04]] dstates= False
states= [[-0.49271839  0.00675811]] dstates= False
states= [[-0.4994765   0.00793887]] dstates= False
states= [[-0.54413244  0.0111399 ]] dstates= False
states= [[-0.55527233  0.0099027 ]] dstates= False
states= [[-0.53201168 -0.00347578]] dstates= False
states= [[-0.5285359  -0.00351281]] dstates= False
states= [[-0.51900864 -0.00121634]] dstates= False
states= [[-5.17792299e-01 -1.72798274e-04]] dstates= False
states= [[-0.34984421  0.01380237]] dstates= False
states= [[-0.36364658  0.0149565 ]] dstates= False
states= [[-0.50410924 -0.0072699 ]] dstates= False
states= [[-0.49

states= [[-0.57109077 -0.01116369]] dstates= False
states= [[-0.55992708 -0.01043561]] dstates= False
states= [[-0.50836737  0.00069598]] dstates= False
states= [[-0.50906336  0.00180497]] dstates= False
states= [[-0.45272067  0.00084816]] dstates= False
states= [[-0.45356883  0.00236953]] dstates= False
states= [[-0.59980803 -0.00094506]] dstates= False
states= [[-5.98862975e-01 -5.04756759e-04]] dstates= False
states= [[-0.56517504  0.00859169]] dstates= False
states= [[-0.57376672  0.00921685]] dstates= False
states= [[-0.50444931  0.01218098]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.44353478 -0.00301198]] dstates= False
states= [[-0.44052281 -0.00239534]] dstates= False
states= [[-0.59445632  0.01350213]] dstates= False
states= [[-0.60795845  0.01287617]] dstates= False
states= [[-0.66730512 -0.00996131]] dstates= False
states= [[-0.65734381 -0.0109377 ]] dstates= False
states= [[-0.54358584 -0.00818041]] dstates= False
states= [[-0.53540544 -






Iterations:   0%|    | 3/1000 [00:03<11:54,  1.40it/s, Mean Rewards Per Episode: -200.0 | 0.674 MSE | Replay Size: 800][A[A[A[A[A




Iterations:   0%|    | 4/1000 [00:03<14:52,  1.12it/s, Mean Rewards Per Episode: -200.0 | 0.674 MSE | Replay Size: 800][A[A[A[A[A

 dstates= False
states= [[-0.46559469 -0.01159194]] dstates= False
states= [[-5.27222576e-01  6.48572838e-05]] dstates= False
states= [[-0.52728743 -0.00096281]] dstates= False
states= [[-0.48746314  0.00065419]] dstates= False
states= [[-4.88117327e-01 -8.02007473e-05]] dstates= False
states= [[-0.52728743 -0.00096281]] dstates= False
states= [[-0.52632463 -0.00098325]] dstates= False
states= [[-0.48882656 -0.01423985]] dstates= False
states= [[-0.47458671 -0.01287358]] dstates= False
states= [[-0.47190717 -0.00405835]] dstates= False
states= [[-0.46784882 -0.00364217]] dstates= False
states= [[-0.48138223 -0.00193643]] dstates= False
states= [[-0.4794458  -0.00260625]] dstates= False
states= [[-5.10410409e-01 -2.09218103e-04]] dstates= False
states= [[-0.51020119 -0.00110876]] dstates= False
states= [[-0.53069551 -0.01370919]] dstates= False
states= [[-0.51698632 -0.0146596 ]] dstates= False
states= [[-5.18648406e-01  3.95957705e-04]] dstates= False
states= [[-0.51904436 -0.00056989]

states= [[-0.44754761 -0.00571939]] dstates= False
states= [[-0.45383245 -0.00628484]] dstates= False
states= [[-0.46163672 -0.00780427]] dstates= False
states= [[-0.46890303 -0.00726632]] dstates= False
states= [[-0.47557773 -0.0066747 ]] dstates= False
states= [[-0.48161134 -0.00603361]] dstates= False
states= [[-0.48695902 -0.00534768]] dstates= False
states= [[-0.49358095 -0.00662193]] dstates= False
states= [[-0.50042771 -0.00684676]] dstates= False
states= [[-0.50844811 -0.0080204 ]] dstates= False
states= [[-0.5175821  -0.00913399]] dstates= False
states= [[-0.52676121 -0.00917911]] dstates= False
states= [[-0.53591661 -0.0091554 ]] dstates= False
states= [[-0.54497964 -0.00906303]] dstates= False
states= [[-0.55288243 -0.00790279]] dstates= False
states= [[-0.55956587 -0.00668344]] dstates= False
states= [[-0.56698008 -0.00741421]] dstates= False
states= [[-0.57306985 -0.00608977]] dstates= False
states= [[-0.57978994 -0.0067201 ]] dstates= False
states= [[-0.5850906  -0.005300

states= [[-0.61026039 -0.00619478]] dstates= False
states= [[-0.47224946  0.00062093]] dstates= False
states= [[-0.47287039  0.00099993]] dstates= False
states= [[-0.46500069  0.0037802 ]] dstates= False
states= [[-0.46878089  0.00318949]] dstates= False
states= [[-0.54770065  0.00286101]] dstates= False
states= [[-0.55056167  0.00165901]] dstates= False
states= [[-0.66730512 -0.00996131]] dstates= False
states= [[-0.65734381 -0.0109377 ]] dstates= False
states= [[-0.4544466   0.00806475]] dstates= False
states= [[-0.46251135  0.00852035]] dstates= False
states= [[-0.43822249  0.01110051]] dstates= False
states= [[-0.449323    0.01165298]] dstates= False
states= [[-0.53169143  0.00543479]] dstates= False
states= [[-0.53712621  0.00433336]] dstates= False
states= [[-5.98297455e-01  3.83673450e-04]] dstates= False
states= [[-0.59868113  0.00082531]] dstates= False
states= [[-0.67097604 -0.00870002]] dstates= False
states= [[-0.66227602 -0.00871036]] dstates= False
states= [[-0.50938094 -

states= [[-0.53233721  0.00528647]] dstates= False
states= [[-0.50104952  0.0089947 ]] dstates= False
states= [[-0.51004422  0.00909633]] dstates= False
states= [[-0.55173537  0.0036058 ]] dstates= False
states= [[-0.55534117  0.00336809]] dstates= False
states= [[-0.37829037 -0.00305968]] dstates= False
states= [[-0.37523069 -0.0019833 ]] dstates= False
states= [[-0.4640121   0.00328104]] dstates= False
states= [[-0.46729314  0.00470133]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.67076101 -0.01070433]] dstates= False
states= [[-0.36364658  0.0149565 ]] dstates= False
states= [[-0.37860308  0.01501   ]] dstates= False
states= [[-0.56181451  0.00181926]] dstates= False
states= [[-5.63633767e-01  5.19717429e-04]] dstates= False
states= [[-0.52720952  0.00531689]] dstates= False
states= [[-0.53252641  0.00524994]] dstates= False
states= [[-0.39531713  0.00664815]] dstates= False
states= [[-0.40196528  0.0085403 ]] dstates= False
states= [[-0.5991361  -

states= [[-0.50142535  0.00615897]] dstates= False
states= [[-0.50758432  0.00627903]] dstates= False
states= [[-0.39060751  0.00470962]] dstates= False
states= [[-0.39531713  0.00664815]] dstates= False
states= [[-0.57306985 -0.00608977]] dstates= False
states= [[-0.56698008 -0.00741421]] dstates= False
states= [[-0.4563399  -0.00273044]] dstates= False
states= [[-0.45360946 -0.00120937]] dstates= False
states= [[-0.59130481  0.00582774]] dstates= False
states= [[-0.59713254  0.0062807 ]] dstates= False
states= [[-0.42431543 -0.00549977]] dstates= False
states= [[-0.41881566 -0.00372678]] dstates= False
states= [[-0.50444931  0.01218098]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.43986746  0.01046611]] dstates= False
states= [[-0.45033357  0.01101118]] dstates= False
states= [[-0.42583092  0.00770582]] dstates= False
states= [[-0.43353675  0.0073731 ]] dstates= False
states= [[-0.35520277 -0.0104049 ]] dstates= False
states= [[-0.34479787 -0.010127

states= [[-0.65796237  0.01297473]] dstates= False
states= [[-0.6709371   0.01190533]] dstates= False
states= [[-0.60058516 -0.00144906]] dstates= False
states= [[-0.5991361  -0.00301075]] dstates= False
states= [[-0.5728501  0.0143993]] dstates= False
states= [[-0.5872494   0.01492482]] dstates= False
states= [[-0.5935322  -0.00985652]] dstates= False
states= [[-0.58367569 -0.00930466]] dstates= False
states= [[-0.44186185 -0.00807389]] dstates= False
states= [[-0.43378796 -0.00840843]] dstates= False
states= [[-0.71238855  0.00462774]] dstates= False
states= [[-0.71701629  0.00325715]] dstates= False
states= [[-0.69230456 -0.00868628]] dstates= False
states= [[-0.68361829 -0.00884085]] dstates= False
states= [[-0.44930103 -0.01185947]] dstates= False
states= [[-0.43744156 -0.01022046]] dstates= False
states= [[-0.4807915   0.01088853]] dstates= False
states= [[-0.49168003  0.01012755]] dstates= False
states= [[-0.42675532  0.01146717]] dstates= False
states= [[-0.43822249  0.01110051

states= [[-0.38185434  0.00327827]] dstates= False
states= [[-0.56833784 -0.00321651]] dstates= False
states= [[-0.56512134 -0.00252712]] dstates= False
states= [[-0.47387031  0.00137151]] dstates= False
states= [[-0.47524182  0.00173291]] dstates= False
states= [[-0.50611018 -0.01059136]] dstates= False
states= [[-0.49551882 -0.01038101]] dstates= False
states= [[-0.47404461 -0.0097229 ]] dstates= False
states= [[-0.46432171 -0.00928067]] dstates= False
states= [[-0.521681    0.00451495]] dstates= False
states= [[-0.52619595  0.00549547]] dstates= False
states= [[-0.57836079 -0.01898493]] dstates= False
states= [[-0.55937586 -0.01825274]] dstates= False
states= [[-5.16491544e-01 -7.46559173e-05]] dstates= False
states= [[-5.16416888e-01 -2.07959294e-05]] dstates= False
states= [[-0.6192153   0.00470539]] dstates= False
states= [[-0.62392069  0.00496428]] dstates= False
states= [[-0.43744156 -0.01022046]] dstates= False
states= [[-0.4272211  -0.01050766]] dstates= False
states= [[-0.41

states= [[-0.40778999 -0.00141348]] dstates= False
states= [[-0.40637651 -0.00155233]] dstates= False
states= [[-0.48976992 -0.00581614]] dstates= False
states= [[-0.48395379 -0.0045195 ]] dstates= False
states= [[-0.49496802 -0.01240394]] dstates= False
states= [[-0.48256408 -0.01209695]] dstates= False
states= [[-0.69218663 -0.0034792 ]] dstates= False
states= [[-0.68870743 -0.0056675 ]] dstates= False
states= [[-0.4282591  -0.00957027]] dstates= False
states= [[-0.41868883 -0.00879637]] dstates= False
states= [[-0.47894467  0.00724687]] dstates= False
states= [[-0.48619155  0.00652684]] dstates= False
states= [[-0.48415778 -0.01691593]] dstates= False
states= [[-0.46724185 -0.01649527]] dstates= False
states= [[-0.64343754  0.01137707]] dstates= False
states= [[-0.65481461  0.01141817]] dstates= False
states= [[-0.50305652  0.00463774]] dstates= False
states= [[-0.50769426  0.00475698]] dstates= False
states= [[-0.61968901 -0.00487651]] dstates= False
states= [[-0.6148125  -0.004552

states= [[-5.23123601e-01 -2.97082799e-04]] dstates= False
states= [[-0.52282652 -0.00129129]] dstates= False
states= [[-0.50692004  0.00311367]] dstates= False
states= [[-0.51003371  0.00421538]] dstates= False
states= [[-0.55977529 -0.00208971]] dstates= False
states= [[-0.55768558 -0.00134492]] dstates= False
states= [[-0.64164428  0.00080061]] dstates= False
states= [[-0.64244488 -0.00107197]] dstates= False
states= [[-0.57706111 -0.00099457]] dstates= False
states= [[-5.76066540e-01 -3.86459007e-04]] dstates= False
states= [[-0.50521063 -0.00620762]] dstates= False
states= [[-0.499003   -0.00502332]] dstates= False
states= [[-0.52189982 -0.0147147 ]] dstates= False
states= [[-0.50718512 -0.01559164]] dstates= False
states= [[-0.40637651 -0.00155233]] dstates= False
states= [[-4.04824185e-01  3.19752606e-04]] dstates= False
states= [[-0.47683955 -0.0012567 ]] dstates= False
states= [[-0.47558285 -0.00189783]] dstates= False
states= [[-0.39912383 -0.00695791]] dstates= False
states=






Iterations:   0%|   | 4/1000 [00:05<14:52,  1.12it/s, Mean Rewards Per Episode: -200.0 | 0.627 MSE | Replay Size: 1000][A[A[A[A[A




Iterations:   0%|   | 5/1000 [00:05<18:52,  1.14s/it, Mean Rewards Per Episode: -200.0 | 0.627 MSE | Replay Size: 1000][A[A[A[A[A

 [[-5.98862975e-01 -5.04756759e-04]] dstates= False
states= [[-0.541805    0.00304484]] dstates= False
states= [[-0.54484984  0.00388556]] dstates= False
states= [[-0.69232548  0.00126998]] dstates= False
states= [[-0.69359546 -0.00095044]] dstates= False
states= [[-0.49526901  0.00430374]] dstates= False
states= [[-0.49957275  0.00348377]] dstates= False
states= [[-5.17792299e-01 -1.72798274e-04]] dstates= False
states= [[-0.5176195  -0.00112796]] dstates= False
states= [[-0.58027227 -0.01010641]] dstates= False
states= [[-0.57016585 -0.00945453]] dstates= False
states= [[-0.52025941 -0.00125077]] dstates= False
states= [[-0.51900864 -0.00121634]] dstates= False
states= [[-0.55568825 -0.0102391 ]] dstates= False
states= [[-0.54544914 -0.00940286]] dstates= False
states= [[-0.59793277  0.01141376]] dstates= False
states= [[-0.60934653  0.01177772]] dstates= False
states= [[-0.60002356  0.006698  ]] dstates= False
states= [[-0.60672156  0.00708102]] dstates= False
states= [[-0.60341324 

states= [[-0.4780541  -0.01836599]] dstates= False
states= [[-0.49776062 -0.01970652]] dstates= False
states= [[-0.51766072 -0.01990011]] dstates= False
states= [[-0.53860536 -0.02094464]] dstates= False
states= [[-0.55843749 -0.01983213]] dstates= False
states= [[-0.57800881 -0.01957131]] dstates= False
states= [[-0.59817386 -0.02016505]] dstates= False
states= [[-0.61678425 -0.01861039]] dstates= False
states= [[-0.63470482 -0.01792057]] dstates= False
states= [[-0.65080743 -0.01610262]] dstates= False
states= [[-0.66697897 -0.01617154]] dstates= False
states= [[-0.68310802 -0.01612904]] dstates= False
states= [[-0.69908588 -0.01597786]] dstates= False
states= [[-0.71480755 -0.01572167]] dstates= False
states= [[-0.72817252 -0.01336497]] dstates= False
states= [[-0.7390977  -0.01092518]] dstates= False
states= [[-0.7495169 -0.0104192]] dstates= False
states= [[-0.75936848 -0.00985158]] dstates= False
states= [[-0.76859559 -0.00922711]] dstates= False
states= [[-0.77514625 -0.00655066

states= [[-0.52189982 -0.0147147 ]] dstates= False
states= [[-0.50718512 -0.01559164]] dstates= False
states= [[-0.5935322  -0.00985652]] dstates= False
states= [[-0.58367569 -0.00930466]] dstates= False
states= [[-0.61359552 -0.00654893]] dstates= False
states= [[-0.60704659 -0.00616827]] dstates= False
states= [[-0.65051686  0.00821268]] dstates= False
states= [[-0.65872954  0.00622673]] dstates= False
states= [[-0.57109077 -0.01116369]] dstates= False
states= [[-0.55992708 -0.01043561]] dstates= False
states= [[-0.48138223 -0.00193643]] dstates= False
states= [[-0.4794458  -0.00260625]] dstates= False
states= [[-0.4146365  -0.00080848]] dstates= False
states= [[-0.41382801  0.        ]] dstates= False
states= [[-0.41513806  0.00598068]] dstates= False
states= [[-0.42111874  0.00673723]] dstates= False
states= [[-0.52282652 -0.00129129]] dstates= False
states= [[-0.52153523 -0.00127581]] dstates= False
states= [[-0.64533612  0.01045143]] dstates= False
states= [[-0.65578755  0.009485

states= [[-0.55022453  0.01475589]] dstates= False
states= [[-0.3958578  -0.00613987]] dstates= False
states= [[-0.38971793 -0.00516255]] dstates= False
states= [[-0.4563399  -0.00273044]] dstates= False
states= [[-0.45360946 -0.00120937]] dstates= False
states= [[-0.61545691 -0.00700603]] dstates= False
states= [[-0.60845088 -0.00663557]] dstates= False
states= [[-0.29196749 -0.00270512]] dstates= False
states= [[-2.89262371e-01 -8.88281145e-05]] dstates= False
states= [[-0.45593836  0.00187351]] dstates= False
states= [[-0.45781187  0.00236371]] dstates= False
states= [[-0.46492877  0.01750469]] dstates= False
states= [[-0.48243347  0.01781265]] dstates= False
states= [[-0.61751516  0.01464585]] dstates= False
states= [[-0.63216102  0.01284596]] dstates= False
states= [[-0.56829822 -0.00658649]] dstates= False
states= [[-0.56171173 -0.00587172]] dstates= False
states= [[-0.5279852  0.004352 ]] dstates= False
states= [[-0.53233721  0.00528647]] dstates= False
states= [[-0.44886807 -0.

states= [[-0.48732875  0.00384874]] dstates= False
states= [[-0.49117749  0.00409152]] dstates= False
states= [[-0.59715204 -0.00819637]] dstates= False
states= [[-0.58895567 -0.00868341]] dstates= False
states= [[-0.40989247 -0.00696001]] dstates= False
states= [[-0.40293245 -0.00707465]] dstates= False
states= [[-0.82244643 -0.0100903 ]] dstates= False
states= [[-0.81235614 -0.0119951 ]] dstates= False
states= [[-0.40327264  0.0073721 ]] dstates= False
states= [[-0.41064475  0.00720314]] dstates= False
states= [[-0.58741787  0.00673218]] dstates= False
states= [[-0.59415004  0.00520698]] dstates= False
states= [[-0.50741537  0.0090602 ]] dstates= False
states= [[-0.51647557  0.01011362]] dstates= False
states= [[-0.77924487  0.00268287]] dstates= False
states= [[-0.78192774  0.00193361]] dstates= False
states= [[-0.55288243 -0.00790279]] dstates= False
states= [[-0.54497964 -0.00906303]] dstates= False
states= [[-0.51003371  0.00421538]] dstates= False
states= [[-0.51424909  0.003285

states= [[-0.41378858 -0.0040998 ]] dstates= False
states= [[-0.47915672 -0.00380507]] dstates= False
states= [[-0.47535165 -0.00344448]] dstates= False
states= [[-0.56259422 -0.00281892]] dstates= False
states= [[-0.55977529 -0.00208971]] dstates= False
states= [[-0.57605458  0.00432684]] dstates= False
states= [[-0.58038143  0.00490303]] dstates= False
states= [[-0.55956587 -0.00668344]] dstates= False
states= [[-0.55288243 -0.00790279]] dstates= False
states= [[-0.67272829  0.00737341]] dstates= False
states= [[-0.6801017   0.00624229]] dstates= False
states= [[-0.70254978 -0.00597561]] dstates= False
states= [[-0.69657417 -0.00721548]] dstates= False
states= [[-0.62112425  0.01105667]] dstates= False
states= [[-0.63218091  0.01125663]] dstates= False
states= [[-0.43989419  0.00474184]] dstates= False
states= [[-0.44463602  0.00632853]] dstates= False
states= [[-0.54341795 -0.00545747]] dstates= False
states= [[-0.53796048 -0.00556515]] dstates= False
states= [[-0.48463444 -0.003252

states= [[-0.47209589  0.01050463]] dstates= False
states= [[-0.48260052  0.01081135]] dstates= False
states= [[-0.5291178  -0.01078544]] dstates= False
states= [[-0.51833236 -0.01174594]] dstates= False
states= [[-0.59357494 -0.00307139]] dstates= False
states= [[-0.59050354 -0.00456982]] dstates= False
states= [[-0.55122969  0.0066437 ]] dstates= False
states= [[-0.5578734  0.0073871]] dstates= False
states= [[-0.63470482 -0.01792057]] dstates= False
states= [[-0.61678425 -0.01861039]] dstates= False
states= [[-0.51698632 -0.0146596 ]] dstates= False
states= [[-0.50232672 -0.01350017]] dstates= False
states= [[-0.3823846  -0.01178248]] dstates= False
states= [[-0.37060212 -0.01167487]] dstates= False
states= [[-0.5195088   0.00111016]] dstates= False
states= [[-0.52061896  0.00213251]] dstates= False
states= [[-0.29866291 -0.00963733]] dstates= False
states= [[-0.28902558 -0.00701969]] dstates= False
states= [[-0.72817252 -0.01336497]] dstates= False
states= [[-0.71480755 -0.01572167

states= [[-3.01228590e-01 -6.24365276e-05]] dstates= False
states= [[-0.30116615  0.00048473]] dstates= False
states= [[-0.62083462  0.0131572 ]] dstates= False
states= [[-0.63399181  0.0113443 ]] dstates= False
states= [[-0.49444756  0.00465013]] dstates= False
states= [[-0.49909769  0.00383372]] dstates= False
states= [[-0.47894467  0.00724687]] dstates= False
states= [[-0.48619155  0.00652684]] dstates= False
states= [[-0.43473812  0.00198225]] dstates= False
states= [[-0.43672036  0.00362648]] dstates= False
states= [[-0.43051067  0.00179335]] dstates= False
states= [[-0.43230403  0.00346953]] dstates= False
states= [[-0.52613054  0.00109204]] dstates= False
states= [[-5.27222576e-01  6.48572838e-05]] dstates= False
states= [[-0.51647557  0.01011362]] dstates= False
states= [[-0.52658919  0.01109119]] dstates= False
states= [[-5.52220678e-01  4.44610577e-04]] dstates= False
states= [[-0.55266529  0.00122689]] dstates= False
states= [[-0.42675532  0.01146717]] dstates= False
states=

states= [[-0.42761867 -0.0071185 ]] dstates= False
states= [[-0.42050018 -0.00535753]] dstates= False
states= [[-0.47073757 -0.00351579]] dstates= False
states= [[-0.46722178 -0.00309498]] dstates= False
states= [[-0.48976992 -0.00581614]] dstates= False
states= [[-0.48395379 -0.0045195 ]] dstates= False
states= [[-0.49193783  0.02110325]] dstates= False
states= [[-0.51304108  0.02118242]] dstates= False
states= [[-0.45383245 -0.00628484]] dstates= False
states= [[-0.44754761 -0.00571939]] dstates= False
states= [[-0.50524164  0.01052869]] dstates= False
states= [[-0.51577032  0.0115874 ]] dstates= False
states= [[-0.64304649  0.00274138]] dstates= False
states= [[-0.64578787  0.00284535]] dstates= False
states= [[-0.66005668 -0.01169941]] dstates= False
states= [[-0.64835727 -0.01261341]] dstates= False
states= [[-0.56995318 -0.00239139]] dstates= False
states= [[-0.56756178 -0.00372016]] dstates= False
states= [[-0.3085781  -0.00642303]] dstates= False
states= [[-0.30215507 -0.005881






Iterations:   0%|   | 5/1000 [00:07<18:52,  1.14s/it, Mean Rewards Per Episode: -200.0 | 0.779 MSE | Replay Size: 1200][A[A[A[A[A




Iterations:   1%|   | 6/1000 [00:07<21:44,  1.31s/it, Mean Rewards Per Episode: -200.0 | 0.779 MSE | Replay Size: 1200][A[A[A[A[A

 dstates= False
states= [[-0.3162008   0.01084791]] dstates= False
states= [[-0.42096607  0.00150964]] dstates= False
states= [[-0.42247571  0.00225648]] dstates= False
states= [[-0.52727031  0.00910223]] dstates= False
states= [[-0.53637254  0.00800645]] dstates= False
states= [[-0.3270487   0.01123808]] dstates= False
states= [[-0.33828679  0.01155743]] dstates= False
states= [[-0.55843749 -0.01983213]] dstates= False
states= [[-0.53860536 -0.02094464]] dstates= False
states= [[-0.51631889 -0.01174045]] dstates= False
states= [[-0.50457844 -0.01259788]] dstates= False
states= [[-0.68488659  0.00711003]] dstates= False
states= [[-0.69199662  0.00690009]] dstates= False
states= [[-4.94468434e-01  3.83911391e-04]] dstates= False
states= [[-0.49485235  0.00159924]] dstates= False
states= [[-0.61221613 -0.00672163]] dstates= False
states= [[-0.6054945  -0.00732969]] dstates= False
states= [[-0.4794458  -0.00260625]] dstates= False
states= [[-0.47683955 -0.0012567 ]] dstates= False
states=

states= [[-0.54280798  0.01296018]] dstates= False
states= [[-0.52970381  0.01310417]] dstates= False
states= [[-0.51755385  0.01214996]] dstates= False
states= [[-0.50544923  0.01210462]] dstates= False
states= [[-0.49448066  0.01096857]] dstates= False
states= [[-0.4837302   0.01075046]] dstates= False
states= [[-0.47427804  0.00945216]] dstates= False
states= [[-0.46619444  0.0080836 ]] dstates= False
states= [[-0.45753925  0.00865519]] dstates= False
states= [[-0.44837627  0.00916298]] dstates= False
states= [[-0.43977268  0.00860359]] dstates= False
states= [[-0.43179118  0.0079815 ]] dstates= False
states= [[-0.42448956  0.00730162]] dstates= False
states= [[-0.41692036  0.0075692 ]] dstates= False
states= [[-0.41113766  0.0057827 ]] dstates= False
states= [[-0.40518251  0.00595515]] dstates= False
states= [[-0.39909691  0.00608559]] dstates= False
states= [[-0.39292352  0.00617339]] dstates= False
states= [[-0.38770529  0.00521824]] dstates= False
states= [[-0.38447825  0.003227

states= [[-0.53318302  0.00817248]] dstates= False
states= [[-0.5413555   0.00703937]] dstates= False
states= [[-0.32577139  0.00835634]] dstates= False
states= [[-0.33412773  0.00870208]] dstates= False
states= [[-0.48671866  0.0037664 ]] dstates= False
states= [[-0.49048507  0.00501435]] dstates= False
states= [[-0.55527233  0.0099027 ]] dstates= False
states= [[-0.56517504  0.00859169]] dstates= False
states= [[-0.51652644  0.00150712]] dstates= False
states= [[-0.51803356  0.00154886]] dstates= False
states= [[-0.39216593 -0.00499751]] dstates= False
states= [[-0.38716842 -0.00500261]] dstates= False
states= [[-0.57306605 -0.01322163]] dstates= False
states= [[-0.55984442 -0.01449294]] dstates= False
states= [[-0.59738103 -0.00616527]] dstates= False
states= [[-0.59121576 -0.00766893]] dstates= False
states= [[-0.45488244 -0.01064902]] dstates= False
states= [[-0.44423342 -0.01005939]] dstates= False
states= [[-0.56688056  0.00227578]] dstates= False
states= [[-0.56915634  0.002935

states= [[-0.43630145  0.01009274]] dstates= False
states= [[-0.51918719  0.00192846]] dstates= False
states= [[-0.52111565  0.00294708]] dstates= False
states= [[-0.42247571  0.00225648]] dstates= False
states= [[-0.42473219  0.00198716]] dstates= False
states= [[-0.30667909 -0.00368883]] dstates= False
states= [[-0.30299026 -0.00115244]] dstates= False
states= [[-0.59793277  0.01141376]] dstates= False
states= [[-0.60934653  0.01177772]] dstates= False
states= [[-0.40661095  0.00334561]] dstates= False
states= [[-0.40995655  0.00518151]] dstates= False
states= [[-0.49776062 -0.01970652]] dstates= False
states= [[-0.4780541  -0.01836599]] dstates= False
states= [[-0.40300933 -0.00197026]] dstates= False
states= [[-4.01039068e-01 -7.16383738e-05]] dstates= False
states= [[-0.33467058 -0.00978498]] dstates= False
states= [[-0.3248856  -0.00838135]] dstates= False
states= [[-0.46412681 -0.0026513 ]] dstates= False
states= [[-0.46147551 -0.00118807]] dstates= False
states= [[-0.52410188 -

states= [[-0.34984421  0.01380237]] dstates= False
states= [[-0.61026039 -0.00619478]] dstates= False
states= [[-0.60406561 -0.00679243]] dstates= False
states= [[-0.37441609  0.01345794]] dstates= False
states= [[-0.38787403  0.01344798]] dstates= False
states= [[-0.45728775  0.01101858]] dstates= False
states= [[-0.46830633  0.01043138]] dstates= False
states= [[-0.51631889 -0.01174045]] dstates= False
states= [[-0.50457844 -0.01259788]] dstates= False
states= [[-0.62330514 -0.01217542]] dstates= False
states= [[-0.61112972 -0.01182438]] dstates= False
states= [[-0.57026608  0.00469226]] dstates= False
states= [[-0.57495834  0.00530858]] dstates= False
states= [[-0.54404978  0.01503297]] dstates= False
states= [[-0.55908276  0.01376734]] dstates= False
states= [[-0.35892725 -0.01048946]] dstates= False
states= [[-0.34843779 -0.00923539]] dstates= False
states= [[-0.51618043  0.00550057]] dstates= False
states= [[-0.521681    0.00451495]] dstates= False
states= [[-0.59715204 -0.008196

states= [[-0.5121232  -0.00140468]] dstates= False
states= [[-5.10718514e-01 -3.08105048e-04]] dstates= False
states= [[-0.48873308 -0.00543895]] dstates= False
states= [[-0.48329413 -0.0041374 ]] dstates= False
states= [[-0.41096697  0.00886919]] dstates= False
states= [[-0.41983616  0.0096349 ]] dstates= False
states= [[-0.67728148  0.00601495]] dstates= False
states= [[-0.68329643  0.00386251]] dstates= False
states= [[-0.67272829  0.00737341]] dstates= False
states= [[-0.6801017   0.00624229]] dstates= False
states= [[-0.61380258  0.00541272]] dstates= False
states= [[-0.6192153   0.00470539]] dstates= False
states= [[-0.46017558  0.00383652]] dstates= False
states= [[-0.4640121   0.00328104]] dstates= False
states= [[-0.27764363 -0.00368049]] dstates= False
states= [[-0.27396314 -0.00297839]] dstates= False
states= [[-0.77290124 -0.01731966]] dstates= False
states= [[-0.75558158 -0.01892244]] dstates= False
states= [[-0.4780813   0.00542714]] dstates= False
states= [[-0.48350844  

states= [[-0.60712451  0.02289607]] dstates= False
states= [[-0.63002058  0.0221114 ]] dstates= False
states= [[-0.58354683 -0.00811612]] dstates= False
states= [[-0.57543071 -0.00850329]] dstates= False
states= [[-0.77482716  0.00441771]] dstates= False
states= [[-0.77924487  0.00268287]] dstates= False
states= [[-0.69259835  0.00754203]] dstates= False
states= [[-0.70014038  0.007279  ]] dstates= False
states= [[-0.53494354 -0.00817565]] dstates= False
states= [[-0.52676789 -0.00819942]] dstates= False
states= [[-0.54437899  0.0068507 ]] dstates= False
states= [[-0.55122969  0.0066437 ]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.67076101 -0.01070433]] dstates= False
states= [[-0.39912383 -0.00695791]] dstates= False
states= [[-0.39216593 -0.00499751]] dstates= False
states= [[-0.58360853  0.02351598]] dstates= False
states= [[-0.60712451  0.02289607]] dstates= False
states= [[-0.50778558  0.00058179]] dstates= False
states= [[-0.50836737  0.000695

states= [[-0.48260052  0.01081135]] dstates= False
states= [[-0.49341187  0.01103744]] dstates= False
states= [[-0.68690684 -0.00750971]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.51086832  0.00190042]] dstates= False
states= [[-0.51276874  0.00198163]] dstates= False
states= [[-0.64956966 -0.00091543]] dstates= False
states= [[-0.64865423 -0.0008315 ]] dstates= False
states= [[-0.27744115  0.00608091]] dstates= False
states= [[-0.28352206  0.00872981]] dstates= False
states= [[-0.51368222  0.00139014]] dstates= False
states= [[-0.51507236  0.00145408]] dstates= False
states= [[-0.51309844  0.01036961]] dstates= False
states= [[-0.52346805  0.01037059]] dstates= False
states= [[-0.51004422  0.00909633]] dstates= False
states= [[-0.51914055  0.00812977]] dstates= False
states= [[-0.6399675  -0.01376473]] dstates= False
states= [[-0.62620277 -0.01552217]] dstates= False
states= [[-0.45968811 -0.01688959]] dstates= False
states= [[-0.44279851 -0.016289

states= [[-0.58984585 -0.00624622]] dstates= False
states= [[-0.64164428  0.00080061]] dstates= False
states= [[-0.64244488 -0.00107197]] dstates= False
states= [[-0.52676121 -0.00917911]] dstates= False
states= [[-0.5175821  -0.00913399]] dstates= False
states= [[-0.42558322 -0.0083231 ]] dstates= False
states= [[-0.41726012 -0.00853902]] dstates= False
states= [[-0.47458671 -0.01287358]] dstates= False
states= [[-0.46171313 -0.0124121 ]] dstates= False
states= [[-0.41881566 -0.00372678]] dstates= False
states= [[-0.41508888 -0.00292725]] dstates= False
states= [[-0.37523069 -0.0019833 ]] dstates= False
states= [[-0.37324739 -0.00189351]] dstates= False
states= [[-0.3761399   0.00161163]] dstates= False
states= [[-0.37775153  0.00267092]] dstates= False
states= [[-0.46277155  0.01283248]] dstates= False
states= [[-0.47560403  0.0141912 ]] dstates= False
states= [[-0.58741787  0.00673218]] dstates= False
states= [[-0.59415004  0.00520698]] dstates= False
states= [[-0.53768038  0.010985






Iterations:   1%|   | 6/1000 [00:08<21:44,  1.31s/it, Mean Rewards Per Episode: -200.0 | 0.635 MSE | Replay Size: 1400][A[A[A[A[A




Iterations:   1%|   | 7/1000 [00:08<23:36,  1.43s/it, Mean Rewards Per Episode: -200.0 | 0.635 MSE | Replay Size: 1400][A[A[A[A[A

 [[-0.66376148 -0.00965812]] dstates= False
states= [[-0.65410336 -0.00961209]] dstates= False
states= [[-0.43579337 -0.01330226]] dstates= False
states= [[-0.4224911  -0.01155553]] dstates= False
states= [[-0.55261644 -0.00903059]] dstates= False
states= [[-0.54358584 -0.00818041]] dstates= False
states= [[-0.577183    0.00778976]] dstates= False
states= [[-0.58497276  0.00633205]] dstates= False
states= [[-0.55882037 -0.00628492]] dstates= False
states= [[-0.55253545 -0.00750167]] dstates= False
states= [[-0.51461393 -0.00249073]] dstates= False
states= [[-0.5121232  -0.00140468]] dstates= False
states= [[-0.51040678 -0.00706278]] dstates= False
states= [[-0.50334401 -0.00791096]] dstates= False
states= [[-0.44415779  0.00579791]] dstates= False
states= [[-0.4499557   0.00534575]] dstates= False
states= [[-0.43201948 -0.00110155]] dstates= False
states= [[-0.43091793 -0.00141537]] dstates= False
states= [[-0.38455538 -0.0031497 ]] dstates= False
states= [[-0.38140568 -0.0031153 ]] ds

states= [[-5.02910016e-01 -2.21916773e-04]] dstates= False
states= [[-5.03286999e-01 -3.76982863e-04]] dstates= False
states= [[-0.50381623 -0.00052923]] dstates= False
states= [[-0.50549374 -0.00167751]] dstates= False
states= [[-0.50730697 -0.00181323]] dstates= False
states= [[-0.50824234 -0.00093537]] dstates= False
states= [[-0.50929284 -0.0010505 ]] dstates= False
states= [[-5.09450602e-01 -1.57764116e-04]] dstates= False
states= [[-5.09714445e-01 -2.63843562e-04]] dstates= False
states= [[-5.10082391e-01 -3.67945930e-04]] dstates= False
states= [[-0.50955168  0.00053071]] dstates= False
states= [[-5.09126295e-01  4.25386947e-04]] dstates= False
states= [[-0.50980942 -0.00068312]] dstates= False
states= [[-0.51059593 -0.00078651]] dstates= False
states= [[-5.10479941e-01  1.15990154e-04]] dstates= False
states= [[-0.51146232 -0.00098238]] dstates= False
states= [[-5.11535696e-01 -7.33790384e-05]] dstates= False
states= [[-5.11699528e-01 -1.63832390e-04]] dstates= False
states= [[

states= [[-0.3913745  -0.00289066]] dstates= False
states= [[-0.38848384 -0.00190482]] dstates= False
states= [[-0.67939713 -0.00863612]] dstates= False
states= [[-0.67076101 -0.01070433]] dstates= False
states= [[-0.43168356 -0.00339395]] dstates= False
states= [[-0.42828962 -0.00268883]] dstates= False
states= [[-0.63720599 -0.01395689]] dstates= False
states= [[-0.6232491  -0.01569319]] dstates= False
states= [[-0.70789556  0.00204085]] dstates= False
states= [[-0.70993641  0.00171498]] dstates= False
states= [[-0.4445607   0.00290066]] dstates= False
states= [[-0.44746136  0.00446674]] dstates= False
states= [[-0.29283211  0.00834241]] dstates= False
states= [[-0.30117452  0.01088952]] dstates= False
states= [[-0.3428298   0.00999236]] dstates= False
states= [[-0.35282216  0.01021788]] dstates= False
states= [[-0.52061896  0.00213251]] dstates= False
states= [[-0.52275147  0.00213887]] dstates= False
states= [[-0.35520277 -0.0104049 ]] dstates= False
states= [[-0.34479787 -0.010127

states= [[-0.70014038  0.007279  ]] dstates= False
states= [[-0.70741938  0.00496917]] dstates= False
states= [[-5.10986009e-01  1.71952106e-04]] dstates= False
states= [[-0.51115796  0.00126524]] dstates= False
states= [[-0.51723377  0.01703539]] dstates= False
states= [[-0.53426916  0.01595537]] dstates= False
states= [[-0.47458671 -0.01287358]] dstates= False
states= [[-0.46171313 -0.0124121 ]] dstates= False
states= [[-0.59793685 -0.01957606]] dstates= False
states= [[-0.57836079 -0.01898493]] dstates= False
states= [[-0.61164017 -0.02223885]] dstates= False
states= [[-0.58940131 -0.02172917]] dstates= False
states= [[-0.40405167 -0.02048552]] dstates= False
states= [[-0.38356615 -0.0184659 ]] dstates= False
states= [[-0.28184082 -0.00748367]] dstates= False
states= [[-0.27435715 -0.00478374]] dstates= False
states= [[-0.50778558  0.00058179]] dstates= False
states= [[-0.50836737  0.00069598]] dstates= False
states= [[-0.48260052  0.01081135]] dstates= False
states= [[-0.49341187  

states= [[-5.20161393e-01 -3.95242671e-04]] dstates= False
states= [[-0.51976615  0.0006335 ]] dstates= False
states= [[-0.51209829 -0.00090519]] dstates= False
states= [[-0.51119311 -0.00181216]] dstates= False
states= [[-0.736751    0.00961221]] dstates= False
states= [[-0.74636321  0.00906309]] dstates= False
states= [[-0.52817866 -0.00436095]] dstates= False
states= [[-0.52381771 -0.00336259]] dstates= False
states= [[-0.33412773  0.00870208]] dstates= False
states= [[-0.3428298   0.00999236]] dstates= False
states= [[-0.27098475 -0.00226   ]] dstates= False
states= [[-0.26872474 -0.00152934]] dstates= False
states= [[-0.63218091  0.01125663]] dstates= False
states= [[-0.64343754  0.01137707]] dstates= False
states= [[-0.51236645  0.00131578]] dstates= False
states= [[-0.51368222  0.00139014]] dstates= False
states= [[-0.59072748  0.00394296]] dstates= False
states= [[-0.59467044  0.00241395]] dstates= False
states= [[-6.94915747e-01  4.09571843e-04]] dstates= False
states= [[-0.69

states= [[-0.45727517 -0.00323825]] dstates= False
states= [[-0.53886185 -0.00974405]] dstates= False
states= [[-0.5291178  -0.01078544]] dstates= False
states= [[-0.55980777  0.00276796]] dstates= False
states= [[-0.56257574  0.0024763 ]] dstates= False
states= [[-0.5141698   0.00145794]] dstates= False
states= [[-0.51562774  0.00051772]] dstates= False
states= [[-0.50521063 -0.00620762]] dstates= False
states= [[-0.499003   -0.00502332]] dstates= False
states= [[-0.63470482 -0.01792057]] dstates= False
states= [[-0.61678425 -0.01861039]] dstates= False
states= [[-0.52970381  0.01310417]] dstates= False
states= [[-0.54280798  0.01296018]] dstates= False
states= [[-0.27204313  0.00539801]] dstates= False
states= [[-0.27744115  0.00608091]] dstates= False
states= [[-0.62112425  0.01105667]] dstates= False
states= [[-0.63218091  0.01125663]] dstates= False
states= [[-0.57842948 -0.00487901]] dstates= False
states= [[-0.57355047 -0.00525225]] dstates= False
states= [[-0.56526049  0.006075

states= [[-0.69259835  0.00754203]] dstates= False
states= [[-5.12529499e-01  4.09614873e-04]] dstates= False
states= [[-5.12939114e-01  4.89548710e-04]] dstates= False
states= [[-0.69264502 -0.00316463]] dstates= False
states= [[-0.68948039 -0.00535803]] dstates= False
states= [[-0.60406561 -0.00679243]] dstates= False
states= [[-0.59727318 -0.0063405 ]] dstates= False
states= [[-0.41868883 -0.00879637]] dstates= False
states= [[-0.40989247 -0.00696001]] dstates= False
states= [[-0.37441609  0.01345794]] dstates= False
states= [[-0.38787403  0.01344798]] dstates= False
states= [[-0.51276874  0.00198163]] dstates= False
states= [[-0.51475037  0.00204799]] dstates= False
states= [[-0.37324739 -0.00189351]] dstates= False
states= [[-0.37135388 -0.00079096]] dstates= False
states= [[-0.59909821 -0.01333027]] dstates= False
states= [[-0.58576795 -0.01379384]] dstates= False
states= [[-5.07081707e-01 -4.69847415e-04]] dstates= False
states= [[-5.06611860e-01 -3.42500681e-04]] dstates= False

states= [[-0.55022453  0.01475589]] dstates= False
states= [[-0.58741787  0.00673218]] dstates= False
states= [[-0.59415004  0.00520698]] dstates= False
states= [[-0.43425166 -0.00145239]] dstates= False
states= [[-0.43279927 -0.00077979]] dstates= False
states= [[-0.50770436  0.00183543]] dstates= False
states= [[-0.5095398   0.00294084]] dstates= False
states= [[-0.53740402  0.00059647]] dstates= False
states= [[-5.38000490e-01  4.88491486e-04]] dstates= False
states= [[-0.38356615 -0.0184659 ]] dstates= False
states= [[-0.36510025 -0.01632145]] dstates= False
states= [[-0.50236883 -0.01821105]] dstates= False
states= [[-0.48415778 -0.01691593]] dstates= False
states= [[-0.48976992 -0.00581614]] dstates= False
states= [[-0.48395379 -0.0045195 ]] dstates= False
states= [[-0.38397151  0.0046296 ]] dstates= False
states= [[-0.38860111  0.00461464]] dstates= False
states= [[-0.54839487  0.00585357]] dstates= False
states= [[-0.55424844  0.00562402]] dstates= False
states= [[-0.53604628 -

states= [[-0.709075 -0.002275]] dstates= False
states= [[-5.12954234e-01  2.48654042e-04]] dstates= False
states= [[-0.51320289 -0.00067339]] dstates= False
states= [[-5.18648406e-01  3.95957705e-04]] dstates= False
states= [[-0.51904436 -0.00056989]] dstates= False
states= [[-0.66697897 -0.01617154]] dstates= False
states= [[-0.65080743 -0.01610262]] dstates= False
states= [[-0.4519281   0.00600014]] dstates= False
states= [[-0.45792824  0.00748948]] dstates= False
states= [[-0.53540544 -0.00726894]] dstates= False
states= [[-0.5281365  -0.00830297]] dstates= False
states= [[-0.58359963 -0.0066938 ]] dstates= False
states= [[-0.57690582 -0.0080919 ]] dstates= False
states= [[-0.45728775  0.01101858]] dstates= False
states= [[-0.46830633  0.01043138]] dstates= False
states= [[-0.50876846  0.00091383]] dstates= False
states= [[-0.50968229  0.00201817]] dstates= False
states= [[-0.45360946 -0.00120937]] dstates= False
states= [[-4.52400093e-01  3.20572908e-04]] dstates= False
states= [[-






Iterations:   1%|   | 7/1000 [00:10<23:36,  1.43s/it, Mean Rewards Per Episode: -200.0 | 0.530 MSE | Replay Size: 1600][A[A[A[A[A




Iterations:   1%|   | 8/1000 [00:10<24:38,  1.49s/it, Mean Rewards Per Episode: -200.0 | 0.530 MSE | Replay Size: 1600][A[A[A[A[A

[[-0.68935869 -0.00740807]] dstates= False
states= [[-0.40968879 -0.003262  ]] dstates= False
states= [[-0.40642679 -0.0024012 ]] dstates= False
states= [[-0.41566663  0.01414004]] dstates= False
states= [[-0.42980667  0.01483424]] dstates= False
states= [[-0.55022453  0.01475589]] dstates= False
states= [[-0.56498042  0.01444633]] dstates= False
states= [[-0.44279851 -0.01628951]] dstates= False
states= [[-0.426509   -0.01557159]] dstates= False
states= [[-0.54770065  0.00286101]] dstates= False
states= [[-0.55056167  0.00165901]] dstates= False
states= [[-0.43390361  0.0017448 ]] dstates= False
states= [[-0.43564841  0.0033968 ]] dstates= False
states= [[-0.57016585 -0.00945453]] dstates= False
states= [[-0.56071132 -0.0087323 ]] dstates= False
states= [[-0.60934653  0.01177772]] dstates= False
states= [[-0.62112425  0.01105667]] dstates= False
states= [[-5.19446486e-01 -4.65396188e-04]] dstates= False
states= [[-5.18981090e-01 -4.30764656e-04]] dstates= False
states= [[-0.3487788  -

states= [[-0.63839047  0.00478312]] dstates= False
states= [[-0.63376333  0.00462714]] dstates= False
states= [[-0.62832491  0.00543842]] dstates= False
states= [[-0.6211139   0.00721101]] dstates= False
states= [[-0.61218192  0.00893198]] dstates= False
states= [[-0.60359335  0.00858856]] dstates= False
states= [[-0.59441057  0.00918278]] dstates= False
states= [[-0.58370069  0.01070988]] dstates= False
states= [[-0.57354248  0.01015821]] dstates= False
states= [[-0.56401109  0.00953139]] dstates= False
states= [[-0.55517735  0.00883374]] dstates= False
states= [[-0.54510713  0.01007022]] dstates= False
states= [[-0.5338757   0.01123142]] dstates= False
states= [[-0.52356721  0.01030849]] dstates= False
states= [[-0.51425896  0.00930825]] dstates= False
states= [[-0.50602075  0.00823821]] dstates= False
states= [[-0.49891431  0.00710644]] dstates= False
states= [[-0.49099283  0.00792147]] dstates= False
states= [[-0.48431552  0.00667732]] dstates= False
states= [[-0.47893214  0.005383

states= [[-0.62620277 -0.01552217]] dstates= False
states= [[-0.61068061 -0.01516788]] dstates= False
states= [[-0.41113766  0.0057827 ]] dstates= False
states= [[-0.41692036  0.0075692 ]] dstates= False
states= [[-0.50626936  0.00078741]] dstates= False
states= [[-0.50705677  0.00091143]] dstates= False
states= [[-0.56401109  0.00953139]] dstates= False
states= [[-0.57354248  0.01015821]] dstates= False
states= [[-0.2671954  -0.00079042]] dstates= False
states= [[-0.26640498  0.00195275]] dstates= False
states= [[-0.46916786 -0.00503927]] dstates= False
states= [[-0.46412858 -0.00359561]] dstates= False
states= [[-0.48016039  0.00649784]] dstates= False
states= [[-0.48665823  0.00777432]] dstates= False
states= [[-0.50444931  0.01218098]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.6054945  -0.00732969]] dstates= False
states= [[-0.59816482 -0.00788428]] dstates= False
states= [[-0.63399181  0.0113443 ]] dstates= False
states= [[-0.64533612  0.010451

states= [[-0.40989247 -0.00696001]] dstates= False
states= [[-0.40293245 -0.00707465]] dstates= False
states= [[-5.10479941e-01  1.15990154e-04]] dstates= False
states= [[-0.51059593 -0.00078651]] dstates= False
states= [[-0.37183645  0.01735078]] dstates= False
states= [[-0.38918723  0.01833176]] dstates= False
states= [[-0.47558285 -0.00189783]] dstates= False
states= [[-0.47368502 -0.00052487]] dstates= False
states= [[-0.54972686 -0.00630891]] dstates= False
states= [[-0.54341795 -0.00545747]] dstates= False
states= [[-5.66065668e-01 -4.70213622e-04]] dstates= False
states= [[-0.56559545 -0.00178436]] dstates= False
states= [[-0.56767214 -0.02205876]] dstates= False
states= [[-0.54561339 -0.02322375]] dstates= False
states= [[-0.30467992  0.00455541]] dstates= False
states= [[-0.30923533  0.00505459]] dstates= False
states= [[-0.54437899  0.0068507 ]] dstates= False
states= [[-0.55122969  0.0066437 ]] dstates= False
states= [[-0.50437435 -0.00804035]] dstates= False
states= [[-0.49

states= [[-6.12330219e-01 -1.13757847e-04]] dstates= False
states= [[-0.5121232  -0.00140468]] dstates= False
states= [[-5.10718514e-01 -3.08105048e-04]] dstates= False
states= [[-0.41956969 -0.01047768]] dstates= False
states= [[-0.40909201 -0.00863567]] dstates= False
states= [[-0.51603457 -0.00198791]] dstates= False
states= [[-0.51404666 -0.00091628]] dstates= False
states= [[-0.49551882 -0.01038101]] dstates= False
states= [[-0.48513781 -0.01109319]] dstates= False
states= [[-0.51407136 -0.00084008]] dstates= False
states= [[-5.13231278e-01  2.37661436e-04]] dstates= False
states= [[-0.57306985 -0.00608977]] dstates= False
states= [[-0.56698008 -0.00741421]] dstates= False
states= [[-0.51599935 -0.00544326]] dstates= False
states= [[-0.51055609 -0.00534546]] dstates= False
states= [[-0.57800881 -0.01957131]] dstates= False
states= [[-0.55843749 -0.01983213]] dstates= False
states= [[-0.63595173  0.00447658]] dstates= False
states= [[-0.64042831  0.00261819]] dstates= False
states=

states= [[-5.16416888e-01 -2.07959294e-05]] dstates= False
states= [[-0.56181451  0.00181926]] dstates= False
states= [[-5.63633767e-01  5.19717429e-04]] dstates= False
states= [[-0.60712451  0.02289607]] dstates= False
states= [[-0.63002058  0.0221114 ]] dstates= False
states= [[-0.50211823 -0.00815482]] dstates= False
states= [[-0.49396341 -0.00793285]] dstates= False
states= [[-0.68935869 -0.00740807]] dstates= False
states= [[-0.68195063 -0.00855154]] dstates= False
states= [[-0.54291998  0.00499881]] dstates= False
states= [[-0.5479188   0.00381657]] dstates= False
states= [[-0.43568947 -0.02075676]] dstates= False
states= [[-0.41493271 -0.01895613]] dstates= False
states= [[-0.68361829 -0.00884085]] dstates= False
states= [[-0.67477743 -0.00993622]] dstates= False
states= [[-0.32372233 -0.00714735]] dstates= False
states= [[-0.31657498 -0.00569258]] dstates= False
states= [[-0.50980942 -0.00068312]] dstates= False
states= [[-5.09126295e-01  4.25386947e-04]] dstates= False
states=

states= [[-0.44754761 -0.00571939]] dstates= False
states= [[-0.57541962 -0.01620716]] dstates= False
states= [[-0.55921246 -0.01547375]] dstates= False
states= [[-4.29992088e-01  4.14357028e-04]] dstates= False
states= [[-4.30406445e-01  1.04229754e-04]] dstates= False
states= [[-0.57834163  0.0146332 ]] dstates= False
states= [[-0.59297483  0.01411663]] dstates= False
states= [[-0.6776064  -0.00663036]] dstates= False
states= [[-0.67097604 -0.00870002]] dstates= False
states= [[-0.49807433  0.00273317]] dstates= False
states= [[-0.5008075   0.00190397]] dstates= False
states= [[-0.61186706 -0.01276885]] dstates= False
states= [[-0.59909821 -0.01333027]] dstates= False
states= [[-0.45543182 -0.00354874]] dstates= False
states= [[-0.45188308 -0.00301501]] dstates= False
states= [[-0.49980636  0.00325904]] dstates= False
states= [[-0.5030654   0.00241294]] dstates= False
states= [[-0.49776062 -0.01970652]] dstates= False
states= [[-0.4780541  -0.01836599]] dstates= False
states= [[-0.69

states= [[-0.52185082  0.00233676]] dstates= False
states= [[-0.48615912 -0.0128737 ]] dstates= False
states= [[-0.47328542 -0.01249778]] dstates= False
states= [[-0.38770529  0.00521824]] dstates= False
states= [[-0.39292352  0.00617339]] dstates= False
states= [[-0.57376672  0.00921685]] dstates= False
states= [[-0.58298357  0.00877381]] dstates= False
states= [[-0.43181499 -0.01224529]] dstates= False
states= [[-0.41956969 -0.01047768]] dstates= False
states= [[-0.43341446  0.00132366]] dstates= False
states= [[-0.43473812  0.00198225]] dstates= False
states= [[-0.48671866  0.0037664 ]] dstates= False
states= [[-0.49048507  0.00501435]] dstates= False
states= [[-5.99258560e-01 -2.55674698e-04]] dstates= False
states= [[-5.99002886e-01  1.83605376e-04]] dstates= False
states= [[-0.48458219 -0.00514946]] dstates= False
states= [[-0.47943273 -0.00581918]] dstates= False
states= [[-0.6752217  -0.00791659]] dstates= False
states= [[-0.66730512 -0.00996131]] dstates= False
states= [[-0.38

states= [[-0.52039965  0.00065749]] dstates= False
states= [[-0.52105715  0.00067656]] dstates= False
states= [[-0.51282571 -0.00320961]] dstates= False
states= [[-0.50961609 -0.00210477]] dstates= False
states= [[-0.4224911  -0.01155553]] dstates= False
states= [[-0.41093557 -0.01172655]] dstates= False
states= [[-0.4365659   0.01282685]] dstates= False
states= [[-0.44939274  0.01337881]] dstates= False
states= [[-0.27204313  0.00539801]] dstates= False
states= [[-0.27744115  0.00608091]] dstates= False
states= [[-0.45948826 -0.00405644]] dstates= False
states= [[-0.45543182 -0.00354874]] dstates= False
states= [[-0.76302244  0.00278775]] dstates= False
states= [[-0.76581019  0.00212685]] dstates= False
states= [[-0.51976615  0.0006335 ]] dstates= False
states= [[-0.52039965  0.00065749]] dstates= False
states= [[-0.69369366 -0.01402583]] dstates= False
states= [[-0.67966783 -0.01415405]] dstates= False
states= [[-0.42050018 -0.00535753]] dstates= False
states= [[-0.41514265 -0.005558






Iterations:   1%|   | 8/1000 [00:12<24:38,  1.49s/it, Mean Rewards Per Episode: -200.0 | 0.441 MSE | Replay Size: 1800][A[A[A[A[A




Iterations:   1%|   | 9/1000 [00:12<25:37,  1.55s/it, Mean Rewards Per Episode: -200.0 | 0.441 MSE | Replay Size: 1800][A[A[A[A[A

 False
states= [[-0.5023945   0.00734099]] dstates= False
states= [[-0.51948091 -0.00088388]] dstates= False
states= [[-0.51859703 -0.00184636]] dstates= False
states= [[-0.56384162 -0.00502125]] dstates= False
states= [[-0.55882037 -0.00628492]] dstates= False
states= [[-0.51647557  0.01011362]] dstates= False
states= [[-0.52658919  0.01109119]] dstates= False
states= [[-2.89262371e-01 -8.88281145e-05]] dstates= False
states= [[-0.28917354  0.00052797]] dstates= False
states= [[-0.40903918  0.00796512]] dstates= False
states= [[-0.4170043   0.00975102]] dstates= False
states= [[-0.44997015  0.00481886]] dstates= False
states= [[-0.45478901  0.00433128]] dstates= False
states= [[-0.69084339  0.00148208]] dstates= False
states= [[-0.69232548  0.00126998]] dstates= False
states= [[-0.38277932 -0.00086882]] dstates= False
states= [[-3.81910494e-01  1.62128450e-04]] dstates= False
states= [[-0.48896162 -0.00088219]] dstates= False
states= [[-0.48807943 -0.0006163 ]] dstates= False
states= 

states= [[-0.27270217  0.00686139]] dstates= False
states= [[-0.26854979  0.00415238]] dstates= False
states= [[-0.26712901  0.00142077]] dstates= False
states= [[-0.26644752  0.0006815 ]] dstates= False
states= [[-0.26750897 -0.00106145]] dstates= False
states= [[-0.27030765 -0.00279868]] dstates= False
states= [[-0.2758284  -0.00552075]] dstates= False
states= [[-0.28304098 -0.00721258]] dstates= False
states= [[-0.29290516 -0.00986418]] dstates= False
states= [[-0.30436469 -0.01145953]] dstates= False
states= [[-0.31635248 -0.01198778]] dstates= False
states= [[-0.33079639 -0.01444391]] dstates= False
states= [[-0.34760702 -0.01681063]] dstates= False
states= [[-0.36467712 -0.0170701 ]] dstates= False
states= [[-0.38389448 -0.01921737]] dstates= False
states= [[-0.40412923 -0.02023474]] dstates= False
states= [[-0.42624093 -0.0221117 ]] dstates= False
states= [[-0.44807248 -0.02183155]] dstates= False
states= [[-0.47146564 -0.02339316]] dstates= False
states= [[-0.49624821 -0.024782

states= [[-0.71701629  0.00325715]] dstates= False
states= [[-0.72027345  0.0008662 ]] dstates= False
states= [[-0.44977911 -0.00819626]] dstates= False
states= [[-0.44158284 -0.00658733]] dstates= False
states= [[-5.07909402e-01 -3.12729812e-04]] dstates= False
states= [[-5.07596672e-01 -1.92760128e-04]] dstates= False
states= [[-0.33721999  0.01511042]] dstates= False
states= [[-0.35233041  0.01533915]] dstates= False
states= [[-0.56884895  0.00537888]] dstates= False
states= [[-0.57422784  0.00400062]] dstates= False
states= [[-0.53081572  0.0011318 ]] dstates= False
states= [[-0.53194752  0.00206919]] dstates= False
states= [[-0.42947106  0.00933152]] dstates= False
states= [[-0.43880258  0.00896065]] dstates= False
states= [[-0.50268961 -0.01115181]] dstates= False
states= [[-0.4915378  -0.01191173]] dstates= False
states= [[-0.79328345  0.00775235]] dstates= False
states= [[-0.8010358   0.00490363]] dstates= False
states= [[-5.06611860e-01 -3.42500681e-04]] dstates= False
states=

states= [[-0.44090985  0.00698693]] dstates= False
states= [[-0.44789678  0.00654982]] dstates= False
states= [[-0.47873771  0.01176682]] dstates= False
states= [[-0.49050453  0.01201462]] dstates= False
states= [[-0.71585803  0.00255712]] dstates= False
states= [[-7.18415153e-01  1.77770688e-04]] dstates= False
states= [[-0.46448323  0.00476841]] dstates= False
states= [[-0.46925164  0.00517421]] dstates= False
states= [[-5.98358218e-01 -6.07638168e-05]] dstates= False
states= [[-5.98297455e-01  3.83673450e-04]] dstates= False
states= [[-0.64317359  0.00290542]] dstates= False
states= [[-0.64607902  0.00200735]] dstates= False
states= [[-0.50547834  0.00154878]] dstates= False
states= [[-0.50702711  0.00167301]] dstates= False
states= [[-0.54544914 -0.00940286]] dstates= False
states= [[-0.53604628 -0.0084962 ]] dstates= False
states= [[-0.49612064 -0.00362523]] dstates= False
states= [[-0.49249541 -0.00239229]] dstates= False
states= [[-0.64640611 -0.01083806]] dstates= False
states=

states= [[-0.53768038  0.01098561]] dstates= False
states= [[-5.52220678e-01  4.44610577e-04]] dstates= False
states= [[-0.55266529  0.00122689]] dstates= False
states= [[-0.51918719  0.00192846]] dstates= False
states= [[-0.52111565  0.00294708]] dstates= False
states= [[-0.63399181  0.0113443 ]] dstates= False
states= [[-0.64533612  0.01045143]] dstates= False
states= [[-0.47434135  0.00929172]] dstates= False
states= [[-0.48363307  0.00859074]] dstates= False
states= [[-0.46171313 -0.0124121 ]] dstates= False
states= [[-0.44930103 -0.01185947]] dstates= False
states= [[-0.54949146 -0.01062961]] dstates= False
states= [[-0.53886185 -0.00974405]] dstates= False
states= [[-0.43341446  0.00132366]] dstates= False
states= [[-0.43473812  0.00198225]] dstates= False
states= [[-0.43317623 -0.00044094]] dstates= False
states= [[-0.43273529 -0.00076788]] dstates= False
states= [[-0.49077103 -0.01045698]] dstates= False
states= [[-0.48031405 -0.00913325]] dstates= False
states= [[-0.44714149 -

states= [[-0.58298357  0.00877381]] dstates= False
states= [[-0.59175738  0.00826618]] dstates= False
states= [[-0.59050354 -0.00456982]] dstates= False
states= [[-0.58593373 -0.00403461]] dstates= False
states= [[-0.50973549  0.00644494]] dstates= False
states= [[-0.51618043  0.00550057]] dstates= False
states= [[-0.50410924 -0.0072699 ]] dstates= False
states= [[-0.49683934 -0.00706942]] dstates= False
states= [[-0.51663029  0.01323324]] dstates= False
states= [[-0.52986353  0.01418626]] dstates= False
states= [[-0.62832491  0.00543842]] dstates= False
states= [[-0.63376333  0.00462714]] dstates= False
states= [[-0.43499551 -0.00593061]] dstates= False
states= [[-0.4290649  -0.00423107]] dstates= False
states= [[-0.45543182 -0.00354874]] dstates= False
states= [[-0.45188308 -0.00301501]] dstates= False
states= [[-0.62330514 -0.01217542]] dstates= False
states= [[-0.61112972 -0.01182438]] dstates= False
states= [[-0.55881873 -0.01241904]] dstates= False
states= [[-0.5463997  -0.013589

states= [[-0.59123968 -0.01582006]] dstates= False
states= [[-0.57541962 -0.01620716]] dstates= False
states= [[-0.59130481  0.00582774]] dstates= False
states= [[-0.59713254  0.0062807 ]] dstates= False
states= [[-0.37233055  0.00800946]] dstates= False
states= [[-0.38034001  0.00805113]] dstates= False
states= [[-0.60087832 -0.00674269]] dstates= False
states= [[-0.59413563 -0.00626778]] dstates= False
states= [[-0.56512134 -0.00252712]] dstates= False
states= [[-0.56259422 -0.00281892]] dstates= False
states= [[-0.33079639 -0.01444391]] dstates= False
states= [[-0.31635248 -0.01198778]] dstates= False
states= [[-0.51856847 -0.00816169]] dstates= False
states= [[-0.51040678 -0.00706278]] dstates= False
states= [[-0.4499557   0.00534575]] dstates= False
states= [[-0.45530146  0.00485441]] dstates= False
states= [[-0.63839047  0.00478312]] dstates= False
states= [[-0.64317359  0.00290542]] dstates= False
states= [[-5.10986009e-01  1.71952106e-04]] dstates= False
states= [[-0.51115796  

states= [[-0.48671866  0.0037664 ]] dstates= False
states= [[-0.51404666 -0.00091628]] dstates= False
states= [[-0.51313038 -0.00083778]] dstates= False
states= [[-0.38787403  0.01344798]] dstates= False
states= [[-0.401322    0.01434462]] dstates= False
states= [[-0.55977529 -0.00208971]] dstates= False
states= [[-0.55768558 -0.00134492]] dstates= False
states= [[-0.52058227  0.00510004]] dstates= False
states= [[-0.5256823   0.00408441]] dstates= False
states= [[-0.24831727  0.00195738]] dstates= False
states= [[-0.25027465  0.00278519]] dstates= False
states= [[-0.68361829 -0.00884085]] dstates= False
states= [[-0.67477743 -0.00993622]] dstates= False
states= [[-5.10718514e-01 -3.08105048e-04]] dstates= False
states= [[-5.10410409e-01 -2.09218103e-04]] dstates= False
states= [[-0.44789678  0.00654982]] dstates= False
states= [[-0.4544466   0.00806475]] dstates= False
states= [[-0.38277932 -0.00086882]] dstates= False
states= [[-3.81910494e-01  1.62128450e-04]] dstates= False
states=

states= [[-0.59817386 -0.02016505]] dstates= False
states= [[-0.76417543 -0.00386738]] dstates= False
states= [[-0.76030805 -0.00649721]] dstates= False
states= [[-0.74954222  0.00804727]] dstates= False
states= [[-0.75758949  0.00543295]] dstates= False
states= [[-0.50836737  0.00069598]] dstates= False
states= [[-0.50906336  0.00180497]] dstates= False
states= [[-0.42942291  0.00240052]] dstates= False
states= [[-0.43182343  0.00208018]] dstates= False
states= [[-0.54303412 -0.00809058]] dstates= False
states= [[-0.53494354 -0.00817565]] dstates= False
states= [[-0.40250208 -0.00063513]] dstates= False
states= [[-0.40186695  0.0012577 ]] dstates= False
states= [[-5.18474478e-01  4.68545446e-04]] dstates= False
states= [[-5.18943024e-01  5.03462448e-04]] dstates= False
states= [[-0.52381771 -0.00336259]] dstates= False
states= [[-0.52045512 -0.00333901]] dstates= False
states= [[-0.43771598 -0.00447522]] dstates= False
states= [[-0.43324076 -0.00380581]] dstates= False
states= [[-0.57






Iterations:   1%|   | 9/1000 [00:13<25:37,  1.55s/it, Mean Rewards Per Episode: -200.0 | 0.438 MSE | Replay Size: 2000][A[A[A[A[A




Iterations:   1%|  | 10/1000 [00:13<26:06,  1.58s/it, Mean Rewards Per Episode: -200.0 | 0.438 MSE | Replay Size: 2000][A[A[A[A[A

 dstates= False
states= [[-0.48646821 -0.01412681]] dstates= False
states= [[-0.4723414  -0.01274389]] dstates= False
states= [[-0.33069983 -0.0111371 ]] dstates= False
states= [[-0.31956273 -0.01070062]] dstates= False
states= [[-0.50876846  0.00091383]] dstates= False
states= [[-0.50968229  0.00201817]] dstates= False
states= [[-0.48047072  0.00173709]] dstates= False
states= [[-0.48220782  0.00204673]] dstates= False
states= [[-0.46028744 -0.00171608]] dstates= False
states= [[-0.45857136 -0.00223146]] dstates= False
states= [[-0.27270217  0.00686139]] dstates= False
states= [[-0.27956356  0.00853248]] dstates= False
states= [[-0.51276874  0.00198163]] dstates= False
states= [[-0.51475037  0.00204799]] dstates= False
states= [[-0.59368063 -0.00901093]] dstates= False
states= [[-0.58466969 -0.00946641]] dstates= False
states= [[-0.4619627   0.00524959]] dstates= False
states= [[-0.46721229  0.00567047]] dstates= False
states= [[-0.51948091 -0.00088388]] dstates= False
states= [[-0.51

states= [[-0.22015726  0.01376808]] dstates= False
states= [[-0.20736344  0.01279382]] dstates= False
states= [[-0.19760127  0.00976217]] dstates= False
states= [[-0.18891254  0.00868873]] dstates= False
states= [[-0.18133296  0.00757958]] dstates= False
states= [[-0.17489249  0.00644047]] dstates= False
states= [[-0.17061573  0.00427675]] dstates= False
states= [[-0.16751858  0.00309715]] dstates= False
states= [[-0.16661232  0.00090627]] dstates= False
states= [[-0.1669002  -0.00028789]] dstates= False
states= [[-0.17038121 -0.003481  ]] dstates= False
states= [[-0.17504267 -0.00466147]] dstates= False
states= [[-0.18086729 -0.00582462]] dstates= False
states= [[-0.18883282 -0.00796554]] dstates= False
states= [[-0.19990782 -0.011075  ]] dstates= False
states= [[-0.21304655 -0.01313873]] dstates= False
states= [[-0.2291918  -0.01614525]] dstates= False
states= [[-0.24826902 -0.01907722]] dstates= False
states= [[-0.27018429 -0.02191527]] dstates= False
states= [[-0.2928223  -0.022638

KeyboardInterrupt: 

In [59]:
C=np.array([-0.63098952,  0.01380841])
C[np.newaxis]

array([[-0.63098952,  0.01380841]])

In [60]:
dstates = policy.discretize(state)
dstates

(7, 9)

In [61]:
policy.model[dstates]

array([-9.70991922, -9.69938436, -9.70159377])

In [62]:
policy.qvals(C[np.newaxis])

array([[-9.75339319, -9.73551997, -9.67037773]])

In [63]:
policy.model[policy.discretize(state)]

array([-9.70991922, -9.69938436, -9.70159377])

In [55]:
policy.model

array([[[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [-6.80699477, -4.60183067, -4.61053137],
        [-9.62358636, -9.62343823, -9.61881578],
        [-9.6108662 , -9.62250954, -9.64284553],
        [-9.65442184, -9.6334957 , -9.6356027 ],
        [-9.55217078, -9.5759087 , -9.54505571],
        [-9.51207072, -9.52380654, -9.46450423],
        [-9.30301461, -9.34980503, -9.32971845],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[ 0.        ,  0.        ,  0.        ],
        [-1.59164968,  0.        ,  0.        ],
        [-8.21716669, -8.73375869, -7.99733251],
        [-9.67956168, -9.68006412, -9.69563743],
        [-9.72087986, -9.72540314, -9.71414255],
        [-9.67232224, -9.67605106, -9.68774119],
        [-9.617629

In [24]:
type(A[0][1])

numpy.ndarray

In [43]:
policy.model[policy.discretize(C)]

array([-9.20065616, -9.20381563, -9.28381501])

In [12]:
policy.discretize(C)

(9, 14)

In [13]:
D=policy.model

In [14]:
D[policy.discretize(C)[0]][policy.discretize(C)[1]]

array([-8.36834804, -8.19111409, -7.97139393])

In [17]:
D[policy.discretize(C)[0]][policy.discretize(C)[1]][2]=9
D[policy.discretize(C)[0]][policy.discretize(C)[1]]

array([-8.36834804, -8.19111409,  9.        ])

In [20]:
policy.model[0][0][1]=1

In [21]:
policy.model

array([[[0., 1., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       ...,

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])