In [1]:
from hyperdash import monitor_cell

In [2]:
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
from IPython import display
def show_state(env, step=0, info=""):
    plt.figure(3)
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("%s | Step: %d %s" % (env._spec.id,step, info))
    plt.axis('off')
    display.clear_output(wait=True)
    display.display(plt.gcf())

In [10]:
import gym
import random
import math
from time import sleep

import numpy as np

In [4]:
env = gym.make('CartPole-v0')

[2017-07-18 21:38:44,083] Making new env: CartPole-v0


In [5]:
# 4 discretes states per dimension
#         (x, x', theta, theta')
BUCKETS = (3, 3 , 3    , 3)


In [6]:
NUM_EPISODES = 1000
MAX_STEPS = 250
LR = 0.1

In [7]:
q_table = np.zeros(BUCKETS + (env.action_space.n,))

In [37]:
NUM_BUCKETS = (1, 1, 6, 3)  # (x, x', theta, theta')
# Number of discrete actions
NUM_ACTIONS = env.action_space.n # (left, right)
# Bounds for each discrete state
STATE_BOUNDS = list(zip(env.observation_space.low, env.observation_space.high))
print(STATE_BOUNDS)
STATE_BOUNDS[1] = [-0.5, 0.5]
STATE_BOUNDS[3] = [-math.radians(50), math.radians(50)]
# Index of the action
ACTION_INDEX = len(NUM_BUCKETS)

## Creating a Q-Table for each state-action pair
q_table = np.zeros(NUM_BUCKETS + (NUM_ACTIONS,))

## Learning related constants
MIN_EXPLORE_RATE = 0.01
MIN_LEARNING_RATE = 0.1

## Defining the simulation related constants
NUM_EPISODES = 1000
MAX_T = 250
STREAK_TO_END = 120
SOLVED_T = 199
DEBUG_MODE = False

def simulate():
    ## Instantiating the learning related parameters
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    discount_factor = 0.99  # since the world is unchanging

    num_streaks = 0

    for episode in range(NUM_EPISODES):

        # Reset the environment
        obv = env.reset()

        # the initial state
        state_0 = state_to_bucket(obv)

        for t in range(MAX_T):
            env.render()

            # Select an action
            action = select_action(state_0, explore_rate)

            # Execute the action
            obv, reward, done, _ = env.step(action)
#             if episode % 100 == 0:
#                 show_state(env.env,t,episode)

            # Observe the result
            state = state_to_bucket(obv)

            # Update the Q based on the result
            best_q = np.amax(q_table[state])
            q_update = learning_rate*(reward + discount_factor*(best_q) - q_table[state_0 + (action,)])
            q_table[state_0 + (action,)] += q_update
            # Setting up for the next iteration
            state_0 = state

            # Print data
            if (DEBUG_MODE):
                print("\nEpisode = %d" % episode)
                print("t = %d" % t)
                print("Action: %d" % action)
                print("State: %s" % str(state))
                print("Reward: %f" % reward)
                print("Best Q: %f" % best_q)
                print("Explore rate: %f" % explore_rate)
                print("Learning rate: %f" % learning_rate)
                print("Streaks: %d" % num_streaks)

                print("")

            if done:
#                print("Episode %d finished after %f time steps" % (episode, t))
               if (t >= SOLVED_T):
                   num_streaks += 1
               else:
                   num_streaks = 0
               break

            #sleep(0.25)

        # It's considered done when it's solved over 120 times consecutively
        if num_streaks > STREAK_TO_END:
            print("Done. Streaks: ",num_streaks)
            break

        # Update parameters
        explore_rate = get_explore_rate(episode)
        learning_rate = get_learning_rate(episode)


def select_action(state, explore_rate):
    # Select a random action
    if random.random() < explore_rate:
        action = env.action_space.sample()
    # Select the action with the highest q
    else:
        action = np.argmax(q_table[state])
    return action


def get_explore_rate(t):
    adj = abs((t+1)/25.)
    er = max(MIN_EXPLORE_RATE, min(1, 1.0 - math.log10(adj)))
#     print(er)
    return er

def get_learning_rate(t):
    adj = abs((t+1)/25.)
    return max(MIN_LEARNING_RATE, min(0.5, 1.0 - math.log10(adj)))

def state_to_bucket(state):
    bucket_indice = []
    for i in range(len(state)):
        if state[i] <= STATE_BOUNDS[i][0]:
            bucket_index = 0
        elif state[i] >= STATE_BOUNDS[i][1]:
            print("On",i)
            bucket_index = NUM_BUCKETS[i] - 1
        else:
            # Mapping the state bounds to the bucket array
            bound_width = STATE_BOUNDS[i][1] - STATE_BOUNDS[i][0]
            offset = (NUM_BUCKETS[i]-1)*STATE_BOUNDS[i][0]/bound_width
            scaling = (NUM_BUCKETS[i]-1)/bound_width
            bucket_index = int(round(scaling*state[i] - offset))
        bucket_indice.append(bucket_index)
    print(state)
    print(tuple(bucket_indice))
    print("")

    return tuple(bucket_indice)

simulate()

[(-4.7999999999999998, 4.7999999999999998), (-3.4028234663852886e+38, 3.4028234663852886e+38), (-0.41887902047863906, 0.41887902047863906), (-3.4028234663852886e+38, 3.4028234663852886e+38)]
[ 0.03121263 -0.04369167  0.00702288  0.01794206]
(0, 0, 3, 1)

[ 0.0303388   0.15132887  0.00738172 -0.27251681]
(0, 0, 3, 1)

[ 0.03336537  0.34634472  0.00193139 -0.56286241]
(0, 0, 3, 0)

('On', 1)
[ 0.04029227  0.54143951 -0.00932586 -0.85493623]
(0, 0, 2, 0)

[ 0.05112106  0.34644589 -0.02642459 -0.56520028]
(0, 0, 2, 0)

[ 0.05804998  0.15170444 -0.03772859 -0.28095801]
(0, 0, 2, 1)

[ 0.06108407  0.34734369 -0.04334775 -0.58529763]
(0, 0, 2, 0)

[ 0.06803094  0.15285487 -0.05505371 -0.30657869]
(0, 0, 2, 1)

[ 0.07108804 -0.04144112 -0.06118528 -0.03175387]
(0, 0, 2, 1)

[ 0.07025921 -0.23563471 -0.06182036  0.24101418]
(0, 0, 2, 1)

[ 0.06554652 -0.42982159 -0.05700007  0.51357447]
(0, 0, 2, 2)

[ 0.05695009 -0.23394512 -0.04672858  0.20348799]
(0, 0, 2, 1)

[ 0.05227119 -0.03818713 -0.042

('On', 3)
[-0.03790853 -0.80740155  0.10575957  1.2527889 ]
(0, 0, 3, 2)

('On', 3)
[-0.05405656 -1.00370726  0.13081534  1.57663761]
(0, 0, 3, 2)

('On', 3)
[-0.0741307  -0.81036444  0.1623481   1.32745141]
(0, 0, 3, 2)

('On', 3)
[-0.09033799 -1.00712     0.18889712  1.66622482]
(0, 0, 4, 2)

('On', 3)
[-0.11048039 -1.2038708   0.22222162  2.01130738]
(0, 0, 4, 2)

[-0.04973294  0.01099367 -0.04869363  0.00532598]
(0, 0, 2, 1)

[-0.04951307 -0.18339735 -0.04858711  0.28225662]
(0, 0, 2, 1)

[-0.05318101 -0.37779381 -0.04294197  0.55922821]
(0, 0, 2, 2)

[-0.06073689 -0.57228754 -0.03175741  0.83807879]
(0, 0, 2, 2)

[-0.07218264 -0.37674663 -0.01499583  0.53558021]
(0, 0, 2, 2)

[-0.07971757 -0.18141706 -0.00428423  0.23821019]
(0, 0, 2, 1)

[ -8.33459136e-02  -3.76477541e-01   4.79975076e-04   5.29538675e-01]
(0, 0, 3, 2)

[-0.09087546 -0.57160624  0.01107075  0.8223728 ]
(0, 0, 3, 2)

('On', 3)
[-0.10230759 -0.76687789  0.0275182   1.11851707]
(0, 0, 3, 2)

[-0.11764515 -0.5721276 

[ 0.09671934 -0.15077081 -0.13585966 -0.16905389]
(0, 0, 2, 1)

[ 0.09370392  0.04600777 -0.13924074 -0.50132184]
(0, 0, 2, 0)

[ 0.09462407  0.24278954 -0.14926717 -0.83444233]
(0, 0, 2, 0)

[ 0.09947987  0.43960104 -0.16595602 -1.17009863]
(0, 0, 2, 0)

('On', 1)
[ 0.10827189  0.63644586 -0.18935799 -1.50987657]
(0, 0, 1, 0)

[ 0.1210008   0.44405525 -0.21955552 -1.28178819]
(0, 0, 1, 0)

[-0.04730209  0.00456186 -0.02689241  0.01925955]
(0, 0, 2, 1)

[-0.04721085  0.20005895 -0.02650722 -0.28178547]
(0, 0, 2, 1)

[-0.04320967  0.00532493 -0.03214292  0.00242074]
(0, 0, 2, 1)

[-0.04310317 -0.18932166 -0.03209451  0.28479141]
(0, 0, 2, 1)

[-0.04688961 -0.38397151 -0.02639868  0.56718167]
(0, 0, 2, 2)

[-0.05456904 -0.5787134  -0.01505505  0.85143241]
(0, 0, 2, 2)

('On', 3)
[-0.06614331 -0.77362689  0.0019736   1.13934349]
(0, 0, 3, 2)

[-0.08161584 -0.5785308   0.02476047  0.84728016]
(0, 0, 3, 2)

('On', 3)
[-0.09318646 -0.7739816   0.04170607  1.14764533]
(0, 0, 3, 2)

[-0.108666

('On', 1)
[ 0.09186132  0.55633366 -0.1237697  -0.98755313]
(0, 0, 2, 0)

[ 0.10298799  0.36306674 -0.14352076 -0.73616586]
(0, 0, 2, 0)

[ 0.11024933  0.17018803 -0.15824408 -0.49187279]
(0, 0, 2, 0)

[ 0.11365309  0.36714666 -0.16808153 -0.82994922]
(0, 0, 1, 0)

('On', 1)
[ 0.12099602  0.56411823 -0.18468052 -1.17042646]
(0, 0, 1, 0)

('On', 1)
[ 0.13227839  0.76109751 -0.20808905 -1.51485888]
(0, 0, 1, 0)

('On', 1)
[ 0.14750034  0.9580408  -0.23838622 -1.8646386 ]
(0, 0, 1, 0)

[-0.04829281 -0.02620842  0.04929883 -0.0270057 ]
(0, 0, 3, 1)

[-0.04881698  0.16817315  0.04875872 -0.30373583]
(0, 0, 3, 1)

[-0.04545351  0.36256754  0.042684   -0.58065148]
(0, 0, 3, 0)

[-0.03820216  0.16687429  0.03107097 -0.27483368]
(0, 0, 3, 1)

[-0.03486468 -0.02867688  0.0255743   0.02748489]
(0, 0, 3, 1)

[-0.03543822  0.16606915  0.026124   -0.25702062]
(0, 0, 3, 1)

[-0.03211683 -0.02941584  0.02098359  0.0437864 ]
(0, 0, 3, 1)

[-0.03270515  0.16539903  0.02185931 -0.24220284]
(0, 0, 3, 1)



[-0.06129372 -0.03862541  0.04392143  0.16179574]
(0, 0, 3, 1)

[-0.06206623 -0.23434772  0.04715735  0.46800482]
(0, 0, 3, 2)

[-0.06675318 -0.43010307  0.05651744  0.77517054]
(0, 0, 3, 2)

('On', 3)
[-0.07535524 -0.625955    0.07202085  1.08508664]
(0, 0, 3, 2)

[-0.08787434 -0.43185324  0.09372259  0.81584462]
(0, 0, 3, 2)

[-0.09651141 -0.23813093  0.11003948  0.55405057]
(0, 0, 3, 2)

('On', 3)
[-0.10127403 -0.43461203  0.12112049  0.87927603]
(0, 0, 3, 2)

[-0.10996627 -0.24132552  0.13870601  0.62699274]
(0, 0, 3, 2)

('On', 3)
[-0.11479278 -0.43808306  0.15124586  0.95994341]
(0, 0, 3, 2)

('On', 3)
[-0.12355444 -0.63487882  0.17044473  1.29606411]
(0, 0, 4, 2)

('On', 3)
[-0.13625202 -0.44228171  0.19636602  1.06122318]
(0, 0, 4, 2)

('On', 3)
[-0.14509765 -0.63938462  0.21759048  1.408557  ]
(0, 0, 4, 2)

[ 0.04288052  0.04928033 -0.035354    0.03274724]
(0, 0, 2, 1)

[ 0.04386613 -0.14531726 -0.03469906  0.31406932]
(0, 0, 2, 1)

[ 0.04095978  0.05028136 -0.02841767  0.0106

('On', 3)
[-0.09258212 -0.97539422  0.04942033  1.49852198]
(0, 0, 3, 2)

('On', 3)
[-0.11209001 -1.17108061  0.07939077  1.80621685]
(0, 0, 3, 2)

('On', 3)
[-0.13551162 -0.97692939  0.1155151   1.53922369]
(0, 0, 3, 2)

('On', 3)
[-0.15505021 -1.17323607  0.14629958  1.86560726]
(0, 0, 3, 2)

('On', 3)
[-0.17851493 -1.36962597  0.18361172  2.19990396]
(0, 0, 4, 2)

('On', 3)
[-0.20590745 -1.17668705  0.2276098   1.96903938]
(0, 0, 4, 2)

[ 0.04336994 -0.00845128  0.0313118   0.01109767]
(0, 0, 3, 1)

[ 0.04320092 -0.20400797  0.03153375  0.31349308]
(0, 0, 3, 1)

[ 0.03912076 -0.39956462  0.03780361  0.6159516 ]
(0, 0, 3, 2)

('On', 3)
[ 0.03112947 -0.59519377  0.05012264  0.9202973 ]
(0, 0, 3, 2)

[ 0.01922559 -0.40078376  0.06852859  0.6437784 ]
(0, 0, 3, 2)

('On', 3)
[ 0.01120992 -0.5967905   0.08140416  0.95723006]
(0, 0, 3, 2)

[-0.00072589 -0.40285199  0.10054876  0.69119204]
(0, 0, 3, 2)

[-0.00878293 -0.2092583   0.1143726   0.43177975]
(0, 0, 3, 1)

[-0.0129681  -0.01592584

('On', 1)
[-0.040603    0.56884843  0.06229991 -0.62795623]
(0, 0, 3, 0)

('On', 1)
[-0.02922603  0.76304809  0.04974078 -0.90038627]
(0, 0, 3, 0)

('On', 1)
[-0.01396507  0.95746202  0.03173306 -1.17702872]
(0, 0, 3, 0)

('On', 1)
[ 0.00518417  1.15215772  0.00819248 -1.45959729]
(0, 0, 3, 0)

('On', 1)
[ 0.02822732  0.95693627 -0.02099946 -1.16436638]
(0, 0, 2, 0)

('On', 1)
[ 0.04736605  0.7620939  -0.04428679 -0.87834066]
(0, 0, 2, 0)

('On', 1)
[ 0.06260793  0.95778877 -0.0618536  -1.1846112 ]
(0, 0, 2, 0)

('On', 1)
[ 0.0817637   1.15365615 -0.08554583 -1.49602378]
(0, 0, 2, 0)

('On', 1)
[ 0.10483683  0.95967213 -0.1154663  -1.23123161]
(0, 0, 2, 0)

('On', 1)
[ 0.12403027  0.76620896 -0.14009094 -0.97684092]
(0, 0, 2, 0)

('On', 1)
[ 0.13935445  0.57321527 -0.15962775 -0.7312386 ]
(0, 0, 2, 0)

[ 0.15081875  0.38061706 -0.17425253 -0.49274567]
(0, 0, 1, 0)

('On', 1)
[ 0.15843109  0.57771322 -0.18410744 -0.83488417]
(0, 0, 1, 0)

('On', 1)
[ 0.16998536  0.77480845 -0.20080512 -

('On', 1)
[ 0.01750165  0.75327915 -0.07501731 -1.22410507]
(0, 0, 2, 0)

('On', 1)
[ 0.03256723  0.55919923 -0.09949941 -0.95583837]
(0, 0, 2, 0)

('On', 1)
[ 0.04375121  0.75550847 -0.11861618 -1.2780504 ]
(0, 0, 2, 0)

('On', 1)
[ 0.05886138  0.95192579 -0.14417719 -1.60539759]
(0, 0, 2, 0)

('On', 1)
[ 0.0778999   1.14842883 -0.17628514 -1.9393353 ]
(0, 0, 1, 0)

('On', 1)
[ 0.10086848  1.34494118 -0.21507184 -2.28109531]
(0, 0, 1, 0)

[ 0.04116874 -0.00955412 -0.04471788  0.00105366]
(0, 0, 2, 1)

[ 0.04097766 -0.20400718 -0.04469681  0.27929898]
(0, 0, 2, 1)

[ 0.03689752 -0.00827705 -0.03911083 -0.02713946]
(0, 0, 2, 1)

[ 0.03673198 -0.20281694 -0.03965362  0.25295155]
(0, 0, 2, 1)

[ 0.03267564 -0.3973509  -0.03459458  0.532868  ]
(0, 0, 2, 2)

[ 0.02472862 -0.20175991 -0.02393722  0.22948828]
(0, 0, 2, 1)

[ 0.02069342 -0.39653176 -0.01934746  0.51452549]
(0, 0, 2, 2)

[ 0.01276279 -0.20114275 -0.00905695  0.21580903]
(0, 0, 2, 1)

[ 0.00873993 -0.39613406 -0.00474077  0.5056

[-0.03198363 -0.04160559  0.13976828  0.32796541]
(0, 0, 3, 1)

[-0.03281575  0.15127858  0.14632758  0.08241879]
(0, 0, 3, 1)

[-0.02979017  0.34403293  0.14797596 -0.1607559 ]
(0, 0, 3, 1)

('On', 1)
[-0.02290952  0.53676071  0.14476084 -0.4033419 ]
(0, 0, 3, 1)

('On', 1)
[-0.0121743   0.72956478  0.136694   -0.64711184]
(0, 0, 3, 0)

('On', 1)
[ 0.00241699  0.53282975  0.12375177 -0.31469903]
(0, 0, 3, 1)

('On', 1)
[ 0.01307359  0.72599156  0.11745779 -0.56593572]
(0, 0, 3, 0)

('On', 1)
[ 0.02759342  0.91928683  0.10613907 -0.81942761]
(0, 0, 3, 0)

('On', 1)
[ 0.04597916  0.72288478  0.08975052 -0.49533608]
(0, 0, 3, 0)

('On', 1)
[ 0.06043685  0.5266194   0.0798438  -0.17577168]
(0, 0, 3, 1)

('On', 1)
[ 0.07096924  0.72051329  0.07632836 -0.4422368 ]
(0, 0, 3, 0)

('On', 1)
[ 0.08537951  0.52439891  0.06748363 -0.12650299]
(0, 0, 3, 1)

('On', 1)
[ 0.09586748  0.71849246  0.06495357 -0.3971555 ]
(0, 0, 3, 1)

('On', 1)
[ 0.11023733  0.91263564  0.05701046 -0.66867324]
(0, 0, 3

[ 0.03454681 -0.35205028 -0.0304957   0.57179979]
(0, 0, 2, 2)

[ 0.02750581 -0.54673163 -0.0190597   0.85472169]
(0, 0, 2, 2)

('On', 3)
[ 0.01657117 -0.74158871 -0.00196527  1.14135101]
(0, 0, 2, 2)

('On', 3)
[ 0.0017394  -0.93668492  0.02086175  1.43341697]
(0, 0, 3, 2)

('On', 3)
[-0.0169943  -0.74182646  0.04953009  1.14732579]
(0, 0, 3, 2)

[-0.03183083 -0.54738497  0.07247661  0.87057714]
(0, 0, 3, 2)

('On', 3)
[-0.04277853 -0.74341395  0.08988815  1.18513813]
(0, 0, 3, 2)

('On', 3)
[-0.05764681 -0.54956548  0.11359091  0.92193088]
(0, 0, 3, 2)

[-0.06863812 -0.35614645  0.13202953  0.66699603]
(0, 0, 3, 2)

('On', 3)
[-0.07576104 -0.55283341  0.14536945  0.99816276]
(0, 0, 3, 2)

('On', 3)
[-0.08681771 -0.74956805  0.1653327   1.33274037]
(0, 0, 3, 2)

('On', 3)
[-0.10180907 -0.55687096  0.19198751  1.09602292]
(0, 0, 4, 2)

('On', 3)
[-0.11294649 -0.75393054  0.21390797  1.44227962]
(0, 0, 4, 2)

[ 0.00153991 -0.03182132 -0.00703136 -0.03638232]
(0, 0, 2, 1)

[ 0.00090348 -

[-0.21233655  0.22239862  0.02672948 -0.44350396]
(0, 0, 3, 0)

[-0.20788858  0.02690885  0.0178594  -0.14251653]
(0, 0, 3, 1)

[-0.2073504  -0.16846426  0.01500907  0.15574679]
(0, 0, 3, 1)

[-0.21071969  0.02643961  0.018124   -0.1321636 ]
(0, 0, 3, 1)

[-0.21019089  0.22129732  0.01548073 -0.41907399]
(0, 0, 3, 1)

[-0.20576495  0.41619653  0.00709925 -0.70683662]
(0, 0, 3, 0)

('On', 1)
[-0.19744102  0.61121941 -0.00703748 -0.99727641]
(0, 0, 2, 0)

('On', 1)
[-0.18521663  0.80643475 -0.02698301 -1.29216117]
(0, 0, 2, 0)

('On', 1)
[-0.16908793  1.00188913 -0.05282623 -1.59316806]
(0, 0, 2, 0)

('On', 1)
[-0.14905015  1.1975966  -0.08468959 -1.90184344]
(0, 0, 2, 0)

('On', 1)
[-0.12509822  1.00348656 -0.12272646 -1.6365909 ]
(0, 0, 2, 0)

('On', 1)
[-0.10502849  0.80999934 -0.15545828 -1.38453411]
(0, 0, 2, 0)

('On', 1)
[-0.0888285   0.61711991 -0.18314896 -1.14422482]
(0, 0, 1, 0)

('On', 1)
[-0.0764861   0.81409942 -0.20603346 -1.48829765]
(0, 0, 1, 0)

('On', 1)
[-0.06020411  

[-0.11272864 -0.19546389  0.15155733  0.60607996]
(0, 0, 3, 2)

('On', 3)
[-0.11663792 -0.39234379  0.16367893  0.94240207]
(0, 0, 3, 2)

[-0.12448479 -0.1997605   0.18252697  0.70529513]
(0, 0, 4, 2)

[-0.12848    -0.00757373  0.19663287  0.4751693 ]
(0, 0, 4, 2)

[-0.12863148 -0.20484956  0.20613626  0.82281903]
(0, 0, 4, 2)

('On', 3)
[-0.13272847 -0.40210491  0.22259264  1.1726137 ]
(0, 0, 4, 2)

[-0.04231434  0.02716228 -0.01374863 -0.0288876 ]
(0, 0, 2, 1)

[-0.04177109 -0.16775984 -0.01432638  0.25942597]
(0, 0, 2, 1)

[-0.04512629 -0.36267437 -0.00913786  0.54755595]
(0, 0, 2, 2)

[-0.05237978 -0.16742524  0.00181326  0.25200799]
(0, 0, 3, 1)

[-0.05572828 -0.36257304  0.00685342  0.5452623 ]
(0, 0, 3, 2)

[-0.06297974 -0.16754805  0.01775866  0.25474658]
(0, 0, 3, 1)

[-0.0663307   0.0273159   0.02285359 -0.03228248]
(0, 0, 3, 1)

[-0.06578438 -0.1681262   0.02220794  0.26752249]
(0, 0, 3, 1)

[-0.06914691 -0.36355794  0.02755839  0.56712641]
(0, 0, 3, 2)

[-0.07641807 -0.5590

[-0.02211238 -0.1990838   0.13606325  0.52609093]
(0, 0, 3, 2)

[-0.02609406 -0.00611237  0.14658507  0.27918832]
(0, 0, 3, 1)

[-0.0262163   0.18664753  0.15216883  0.03609115]
(0, 0, 3, 1)

[-0.02248335 -0.01029191  0.15289066  0.37265194]
(0, 0, 3, 1)

[-0.02268919  0.18236504  0.1603437   0.13181247]
(0, 0, 3, 1)

[-0.01904189 -0.01464709  0.16297995  0.47047921]
(0, 0, 3, 2)

[-0.01933483  0.17784288  0.17238953  0.23327479]
(0, 0, 4, 1)

[-0.01577797 -0.0192692   0.17705503  0.57499228]
(0, 0, 4, 2)

('On', 3)
[-0.01616336 -0.21637342  0.18855487  0.91780915]
(0, 0, 4, 2)

[-0.02049083 -0.02423189  0.20691105  0.68981232]
(0, 0, 4, 2)

('On', 3)
[-0.02097546 -0.22153261  0.2207073   1.03984954]
(0, 0, 4, 2)

[ 0.0058812  -0.01312161 -0.01642618  0.02481385]
(0, 0, 2, 1)

[ 0.00561877 -0.20800419 -0.0159299   0.3122692 ]
(0, 0, 2, 1)

[ 0.00145869 -0.01265896 -0.00968452  0.01460534]
(0, 0, 2, 1)

[ 0.00120551 -0.20764069 -0.00939241  0.30421703]
(0, 0, 2, 1)

[-0.00294731 -0.4026

('On', 3)
[-0.2235235  -0.81134912  0.18337563  0.90426991]
(0, 0, 4, 2)

('On', 3)
[-0.23975048 -1.00841744  0.20146103  1.24852704]
(0, 0, 4, 2)

('On', 3)
[-0.25991883 -1.20546946  0.22643157  1.59695679]
(0, 0, 4, 2)

[-0.02988051 -0.04990952 -0.00310899 -0.03242702]
(0, 0, 2, 1)

[-0.0308787  -0.24498675 -0.00375753  0.25927336]
(0, 0, 2, 1)

[-0.03577843 -0.04981136  0.00142794 -0.03459237]
(0, 0, 3, 1)

[-0.03677466  0.14529009  0.00073609 -0.32682442]
(0, 0, 3, 1)

[-0.03386886  0.34040155 -0.0058004  -0.61927513]
(0, 0, 2, 0)

[-0.02706083  0.14536109 -0.0181859  -0.32842468]
(0, 0, 2, 1)

[-0.0241536  -0.0494973  -0.0247544  -0.04153178]
(0, 0, 2, 1)

[-0.02514355 -0.2442557  -0.02558503  0.24323926]
(0, 0, 2, 1)

[-0.03002866 -0.04877782 -0.02072025 -0.05740278]
(0, 0, 2, 1)

[-0.03100422 -0.24359664 -0.0218683   0.22867142]
(0, 0, 2, 1)

[-0.03587615 -0.43839937 -0.01729487  0.51437687]
(0, 0, 2, 2)

[-0.04464414 -0.63327353 -0.00700734  0.80155997]
(0, 0, 2, 2)

[-0.057309

[ 0.27819045 -0.2459705  -0.03912278  0.58273384]
(0, 0, 2, 2)

[ 0.27327104 -0.0503229  -0.02746811  0.27798785]
(0, 0, 2, 1)

[ 0.27226458  0.14517993 -0.02190835 -0.02323038]
(0, 0, 2, 1)

[ 0.27516818 -0.0496211  -0.02237296  0.2624605 ]
(0, 0, 2, 1)

[ 0.27417576 -0.24441667 -0.01712375  0.54800363]
(0, 0, 2, 2)

[ 0.26928742 -0.04905839 -0.00616367  0.24997504]
(0, 0, 2, 1)

[ 0.26830625 -0.24409178 -0.00116417  0.54070746]
(0, 0, 2, 2)

[ 0.26342442 -0.04895349  0.00964998  0.24765795]
(0, 0, 3, 1)

[ 0.26244535  0.14602933  0.01460314 -0.0419656 ]
(0, 0, 3, 1)

[ 0.26536594  0.34093886  0.01376382 -0.33000556]
(0, 0, 3, 1)

('On', 1)
[ 0.27218471  0.53586221  0.00716371 -0.61831645]
(0, 0, 3, 0)

[ 0.28290196  0.34064093 -0.00520262 -0.32338593]
(0, 0, 2, 1)

[ 0.28971478  0.14559344 -0.01167034 -0.03234822]
(0, 0, 2, 1)

[ 0.29262664 -0.04935922 -0.0123173   0.25662986]
(0, 0, 2, 1)

[ 0.29163946 -0.24430317 -0.0071847   0.54540241]
(0, 0, 2, 2)

[ 0.2867534  -0.04908101  0.00

[ 0.02120777  0.01784997 -0.01864174  0.03211487]
(0, 0, 2, 1)

[ 0.02156477 -0.17699975 -0.01799945  0.31885833]
(0, 0, 2, 1)

[ 0.01802478 -0.37186079 -0.01162228  0.60581098]
(0, 0, 2, 2)

('On', 3)
[  1.05875628e-02  -5.66818310e-01   4.93941190e-04   8.94810639e-01]
(0, 0, 3, 2)

[-0.0007488  -0.37170306  0.01839015  0.60228302]
(0, 0, 3, 2)

[-0.00818286 -0.1768431   0.03043581  0.31544891]
(0, 0, 3, 1)

[-0.01171973 -0.37238508  0.03674479  0.61757277]
(0, 0, 3, 2)

[-0.01916743 -0.17779517  0.04909625  0.33668548]
(0, 0, 3, 1)

[-0.02272333 -0.37358017  0.05582996  0.64443761]
(0, 0, 3, 2)

[-0.03019494 -0.1792789   0.06871871  0.36984529]
(0, 0, 3, 1)

[-0.03378051 -0.37530649  0.07611562  0.68338008]
(0, 0, 3, 2)

[-0.04128664 -0.18131937  0.08978322  0.41559829]
(0, 0, 3, 1)

[-0.04491303 -0.37759147  0.09809518  0.73518143]
(0, 0, 3, 2)

[-0.05246486 -0.1839516   0.11279881  0.47491176]
(0, 0, 3, 2)

[-0.05614389  0.00941185  0.12229705  0.21980241]
(0, 0, 3, 1)

[-0.055955

[ 0.0855483  -0.17444502 -0.01182438 -0.19760114]
(0, 0, 2, 1)

[ 0.0820594  -0.36939587 -0.0157764   0.0913284 ]
(0, 0, 2, 1)

[ 0.07467148 -0.56428818 -0.01394983  0.37899242]
(0, 0, 2, 1)

[ 0.06338572 -0.75920927 -0.00636998  0.66724449]
(0, 0, 2, 2)

[ 0.04820153 -0.56399932  0.00697491  0.37256274]
(0, 0, 3, 1)

[ 0.03692155 -0.75921966  0.01442616  0.66743673]
(0, 0, 3, 2)

[ 0.02173715 -0.56430127  0.0277749   0.37933071]
(0, 0, 3, 1)

[ 0.01045113 -0.36958453  0.03536151  0.09553303]
(0, 0, 3, 1)

[ 0.00305944 -0.56519499  0.03727217  0.39915941]
(0, 0, 3, 1)

[-0.00824446 -0.37062106  0.04525536  0.1184567 ]
(0, 0, 3, 1)

[-0.01565689 -0.56636122  0.04762449  0.42506686]
(0, 0, 3, 1)

[-0.02698411 -0.76212429  0.05612583  0.73237483]
(0, 0, 3, 2)

[-0.0422266  -0.56782096  0.07077333  0.45787111]
(0, 0, 3, 2)

[-0.05358301 -0.37376715  0.07993075  0.18830908]
(0, 0, 3, 1)

[-0.06105836 -0.56993628  0.08369693  0.50509791]
(0, 0, 3, 2)

[-0.07245708 -0.37608739  0.09379889  0.

KeyboardInterrupt: 