In [47]:
import numpy as np
import sys
import time
import warnings
import matplotlib.pyplot as plt
import os
warnings.filterwarnings('ignore')
import math
import random
import imageio
from PIL import Image
import PIL.ImageDraw as ImageDraw
import matplotlib.pyplot as plt  

In [62]:
 '''
Class MarkovDP contains the following attributes:
1)Number of states  : s
2)Number of actions : a
3)Dimension of parameter space : d
4)Number of iterations T
5)State Space
6)Action Space
7)Transition probability matrix of size (a,s,s)
8)Reward matrix (a,s,s)
9) Feature approximation vector of size (a,s,d) for score function and policy update
10) Feature approximation vector phi of size (s,d) for Temporal Difference Error update
11) policy pi of size (s,a)
12) optimal policy of size (s,a) for testing
13) Temporal Difference Error (a,s,s)
14) initial probability distribution xi
15) discount factor
16) critic parameter thetas 
17) action value function
'''
class MarkovDP:
    
    def __init__(self,s,a,d,T, discount_factor):
        self.num_state             = s
        self.num_action            = a
        self.dimension             = d
        self.T                     = T
        self.states                = np.array(range(0,s))
        self.actions               = np.array(range(0,a))
        self.transitions           = np.zeros((a,s,s))
        self.rewards               = np.zeros((a,s,s))
        self.feature_approx        = np.zeros((a,s,d))
        self.phi                   = np.zeros((s,d))
        self.pi                    = np.zeros((s, a))
        self.optimal_pi            = np.zeros((s, a))
        self.TDerror               = np.zeros((a,s,s))
        self.xi                    = np.zeros(s)
        self.discount_factor       = discount_factor
        self.thetas                = np.zeros((int(T+1), d))
        self.action_value          = np.zeros((s,a))
    
    #mdp created to test the policy_eval and optimal_policyValue methods
    def test_mdp(self, transition, featureApprox, reward, pis):
        np.random.seed(0)
        #self.transitions[0] = [[0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [1,0,0,0,0]]
        #self.transitions[1] = [[1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1]]
        #self.transitions[2] = [[0,0,0,0,1], [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0]]
        #self.feature_approx = np.random.rand(self.num_action, self.num_state, self.dimension)
        self.phi = np.random.rand(self.num_state, self.dimension)
        self.thetas[0] = np.zeros(self.dimension)
        self.xi = np.full((self.num_state), (1.0/self.num_state))
        self.rewards = reward
        self.transitions = transition
        self.feature_approx = featureApprox
        self.pi = pis
        for i in range(0, self.num_state):
            #self.pi[i] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)
            self.optimal_pi[i] = np.full((self.num_action), (1.0/self.num_action))
      
    #initializes MDP
    def initialize_mdp(self):    
        self.rewards = np.random.rand(self.num_action, self.num_state, self.num_state)
        self.feature_approx = np.random.rand(self.num_action, self.num_state, self.dimension)
        self.phi = np.random.rand(self.num_state, self.dimension)
        self.xi = np.full((self.num_state), (1.0/self.num_state))
        for i in range (0, self.num_action):
            for j in range(0, self.num_state):
                self.transitions[i][j] = np.random.dirichlet(np.ones(self.num_state, dtype = np.int8),size=1)
        for i in range(0, self.num_state):
            self.pi[i] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)
            self.optimal_pi[i] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)
     
    #log gradient of policy
    def score_function(self, action, state):
        score = np.zeros(self.dimension)
        if (action == 0):
            score[0] = 0
            score[1] = 0
            score[2] = 0
            score[3] = 0
            score[4] = state[0]
            score[5] = state[1]
            score[6] = state[2]
            score[7] = state[3]
        return score
    
    #updates the policy using a softmax function
    def update_policy(self, actor_parameter):
        for i in range(0, self.num_state):
            pi_sum = 0
            for j in range(0, self.num_action):
                pi_sum += math.exp(np.dot(actor_parameter, self.feature_approx[j][i]))
            for j in range(0, self.num_action):
                self.pi[i][j] = ((math.exp(np.dot(actor_parameter, self.feature_approx[j][i]))) / pi_sum)
                
    #evaluates the policy and finds its value function           
    def policy_eval(self, pi, gamma):
        policy_rewards = np.zeros(self.num_state)
        for i in range(0, self.num_state):
            total_sum = 0
            for j in range(0, self.num_action):
                reward_sum = 0
                
                for k in range(0, self.num_state):
                    reward_sum += (self.rewards[j][i][k] * self.transitions[j][i][k])
                total_sum += (pi[i][j] * reward_sum)
            policy_rewards[i] = total_sum
        #print(policy_rewards)
        policy_transitions = np.zeros((self.num_state, self.num_state))
        for i in range(0, self.num_state):
            for j in range(0, self.num_state):
                reward_sum = 0
                for k in range(0, self.num_action):
                    reward_sum += (self.transitions[k][i][j] * pi[i][k])
                policy_transitions[i][j] = reward_sum
        value_func = (np.dot(np.linalg.inv(np.identity(self.num_state) - (gamma * policy_transitions)), policy_rewards))
        return value_func
    
     #evaluates the policy and finds its value function           
    def qpi_eval(self, pi, gamma):
        policy_rewards = np.zeros(self.num_state * self.num_action)
        for i in range(0, self.num_state):
            total_sum = 0
            for j in range(0, self.num_action):
                reward_sum = 0
                #print(j)
                for k in range(0, self.num_state):
                    reward_sum += (self.rewards[j][i][k] * self.transitions[j][i][k])
                policy_rewards[(j) + (i * 3)] =  (reward_sum)
        #print(policy_rewards)
        policy_transitions = np.zeros((self.num_state * self.num_action, self.num_state * self.num_action))
        for i in range(0, self.num_state):
            for j in range(0, self.num_state):
                for l in range(0, self.num_action):
                    for k in range(0, self.num_action):
                        print(self.transitions[l][i][j] * pi[j][k])
                        policy_transitions[(l) + (i * 3)][(k) + (j * 3)] = (self.transitions[l][i][j] * pi[j][k])
        value_func = (np.dot(np.linalg.inv(np.identity(self.num_state * self.num_action) - (gamma * policy_transitions)), policy_rewards))
        return value_func
    
    #calculates the optimal policy for the given MDP
    def optimal_policyValue(self):
        policy_stable = False
        value_func = self.policy_eval(self.optimal_pi, self.discount_factor)
        while(not policy_stable):
            policy_stable = True
            for i in range(0, self.num_state):
                old_action = np.random.choice(self.actions, p = self.optimal_pi[i])
                max = np.zeros(2)
                for j in range(0, self.num_action):
                    reward_sum = 0
                    for k in range(0, self.num_state):
                        reward_sum += (self.transitions[j][i][k] * (self.rewards[j][i][k] + (self.discount_factor * value_func[k])))
                    if (reward_sum > max[0]):
                        max[0] = reward_sum
                        max[1] = j
                policy_dist = np.zeros(self.num_action)
                policy_dist[int(max[1])] = 1
                self.optimal_pi[i] = policy_dist
                if (old_action != np.random.choice(self.actions, p = self.optimal_pi[i])):
                    policy_stable = False
        value_func = self.policy_eval(self.optimal_pi, self.discount_factor)
        return value_func
    
    #projection of y onto ball with given radius
    def projection(self, radius, y):
        denom = max(radius, np.linalg.norm(y))
        return ((y / denom) * radius)
    
    #calculates r(s,a) from r(s,a,s)
    def calculate_reward(self, state, action):
        reward_sum = 0
        for i in range(0, self.num_state):
            #print("Components")
            #print(self.transitions[action][state][i])
            #print(self.rewards[action][state][i])
            reward_sum += (self.transitions[action][state][i] * self.rewards[action][state][i])
        #print("REward")
        #print(reward_sum)
        return reward_sum
    
    #samples the state, action and action-value function
    def sampler(self):
        state = np.random.choice(self.states, p = np.full((self.num_state), (1.0 / self.num_state)))
        action = np.random.choice(self.actions, p = np.full((self.num_action), (1.0 / self.num_action)))
        choice = np.random.choice([True, False], p = [self.discount_factor, (1.00 - self.discount_factor)])
        #first pass of sampling
        while (choice):
            state = np.random.choice(self.states, p = self.transitions[action][state])
            action = np.random.choice(self.actions, p = self.pi[state])
            choice = np.random.choice([True, False], p = [self.discount_factor, (1 - self.discount_factor)])
        final_state = state.copy()
        final_action = action.copy()
        action_value_sum = (self.calculate_reward(state, action))
        choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #second pass to calculate an unbiased estimate of Qpi
        while (choice):
            state = np.random.choice(self.states, p = self.transitions[action][state])
            action = np.random.choice(self.actions, p = self.pi[state])
            #adds undiscounted reward from current state, action pair to the sum
            action_value_sum += (self.calculate_reward(state, action))
            choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #print(action_value_sum)
        self.action_value[final_state][final_action] = action_value_sum
        return final_state, final_action
    #samples the state, action and action-value function
    def sampler_test(self, state, action):
        #print(state, action)
        final_state = state
        final_action = action
        action_value_sum = (self.calculate_reward(state, action))
        choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #second pass to calculate an unbiased estimate of Qpi
        while (choice):
            state = np.random.choice(self.states, p = self.transitions[action][state])
            action = np.random.choice(self.actions, p = self.optimal_pi[state])
            #print(state)
            #print(action)
            #adds undiscounted reward from current state, action pair to the sum
            action_value_sum += (self.calculate_reward(state, action))
            choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #print(final_state, final_action)
        #print(action_value_sum)
        self.action_value[final_state][final_action] += action_value_sum
        return final_state, final_action
    
    def inner_loop(self, alpha, projection_radius, N):
        actor_parameter = np.zeros((int(N + 1), self.dimension))
        for j in range(0, int(N)):
            state, action = self.sampler()
            #print()
            #print(actor_parameter[j])
            #print(2 * alpha)
            #print((np.dot(actor_parameter[j], self.feature_approx[action][state])) - (self.action_value[state][action]))
            #print(self.feature_approx[action][state])
            projectionOf = ((actor_parameter[j]) - ((2 * alpha) * ((np.dot(actor_parameter[j], self.feature_approx[action][state])) - (self.action_value[state][action])) * (self.feature_approx[action][state])))
            #print(projectionOf)
            actor_parameter[j+1] = self.projection(projection_radius, projectionOf)
            #print(actor_parameter[j+1])
            #print()
        w_prime = 0.0
        for j in range(1, int(N + 1)):
            w_prime += actor_parameter[j]
        w_prime = (w_prime) / N
        return w_prime
    #runs the sample based Q-npg
    def npg(self, n, alpha, N):
        actor_parameter = np.zeros((int(N + 1), self.dimension))
        optimal_value = (np.sum(self.optimal_pi) / self.num_state)
        value_diff = np.zeros(int(self.T))
        for i in range(0, self.T):
            actor_parameter[0] = 0
            for j in range(0, int(N)):
                state, action = self.sampler()
                actor_parameter[j+1] = self.projection(150, (actor_parameter[j] - ((2 * alpha) * (np.dot(actor_parameter[j], self.feature_approx[action][state]) - self.action_value[state][action]) * (self.feature_approx[action][state]))))
                #print(j)
            w_prime = 0
            print(i)
            for j in range(1, int(N+1)):
                w_prime += actor_parameter[j]
            w_prime = (w_prime) / int(N)
            self.thetas[i+1] = self.thetas[i] + (n * w_prime)
            self.update_policy(self.thetas[i+1])
            #to test whether the algorithm converges
            #value_func = self.policy_eval(self.pi, self.discount_factor)
            #value_diff[i] = (optimal_value - (np.sum(value_func) / self.num_state))
        return (np.arange(self.T), value_diff)
    
    def setReward(self, reward, state, action):
        for i in range(0, self.num_state):
            self.rewards[action][state][i] = (reward) / (self.transitions[action][state][i])
        #print(self.rewards[action][state])
        
    def _label_with_episode_number(self, frame, episode_num):
        im = Image.fromarray(frame)
        drawer = ImageDraw.Draw(im)
        if np.mean(im) < 128:
            text_color = (255,255,255)
        else:
            text_color = (0,0,0)
        drawer.text((im.size[0]/20,im.size[1]/18), f'Episode: {episode_num+1}', fill=text_color)

        return im
    #samples the state, action and action-value function
    def samplerGym(self, gym_env, frames, episode_num):
        state = gym_env.reset()
        self.pi[tuple(state)] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)[0]
        print(self.pi[tuple(state)])
        action = np.random.choice(self.actions, p = np.full((self.num_action), (1.0 / self.num_action)))
        choice = np.random.choice([True, False], p = [self.discount_factor, (1.00 - self.discount_factor)])
        #first pass of sampling
        score = 0
        final_reward = 0
        while (choice):
            frame = gym_env.render(mode = 'rgb_array')
            frames.append(self._label_with_episode_number(frame, episode_num))
            action = np.random.choice(self.actions, p = self.pi[tuple(state)])
            n_state, reward, done, info = gym_env.step(action)
            state = n_state
            self.pi[tuple(state)] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)[0]
            final_reward = reward
            if (done):
                return state, action, 0
            choice = np.random.choice([True, False], p = [self.discount_factor, (1 - self.discount_factor)])
        final_state = state.copy()
        final_action = action.copy()
        action_value_sum = final_reward
        choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #second pass to calculate an unbiased estimate of Qpi
        while (choice):
            frame = gym_env.render(mode = 'rgb_array')
            frames.append(self._label_with_episode_number(frame, episode_num))
            action = np.random.choice(self.actions, p = self.pi[tuple(state)])
            n_state, reward, done, info = gym_env.step(action)
            state = n_state
            self.pi[tuple(state)] = np.random.dirichlet(np.ones(self.num_action, dtype = np.int8),size=1)[0]
            if (done):
                return state, action, action_value_sum
            #adds undiscounted reward from current state, action pair to the sum
            action_value_sum += reward
            choice = np.random.choice([True, False], p = [self.discount_factor, (1.0 - self.discount_factor)])
        #print(action_value_sum)
        return final_state, final_action, action_value_sum

    #runs the sample based Q-npg with the open AI gym cart model
    def npgGym(self, gym_env, n, alpha, N):
        optimal_value = self.optimal_policyValue()
        actor_parameter = np.zeros((int(N + 1), self.dimension))
        optimal_value = (np.sum(optimal_value) / self.num_state)
        value_diff = np.zeros(int(self.T))
        frames = []
        episode_num = 0
        for i in range(0, self.T):
            actor_parameter[0] = 0
            for j in range(0, int(N)):
                state, action, action_value = self.samplerGym(gym_env, frames, episode_num)
                actor_parameter[j+1] = self.projection(150, (actor_parameter[j] - ((2 * alpha) * (np.dot(actor_parameter[j], self.score_function(action, state)) - action_value) * (self.score_function(action, state)))))
                episode_num = episode_num + 1
            w_prime = 0
            print(i)
            for j in range(1, int(N+1)):
                w_prime += actor_parameter[j]
            w_prime = (w_prime) / int(N)
            self.thetas[i+1] = self.thetas[i] + (n * w_prime)
            self.update_policy(self.thetas[i+1])
            #to test whether the algorithm converges
            #value_func = self.policy_eval(self.pi, self.discount_factor)
            #value_diff[i] = (optimal_value - (np.sum(value_func) / self.num_state))
        imageio.mimwrite(os.path.join('./videos/', 'random_agent.gif'), frames, fps=60)
        return (np.arange(self.T), value_diff)

In [63]:
np.random.seed(0)
a = 3
s = 5
d = 8
transitions           = np.zeros((a,s,s))
rewards               = np.zeros((a,s,s))
pi                    = np.zeros((s, a))
rewards = (((np.random.rand(a, s, s) * 2) - 1) * 10.0)
for i in range (0, a):
    for j in range(0, s):
        transitions[i][j] = np.random.dirichlet(np.ones(s, dtype = np.int8),size=1)
for i in range(0, s):
    pi[i] = np.random.dirichlet(np.ones(a, dtype = np.int8),size=1)

In [64]:
feature_approx = ((np.random.rand(a,s,d) * 10.0) - 5.0);

In [65]:
b = 14
projection_radius = 10
discount_factor = 0.9
N = 2000.0
T = 1000
g = (2.0 * b) * ((b * projection_radius) + (1.0 / (1.0 - discount_factor)))
alpha = (projection_radius) / (g * math.sqrt(N))
n = math.sqrt((2 * math.log10(a)) / (pow(b, 2) * pow(projection_radius, 2) * T))
print(n)
print(alpha)

0.00022064868528575668
5.323971374999499e-05


In [66]:
env = MarkovDP(s,a,d,T, discount_factor)
env.test_mdp(transitions, feature_approx, rewards, pi)
print(env.pi)
print(env.rewards)
print(env.transitions)
env.optimal_policyValue()
arr = env.qpi_eval(env.pi, discount_factor)
for i in range(0, 15):
    print("Qpi(%(s)d, %(a)d) =" %{"s": i / 3, "a": i % 3,})
    print(arr[i])
w = env.inner_loop(alpha, projection_radius, N)
for i in range(0, env.num_state):
    for j in range(0, env.num_action):
        print("Estimate of Qpi(%(s)d, %(a)d) = " %{"s": i, "a": j})
        print(np.dot(w, env.feature_approx[j][i]))
#print(env.policy_eval(env.optimal_pi, 0.9))
#print(np.sum(env.action_value) / env.num_state)

[[0.06844099 0.85658322 0.07497579]
 [0.32164906 0.04447517 0.63387578]
 [0.54681569 0.27955813 0.17362617]
 [0.03830014 0.63878927 0.32291058]
 [0.18282477 0.28741303 0.5297622 ]]
[[[ 0.97627008  4.30378733  2.05526752  0.89766366 -1.52690401]
  [ 2.91788226 -1.24825577  7.83546002  9.27325521 -2.33116962]
  [ 5.83450076  0.5778984   1.36089122  8.51193277 -8.57927884]
  [-8.25741401 -9.59563205  6.65239691  5.56313502  7.40024296]
  [ 9.57236684  5.98317128 -0.77041275  5.61058353 -7.63451148]]

 [[ 2.79842043 -7.13293425  8.89337834  0.43696644 -1.7067612 ]
  [-4.70888776  5.48467379 -0.87699336  1.36867898 -9.62420399]
  [ 2.35270994  2.24191445  2.33867994  8.87496157  3.63640598]
  [-2.80984199 -1.25936092  3.95262392 -8.79549057  3.33533431]
  [ 3.41275739 -5.79234878 -7.42147405 -3.69143298 -2.72578458]]

 [[ 1.40393541 -1.22796973  9.76747676 -7.95910379 -5.82246488]
  [-6.77380964  3.06216651 -4.93416795 -0.67378454 -5.11148816]
  [-6.82060833 -7.79249718  3.12659179 -7.23634

In [67]:
print(T)

1000


In [None]:
env = MarkovDP(s,a,d,T, discount_factor)
env.test_mdp(transitions, feature_approx, rewards, pi)
x, y = env.npg(n, alpha, N)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76


In [None]:
%matplotlib inline
plt.xlabel('T', fontsize=18)
plt.ylabel('V*-V($\pi_t$)', fontsize=18)
plt.plot(x, y)
plt.show()

[[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


2
3


[[0.5 0.5]
 [1.  1. ]]


In [3]:
import gym 
gym_env = gym.make('CartPole-v0')
print(gym_env.observation_space)
np.random.seed(0)
a = gym_env.action_space.n
s = gym_env.observation_space.shape[0]
d = s * 2
transitions           = np.zeros((a,s,s))
rewards               = np.zeros((a,s,s))
pi                    = np.zeros((s, a))
rewards = (((np.random.rand(a, s, s) * 2) - 1) * 10.0)
for i in range (0, a):
    for j in range(0, s):
        transitions[i][j] = np.random.dirichlet(np.ones(s, dtype = np.int8),size=1)
for i in range(0, s):
    pi[i] = np.random.dirichlet(np.ones(a, dtype = np.int8),size=1)
feature_approx = ((np.random.rand(a,s,d) * 10.0) - 5.0)
print(feature_approx)
b = 14
projection_radius = 10
discount_factor = 0.9
N = 2000.0
T = 50
g = (2.0 * b) * ((b * projection_radius) + (1.0 / (1.0 - discount_factor)))
alpha = (projection_radius) / (g * math.sqrt(N))
n = math.sqrt((2 * math.log10(a)) / (pow(b, 2) * pow(projection_radius, 2) * T))
print(n)
print(alpha)

Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
[[[ 4.76761088  1.0484552   2.39263579 -4.60812208 -2.17193037
   -3.79803439 -2.03859802 -3.81272281]
  [-1.82016821 -0.85737005 -4.35852504  1.92472119  0.66601454
   -2.34610509  0.23248053 -4.06059489]
  [ 0.75946496  4.29296198 -1.81431048  1.6741038  -3.68202138
    2.16327204 -2.10593907 -3.16808638]
  [ 0.86512935 -4.79892454  3.28940029 -4.95304524  1.77816537
   -2.29992027  2.35194022  4.62188545]]

 [[-2.51246856  0.76157334  0.92041931  0.72251906 -2.76918367
    4.52749012 -0.52874621  3.46408672]
  [ 1.99479275 -2.02563049  3.1379782  -1.03494259  3.81103197
    0.81272873  3.81735362  1.9253159 ]
  [ 2.2525428   0.01324382  4.56083635  1.43990199 -0.76144951
    1.06393214 -4.80806802 -1.98425183]
  [ 1.60173537 -2.09922393  1.18015429 -0.71231299 -3.64525936
   -2.01717674  0.69964911  0.90872761]]]
0.0007838028641989592
5.323971374999499e-05


In [4]:
env = MarkovDP(s,a,d,T, discount_factor)
env.test_mdp(transitions, feature_approx, rewards, pi)
x, y = env.npgGym(gym_env, n, alpha, N)

[0.45599612 0.54400388]
[0.7098248 0.2901752]
[0.65703279 0.34296721]
[0.61726515 0.38273485]
[0.96645432 0.03354568]
[0.33279008 0.66720992]
[0.54932342 0.45067658]
[0.33230258 0.66769742]
[0.01407027 0.98592973]
[0.401136 0.598864]
[0.96702689 0.03297311]
[0.01339335 0.98660665]
[0.23779784 0.76220216]
[0.64197779 0.35802221]
[0.0761693 0.9238307]
[0.1867488 0.8132512]
[0.54285217 0.45714783]
[0.67817504 0.32182496]
[0.17835644 0.82164356]
[0.45058884 0.54941116]
[0.87811279 0.12188721]
[0.04972952 0.95027048]
[0.43803509 0.56196491]
[0.52004954 0.47995046]
[0.24833748 0.75166252]
[0.18112562 0.81887438]
[0.09471407 0.90528593]
[0.66829238 0.33170762]
[0.44370182 0.55629818]
[0.30907734 0.69092266]
[0.04924016 0.95075984]
[0.80363798 0.19636202]
[0.6730267 0.3269733]
[0.15250178 0.84749822]
[0.17050595 0.82949405]
[0.66740256 0.33259744]
[0.44251025 0.55748975]
[0.9302274 0.0697726]
[0.41929083 0.58070917]
[0.69367097 0.30632903]
[0.00331318 0.99668682]
[0.86693499 0.13306501]
[0.140

[0.45124965 0.54875035]
[0.97169424 0.02830576]
[0.01955547 0.98044453]
[0.88524913 0.11475087]
[0.77888794 0.22111206]
[0.79732856 0.20267144]
[0.43612551 0.56387449]
[0.29208907 0.70791093]
[0.55041962 0.44958038]
[0.94323202 0.05676798]
[0.39976715 0.60023285]
[0.08734438 0.91265562]
[0.99439583 0.00560417]
[0.15731696 0.84268304]
[0.54597623 0.45402377]
[0.55739308 0.44260692]
[0.76605176 0.23394824]
[0.94098068 0.05901932]
[0.08813908 0.91186092]
[0.35524986 0.64475014]
[0.90209861 0.09790139]
[0.39233545 0.60766455]
[0.41845436 0.58154564]
[0.90292814 0.09707186]
[0.06204311 0.93795689]
[0.35217028 0.64782972]
[0.45740275 0.54259725]
[0.70121531 0.29878469]
[0.64272085 0.35727915]
[0.55539112 0.44460888]
[0.73838098 0.26161902]
[0.86395278 0.13604722]
[0.84832029 0.15167971]
[0.44994273 0.55005727]
[0.72713682 0.27286318]
[0.74106801 0.25893199]
[0.78822155 0.21177845]
[0.36263887 0.63736113]
[0.49114027 0.50885973]
[0.0278328 0.9721672]
[0.59162189 0.40837811]
[0.67790988 0.3220

[0.60255354 0.39744646]
[0.53527661 0.46472339]
[0.44716961 0.55283039]
[0.27865094 0.72134906]
[0.65630945 0.34369055]
[0.65514782 0.34485218]
[0.37645233 0.62354767]
[0.89291366 0.10708634]
[0.40923177 0.59076823]
[0.115318 0.884682]
[0.65009185 0.34990815]
[0.07199481 0.92800519]
[0.93189945 0.06810055]
[0.24396294 0.75603706]
[0.05620307 0.94379693]
[0.54965089 0.45034911]
[0.82350086 0.17649914]
[0.58679867 0.41320133]
[0.03761379 0.96238621]
[0.36603778 0.63396222]
[0.0333569 0.9666431]
[0.27036927 0.72963073]
[0.91521883 0.08478117]
[0.8087013 0.1912987]
[0.3077357 0.6922643]
[0.72619613 0.27380387]
[0.47804729 0.52195271]
[0.51790493 0.48209507]
[0.6267941 0.3732059]
[0.02019206 0.97980794]
[0.21203977 0.78796023]
[0.70943792 0.29056208]
[0.37405406 0.62594594]
[0.52350735 0.47649265]
[0.46246432 0.53753568]
[0.58375268 0.41624732]
[0.16257658 0.83742342]
[0.5259879 0.4740121]
[0.52849773 0.47150227]
[0.12425428 0.87574572]
[0.12031803 0.87968197]
[0.35285766 0.64714234]
[0.694

[0.8713141 0.1286859]
[0.08201018 0.91798982]
[0.24019422 0.75980578]
[0.4998233 0.5001767]
[0.58462829 0.41537171]
[0.91433374 0.08566626]
[0.81781285 0.18218715]
[0.34790017 0.65209983]
[0.35534236 0.64465764]
[0.83600627 0.16399373]
[0.13637563 0.86362437]
[0.37747326 0.62252674]
[0.40667276 0.59332724]
[0.29836658 0.70163342]
[0.35370961 0.64629039]
[0.55633349 0.44366651]
[0.10459435 0.89540565]
[0.41945418 0.58054582]
[0.42470751 0.57529249]
[0.11898749 0.88101251]
[0.37990218 0.62009782]
[0.92809447 0.07190553]
[0.47062717 0.52937283]
[0.4236241 0.5763759]
[0.45070332 0.54929668]
[0.82856229 0.17143771]
[0.37221514 0.62778486]
[0.25448158 0.74551842]
[0.02470271 0.97529729]
[0.17747097 0.82252903]
[0.60689028 0.39310972]
[0.33301165 0.66698835]
[0.68346479 0.31653521]
[0.72637933 0.27362067]
[0.16445508 0.83554492]
[0.50413806 0.49586194]
[0.44260726 0.55739274]
[0.94892894 0.05107106]
[0.43093781 0.56906219]
[0.04148439 0.95851561]
[0.14080528 0.85919472]
[0.83971231 0.16028769

[0.2700973 0.7299027]
[0.56628025 0.43371975]
[0.9533258 0.0466742]
[0.60218044 0.39781956]
[0.3345202 0.6654798]
[0.74792222 0.25207778]
[0.21440976 0.78559024]
[0.2815295 0.7184705]
[0.47498987 0.52501013]
[0.28518866 0.71481134]
[0.80743571 0.19256429]
[0.64579078 0.35420922]
[0.49628125 0.50371875]
[0.61289504 0.38710496]
[0.30705877 0.69294123]
[0.48706587 0.51293413]
[0.70167219 0.29832781]
[0.55458357 0.44541643]
[0.02126246 0.97873754]
[0.99061208 0.00938792]
[0.15271842 0.84728158]
[0.34391383 0.65608617]
[0.51074607 0.48925393]
[0.09644681 0.90355319]
[0.3819201 0.6180799]
[0.93231397 0.06768603]
[0.52771106 0.47228894]
[0.05391062 0.94608938]
[0.3895762 0.6104238]
[0.55421534 0.44578466]
[0.02260049 0.97739951]
[0.94738144 0.05261856]
[0.41787096 0.58212904]
[0.45238798 0.54761202]
[0.20620934 0.79379066]
[0.26174855 0.73825145]
[0.93303533 0.06696467]
[0.71542666 0.28457334]
[0.47903981 0.52096019]
[0.88140864 0.11859136]
[0.86863003 0.13136997]
[0.58958785 0.41041215]
[0.3

[0.49968637 0.50031363]
[0.08377299 0.91622701]
[0.80820412 0.19179588]
[0.76050512 0.23949488]
[0.5863021 0.4136979]
[0.24606693 0.75393307]
[0.77418993 0.22581007]
[0.74447107 0.25552893]
[0.36793874 0.63206126]
[0.32575561 0.67424439]
[0.68022032 0.31977968]
[0.40257728 0.59742272]
[0.23505865 0.76494135]
[0.42377957 0.57622043]
[0.46583641 0.53416359]
[0.48853436 0.51146564]
[0.4575449 0.5424551]
[0.90328317 0.09671683]
[0.66404482 0.33595518]
[0.51999893 0.48000107]
[0.489451 0.510549]
[0.54543514 0.45456486]
[0.31386146 0.68613854]
[0.52543693 0.47456307]
[0.02560532 0.97439468]
[0.89344024 0.10655976]
[0.06538371 0.93461629]
[0.53672938 0.46327062]
[0.43214141 0.56785859]
[0.30514017 0.69485983]
[0.39482451 0.60517549]
[0.63214754 0.36785246]
[0.23109762 0.76890238]
[0.36924574 0.63075426]
[0.20708816 0.79291184]
[0.57331236 0.42668764]
[0.9533253 0.0466747]
[0.16248288 0.83751712]
[0.83257351 0.16742649]
[0.19288472 0.80711528]
[0.61925691 0.38074309]
[0.8685157 0.1314843]
[0.0

KeyError: 0