In [7]:
%reload_ext autoreload
%autoreload 2

In [8]:
import numpy as np
import gym

import time

np.set_printoptions(precision=0)

In [9]:
env = gym.make('CartPole-v0')

# First, a random agent

In [57]:
for i_episode in range(10):
    observation = env.reset()
    
    for t in range(1000):
#         env.render()
        #print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
#         print((action, reward))
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

Episode finished after 13 timesteps
Episode finished after 25 timesteps
Episode finished after 9 timesteps
Episode finished after 15 timesteps
Episode finished after 62 timesteps
Episode finished after 15 timesteps
Episode finished after 14 timesteps
Episode finished after 34 timesteps
Episode finished after 68 timesteps
Episode finished after 15 timesteps


# Genetic Algorithm

In [58]:
import genetic

## A neural-network agent

In [59]:
agent = genetic.Agent(.5, verbose=False)

In [60]:
for i_episode in range(10):
    observation = env.reset()
    
    for t in range(1000):
#         env.render()
        #print(observation)
        action = agent.action(observation)
        observation, reward, done, info = env.step(action)
#         print((action, reward))
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

Episode finished after 24 timesteps
Episode finished after 23 timesteps
Episode finished after 17 timesteps
Episode finished after 21 timesteps
Episode finished after 19 timesteps
Episode finished after 20 timesteps
Episode finished after 29 timesteps
Episode finished after 18 timesteps
Episode finished after 43 timesteps
Episode finished after 29 timesteps


## Training the genetic algorithm

In [61]:
generation = genetic.Generation(n=10, env=env)

In [62]:
count = 0

for i in range(1000):
    
    res = generation.simulation_step(200)
    generation.select(std=.1, proportion=.3)
    generation.reset()
    
    print(str(i + 1) + ', minimum: ' + str(res.min()) + ' '*10, end='\r')
    # print(str(i + 1) + ': ' + str(np.sort(res)[-1:0:-1]) + ' '*20, end='\r')
        
    if res.min() > 195:
        count += 1
    else:
        count = 0
    
    if count == 5:
        break

246, minimum: 200.0          

Training seems overly long... Let's try to change the _mutation rate_ as the models become more accurate:

In [92]:
generation = genetic.Generation(n=10, env=env)

In [93]:
std = .8
threshold = 70
count = 0

for i in range(300):
    
    res = generation.simulation_step(200)
    generation.select(std=std, proportion=.3)
    generation.reset()
    
    print(str(i + 1) + ', minimum: ' + str(res.min()) + ' '*10, end='\r')
    
    if res.min() > threshold:
        std /= 2
        threshold += 20
        
    if res.min() > 195:
        count += 1
    else:
        count = 0
    
    if count == 10:
        break

63, minimum: 200.0          

Definitely better... But some tweaking is needed. Let's check that the agents are well-trained:

In [94]:
l = np.ones(10) * 200
agent = generation.agents[0]

for i in range(100):
    generation.reset()
    res = generation.simulation_step(200)
    print(str(i) + ': ' + str(res) + ' '*20, end='\r')
    l = np.minimum(l, res)

print()
print(l)

99: [ 200.  200.  200.  200.  200.  200.  200.  200.  200.  200.]                    
[ 200.  200.  200.  200.  200.  200.  200.  200.  200.  200.]


Let's render the 

In [26]:
# agent = generation.agents[0]
agent = genetic.Agent()

for i_episode in range(10):
    observation = env.reset()
    
    for t in range(1000):
        env.render()
        action = agent.action(observation)
        observation, reward, done, info = env.step(action)
        agent.add_reward(reward)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

NotImplementedError: abstract

# A k-NN algorithm

In [10]:
import knn

In [160]:
agent = knn.Agent(k=5)

In [161]:
for i in range(1000):
    observation = env.reset()
    
    for t in range(200):
        action = agent.action(observation)
        observation, reward, done, info = env.step(action)
        agent.add_reward(reward)
        if done:
            print(str(i+1) + '-th episode finished after {t} timesteps'.format(t=t+1) + ' '*20, end='\r')
            agent.reset()
            break

1000-th episode finished after 16 timesteps                    

In [162]:
agent.neighborhood[1].X

Unnamed: 0,a,b,c,d
0,-0.014439,-0.179717,0.028559,0.282830
1,-0.040307,-0.767653,0.070588,1.219596
2,-0.055660,-0.573508,0.094980,0.949840
3,-0.086252,-0.772861,0.148041,1.344638
4,-0.113307,-0.776816,0.196967,1.443761
5,-0.013963,-0.177768,-0.019249,0.301402
6,-0.017166,-0.177307,-0.013167,0.291193
7,-0.020352,-0.177015,-0.007455,0.284744
8,-0.023528,-0.176884,-0.001966,0.281846
9,-0.049027,-0.762724,0.038046,1.170773


In [101]:
X.loc[len(X)] = np.ones(4)

In [109]:
X

Unnamed: 0,a,b,c,d
0,1.0,1.0,1.0,1.0


In [105]:
agent.neighborhood[1].elements

[]