In [1]:
%reload_ext autoreload
%autoreload 2

In [13]:
import numpy as np
import gym

import time

np.set_printoptions(precision=0)

In [14]:
env = gym.make('CartPole-v0')

[2017-11-30 22:48:46,753] Making new env: CartPole-v0


# First, a random agent

In [15]:
for i_episode in range(10):
    observation = env.reset()
    
    for t in range(1000):
#         env.render()
        #print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
#         print((action, reward))
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

Episode finished after 40 timesteps
Episode finished after 11 timesteps
Episode finished after 14 timesteps
Episode finished after 35 timesteps
Episode finished after 17 timesteps
Episode finished after 12 timesteps
Episode finished after 19 timesteps
Episode finished after 26 timesteps
Episode finished after 13 timesteps
Episode finished after 9 timesteps


# Genetic Algorithm

In [9]:
import genetic

## A neural-network agent

In [10]:
agent = genetic.Agent(.5, verbose=False)

In [16]:
for i_episode in range(10):
    observation = env.reset()
    
    for t in range(1000):
#         env.render()
        #print(observation)
        action = agent.action(observation)
        observation, reward, done, info = env.step(action)
#         print((action, reward))
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

Episode finished after 8 timesteps
Episode finished after 9 timesteps
Episode finished after 8 timesteps
Episode finished after 10 timesteps
Episode finished after 9 timesteps
Episode finished after 10 timesteps
Episode finished after 8 timesteps
Episode finished after 10 timesteps
Episode finished after 11 timesteps
Episode finished after 9 timesteps


## Training the genetic algorithm

In [78]:
generation = genetic.Generation(n=10, env=env)

In [79]:
count = 0

for i in range(1000):
    
    res = generation.simulation_step(200)
    generation.select(std=.1, proportion=.3)
    generation.reset()
    
    print(str(i + 1) + ': ' + str(np.sort(res)[-1:0:-1]) + ' '*20, end='\r')
        
    if res.min() > 195:
        count += 1
    else:
        count = 0
    
    if count == 5:
        break

921: [ 200.  200.  200.  200.  200.  200.  200.  200.  200.]                    

Training seems overly long... Let's try to change the _mutation rate_ as the models become more accurate:

In [84]:
generation = genetic.Generation(n=10, env=env)

In [85]:
std = .1
threshold = 150
count = 0

for i in range(1000):
    
    res = generation.simulation_step(200)
    generation.select(std=std, proportion=.3)
    generation.reset()
    
    print(str(i + 1) + ': ' + str(np.sort(res)[-1:0:-1]) + ' '*10, end='\r')
    
    if res.min() > threshold:
        std /= 2
        threshold += 10
        
    if res.min() > 195:
        count += 1
    else:
        count = 0
    
    if count == 5:
        break

77: [ 200.  200.  200.  200.  200.  200.  200.  200.  200.]          

Definitely better... But some tweaking is needed. Let's check that the agents are well-trained:

In [86]:
l = np.zeros(10)
agent = generation.agents[0]

for _ in range(100):
    generation.reset()
    res = generation.simulation_step(200)
    print(res, end='\r')
    l += res/100

print()
print(l)

[ 200.  200.  200.  200.  200.  200.  200.  200.  200.  200.]
[ 200.  200.  200.  200.  200.  200.  200.  200.  200.  200.]


# A k-NN algorithm

In [117]:
import knn

In [120]:
agent = knn.Agent(k=10)

In [121]:
for i in range(1000):
    observation = env.reset()
    
    for t in range(200):
        action = agent.action(observation)
        observation, reward, done, info = env.step(action)
        agent.add_reward(reward)
        if done:
            print(str(i+1) + '-th episode finished after {t} timesteps'.format(t=t+1) + ' '*20, end='\r')
            break

377-th episode finished after 14 timesteps                    

KeyboardInterrupt: 