In [None]:
# coding: utf-8
from collections import deque
import os

import numpy as np
import time

import tensorflow as tf

import gym
from gym import wrappers

np.random.seed(7)

# 過去何コマを見るか
STATE_NUM = 4


class DQNAgent():
    """
    Multi Layer Perceptron with Experience Replay
    """

    def __init__(self, epsilon=0.99):
        # parameters
        # self.name = os.path.splitext(os.path.basename(__file__))[0]
        # self.environment_name = environment_name
        self.enable_actions = [0,1] 
        self.n_actions = len(self.enable_actions)
        self.minibatch_size = 32
        self.replay_memory_size = 300*100
        self.learning_rate = 0.001
        self.discount_factor = 0.9
        self.exploration = 0.1
        self.epsilon = epsilon
        self.experienceMemory=[] # 経験メモリ（エピソードローカル）
        self.experienceMemory_local=[] # 経験メモリ（エピソードローカル）
        self.memSize = 300*100  # 経験メモリのサイズ(300サンプリングx100エピソード)
        self.experienceMemory_local=[] # 経験メモリ（エピソードローカル）
        self.memPos = 0 #メモリのインデックス
        self.batch_num = 32 # 学習に使うバッチサイズ
        self.gamma = 0.9       # 割引率
        self.loss=0
        self.total_reward_award=np.ones(100)*-1000 #100エピソード
        


        # replay memory
        self.D = deque(maxlen=self.replay_memory_size)

        # model
        self.init_model()

        # variables
        self.current_loss = 0.0

    def init_model(self):

        # input layer (1 x 4)
        self.x = tf.placeholder(tf.float32, [4])

        # flatten (64)
        x_flat = tf.reshape(self.x, [-1, 4])

        # fully connected layer (32)
        W_fc1 = tf.Variable(tf.truncated_normal([4 ,16], stddev=0.01))
        b_fc1 = tf.Variable(tf.zeros([16]))
        h_fc1 = tf.nn.relu(tf.matmul(x_flat, W_fc1) + b_fc1)

        # output layer (n_actions)
        W_out = tf.Variable(tf.truncated_normal([16, self.n_actions], stddev=0.01))
        b_out = tf.Variable(tf.zeros([self.n_actions]))
        self.y = tf.matmul(h_fc1, W_out) + b_out

        # loss function
        self.y_ = tf.placeholder(tf.float32, [self.n_actions])
        self.loss = tf.reduce_mean(tf.square(self.y_ - self.y))

        # train operation
        optimizer = tf.train.RMSPropOptimizer(self.learning_rate)
        self.training = optimizer.minimize(self.loss)

        # saver
        self.saver = tf.train.Saver()

        # session
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

    def get_action_value(self, state):
        #Q_values(self, state): #
        # Q(state, action) of all actions
        return self.sess.run(self.y, feed_dict={self.x: state})[0]

    def reduce_epsilon(self):
        self.epsilon-=1.0/100000

    def get_epsilon(self):
        return self.epsilon

    def get_action(self, state, train):
        if train==True and np.random.rand() < self.epsilon:
            # random
            return np.random.choice(self.enable_actions)
        else:
            # max_action Q(state, action)
            return self.enable_actions[np.argmax(self.get_action_value(state))]

    def experience_local(self,old_seq, action, reward, new_seq):
        #エピソードローカルな記憶
        self.experienceMemory_local.append( np.hstack([old_seq,action,reward,new_seq]) )
 
    def store_experience(self, state, action, reward, state_1, train):
        self.D.append((state, action, reward, state_1, train))


    def experience_global(self,total_reward):
        #グローバルな記憶
        #ベスト100に入る経験を取り込む
        if np.min(self.total_reward_award)<total_reward:
            i=np.argmin(self.total_reward_award)
            self.total_reward_award[i]=total_reward

            # GOOD EXPERIENCE REPLAY
            for x in self.experienceMemory_local:
                self.experience( x )

        #一定確率で優秀でないものも取り込む
        if np.random.random()<0.01:
            # # NORMAL EXPERIENCE REPLAY
            for x in self.experienceMemory_local:
                self.experience( x )

        self.experienceMemory_local=[]

    def experience(self,x):
        if len(self.experienceMemory)>self.memSize:
            self.experienceMemory[int(self.memPos%self.memSize)]=x
            self.memPos+=1
        else:
            self.experienceMemory.append( x )
        
    def experience_replay(self):
        state_minibatch = []
        y_minibatch = []

        # sample random minibatch
        minibatch_size = min(len(self.D), self.minibatch_size)
        minibatch_indexes = np.random.randint(0, len(self.D), minibatch_size)

        for j in minibatch_indexes:
            state_j, action_j, reward_j, state_j_1, terminal = self.D[j]
            action_j_index = self.enable_actions.index(action_j)
            state_j
            y_j = self.get_action_value(state_j)

            if terminal:
                y_j[action_j_index] = reward_j
            else:
                # reward_j + gamma * max_action' Q(state', action')
                y_j[action_j_index] = reward_j + self.discount_factor * np.max(self.get_action_value(state_j_1))  # NOQA

            state_minibatch.append(state_j)
            y_minibatch.append(y_j)

        # training
        self.sess.run(self.training, feed_dict={self.x: state_minibatch, self.y_: y_minibatch})

        # for log
        self.current_loss = self.sess.run(self.loss, feed_dict={self.x: state_minibatch, self.y_: y_minibatch})
    
    
    def update_model(self,old_seq, action, reward, new_seq):
        '''
        モデルを更新する
        '''
        # 経験メモリにたまってない場合は更新しない
        if len(self.experienceMemory)<self.batch_num:
            return

        # 経験メモリからバッチを作成
        memsize=len(self.experienceMemory)
        batch_index = list(np.random.randint(0,memsize,(self.batch_num)))
        batch =np.array( [self.experienceMemory[i] for i in batch_index ])
        x = batch[:,0:STATE_NUM].reshape( (self.batch_num,-1)).astype(np.float32)
        # print(x)
        targets=[]
        # targets=np.zeros((x.shape[0],4))
        # targets=np.array([self.get_action_value(x[j]) for i in x.shape[0]])
        for j in range(x.shape[0]): 
        #   targets[j]=self.get_action_value(x[j])        
            targets.append(np.ndarray.tolist(self.get_action_value(x[j])))
        #targets=self.get_action_value(x).data.copy()
        targets = np.array(targets)
        # print("targets=",targets)
        
        for i in range(self.batch_num):
            #[ seq..., action, reward, seq_new]
            a = batch[i,STATE_NUM]
            r = batch[i, STATE_NUM+1]
            ai=int((a+1)/2) #±1 をindex(0,1)に。
            new_seq= batch[i,(STATE_NUM+2):(STATE_NUM*2+2)]
            targets[i,ai]=( r+ self.gamma * np.max(self.get_action_value(new_seq)))
        t =targets
        #t = Variable(np.array(targets).reshape((self.batch_num,-1)).astype(np.float32))
         

        # ネットの更新
        # self.model.zerograds()
        # loss=self.model(x ,t)
        # self.loss = loss.data
        # loss.backward()
        # self.optimizer.update()

        # training
        for i in range(x.shape[0]):
            x1 = x[i]
            t2 = t[i]
            self.sess.run(self.training, feed_dict={self.x: x1, self.y_: t2})
            # for log
            self.current_loss = self.sess.run(self.loss, feed_dict={self.x: x1 ,self.y_: t2})


class pendulumEnvironment():
    '''
    model
    '''
    def __init__(self):
        self.env = wrappers.Monitor(gym.make('CartPole-v0'), './private/tmp/cartpole-experiment-3', force = True)

    def reset(self):
        self.env.reset()

    def step(self, action):
        return self.env.step(action)

    def monitor_close(self):
        self.env.close()

# シミュレータ。
class simulator:
    def __init__(self, environment, agent):
        self.agent = agent
        self.env = environment
        self.num_seq=STATE_NUM
        self.reset_seq()
        self.learning_rate=1.0
        self.highscore=0
        self.log=[]

    def reset_seq(self):
        self.seq=np.zeros(self.num_seq)

    def push_seq(self, state):
        self.seq[1:self.num_seq]=self.seq[0:self.num_seq-1]
        self.seq[0]=state

    def run(self, train=True):

        self.env.reset()
        self.reset_seq()
        total_reward=0

        for i in range(300):
            # 現在のstateからなるシーケンスを保存
            old_seq = self.seq.copy()

            # エージェントの行動を決める
            action = self.agent.get_action(old_seq,train)

            # 環境に行動を入力する
            observation, reward, done, info =  self.env.step(action)
            total_reward +=reward

            # 結果を観測してstateとシーケンスを更新する
            state = observation[2]
            self.push_seq(state)
            new_seq = self.seq.copy()

            # エピソードローカルなメモリに記憶する
            self.agent.store_experience(old_seq, action, reward, new_seq,train)
            self.agent.experience_local(old_seq, action, reward, new_seq)

            if done:
                print("Episode finished after {} timesteps".format(i+1))
                break

        # エピソードローカルなメモリ内容をグローバルなメモリに移す
        self.agent.experience_global(total_reward)

        if train:
            # 学習用メモリを使ってモデルを更新する
            self.agent.update_model(old_seq, action, reward, new_seq)
            # self.agent.experience_replay()
            self.agent.reduce_epsilon()

        return total_reward

if __name__ == '__main__':
    agent=DQNAgent()
    env=pendulumEnvironment()
    sim=simulator(env,agent)

    best_reward1 = 0
    for i in range(20000):
        total_reward1 = sim.run(train=True)
        if best_reward1 < total_reward1:
            best_reward1 = total_reward1

        print(str(i) + " " + str(total_reward1) + " " + str(best_reward1))
        env.reset()

        if best_reward1 > 195:
            break

    env.monitor_close()
    gym.upload('./private/tmp/cartpole-experiment-3', api_key='sk_GDB9izzTxu1PyxNAdhcw')


[2017-05-31 06:37:18,631] Making new env: CartPole-v0
[2017-05-31 06:37:18,638] Clearing 60 monitor files from previous run (because force=True was provided)
[2017-05-31 06:37:18,829] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000000.mp4
[2017-05-31 06:37:19,621] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000001.mp4


Episode finished after 27 timesteps
0 27.0 27.0
Episode finished after 17 timesteps


[2017-05-31 06:37:19,996] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000008.mp4


1 17.0 27.0
Episode finished after 23 timesteps
2 23.0 27.0
Episode finished after 18 timesteps
3 18.0 27.0
Episode finished after 26 timesteps
4 26.0 27.0
Episode finished after 18 timesteps
5 18.0 27.0
Episode finished after 13 timesteps
6 13.0 27.0
Episode finished after 9 timesteps
7 9.0 27.0
Episode finished after 30 timesteps
8 30.0 30.0
Episode finished after 20 timesteps
9 20.0 30.0
Episode finished after 22 timesteps
10 22.0 30.0
Episode finished after 16 timesteps
11 16.0 30.0
Episode finished after 20 timesteps


[2017-05-31 06:37:21,016] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000027.mp4


12 20.0 30.0
Episode finished after 13 timesteps
13 13.0 30.0
Episode finished after 37 timesteps
14 37.0 37.0
Episode finished after 16 timesteps
15 16.0 37.0
Episode finished after 40 timesteps
16 40.0 40.0
Episode finished after 14 timesteps
17 14.0 40.0
Episode finished after 37 timesteps
18 37.0 40.0
Episode finished after 39 timesteps
19 39.0 40.0
Episode finished after 32 timesteps
20 32.0 40.0
Episode finished after 37 timesteps
21 37.0 40.0
Episode finished after 30 timesteps
22 30.0 40.0
Episode finished after 14 timesteps
23 14.0 40.0
Episode finished after 14 timesteps
24 14.0 40.0
Episode finished after 18 timesteps
25 18.0 40.0
Episode finished after 22 timesteps
26 22.0 40.0
Episode finished after 19 timesteps
27 19.0 40.0
Episode finished after 13 timesteps
28 13.0 40.0
Episode finished after 25 timesteps
29 25.0 40.0
Episode finished after 16 timesteps


[2017-05-31 06:37:21,977] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000064.mp4


30 16.0 40.0
Episode finished after 40 timesteps
31 40.0 40.0
Episode finished after 29 timesteps
32 29.0 40.0
Episode finished after 13 timesteps
33 13.0 40.0
Episode finished after 13 timesteps
34 13.0 40.0
Episode finished after 14 timesteps
35 14.0 40.0
Episode finished after 13 timesteps
36 13.0 40.0
Episode finished after 35 timesteps
37 35.0 40.0
Episode finished after 8 timesteps
38 8.0 40.0
Episode finished after 12 timesteps
39 12.0 40.0
Episode finished after 31 timesteps
40 31.0 40.0
Episode finished after 17 timesteps
41 17.0 40.0
Episode finished after 32 timesteps
42 32.0 40.0
Episode finished after 14 timesteps
43 14.0 40.0
Episode finished after 13 timesteps
44 13.0 40.0
Episode finished after 102 timesteps
45 102.0 102.0
Episode finished after 14 timesteps
46 14.0 102.0
Episode finished after 16 timesteps
47 16.0 102.0
Episode finished after 13 timesteps
48 13.0 102.0
Episode finished after 23 timesteps
49 23.0 102.0
Episode finished after 29 timesteps
50 29.0 102.0
E

[2017-05-31 06:37:24,156] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000125.mp4


58 28.0 102.0
Episode finished after 26 timesteps
59 26.0 102.0
Episode finished after 13 timesteps
60 13.0 102.0
Episode finished after 15 timesteps
61 15.0 102.0
Episode finished after 59 timesteps
62 59.0 102.0
Episode finished after 31 timesteps
63 31.0 102.0
Episode finished after 23 timesteps
64 23.0 102.0
Episode finished after 17 timesteps
65 17.0 102.0
Episode finished after 18 timesteps
66 18.0 102.0
Episode finished after 14 timesteps
67 14.0 102.0
Episode finished after 27 timesteps
68 27.0 102.0
Episode finished after 14 timesteps
69 14.0 102.0
Episode finished after 16 timesteps
70 16.0 102.0
Episode finished after 11 timesteps
71 11.0 102.0
Episode finished after 12 timesteps
72 12.0 102.0
Episode finished after 18 timesteps
73 18.0 102.0
Episode finished after 37 timesteps
74 37.0 102.0
Episode finished after 27 timesteps
75 27.0 102.0
Episode finished after 18 timesteps
76 18.0 102.0
Episode finished after 15 timesteps
77 15.0 102.0
Episode finished after 17 timesteps


[2017-05-31 06:37:26,087] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000216.mp4


102 20.0 102.0
Episode finished after 11 timesteps
103 11.0 102.0
Episode finished after 23 timesteps
104 23.0 102.0
Episode finished after 12 timesteps
105 12.0 102.0
Episode finished after 25 timesteps
106 25.0 102.0
Episode finished after 10 timesteps
107 10.0 102.0
Episode finished after 13 timesteps
108 13.0 102.0
Episode finished after 19 timesteps
109 19.0 102.0
Episode finished after 14 timesteps
110 14.0 102.0
Episode finished after 50 timesteps
111 50.0 102.0
Episode finished after 17 timesteps
112 17.0 102.0
Episode finished after 44 timesteps
113 44.0 102.0
Episode finished after 22 timesteps
114 22.0 102.0
Episode finished after 14 timesteps
115 14.0 102.0
Episode finished after 18 timesteps
116 18.0 102.0
Episode finished after 11 timesteps
117 11.0 102.0
Episode finished after 21 timesteps
118 21.0 102.0
Episode finished after 19 timesteps
119 19.0 102.0
Episode finished after 32 timesteps
120 32.0 102.0
Episode finished after 53 timesteps
121 53.0 102.0
Episode finished

[2017-05-31 06:37:29,068] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000343.mp4



Episode finished after 22 timesteps
168 22.0 102.0
Episode finished after 18 timesteps
169 18.0 102.0
Episode finished after 29 timesteps
170 29.0 102.0
Episode finished after 15 timesteps
171 15.0 102.0
Episode finished after 17 timesteps
172 17.0 102.0
Episode finished after 16 timesteps
173 16.0 102.0
Episode finished after 19 timesteps
174 19.0 102.0
Episode finished after 38 timesteps
175 38.0 102.0
Episode finished after 11 timesteps
176 11.0 102.0
Episode finished after 33 timesteps
177 33.0 102.0
Episode finished after 24 timesteps
178 24.0 102.0
Episode finished after 25 timesteps
179 25.0 102.0
Episode finished after 11 timesteps
180 11.0 102.0
Episode finished after 13 timesteps
181 13.0 102.0
Episode finished after 16 timesteps
182 16.0 102.0
Episode finished after 16 timesteps
183 16.0 102.0
Episode finished after 11 timesteps
184 11.0 102.0
Episode finished after 24 timesteps
185 24.0 102.0
Episode finished after 13 timesteps
186 13.0 102.0
Episode finished after 24 time

[2017-05-31 06:37:32,786] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000512.mp4


251 30.0 102.0
Episode finished after 13 timesteps
252 13.0 102.0
Episode finished after 40 timesteps
253 40.0 102.0
Episode finished after 25 timesteps
254 25.0 102.0
Episode finished after 15 timesteps
255 15.0 102.0
Episode finished after 13 timesteps
256 13.0 102.0
Episode finished after 29 timesteps
257 29.0 102.0
Episode finished after 16 timesteps
258 16.0 102.0
Episode finished after 19 timesteps
259 19.0 102.0
Episode finished after 17 timesteps
260 17.0 102.0
Episode finished after 14 timesteps
261 14.0 102.0
Episode finished after 56 timesteps
262 56.0 102.0
Episode finished after 51 timesteps
263 51.0 102.0
Episode finished after 13 timesteps
264 13.0 102.0
Episode finished after 26 timesteps
265 26.0 102.0
Episode finished after 31 timesteps
266 31.0 102.0
Episode finished after 25 timesteps
267 25.0 102.0
Episode finished after 24 timesteps
268 24.0 102.0
Episode finished after 29 timesteps
269 29.0 102.0
Episode finished after 14 timesteps
270 14.0 102.0
Episode finished

[2017-05-31 06:37:37,702] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video000729.mp4


361 21.0 102.0
Episode finished after 48 timesteps
362 48.0 102.0
Episode finished after 40 timesteps
363 40.0 102.0
Episode finished after 36 timesteps
364 36.0 102.0
Episode finished after 17 timesteps
365 17.0 102.0
Episode finished after 52 timesteps
366 52.0 102.0
Episode finished after 12 timesteps
367 12.0 102.0
Episode finished after 21 timesteps
368 21.0 102.0
Episode finished after 20 timesteps
369 20.0 102.0
Episode finished after 13 timesteps
370 13.0 102.0
Episode finished after 18 timesteps
371 18.0 102.0
Episode finished after 22 timesteps
372 22.0 102.0
Episode finished after 38 timesteps
373 38.0 102.0
Episode finished after 50 timesteps
374 50.0 102.0
Episode finished after 14 timesteps
375 14.0 102.0
Episode finished after 20 timesteps
376 20.0 102.0
Episode finished after 33 timesteps
377 33.0 102.0
Episode finished after 20 timesteps
378 20.0 102.0
Episode finished after 18 timesteps
379 18.0 102.0
Episode finished after 13 timesteps
380 13.0 102.0
Episode finished

[2017-05-31 06:37:43,306] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video001000.mp4


Episode finished after 16 timesteps
500 16.0 102.0
Episode finished after 9 timesteps
501 9.0 102.0
Episode finished after 38 timesteps
502 38.0 102.0
Episode finished after 30 timesteps
503 30.0 102.0
Episode finished after 23 timesteps
504 23.0 102.0
Episode finished after 15 timesteps
505 15.0 102.0
Episode finished after 13 timesteps
506 13.0 102.0
Episode finished after 10 timesteps
507 10.0 102.0
Episode finished after 21 timesteps
508 21.0 102.0
Episode finished after 31 timesteps
509 31.0 102.0
Episode finished after 53 timesteps
510 53.0 102.0
Episode finished after 33 timesteps
511 33.0 102.0
Episode finished after 60 timesteps
512 60.0 102.0
Episode finished after 14 timesteps
513 14.0 102.0
Episode finished after 58 timesteps
514 58.0 102.0
Episode finished after 20 timesteps
515 20.0 102.0
Episode finished after 19 timesteps
516 19.0 102.0
Episode finished after 41 timesteps
517 41.0 102.0
Episode finished after 17 timesteps
518 17.0 102.0
Episode finished after 31 timeste

[2017-05-31 06:38:04,105] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video002000.mp4


998 25.0 102.0
Episode finished after 14 timesteps
999 14.0 102.0
Episode finished after 12 timesteps
1000 12.0 102.0
Episode finished after 17 timesteps
1001 17.0 102.0
Episode finished after 23 timesteps
1002 23.0 102.0
Episode finished after 36 timesteps
1003 36.0 102.0
Episode finished after 25 timesteps
1004 25.0 102.0
Episode finished after 12 timesteps
1005 12.0 102.0
Episode finished after 33 timesteps
1006 33.0 102.0
Episode finished after 20 timesteps
1007 20.0 102.0
Episode finished after 32 timesteps
1008 32.0 102.0
Episode finished after 15 timesteps
1009 15.0 102.0
Episode finished after 11 timesteps
1010 11.0 102.0
Episode finished after 11 timesteps
1011 11.0 102.0
Episode finished after 19 timesteps
1012 19.0 102.0
Episode finished after 36 timesteps
1013 36.0 102.0
Episode finished after 23 timesteps
1014 23.0 102.0
Episode finished after 16 timesteps
1015 16.0 102.0
Episode finished after 24 timesteps
1016 24.0 102.0
Episode finished after 18 timesteps
1017 18.0 102.

[2017-05-31 06:38:24,657] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video003000.mp4


1495 19.0 102.0
Episode finished after 40 timesteps
1496 40.0 102.0
Episode finished after 39 timesteps
1497 39.0 102.0
Episode finished after 20 timesteps
1498 20.0 102.0
Episode finished after 19 timesteps
1499 19.0 102.0
Episode finished after 13 timesteps
1500 13.0 102.0
Episode finished after 49 timesteps
1501 49.0 102.0
Episode finished after 13 timesteps
1502 13.0 102.0
Episode finished after 28 timesteps
1503 28.0 102.0
Episode finished after 30 timesteps
1504 30.0 102.0
Episode finished after 42 timesteps
1505 42.0 102.0
Episode finished after 36 timesteps
1506 36.0 102.0
Episode finished after 16 timesteps
1507 16.0 102.0
Episode finished after 18 timesteps
1508 18.0 102.0
Episode finished after 20 timesteps
1509 20.0 102.0
Episode finished after 18 timesteps
1510 18.0 102.0
Episode finished after 31 timesteps
1511 31.0 102.0
Episode finished after 15 timesteps
1512 15.0 102.0
Episode finished after 9 timesteps
1513 9.0 102.0
Episode finished after 16 timesteps
1514 16.0 102.

[2017-05-31 06:38:45,398] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video004000.mp4


1998 18.0 102.0
Episode finished after 22 timesteps
1999 22.0 102.0
Episode finished after 17 timesteps
2000 17.0 102.0
Episode finished after 24 timesteps
2001 24.0 102.0
Episode finished after 17 timesteps
2002 17.0 102.0
Episode finished after 13 timesteps
2003 13.0 102.0
Episode finished after 20 timesteps
2004 20.0 102.0
Episode finished after 10 timesteps
2005 10.0 102.0
Episode finished after 23 timesteps
2006 23.0 102.0
Episode finished after 29 timesteps
2007 29.0 102.0
Episode finished after 94 timesteps
2008 94.0 102.0
Episode finished after 16 timesteps
2009 16.0 102.0
Episode finished after 14 timesteps
2010 14.0 102.0
Episode finished after 16 timesteps
2011 16.0 102.0
Episode finished after 10 timesteps
2012 10.0 102.0
Episode finished after 25 timesteps
2013 25.0 102.0
Episode finished after 12 timesteps
2014 12.0 102.0
Episode finished after 30 timesteps
2015 30.0 102.0
Episode finished after 23 timesteps
2016 23.0 102.0
Episode finished after 40 timesteps
2017 40.0 10

[2017-05-31 06:39:06,364] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video005000.mp4


2499 28.0 102.0
Episode finished after 20 timesteps
2500 20.0 102.0
Episode finished after 21 timesteps
2501 21.0 102.0
Episode finished after 61 timesteps
2502 61.0 102.0
Episode finished after 28 timesteps
2503 28.0 102.0
Episode finished after 18 timesteps
2504 18.0 102.0
Episode finished after 23 timesteps
2505 23.0 102.0
Episode finished after 13 timesteps
2506 13.0 102.0
Episode finished after 13 timesteps
2507 13.0 102.0
Episode finished after 13 timesteps
2508 13.0 102.0
Episode finished after 15 timesteps
2509 15.0 102.0
Episode finished after 26 timesteps
2510 26.0 102.0
Episode finished after 18 timesteps
2511 18.0 102.0
Episode finished after 31 timesteps
2512 31.0 102.0
Episode finished after 48 timesteps
2513 48.0 102.0
Episode finished after 33 timesteps
2514 33.0 102.0
Episode finished after 16 timesteps
2515 16.0 102.0
Episode finished after 24 timesteps
2516 24.0 102.0
Episode finished after 10 timesteps
2517 10.0 102.0
Episode finished after 15 timesteps
2518 15.0 10

[2017-05-31 06:39:28,254] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video006000.mp4


2999 31.0 102.0
Episode finished after 25 timesteps
3000 25.0 102.0
Episode finished after 37 timesteps
3001 37.0 102.0
Episode finished after 13 timesteps
3002 13.0 102.0
Episode finished after 23 timesteps
3003 23.0 102.0
Episode finished after 21 timesteps
3004 21.0 102.0
Episode finished after 21 timesteps
3005 21.0 102.0
Episode finished after 16 timesteps
3006 16.0 102.0
Episode finished after 19 timesteps
3007 19.0 102.0
Episode finished after 19 timesteps
3008 19.0 102.0
Episode finished after 15 timesteps
3009 15.0 102.0
Episode finished after 23 timesteps
3010 23.0 102.0
Episode finished after 12 timesteps
3011 12.0 102.0
Episode finished after 26 timesteps
3012 26.0 102.0
Episode finished after 20 timesteps
3013 20.0 102.0
Episode finished after 27 timesteps
3014 27.0 102.0
Episode finished after 18 timesteps
3015 18.0 102.0
Episode finished after 11 timesteps
3016 11.0 102.0
Episode finished after 16 timesteps
3017 16.0 102.0
Episode finished after 9 timesteps
3018 9.0 102.

[2017-05-31 06:39:51,782] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video007000.mp4


3496 25.0 102.0
Episode finished after 14 timesteps
3497 14.0 102.0
Episode finished after 18 timesteps
3498 18.0 102.0
Episode finished after 12 timesteps
3499 12.0 102.0
Episode finished after 17 timesteps
3500 17.0 102.0
Episode finished after 11 timesteps
3501 11.0 102.0
Episode finished after 14 timesteps
3502 14.0 102.0
Episode finished after 20 timesteps
3503 20.0 102.0
Episode finished after 11 timesteps
3504 11.0 102.0
Episode finished after 23 timesteps
3505 23.0 102.0
Episode finished after 20 timesteps
3506 20.0 102.0
Episode finished after 15 timesteps
3507 15.0 102.0
Episode finished after 30 timesteps
3508 30.0 102.0
Episode finished after 16 timesteps
3509 16.0 102.0
Episode finished after 12 timesteps
3510 12.0 102.0
Episode finished after 12 timesteps
3511 12.0 102.0
Episode finished after 23 timesteps
3512 23.0 102.0
Episode finished after 20 timesteps
3513 20.0 102.0
Episode finished after 41 timesteps
3514 41.0 102.0
Episode finished after 21 timesteps
3515 21.0 10

[2017-05-31 06:40:17,731] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video008000.mp4


3997 27.0 102.0
Episode finished after 12 timesteps
3998 12.0 102.0
Episode finished after 28 timesteps
3999 28.0 102.0
Episode finished after 19 timesteps
4000 19.0 102.0
Episode finished after 33 timesteps
4001 33.0 102.0
Episode finished after 23 timesteps
4002 23.0 102.0
Episode finished after 30 timesteps
4003 30.0 102.0
Episode finished after 21 timesteps
4004 21.0 102.0
Episode finished after 16 timesteps
4005 16.0 102.0
Episode finished after 13 timesteps
4006 13.0 102.0
Episode finished after 18 timesteps
4007 18.0 102.0
Episode finished after 19 timesteps
4008 19.0 102.0
Episode finished after 30 timesteps
4009 30.0 102.0
Episode finished after 15 timesteps
4010 15.0 102.0
Episode finished after 13 timesteps
4011 13.0 102.0
Episode finished after 16 timesteps
4012 16.0 102.0
Episode finished after 23 timesteps
4013 23.0 102.0
Episode finished after 17 timesteps
4014 17.0 102.0
Episode finished after 14 timesteps
4015 14.0 102.0
Episode finished after 12 timesteps
4016 12.0 10

[2017-05-31 06:40:44,442] Starting new video recorder writing to /Users/nanaki/Documents/desk/code/AI_gym/CarPole/private/tmp/cartpole-experiment-3/openaigym.video.16.65450.video009000.mp4


4499 30.0 107.0
Episode finished after 18 timesteps
4500 18.0 107.0
Episode finished after 16 timesteps
4501 16.0 107.0
Episode finished after 17 timesteps
4502 17.0 107.0
Episode finished after 16 timesteps
4503 16.0 107.0
Episode finished after 18 timesteps
4504 18.0 107.0
Episode finished after 18 timesteps
4505 18.0 107.0
Episode finished after 24 timesteps
4506 24.0 107.0
Episode finished after 18 timesteps
4507 18.0 107.0
Episode finished after 14 timesteps
4508 14.0 107.0
Episode finished after 24 timesteps
4509 24.0 107.0
Episode finished after 11 timesteps
4510 11.0 107.0
Episode finished after 47 timesteps
4511 47.0 107.0
Episode finished after 24 timesteps
4512 24.0 107.0
Episode finished after 13 timesteps
4513 13.0 107.0
Episode finished after 34 timesteps
4514 34.0 107.0
Episode finished after 11 timesteps
4515 11.0 107.0
Episode finished after 62 timesteps
4516 62.0 107.0
Episode finished after 18 timesteps
4517 18.0 107.0
Episode finished after 14 timesteps
4518 14.0 10

In [61]:
target1

NameError: name 'target1' is not defined