In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gym

In [2]:
#애니메이션을 만드는 함수
from JSAnimation.IPython_display import display_animation
from matplotlib import animation
from IPython.display import display

def display_frames_as_gif(frames):
    plt.figure(figsize=(frames[0].shape[1]/72.0, frames[0].shape[0]/72.0), dpi = 72)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    
    def animate(i):
        patch.set_data(frames[i])
    
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval = 50)
    writergif = animation.PillowWriter(fps=30) 
    anim.save("movie_cartpole.gif",writer=writergif)
    display(display_animation(anim, default_mode='loop'))

In [3]:
#수레를 무작위로 움직임
frames = []
env = gym.make('CartPole-v0')
observation = env.reset() #환경 초기화

for step in range(0,200):
    frames.append(env.render(mode='rgb_array')) #v프레임에 각 시각의 이미지 추가
    action = np.random.choice(2) #0-수레를 왼쪽으로, 1-수레를 오른쪽으로
    observation, reward, done, info = env.step(action) #action실행
    
#애니메이션파일 재생
display_frames_as_gif(frames)

The 'clear_temp' parameter of setup() was deprecated in Matplotlib 3.3 and will be removed two minor releases later. If any parameter follows 'clear_temp', they should be passed as keyword, not positionally.
  frame_prefix, clear_temp=False)


AttributeError: 'HTMLWriter' object has no attribute '_temp_names'

### 상태의 이산변수 변환

In [4]:
#상수 정의
ENV = 'CartPole-v0' #태스크 이름
NUM_DIZITIZED = 6 #각 상태를 이산변수로 변환할 구간 수

#CartPole 실행
env = gym.make(ENV) #실행할 태스크 선택
observation = env.reset() #환경 초기화

#이산값으로 만들 구간 계산
def bins(clip_min, clip_max, num): #관측된 상태(연속값)를 이산값으로 변환하는 구간 계산
    return np.linspace(clip_min, clip_max, num+1)[1:-1]

In [5]:
#관측된 상태를 이산값으로 변환
def digitize_state(observation):
    cart_pos, cart_v, pole_angle, pole_v = observation
    #순서대로 수레의 위치, 수레의 속도, 봉의 각도, 봉의 각속도
    digitized = [
        np.digitize(cart_pos, bins = bins(-2.4, 2.4, NUM_DIZITIZED)),
        np.digitize(cart_v, bins = bins(-3.0, 3.0, NUM_DIZITIZED)),
        np.digitize(cart_pos, bins = bins(-0.5, 0.5, NUM_DIZITIZED)),
        np.digitize(cart_pos, bins = bins(-2.0, 2.0, NUM_DIZITIZED))]
    return sum([x*(NUM_DIZITIZED**i)for i, x in enumerate(digitized)])

### Q-learning 구현
with 3 classes
- Agent: 수레, 'Q함수 수정', '다음 행동 결정'
- Brain: '수레가 관찰한 상태를 이산변수로 변환', 'Q테이블 수정', 'Q테이블을 이용해 행동 결정'
- Environment: OpenAI Gyem이 실행되는 실행환경, 'run'

#### (1) agent가 observation_t를 Brain에 전달
#### (2) Brain은 전달받은 현재 상태를 이산변수로 변환하고, Q테이블을 참조해서 행동을 결정해서, agent에게 행동 전달
#### (3) agent는 environment에 행동action_t를 전달하고 
#### (4) Environment는 다음 상태 observation_t+1과 즉각보상 reward_t+1을 agent에게 반환
#### (5) agent가 전달한, 현재상태, 행동, 다음 상태, 즉각보상 (transition) 정보를 바탕으로, Brain은 Q테이블 수정

In [17]:
#상수 정의
ENV = 'CartPole-v0'
NUM_DIZITIZED = 6 #이산 변수로 변환할 구간 수
GAMMA = 0.99 #시간할인율
ETA = 0.5 #학습률
MAX_STEPS = 200 #한 에피소드당 최대 단계 수
NUM_EPISODES = 2000 #최대 에피소드 수

In [18]:
class Agent:
    def __init__(self, num_states, num_actions):
        self.brain = Brain(num_states, num_actions) #agent가 행동을 결정하는 두뇌 역할
        
    def update_Q_function(self, observation, action, reward, observation_next):
        self.brain.update_Q_table(observation, action, reward, observation_next)
    
    def get_action(self, observatoin, step):
        action = self.brain.decide_action(observation, step)
        return action

In [19]:
class Brain:
    def __init__(self, num_states, num_actions):
        self.num_actions= num_actions #행동 가짓수(왼 or 오)구함
        
        self.q_table = np.random.uniform(low=0, high=1, size=(NUM_DIZITIZED**num_states, num_actions)) #Q테이블 생성
    
    def bins(self, clip_min, clip_max, num): #상태를 이산변수 구간으로 나누기
        return np.linspace(clip_min, clip_max, num+1)[1:-1]
    
    def digitize_state(self, observation): #관측된 상태를 이산변수로 변환
        cart_pos, cart_v, pole_angle, pole_v = observation
        digitized = [
            np.digitize(cart_pos, bins = bins(-2.4, 2.4, NUM_DIZITIZED)),
            np.digitize(cart_v, bins = bins(-3.0, 3.0, NUM_DIZITIZED)),
            np.digitize(cart_pos, bins = bins(-0.5, 0.5, NUM_DIZITIZED)),
            np.digitize(cart_pos, bins = bins(-2.0, 2.0, NUM_DIZITIZED))
        ]
        return sum([x*(NUM_DIZITIZED**i)for i, x in enumerate(digitized)])
        
    def update_Q_table(self,observation, action, reward, observation_next): #Q-leaning : Q테이블 수정
        state = self.digitize_state(observation) #현재 상태를 이산변수로 변환
        state_next = self.digitize_state(observation_next) #다음 상태를 이산변수로 변환
        Max_Q_next = max(self.q_table[state_next][:])
        self.q_table[state, action] = self.q_table[state, action]+ ETA*(reward+ GAMMA*Max_Q_next - self.q_table[state,action])
    
    def decide_action(self, observation, episode): #엡실론-그리디 -> 최적 행동 비중을 늘림
        state = self.digitize_state(observation)
        epsilon = 0.5*(1/(episode+1))
        
        if epsilon <= np.random.uniform(0,1):
            action = np.argmax(self.q_table[state][:])
        else:
            action = np.random.choice(self.num_actions)
        return action
        

In [20]:
class Environment:
    def __init__(self):
        self.env = gym.make(ENV)
        num_states = self.env.observation_space.shape[0] #테스크의 상태변수 수를 구함
        num_actions = self.env.action_space.n #가능한 행동 수 구함
        self.agent = Agent(num_states, num_actions) #에이전트 객체 생성
    
    def run(self):
        complete_episodes = 0 #성공한 에피소드 수(195이상 단계를 버틴 경우)
        is_episode_final = False #마지막 에피소드 여부
        framse = [] #에니메이션 만드는 데 사용할 이미지 저장하는 변수
        
        for episode in range(NUM_EPISODES): #에피소드 수만큼 반복
            observation = self.env.reset() #환경 초기화
            
            for step in range(MAX_STEPS): #한 에피소드씩
                if is_episode_final is True: #마지막 에피소드면 frames에 각 단계의 이미지 저장
                    frames.append(self.env.render(mode='rgb_array'))
                #행동 선택
                action = self.agent.get_action(observation, episode)
                #행동 a_t를 실행해 s_t+1, r_t+1 계산
                observation_next, _, done, _ = self.env.step(action)
                
                if done: #200단계 넘거나 일정 각도 이상 기울면 done이 참
                    if step <195:
                        reward = -1 #봉이 쓰러지면 패널티 -1
                        complete_episodes = 0 #195 이상 버티면 성공처리
                    else:
                        reward = 1
                        complete_episode += 1
                    
                else: 
                    reward = 0 #에피 소드 중에는 보상 없다
                
                #다음단계 상태 observation_next로 Q함수 수정
                self.agent.update_Q_function(observation, action, reward, observation_next)
                
                #다음단계 상태 관측
                observation = observation_next
                
                #에피소드 마무리
                if done:
                    print('{0} Episode: Finished after {1} time steps'.format(episode, step+1))
                    break
            
            if is_episode_final is True: 
                display_frames_as_gif(frames)
                break
            
            if complete_episodes >= 10: 
                print('10 에피소드 연속 성공')
                is_episode_final = True #다음 에피소드가 마지막이됨

In [21]:
#main
cartpole_env = Environment()
cartpole_env.run()

0 Episode: Finished after 17 time steps
1 Episode: Finished after 9 time steps
2 Episode: Finished after 17 time steps
3 Episode: Finished after 13 time steps
4 Episode: Finished after 10 time steps
5 Episode: Finished after 9 time steps
6 Episode: Finished after 10 time steps
7 Episode: Finished after 12 time steps
8 Episode: Finished after 9 time steps
9 Episode: Finished after 9 time steps
10 Episode: Finished after 10 time steps
11 Episode: Finished after 10 time steps
12 Episode: Finished after 10 time steps
13 Episode: Finished after 10 time steps
14 Episode: Finished after 9 time steps
15 Episode: Finished after 10 time steps
16 Episode: Finished after 10 time steps
17 Episode: Finished after 10 time steps
18 Episode: Finished after 8 time steps
19 Episode: Finished after 9 time steps
20 Episode: Finished after 9 time steps
21 Episode: Finished after 10 time steps
22 Episode: Finished after 10 time steps
23 Episode: Finished after 9 time steps
24 Episode: Finished after 10 time 

211 Episode: Finished after 10 time steps
212 Episode: Finished after 10 time steps
213 Episode: Finished after 9 time steps
214 Episode: Finished after 10 time steps
215 Episode: Finished after 10 time steps
216 Episode: Finished after 8 time steps
217 Episode: Finished after 10 time steps
218 Episode: Finished after 10 time steps
219 Episode: Finished after 10 time steps
220 Episode: Finished after 9 time steps
221 Episode: Finished after 10 time steps
222 Episode: Finished after 9 time steps
223 Episode: Finished after 10 time steps
224 Episode: Finished after 9 time steps
225 Episode: Finished after 10 time steps
226 Episode: Finished after 11 time steps
227 Episode: Finished after 9 time steps
228 Episode: Finished after 10 time steps
229 Episode: Finished after 10 time steps
230 Episode: Finished after 9 time steps
231 Episode: Finished after 11 time steps
232 Episode: Finished after 10 time steps
233 Episode: Finished after 9 time steps
234 Episode: Finished after 9 time steps
2

418 Episode: Finished after 9 time steps
419 Episode: Finished after 10 time steps
420 Episode: Finished after 10 time steps
421 Episode: Finished after 10 time steps
422 Episode: Finished after 8 time steps
423 Episode: Finished after 8 time steps
424 Episode: Finished after 10 time steps
425 Episode: Finished after 9 time steps
426 Episode: Finished after 9 time steps
427 Episode: Finished after 9 time steps
428 Episode: Finished after 9 time steps
429 Episode: Finished after 11 time steps
430 Episode: Finished after 10 time steps
431 Episode: Finished after 11 time steps
432 Episode: Finished after 8 time steps
433 Episode: Finished after 10 time steps
434 Episode: Finished after 9 time steps
435 Episode: Finished after 10 time steps
436 Episode: Finished after 8 time steps
437 Episode: Finished after 10 time steps
438 Episode: Finished after 10 time steps
439 Episode: Finished after 10 time steps
440 Episode: Finished after 11 time steps
441 Episode: Finished after 10 time steps
44

627 Episode: Finished after 10 time steps
628 Episode: Finished after 9 time steps
629 Episode: Finished after 9 time steps
630 Episode: Finished after 8 time steps
631 Episode: Finished after 8 time steps
632 Episode: Finished after 10 time steps
633 Episode: Finished after 9 time steps
634 Episode: Finished after 10 time steps
635 Episode: Finished after 10 time steps
636 Episode: Finished after 10 time steps
637 Episode: Finished after 10 time steps
638 Episode: Finished after 9 time steps
639 Episode: Finished after 8 time steps
640 Episode: Finished after 10 time steps
641 Episode: Finished after 8 time steps
642 Episode: Finished after 9 time steps
643 Episode: Finished after 9 time steps
644 Episode: Finished after 10 time steps
645 Episode: Finished after 10 time steps
646 Episode: Finished after 9 time steps
647 Episode: Finished after 9 time steps
648 Episode: Finished after 9 time steps
649 Episode: Finished after 9 time steps
650 Episode: Finished after 9 time steps
651 Epi

850 Episode: Finished after 9 time steps
851 Episode: Finished after 8 time steps
852 Episode: Finished after 9 time steps
853 Episode: Finished after 8 time steps
854 Episode: Finished after 9 time steps
855 Episode: Finished after 9 time steps
856 Episode: Finished after 10 time steps
857 Episode: Finished after 10 time steps
858 Episode: Finished after 9 time steps
859 Episode: Finished after 10 time steps
860 Episode: Finished after 9 time steps
861 Episode: Finished after 9 time steps
862 Episode: Finished after 9 time steps
863 Episode: Finished after 10 time steps
864 Episode: Finished after 10 time steps
865 Episode: Finished after 9 time steps
866 Episode: Finished after 10 time steps
867 Episode: Finished after 8 time steps
868 Episode: Finished after 8 time steps
869 Episode: Finished after 10 time steps
870 Episode: Finished after 9 time steps
871 Episode: Finished after 9 time steps
872 Episode: Finished after 9 time steps
873 Episode: Finished after 9 time steps
874 Episo

1065 Episode: Finished after 10 time steps
1066 Episode: Finished after 10 time steps
1067 Episode: Finished after 9 time steps
1068 Episode: Finished after 10 time steps
1069 Episode: Finished after 9 time steps
1070 Episode: Finished after 9 time steps
1071 Episode: Finished after 9 time steps
1072 Episode: Finished after 9 time steps
1073 Episode: Finished after 9 time steps
1074 Episode: Finished after 9 time steps
1075 Episode: Finished after 9 time steps
1076 Episode: Finished after 8 time steps
1077 Episode: Finished after 10 time steps
1078 Episode: Finished after 9 time steps
1079 Episode: Finished after 9 time steps
1080 Episode: Finished after 9 time steps
1081 Episode: Finished after 10 time steps
1082 Episode: Finished after 8 time steps
1083 Episode: Finished after 10 time steps
1084 Episode: Finished after 9 time steps
1085 Episode: Finished after 10 time steps
1086 Episode: Finished after 10 time steps
1087 Episode: Finished after 8 time steps
1088 Episode: Finished aft

1284 Episode: Finished after 10 time steps
1285 Episode: Finished after 9 time steps
1286 Episode: Finished after 8 time steps
1287 Episode: Finished after 8 time steps
1288 Episode: Finished after 9 time steps
1289 Episode: Finished after 10 time steps
1290 Episode: Finished after 8 time steps
1291 Episode: Finished after 10 time steps
1292 Episode: Finished after 10 time steps
1293 Episode: Finished after 9 time steps
1294 Episode: Finished after 10 time steps
1295 Episode: Finished after 10 time steps
1296 Episode: Finished after 9 time steps
1297 Episode: Finished after 10 time steps
1298 Episode: Finished after 9 time steps
1299 Episode: Finished after 9 time steps
1300 Episode: Finished after 8 time steps
1301 Episode: Finished after 9 time steps
1302 Episode: Finished after 10 time steps
1303 Episode: Finished after 10 time steps
1304 Episode: Finished after 8 time steps
1305 Episode: Finished after 9 time steps
1306 Episode: Finished after 10 time steps
1307 Episode: Finished a

1503 Episode: Finished after 10 time steps
1504 Episode: Finished after 10 time steps
1505 Episode: Finished after 8 time steps
1506 Episode: Finished after 10 time steps
1507 Episode: Finished after 9 time steps
1508 Episode: Finished after 10 time steps
1509 Episode: Finished after 8 time steps
1510 Episode: Finished after 8 time steps
1511 Episode: Finished after 9 time steps
1512 Episode: Finished after 10 time steps
1513 Episode: Finished after 10 time steps
1514 Episode: Finished after 10 time steps
1515 Episode: Finished after 10 time steps
1516 Episode: Finished after 8 time steps
1517 Episode: Finished after 9 time steps
1518 Episode: Finished after 10 time steps
1519 Episode: Finished after 9 time steps
1520 Episode: Finished after 9 time steps
1521 Episode: Finished after 10 time steps
1522 Episode: Finished after 9 time steps
1523 Episode: Finished after 9 time steps
1524 Episode: Finished after 9 time steps
1525 Episode: Finished after 9 time steps
1526 Episode: Finished a

1708 Episode: Finished after 10 time steps
1709 Episode: Finished after 9 time steps
1710 Episode: Finished after 9 time steps
1711 Episode: Finished after 9 time steps
1712 Episode: Finished after 9 time steps
1713 Episode: Finished after 11 time steps
1714 Episode: Finished after 10 time steps
1715 Episode: Finished after 10 time steps
1716 Episode: Finished after 10 time steps
1717 Episode: Finished after 11 time steps
1718 Episode: Finished after 10 time steps
1719 Episode: Finished after 9 time steps
1720 Episode: Finished after 9 time steps
1721 Episode: Finished after 10 time steps
1722 Episode: Finished after 9 time steps
1723 Episode: Finished after 9 time steps
1724 Episode: Finished after 10 time steps
1725 Episode: Finished after 9 time steps
1726 Episode: Finished after 10 time steps
1727 Episode: Finished after 9 time steps
1728 Episode: Finished after 8 time steps
1729 Episode: Finished after 9 time steps
1730 Episode: Finished after 10 time steps
1731 Episode: Finished 

1916 Episode: Finished after 10 time steps
1917 Episode: Finished after 10 time steps
1918 Episode: Finished after 9 time steps
1919 Episode: Finished after 10 time steps
1920 Episode: Finished after 9 time steps
1921 Episode: Finished after 9 time steps
1922 Episode: Finished after 10 time steps
1923 Episode: Finished after 9 time steps
1924 Episode: Finished after 10 time steps
1925 Episode: Finished after 9 time steps
1926 Episode: Finished after 11 time steps
1927 Episode: Finished after 9 time steps
1928 Episode: Finished after 10 time steps
1929 Episode: Finished after 10 time steps
1930 Episode: Finished after 10 time steps
1931 Episode: Finished after 9 time steps
1932 Episode: Finished after 10 time steps
1933 Episode: Finished after 10 time steps
1934 Episode: Finished after 10 time steps
1935 Episode: Finished after 9 time steps
1936 Episode: Finished after 9 time steps
1937 Episode: Finished after 9 time steps
1938 Episode: Finished after 9 time steps
1939 Episode: Finished