# 강화학습 입문자의 부트 캠프!
# [GYM](https://gym.openai.com/)

* gym의 [document](https://gym.openai.com/docs/)

# 필요한 라이브러리들 설치

In [1]:
%%time
## 약 25초 ~30초 소요
!pip install pyvirtualdisplay 
!apt-get install -y xvfb python-opengl ffmpeg
!pip install gym
!pip install box2d-py
#!pip install pyglet==1.3.2
!pip install pyglet

Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/ad/05/6568620fed440941b704664b9cfe5f836ad699ac7694745e7787fbdc8063/PyVirtualDisplay-2.0-py2.py3-none-any.whl
Collecting EasyProcess
  Downloading https://files.pythonhosted.org/packages/48/3c/75573613641c90c6d094059ac28adb748560d99bd27ee6f80cce398f404e/EasyProcess-0.3-py2.py3-none-any.whl
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.3 pyvirtualdisplay-2.0
Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
Suggested packages:
  libgle3
The following NEW packages will be installed:
  python-opengl xvfb
0 upgraded, 2 newly installed, 0 to remove and 17 not upgraded.
Need to get 1,280 kB of archives.
After this operation, 7,686 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-opengl all 3.1.0+df

In [2]:
%tensorflow_version 2.x
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display

In [3]:
display = Display(visible=0, size=(1400, 900))
display.start()

<pyvirtualdisplay.display.Display at 0x7f566cf95f98>

# 비디오 녹화를 위한 함수 제작


In [4]:
"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[-1]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

# 첫만남 I : Frozen Lake

얜 딱히 비디오를 관찰할 것이..

In [5]:
import gym
gym.envs.registration.register(
    id="FrozenLake-v3", entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name': '4x4', 'is_slippery': False}
)

# gym.envs.registration.register(
#     id="FrozenLake-v8", entry_point='gym.envs.toy_text:FrozenLakeEnv',
#     kwargs={'map_name': '8x8', 'is_slippery': True}
# )

In [6]:
env = wrap_env(gym.make('FrozenLake-v3'))
# env = wrap_env(gym.make('FrozenLake-v8'))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

observation space: Discrete(16)
action space: Discrete(4)


In [7]:
# 반복실행 해볼 것
# action_space에서 랜덤한 액션을 막 뽑는것
env.action_space.sample()

2

In [8]:
env.render() # 현재 문제의 상황을 '눈으로'볼 수 있게 해준다.
print('-----state 설명-----')
print('S : starting point, safe')
print('F : frozen surface, safe')
print('H : hole, fall to your doom')
print('G : goal, where the frisbee is located')


[41mS[0mFFF
FHFH
FFFH
HFFG
-----state 설명-----
S : starting point, safe
F : frozen surface, safe
H : hole, fall to your doom
G : goal, where the frisbee is located


In [9]:
print('G에 도착도 해보고, H에 빠져도 보자.')
init_state = env.reset()  # 환경 초기화, 매번 새로운 환경에서 출발해야 하니까!
print('초기 위치 : {}'.format(init_state))
done = False 
action_controls = ['a', 's', 'd', 'w'] # 왼쪽, 아래, 오른쪽, 위

while not done:
    env.render()  # 상황을 눈으로 보고.

    key = input() # 키 a,s,d,w 를 입력받아서
    if key not in action_controls: # 혹시 모를 안전장치
        print("왼쪽 : a, 아래 : s, 오른쪽 : d, 위 : w")
        continue
    
    action = action_controls.index(key)  #여러분의 액션을 0,1,2,3 으로 바꾸어줌.

    state, reward, done, info = env.step(action)
    print('현재 위치 : {}, 방금 받은 reward : {}, episode 끝? : {}'.format(state, reward, done))
env.close()

G에 도착도 해보고, H에 빠져도 보자.
초기 위치 : 0

[41mS[0mFFF
FHFH
FFFH
HFFG
d
현재 위치 : 1, 방금 받은 reward : 0.0, episode 끝? : False
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
d
현재 위치 : 2, 방금 받은 reward : 0.0, episode 끝? : False
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
d
현재 위치 : 3, 방금 받은 reward : 0.0, episode 끝? : False
  (Right)
SFF[41mF[0m
FHFH
FFFH
HFFG
d
현재 위치 : 3, 방금 받은 reward : 0.0, episode 끝? : False
  (Right)
SFF[41mF[0m
FHFH
FFFH
HFFG
a
현재 위치 : 2, 방금 받은 reward : 0.0, episode 끝? : False
  (Left)
SF[41mF[0mF
FHFH
FFFH
HFFG
s
현재 위치 : 6, 방금 받은 reward : 0.0, episode 끝? : False
  (Down)
SFFF
FH[41mF[0mH
FFFH
HFFG
d
현재 위치 : 7, 방금 받은 reward : 0.0, episode 끝? : True


# 핵심적인 내용을 정리해보자.

1. env = wrap_env(gym.make('FrozenLake-v3')) # 환경 만들기
    * env.observation_space : 답을 적자.
    * env.action_space : 답을 적자.
2. env.reset() 의 역할을 적자.
    * 무엇을 반환해주는가 : 
    * 어떤 기능을 하는가 :
3. env.render() : 역할을 적자.
4. env.step( ) :
    * 무엇을 인풋으로 받는가.
    * 무슨 기능을 하는가? :
        1. state : 무엇을 return해준 것인가?
        2. reward : 무엇을 return해준 것인가?
        3. done : episode가 종료되었는지 알려줌.
        4. info : useful한 정보지만, evaluation때는 사용해선 안됨.
5. env.close() : 이걸 해줘야 뒤에서 도는게 다 멈춰진다.

# 첫만남 II : Cartpole

env.render()와 위 내용을 참고하면 frozen lake처럼 직접 action을 키보드로 입력해가며 할 수 있지만.. 추천하지 않음

## Q1. 'FrozenLake-v3'대신 'CartPole-v1'를 불러와보자.
[이 문서의 Description을 읽고 답하자](https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py)
1. observation_space : 무엇을 의미?
2. action space : 무엇을 의미?

In [10]:
### your code
env = wrap_env(gym.make(   'CartPole-v1'  ))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

observation space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
action space: Discrete(2)


## Q2. 아래 코드를 완성하시오.

* action 에는 env.action_space.sample()를 이용하자.
* 실행하고. 일단 관찰!

In [11]:
init_state = env.reset()  ##리셋

for t in range(1000):
    action = env. action_space.sample()            ## 랜덤 액션 
    env.render()
    observation, reward, done, info =  env.step(action)      ###
    if done: 
      break;
            
print('steps: ', t)
env.close()
show_video()

steps:  27


## Q3. 아래 observation은 무엇을 의미하는가? -재확인

In [12]:
print(observation)

[-0.16579468 -1.1682455   0.25023129  1.9880521 ]


# 첫만남 III : pacman!?

In [13]:
# 수정 금지
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1
!pip install gym[atari] > /dev/null 2>&1

Requirement already up-to-date: setuptools in /usr/local/lib/python3.6/dist-packages (53.0.0)


## Q1. 'FrozenLake-v3'대신 'MsPacman-v0'를 불러와보자.
[이문서를 참고 하여..](https://gym.openai.com/envs/MsPacman-v0/)
1. observation_space : 무엇을 의미?
2. action space : 무엇을 의미? (이건 git보는 것이 좋음)

In [14]:
env = wrap_env(gym.make("MsPacman-v0"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

observation space: Box(0, 255, (210, 160, 3), uint8)
action space: Discrete(9)


## Q2. 아래 코드를 완성하시오.

* action 에는 env.action_space.sample()를 이용하자.
* 실행하고. 일단 관찰!

In [15]:
init_state = env.reset()  ##리셋

while True :
    env.render()
    action =   env.action_space.sample()             ## 랜덤 액션 
    observation, reward, done, info =  env.step(action)   # 스텝을 밟자.
    if done: 
      break;
        
env.close()
show_video()

# 첫만남 IV : Assault-v0

CartPole 불러온 것을 참고하여, 랜덤 액션을 취하는 agent의 episode를 녹화한 영상을 출력하시오.

In [None]:
env = wrap_env(gym.make("Assault-v0"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

state = env.reset()
for t in range(1000):
    ####################
    ## Your Code here ##
    ####################

    
    if done: 
      break;
            
print('steps: ', t)
env.close()
show_video()

# 첫만남 V : Atari BREAK OUT!!

Assault-v0를 참고하여 ,랜덤 액션을 취하는 agent의 episode를 녹화한 영상을 출력하시오
* 'Breakout-v0'



In [None]:
env = wrap_env(gym.make("Breakout-v0"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

####################
## Your Code here ##
####################

# 첫만남 VI : LunarLander-v2

* Assault-v0를 참고하여 ,랜덤 액션을 취하는 agent의 episode를 녹화한 영상을 출력하시오
* rewards_list = [] 선언
    * 시간에 따른 reward를 append
    * 영상 출력후에 시각화

In [None]:
env = wrap_env(gym.make("LunarLander-v2"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

rewards_list = []
state = env.reset()
for t in range(1000):
    ###################
    ## Your Code here ##
    ####################



    rewards_list.append(reward)
    if done: 
      break;
            
print('steps: ', t)
env.close()
show_video()

In [None]:
plt.figure(figsize=(8,5))
plt.plot(np.arange(1, len(rewards_list)+1), rewards_list)
plt.ylabel('rewards')
plt.xlabel('steps #')
plt.show()

# 기타 나머지 더 맛 봐보기

In [None]:
env = wrap_env(gym.make("BipedalWalker-v3"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

state = env.reset()
for t in range(1000):
    action = env.action_space.sample() # your agent here (this takes random actions)
    env.render()
    observation, reward, done, info = env.step(action)

    if done: 
      break;
            
print('steps: ', t)
env.close()
show_video()

In [None]:
env = wrap_env(gym.make("CarRacing-v0"))
print('observation space:', env.observation_space)
print('action space:', env.action_space)

state = env.reset()
for t in range(1000):
    action = env.action_space.sample() # your agent here (this takes random actions)
    env.render()
    observation, reward, done, info = env.step(action)

    if done: 
      break;
            
print('steps: ', t)
env.close()
show_video()

# 쏘닉!

![소닉](https://file.bodnara.co.kr/logo/insidelogo.php?image=%2Fhttp%3A%2F%2Ffile.bodnara.co.kr%2Fwebedit%2Fnews%2F2015%2F1575255863-sonic_movie_5m.jpg)

In [None]:
# 약 1 ~ 2분 걸림
!apt-get install pkg-config lua5.1 build-essential libav-tools git
!pip install tqdm retrowrapper gym-retro
!pip install -U git+git://github.com/frenchie4111/dumbrain.git

In [None]:
# Don't run this command unless you already own the games, otherwise you are pirating :)
!python -m dumbrain.rl.retro_contest.install_games http://aiml.mikelyons.org/datasets/sonic/Sonic%20Roms.zip

In [None]:
!pip install gym-retro

In [None]:
import retro
list( filter( lambda game: game.startswith( 'Sonic' ), retro.data.list_games() ) )
# retro.data.lisg_games()

In [None]:
# env = retro.make(game='SonicTheHedgehog2-Genesis',
#     state='MetropolisZone.Act1', record='.')
import retrowrapper

env = retrowrapper.RetroWrapper(
    game='SonicTheHedgehog2-Genesis',
    state='MetropolisZone.Act1' ,
    record='.'
)

In [None]:
# 시간좀 걸림
print('observation space:', env.observation_space)
print('action space:', env.action_space)

state = env.reset()
for t in range(1000):
    action = env.action_space.sample() # your agent here (this takes random actions)
    # env.render()
    observation, reward, done, info = env.step(action)

    if done: 
      break;
            
print('steps: ', t)
env.close()
# show_video()

In [None]:
## 마지막 순간
plt.figure(figsize=(100,142))
plt.imshow(observation)
plt.show()

In [None]:
!python /usr/local/lib/python3.6/dist-packages/retro/scripts/playback_movie.py /content/SonicTheHedgehog2-Genesis-MetropolisZone.Act1-000000.bk2

In [None]:
def show_retro():
  mp4list = glob.glob('*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

In [None]:
show_retro()