*이탤릭체 텍스트*# 1. 패키지 설치

In [1]:
!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1


In [2]:
import sys
print(sys.version) #설치된 파이썬 버전 확인

3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]


# 2. 환경 불러오기

In [None]:
import gymnasium as gym #강화학습 환경 제공

In [None]:
env = gym.make('CartPole-v1',render_mode="rgb_array")

In [None]:
# 0-push cart to left, 1-push cart to the right
print(env.action_space)
print(env.action_space.sample())



Discrete(2)
1


In [None]:
# [cart position, cart velocity, pole angle, pole angular velocity]
print(env.observation_space)
print(env.observation_space.sample())

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)
[3.4308815e+00 3.1656227e+38 3.8085070e-01 2.0682444e+38]


In [None]:
episodes=10
for episode in range(episodes):
    observation, info = env.reset()
    done = False
    score = 0
    while not done:
      env.render()
      action = env.action_space.sample() # 랜덤한 action 생성
      observation, reward, done, truncated, info = env.step(action)
      score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

env.close()

Episode:0 Score:45.0
Episode:1 Score:28.0
Episode:2 Score:15.0
Episode:3 Score:26.0
Episode:4 Score:34.0
Episode:5 Score:20.0
Episode:6 Score:14.0
Episode:7 Score:49.0
Episode:8 Score:22.0
Episode:9 Score:37.0


# + 캡처 기능 추가

In [None]:
import imageio
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import gymnasium as gym

In [None]:
def add_text_to_image(image, text, position=(10, 10), font_size=100, font_color=(0, 0, 0)): # (폰트 색깔 유의)
    # 이미지를 PIL 이미지로 변환
    pil_image = Image.fromarray(np.uint8(image))

    # 폰트 설정
    font = ImageFont.load_default()

    # ImageDraw 객체 생성
    draw = ImageDraw.Draw(pil_image)

    # 텍스트 추가
    draw.text(position, text, font=font, fill=font_color)
    draw = ImageDraw.Draw(pil_image)

    # PIL 이미지를 다시 NumPy 배열로 변환
    image_with_text = np.array(pil_image)

    return image_with_text


In [None]:
env = gym.make('CartPole-v1',render_mode="rgb_array")
episodes=10
images = []
for episode in range(episodes):
    observation, info = env.reset(seed=42)
    done = False
    score = 0
    while not done:
      screen=env.render() # 실행 화면 캡쳐
      text_screen=add_text_to_image(screen, f"Score: {score}, Episode :{episode} ") # episode와 score를 이미지에 삽입
      images.append(text_screen)
      action = env.action_space.sample() # 랜덤한 action 생성
      observation, reward, done, truncated, info = env.step(action)
      score+=reward

    print('Episode:{} Score:{}'.format(episode, score))

imageio.mimsave("Cartpole.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=20) # 초당 20개의 이미지 생성, 절반으로 축약해서 Gif 생성
env.close()


Episode:0 Score:11.0
Episode:1 Score:13.0
Episode:2 Score:12.0
Episode:3 Score:34.0
Episode:4 Score:23.0
Episode:5 Score:17.0
Episode:6 Score:12.0
Episode:7 Score:50.0
Episode:8 Score:12.0
Episode:9 Score:19.0


# 3. 모델 학습

In [None]:
!pip install stable-baselines3
!pip install tensorboard

In [None]:
from stable_baselines3 import PPO,DQN,A2C # 사용하고자 하는 강화학습 알고리즘을 import
from stable_baselines3.common.vec_env import DummyVecEnv # 여러 환경을 벡터화
from stable_baselines3.common.evaluation import evaluate_policy # 모델 평가
from stable_baselines3.common. monitor import Monitor
import os

In [None]:
env = gym.make('CartPole-v1',render_mode="rgb_array")
log_path = os.path.join('Training', 'logs',"Cartpole","PPO")
env=Monitor(env,log_path)
#env = DummyVecEnv([lambda: env])
model = PPO('MlpPolicy', env, verbose = 1)
# model = PPO('MlpPolicy', env, verbose = 1,tensorboard_log=log_path)
# model = PPO('MlpPolicy', env, verbose = 1,tensorboard_log=log_path,nsteps=1000,batch_size=100,)

Using cpu device
Wrapping the env in a DummyVecEnv.


In [None]:
model.learn(total_timesteps=20000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.1     |
|    ep_rew_mean     | 24.1     |
| time/              |          |
|    fps             | 834      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 27.8        |
|    ep_rew_mean          | 27.8        |
| time/                   |             |
|    fps                  | 631         |
|    iterations           | 2           |
|    time_elapsed         | 6           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008222882 |
|    clip_fraction        | 0.0915      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | 0.000831    |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x7afc55f992a0>

In [None]:
env.close()



```
# 코드로 형식 지정됨
```

# 4.학습 과정 평가

In [None]:
%load_ext tensorboard
#%reload_ext tensorboard
training_log_path = os.path.join(log_path)
# training_log_path = os.path.join(log_path,'PPO_2')
%tensorboard --logdir={training_log_path} --port=6009

# 5. 모델 테스트

In [None]:
env = gym.make('CartPole-v1',render_mode="rgb_array")

In [None]:
# colab 환경에서 200장 넘어가면 RAM 사용량이 초과됨. 따라서 200장 마다 gif를 생성하도록 코드 변경
env=gym.make('CartPole-v1',render_mode='rgb_array')
episodes=1
images = []
for episode in range(episodes):
    observation, info = env.reset(seed=42)
    done = False
    score = 0
    timestep=0
    a=0
    while not done:
      timestep+=1
      screen=env.render() # 실행 화면 캡쳐
      text_screen=add_text_to_image(screen, f"Score: {score}, Episode :{episode} ") # episode와 score를 이미지에 삽입
      images.append(text_screen)
      if timestep==200:
        a+=1
        timestep=0
        imageio.mimsave(f"Cartpole{a}.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=20)
        images=[]
      action= model.predict(observation)[0]
      observation, reward, done, truncated, info = env.step(action)
      if truncated==True:
        break
      score+=reward

    print('Episode:{} Score:{}'.format(episode, score))


env.close()


# 6. 모델 저장 후 재 학습

In [None]:
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model') # 저장 경로 및 파일명 지정
model.save(PPO_path)

In [None]:
model = PPO.load('PPO_model', env=env)

In [None]:
model.learn(total_timesteps=5000,tb_log_name='result',reset_num_timesteps=False)

#7. 다른 강화학습 알고리즘 사용

---



In [None]:
env = gym.make('CartPole-v1',render_mode="rgb_array")

In [None]:
ㅊ

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
model.learn(total_timesteps=100000)