In [1]:
################################################################################
# Program : Test_Taxi_DQN.py
# Description : Open AI GYM의 Taxi 환경의 강화학습 구현 코드 (코랩용) : DQN Taxi
################################################################################

In [2]:
##################################################
# install package
##################################################
!apt-get install -y xvfb x11-utils
!pip install pyvirtualdisplay==0.2.* PyOpenGL==3.1.* PyOpenGL-accelerate==3.1.*

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common
Suggested packages:
  mesa-utils
The following NEW packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 libxtst6 libxxf86dga1 x11-utils x11-xkb-utils xfonts-base
  xfonts-encodings xfonts-utils xserver-common xvfb
0 upgraded, 12 newly installed, 0 to remove and 23 not upgraded.
Need to get 8,045 kB of archives.
After this operation, 12.8 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libfontenc1 amd64 1:1.1.4-1build3 [14.7 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxfont2 amd64 1:2.0.5-1build1 [94.5 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxkbfile1 amd64 1:1.1.0-1build3 [71.8 kB]
Get:4 http://archive.ubuntu.

In [11]:
##################################################
# import package
##################################################
import gym
import numpy as np
from tensorflow.keras.models import load_model

## 이미지 캡쳐를 통해 영상으로 확인하려면 주석 해제
# import matplotlib.pyplot as plt
# from IPython import display as ipythondisplay
# from pyvirtualdisplay import Display

In [15]:
##################################################
# Trained Model Load
##################################################
# Load the saved model
loaded_model = load_model('/content/drive/MyDrive/RL/Taxi/taxi_model.h5') #모델이 저장된 경로 설정

## 이미지 캡쳐를 통해 영상으로 확인하려면 주석 해제
# display = Display(visible=0, size=(400, 300))
# display.start()

In [16]:
##################################################
# Set Envrionment
##################################################
env = gym.make("Taxi-v3")
state = env.reset()
state = np.reshape(state, [1, 1])

## 이미지 캡쳐를 통해 영상으로 확인하려면 주석 해제
# prev_screen = env.render(mode='rgb_array')
# plt.imshow(prev_screen)


In [18]:
##################################################
# Test
##################################################
test_episodes = []
test_total_rewards = []
test_total_steps = []

num_episodes = 30

for episode in range(num_episodes):
    state = env.reset()
    state = np.reshape(state, [1, 1])
    done = False
    total_reward = 0
    total_step = 0

    while not done:
        # action = env.action_space.sample()
		# 학습된 모델로 예측한 행동 산출
        action = np.argmax(loaded_model.predict(state, verbose=0))
		# 다음 상태, 보상, 종료여부 생성
        next_state, reward, done, info = env.step(action)

        ## 이미지 캡쳐를 통해 영상으로 확인하려면 주석 해제
        # screen = env.render(mode='rgb_array')
        # plt.imshow(screen)
        # ipythondisplay.clear_output(wait=True)
        # ipythondisplay.display(plt.gcf())

        state = next_state
        state = np.reshape(next_state, [1, 1])
        total_reward += reward
        total_step += 1

        if done:
            break

    print(f"Episode {episode + 1}: Total Reward = {total_reward} Total Step = {total_step}")

    test_episodes.append(episode)
    test_total_rewards.append(total_reward)
    test_total_steps.append(total_step)

## 이미지 캡쳐를 통해 영상으로 확인하려면 주석 해제
# ipythondisplay.clear_output(wait=True)
env.close()

# 테스트 결과 값 저장
dump(test_episodes, '/content/drive/MyDrive/RL/taxi_test_episodes.joblib')
dump(test_total_rewards, '/content/drive/MyDrive/RL/taxi_test_total_rewards.joblib')
dump(test_total_steps, '/content/drive/MyDrive/RL/taxi_test_total_steps.joblib')
print("Test Complete!")

Episode 1: Total Reward = -200 Total Step = 200
Episode 2: Total Reward = -200 Total Step = 200
Episode 3: Total Reward = -1991 Total Step = 200
Episode 4: Total Reward = -2000 Total Step = 200
Episode 5: Total Reward = -1973 Total Step = 200
Episode 6: Total Reward = -200 Total Step = 200
Episode 7: Total Reward = -200 Total Step = 200
Episode 8: Total Reward = -200 Total Step = 200
Episode 9: Total Reward = -1991 Total Step = 200
Episode 10: Total Reward = -200 Total Step = 200
Episode 11: Total Reward = -200 Total Step = 200
Episode 12: Total Reward = -200 Total Step = 200
Episode 13: Total Reward = -2000 Total Step = 200
Episode 14: Total Reward = -200 Total Step = 200
Episode 15: Total Reward = -2000 Total Step = 200
Episode 16: Total Reward = -200 Total Step = 200
Episode 17: Total Reward = -200 Total Step = 200
Episode 18: Total Reward = -200 Total Step = 200
Episode 19: Total Reward = -200 Total Step = 200
Episode 20: Total Reward = -200 Total Step = 200
Episode 21: Total Rewar

KeyboardInterrupt: ignored