In [1]:
import pygame
import os
import sys

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
from PIL import Image
import random
import matplotlib.pyplot as plt
from matplotlib import animation
from collections import deque

pygame 2.6.1 (SDL 2.28.4, Python 3.10.18)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
print(torch.cuda.is_available())  # 應該為 True
print(torch.version.cuda)         # 應該列出 CUDA 版本
print(torch.backends.cudnn.version())  # cuDNN 版本


True
11.8
90100


In [3]:
script_dir = os.path.join(os.getcwd(), 'space_ship_game_RL')
if script_dir not in sys.path:
    sys.path.append(script_dir)

from setting import *
from game import Game


In [4]:
class SpaceShipEnv():
    def __init__(self):
        pygame.init()
        pygame.font.init()

        # 延後畫面初始化，等 render() 時才設置
        self.screen = None
        self.clock = pygame.time.Clock()
        self.fps = FPS

        self.game = Game()

        self.action_space = [0, 1, 2, 3]
        self.observation = self.game.state

    def step(self, action):
        self.game.update(action)

        if self.screen is None:
            self.game.draw()
        else:
            self.game.draw(self.screen)
            self.clock.tick(self.fps)

        # define the state by your game logic
        state = self.game.state

        # define the reward by your game logic
        reward = -0.5
         
        
        done = not self.game.running or self.game.score >= 10000
        info = self.game.score

        return state, reward, done, info

    def reset(self):
        self.game = Game()

        return self.game.state

    def render(self):
        if self.screen is None:
            self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
            pygame.display.set_caption("SpaceShip RL Environment")

    def close(self):
        pygame.quit()


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [6]:
class SpaceShipEnvVector:
    def __init__(self):
        pygame.init()
        pygame.font.init()
        self.screen = None
        self.clock = pygame.time.Clock()
        self.fps = FPS
        self.game = Game()
        
        # 狀態空間設計 - 總共50維
        self.state_dim = 50
        self.action_space = [0, 1, 2, 3]  # 無動作、左、右、射擊
    
    def get_vector_state(self):
        """提取向量化狀態"""
        state = np.zeros(self.state_dim)
        idx = 0
        
        # 玩家基本信息 (7維)
        player = self.game.player.sprite
        state[idx:idx+7] = [
            player.rect.centerx / WIDTH,        # 玩家x位置 (標準化)
            player.rect.centery / HEIGHT,       # 玩家y位置 (標準化)
            player.speedx / 10.0,               # 玩家x速度
            player.health / 100.0,              # 生命值 (標準化)
            player.lives / 3.0,                 # 生命數 (標準化)
            min(player.gun, 3) / 3.0,           # 槍械等級 (標準化)
            1.0 if not player.hidden else 0.0   # 是否隱藏
        ]
        idx += 7
        
        # 最近5個石頭信息 (25維: 每個石頭5維)
        rocks = sorted(list(self.game.rocks), 
                      key=lambda r: ((r.rect.centerx - player.rect.centerx)**2 + 
                                   (r.rect.centery - player.rect.centery)**2))[:5]
        for i in range(5):
            if i < len(rocks):
                rock = rocks[i]
                state[idx:idx+5] = [
                    rock.rect.centerx / WIDTH,   # 石頭x位置
                    rock.rect.centery / HEIGHT,  # 石頭y位置
                    rock.speedx / 10.0,          # 石頭x速度
                    rock.speedy / 10.0,          # 石頭y速度
                    rock.radius / 50.0           # 石頭半徑
                ]
            idx += 5
        
        # 最近3個子彈信息 (12維: 每個子彈4維)
        bullets = list(player.bullet_group)[:3]
        for i in range(3):
            if i < len(bullets):
                bullet = bullets[i]
                state[idx:idx+4] = [
                    bullet.rect.centerx / WIDTH,  # 子彈x位置
                    bullet.rect.centery / HEIGHT, # 子彈y位置
                    bullet.speedy / 10.0,         # 子彈y速度
                    1.0                           # 子彈活躍狀態
                ]
            idx += 4
        
        # 最近2個道具信息 (6維: 每個道具3維)
        powers = list(self.game.powers)[:2]
        for i in range(2):
            if i < len(powers):
                power = powers[i]
                power_type = 1.0 if power.type == 'shield' else 0.0
                state[idx:idx+3] = [
                    power.rect.centerx / WIDTH,   # 道具x位置
                    power.rect.centery / HEIGHT,  # 道具y位置
                    power_type                    # 道具類型
                ]
            idx += 3
        
        return state
    
    def calculate_reward(self, prev_score, prev_health, prev_lives):
        """計算獎勵函數"""
        reward = 0
        
        # 分數獎勵 - 主要目標
        score_diff = self.game.score - prev_score
        if score_diff > 0:
            reward += score_diff * 1.0
        
        # 生存獎勵
        if self.game.running:
            reward += 3.0
        else:
            reward -= 500  # 死亡懲罰
        
        # 傷害懲罰
        health_diff = self.game.player.sprite.health - prev_health
        if health_diff < 0:
            reward -= abs(health_diff) * 2.0
        elif health_diff > 0:
            reward += health_diff * 0.5   
              
        if self.game.is_power:
            reward += 30.0
            
        return reward
    
    def step(self, action):
        # 記錄前一步狀態
        prev_score = self.game.score
        prev_health = self.game.player.sprite.health
        prev_lives = self.game.player.sprite.lives
        
        # 執行動作
        self.game.update(action)
        
        # 繪製畫面（訓練時可以關閉以加速）
        if self.screen is None:
            self.game.draw()
        else:
            self.game.draw(self.screen)
            self.clock.tick(self.fps)
        
        # 獲取新狀態
        state = self.get_vector_state()
        
        # 計算獎勵
        reward = self.calculate_reward(prev_score, prev_health, prev_lives)
        
        # 判斷遊戲結束
        done = not self.game.running or self.game.score >= 10000
        
        # 附加信息
        info = {'score': self.game.score}
        
        return state, reward, done, info
    
    def reset(self):
        self.game = Game()
        return self.get_vector_state()
    
    def render(self):
        if self.screen is None:
            self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
            pygame.display.set_caption("SpaceShip RL Environment")
    
    def close(self):
        pygame.quit()


In [7]:
# 向量DQN模型定義（需要添加）
class VectorDQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(VectorDQN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim, 512),
            nn.ReLU(),
            nn.LayerNorm(512),
            nn.Dropout(0.1),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.LayerNorm(512),
            nn.Dropout(0.1),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, action_dim)
        )
    
    def forward(self, x):
        return self.net(x)

# 模型初始化修改
state_dim = 50  # 向量狀態維度
num_actions = 4
model = VectorDQN(state_dim, num_actions).to(device)

# 載入向量模型檢查點
checkpoint = torch.load('checkpoint_vector_best3642.pth', map_location=device)
model.load_state_dict(checkpoint['policy_net'])
model.eval()

VectorDQN(
  (net): Sequential(
    (0): Linear(in_features=50, out_features=512, bias=True)
    (1): ReLU()
    (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (7): Dropout(p=0.1, inplace=False)
    (8): Linear(in_features=512, out_features=256, bias=True)
    (9): ReLU()
    (10): Linear(in_features=256, out_features=128, bias=True)
    (11): ReLU()
    (12): Linear(in_features=128, out_features=4, bias=True)
  )
)

In [8]:
# 修改後的遊戲執行迴圈
env = SpaceShipEnvVector()
env.render()
frames = []
best_score = 3718  # 記錄最高分數
max_games = 50  # 最大遊戲次數，避免無限循環
game_count = 0

while game_count < max_games:
    state = env.reset()  # 直接獲得50維向量，無需stack_frames
    done = False
    episode_frames = []
    episode_reward = 0
    game_count += 1
    
    while not done:
        # 直接使用向量狀態，無需預處理
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        q_values = model(state_tensor)
        action = torch.argmax(q_values, dim=1).item()

        next_state, reward, done, info = env.step(action)
        state = next_state  # 直接使用向量狀態
        
        episode_reward += reward
        
        # 抓取畫面用於影片製作
        surface = pygame.display.get_surface()
        frame = pygame.surfarray.array3d(surface)
        frame = np.transpose(frame, (1, 0, 2))
        episode_frames.append(frame)
    
    print(f"遊戲 {game_count}: reward: {episode_reward:.1f}, score: {info['score']}")
    
    # 如果刷新紀錄，保存frames
    if info['score'] > best_score:
        print(f"刷新紀錄！新紀錄: {info['score']} (原紀錄: {best_score})")
        frames = episode_frames  # 保存這場遊戲的frames
        best_score = info['score']  # 更新最高分數
        
        # 立即保存影片（每次刷新紀錄都保存）
        try:
            import imageio
            video_path = f"space_ship_run_rl.mp4"
            imageio.mimsave(video_path, frames, fps=60, quality=9)
            print(f"已保存影片: {video_path}")
        except Exception as e:
            print(f"保存影片時出錯: {e}")
    else:
        print(f"分數 {info['score']} 未刷新紀錄 (最高分: {best_score})，重新開始遊戲...")


# 最終保存影片
try:
    import imageio
    video_path = f"space_ship_run_rl_final_{best_score}.mp4"
    imageio.mimsave(video_path, frames, fps=60, quality=9)
    print(f"最終影片已保存: {video_path}")
except Exception as e:
    print(f"保存最終影片時出錯: {e}")

env.close()

遊戲 1: reward: 16140.0, score: 1928
分數 1928 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 2: reward: 7822.0, score: 938
分數 938 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 3: reward: 3229.0, score: 358
分數 358 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 4: reward: 8963.0, score: 1212
分數 1212 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 5: reward: 4797.0, score: 786
分數 786 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 6: reward: 6664.0, score: 1058
分數 1058 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 7: reward: 3059.0, score: 444
分數 444 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 8: reward: 7683.0, score: 1080
分數 1080 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 9: reward: 14321.5, score: 1744
分數 1744 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 10: reward: 11029.0, score: 1302
分數 1302 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 11: reward: 8867.0, score: 1100
分數 1100 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 12: reward: 10627.0, score: 1342
分數 1342 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 13: reward: 7303.5, score: 980
分數 980 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 14: reward: 3276.0, score: 444
分數 444 未刷新紀錄 (最高分: 3718)，重新開始遊戲...
遊戲 15: reward: 3585.0, 

KeyboardInterrupt: 