In [1]:
!pip install "stable-baselines3[extra]"
!pip install wandb
!wget http://www.aiotlab.org/teaching/oop/tetris/TetrisTCPserver_v0.6.jar
import os

if os.path.exists("TetrisTCPserver_v0.6.jar"):
    print("✅ 檔案複製成功")
else:
    print("❌ 檔案複製失敗")

Collecting shimmy~=1.1.0 (from shimmy[atari]~=1.1.0; extra == "extra"->stable-baselines3[extra])
  Downloading Shimmy-1.1.0-py3-none-any.whl.metadata (3.3 kB)
Collecting autorom~=0.6.1 (from autorom[accept-rom-license]~=0.6.1; extra == "extra"->stable-baselines3[extra])
  Downloading AutoROM-0.6.1-py3-none-any.whl.metadata (2.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1; extra == "extra"->stable-baselines3[extra])
  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.7/434.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ale-py~=0.8.1 (from shimmy[atari]~=1.1.0; extra == "extra"->stable-baselines3[extra])
  Downloading ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.man

In [None]:
import numpy as np
import socket
import cv2
import matplotlib.pyplot as plt
import subprocess
import os
import shutil
import glob
import imageio
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize, VecFrameStack
from IPython.display import FileLink, display, Image
from stable_baselines3.common.vec_env import DummyVecEnv
import torch
# 使用 wandb 記錄訓練日誌
import os
import wandb
from kaggle_secrets import UserSecretsClient
from stable_baselines3.common.vec_env import DummyVecEnv

# 從 Kaggle Secrets 讀取 API Token
user_secrets = UserSecretsClient()
WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")

# 設定環境變數，模擬 login
os.environ["WANDB_API_KEY"] = WANDB_API_KEY

# login & init
wandb.login()
wandb.init(project="tetris-training", entity="t113598065-ntut-edu-tw")

log_path = "/kaggle/working/tetris_train_log.txt"

def write_log(message):
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(message + "\n")
    print(message)

import time

def wait_for_tetris_server(ip="127.0.0.1", port=10612, timeout=30):
    write_log("⏳ 等待 Tetris TCP server 啟動中...")
    start_time = time.time()
    while True:
        try:
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            test_sock.settimeout(1.0)
            test_sock.connect((ip, port))
            test_sock.close()
            write_log("✅ Java TCP server 準備完成，連線成功")
            break
        except socket.error:
            if time.time() - start_time > timeout:
                raise TimeoutError("❌ 等待 Java TCP server 超時")
            time.sleep(0.5)

# 啟動 Java Tetris server
print("Java started")
subprocess.Popen(["java", "-jar", "TetrisTCPserver_v0.6.jar"])
write_log("✅ Java server started")
wait_for_tetris_server()

if torch.cuda.is_available():
    print("✅ PyTorch is using GPU:", torch.cuda.get_device_name(0))
else:
    print("❌ PyTorch is using CPU")
# ----------------------------
# 定義 Tetris 環境 (採用老師的格式)


class TetrisEnv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 20}
    N_DISCRETE_ACTIONS = 5
    IMG_HEIGHT = 200
    IMG_WIDTH = 100
    IMG_CHANNELS = 3

    def __init__(self, host_ip="127.0.0.1", host_port=10612):
        super().__init__()
        self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS)
        # self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)
        self.observation_space = spaces.Box(low=0, high=255, shape=(1, 84, 84), dtype=np.uint8)
        self.server_ip = host_ip
        self.server_port = host_port

        self.client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.client_sock.connect((self.server_ip, self.server_port))

        # 初始化 reward shaping 與統計用變數
        self.lines_removed = 0
        self.height = 0
        self.holes = 0
        self.lifetime = 0

    def step(self, action):
        if action == 0:
            self.client_sock.sendall(b"move -1\n")
        elif action == 1:
            self.client_sock.sendall(b"move 1\n")
        elif action == 2:
            self.client_sock.sendall(b"rotate 0\n")
        elif action == 3:
            self.client_sock.sendall(b"rotate 1\n")
        elif action == 4:
            self.client_sock.sendall(b"drop\n")
    
        terminated, lines, height, holes, observation = self.get_tetris_server_response(self.client_sock)
    
        reward = 0
        if action == 4:
            reward += 5
    
        if height > self.height:
            reward -= (height - self.height) * 5
    
        if holes < self.holes:
            reward += (self.holes - holes) * 10
    
        if lines > self.lines_removed:
            reward += (lines - self.lines_removed) * 1000
            self.lines_removed = lines
    
        self.height = height
        self.holes = holes
        self.lifetime += 1
    
        info = {'removed_lines': self.lines_removed, 'lifetime': self.lifetime}
    
        truncated = False
    
        # 關鍵！處理終止觀察值
        if terminated:
            info['terminal_observation']  = observation.copy()  
    
        return observation, reward, terminated, truncated, info


    def reset(self, seed=None, options=None):
        self.client_sock.sendall(b"start\n")
        terminated, lines, height, holes, observation = self.get_tetris_server_response(self.client_sock)
        # 重置統計變數
        self.lines_removed = 0
        self.height = 0
        self.holes = 0
        self.lifetime = 0
        return observation, {}

    def render(self):
        cv2.imshow("Tetris", self.last_observation)
        cv2.waitKey(1)

    def close(self):
        self.client_sock.close()
        cv2.destroyAllWindows()

    def get_tetris_server_response(self, sock):
        is_game_over = (sock.recv(1) == b'\x01')
        removed_lines = int.from_bytes(sock.recv(4), 'big')
        height = int.from_bytes(sock.recv(4), 'big')
        holes = int.from_bytes(sock.recv(4), 'big')
        img_size = int.from_bytes(sock.recv(4), 'big')
        img_png = sock.recv(img_size)
        nparr = np.frombuffer(img_png, np.uint8)
        np_image = cv2.imdecode(nparr, -1)
        resized = cv2.resize(np_image, (84, 84))
        gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
        gray = np.expand_dims(gray, axis=0)  # <- 關鍵！channel-first
        self.last_observation = gray.copy()
        return is_game_over, removed_lines, height, holes, gray
    

        
# 檢查環境
print("✅ 建立環境開始")
env = TetrisEnv()
check_env(env)

# ----------------------------
# 建立訓練環境（使用向量化、多個 env）並加入正規化與 frame stacking
# 這部分主要用於加速並穩定訓練
# train_env = make_vec_env(TetrisEnv, n_envs=3)
# train_env = VecNormalize(train_env, norm_obs=False, norm_reward=True)
# train_env = VecFrameStack(train_env, n_stack=4, channels_order='first')
train_env = DummyVecEnv([lambda: TetrisEnv()])
train_env = VecFrameStack(train_env, n_stack=4, channels_order="first")

# ----------------------------
# 使用 DQN 進行訓練，調整超參數以提升效能：
# 這裡設定 buffer_size、learning_starts、target_update_interval 等參數
model = DQN("CnnPolicy", train_env, verbose=1, tensorboard_log="./sb3_log/",
            gamma=0.95,
            learning_rate=1e-4,         # 較低的學習率有助於穩定收斂
            buffer_size=20000,         # 經驗回放緩衝區大小
            learning_starts=1000,       # 多少步後開始學習
            policy_kwargs=dict(normalize_images=False),
            target_update_interval=1000 # 目標網路更新頻率
           )
write_log("Model device: " + str(model.device))
model.learn(total_timesteps=100000)  # 可根據需要延長 timesteps1000000

# 儲存訓練後的模型（訓練完畢後可先暫停 train_env 的歸一化更新）
train_env.training = False

# ----------------------------
# 包裝測試環境，但僅用來符合 predict 格式，取影像還是從原生環境拿
# wrapped_test_env = make_vec_env(TetrisEnv, n_envs=1)
# wrapped_test_env = VecNormalize(wrapped_test_env, norm_obs=False, norm_reward=False, training=False)
# wrapped_test_env = VecFrameStack(wrapped_test_env, n_stack=4, channels_order='first')
wrapped_test_env = DummyVecEnv([lambda: TetrisEnv()])
wrapped_test_env = VecFrameStack(wrapped_test_env, n_stack=4, channels_order="first")

# 原始環境保留用來取影像
raw_test_env = TetrisEnv()

# 初始化狀態
wrapped_obs = wrapped_test_env.reset()
raw_obs, _ = raw_test_env.reset()

frames = []
total_test_reward = 0
test_steps = 1000

for step in range(test_steps):
    action, _ = model.predict(wrapped_obs, deterministic=True)

    # 執行動作
    next_raw_obs, reward, done, truncated, info = raw_test_env.step(action)
    wrapped_obs, _, _, _ = wrapped_test_env.step(action)

    total_test_reward += reward
    frames.append(np.expand_dims(raw_obs.copy(), axis=0))
    # frames.append(raw_obs.copy())  # 儲存原始影像
    raw_obs = next_raw_obs

    if done:
        break

write_log("Test completed: Total reward = " + str(total_test_reward))

# 將回放影像存入資料夾（依老師格式）
replay_folder = './replay'
if os.path.exists(replay_folder):
    shutil.rmtree(replay_folder)
os.makedirs(replay_folder, exist_ok=True)
episode_folder = os.path.join(replay_folder, "0", "0")
os.makedirs(episode_folder, exist_ok=True)
for i, frame in enumerate(frames):
    fname = os.path.join(episode_folder, '{:06d}.png'.format(i))
    cv2.imwrite(fname, frame[0])

# 產生 replay GIF（最佳回放）
filenames = sorted(glob.glob(episode_folder + '/*.png'))
gif_images = []
for filename in filenames:
    gif_images.append(imageio.imread(filename))
imageio.mimsave('replay.gif', gif_images, loop=0)
print("Replay GIF saved: replay.gif")
display(FileLink('replay.gif'))

# 將測試結果寫入 CSV（格式與老師版本一致）
with open('tetris_best_score_test2.csv', 'w') as fs:
    fs.write('id,removed_lines,played_steps\n')
    fs.write(f'0,{info["removed_lines"]},{info["lifetime"]}\n')
    fs.write(f'1,{info["removed_lines"]},{info["lifetime"]}\n')
print("CSV file saved: tetris_best_score_test2.csv")
display(FileLink('tetris_best_score_test2.csv'))
wandb.save('tetris_best_score_test2.csv')

# ----------------------------
# 儲存最終模型（請確認將 '113598065' 替換成你的學號）
model.save('113598065_dqn_30env_1M.zip')
print("Model saved: 113598065_dqn_30env_1M.zip")
display(FileLink('113598065_dqn_30env_1M.zip'))
wandb.save('113598065_dqn_30env_1M.zip')

# 關閉環境
wrapped_test_env.close()
raw_test_env.close()
train_env.close()



Java started
✅ Java server started
⏳ 等待 Tetris TCP server 啟動中...
✅ Java TCP server 準備完成，連線成功
✅ PyTorch is using GPU:Client has joined the game Tesla T4
Client has exited the game

✅ 建立環境開始
Client has exited the game
Client has joined the game
Address already in use (Bind failed)
Tetris TCP server is listening at 10612
Client has joined the game
Using cuda device
Model device: cuda
Logging to ./sb3_log/DQN_11
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.856    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 23       |
|    time_elapsed     | 6        |
|    total_timesteps  | 152      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.683    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 23       |
|    time_elapsed     | 14       |
|    total_timesteps  | 334 