In [3]:
!pip install "stable-baselines3[extra] >= 2.0.0a5"
!pip install wandb
!wget http://www.aiotlab.org/teaching/oop/tetris/TetrisTCPserver_v0.6.jar
import os

if os.path.exists("TetrisTCPserver_v0.6.jar"):
    print("✅ 檔案複製成功")
else:
    print("❌ 檔案複製失敗")

--2025-04-16 09:16:40--  http://www.aiotlab.org/teaching/oop/tetris/TetrisTCPserver_v0.6.jar
Resolving www.aiotlab.org (www.aiotlab.org)... 18.67.93.115, 18.67.93.29, 18.67.93.113, ...
Connecting to www.aiotlab.org (www.aiotlab.org)|18.67.93.115|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.aiotlab.org/teaching/oop/tetris/TetrisTCPserver_v0.6.jar [following]
--2025-04-16 09:16:40--  https://www.aiotlab.org/teaching/oop/tetris/TetrisTCPserver_v0.6.jar
Connecting to www.aiotlab.org (www.aiotlab.org)|18.67.93.115|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3879189 (3.7M) [binary/octet-stream]
Saving to: ‘TetrisTCPserver_v0.6.jar.1’


2025-04-16 09:16:41 (244 MB/s) - ‘TetrisTCPserver_v0.6.jar.1’ saved [3879189/3879189]

✅ 檔案複製成功


In [10]:
import numpy as np
import socket
import cv2
import matplotlib.pyplot as plt
import subprocess
import os
import shutil
import glob
import imageio
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize, VecFrameStack
from IPython.display import FileLink, display, Image
import torch
# 使用 wandb 記錄訓練日誌
import os
import wandb
from kaggle_secrets import UserSecretsClient

# 從 Kaggle Secrets 讀取 API Token
user_secrets = UserSecretsClient()
WANDB_API_KEY = user_secrets.get_secret("WANDB_API_KEY")

# 設定環境變數，模擬 login
os.environ["WANDB_API_KEY"] = WANDB_API_KEY

# login & init
wandb.login()
wandb.init(project="tetris-training", entity="t113598065-ntut-edu-tw")

log_path = "/kaggle/working/tetris_train_log.txt"

def write_log(message):
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(message + "\n")
    print(message)

import time

def wait_for_tetris_server(ip="127.0.0.1", port=10612, timeout=30):
    write_log("⏳ 等待 Tetris TCP server 啟動中...")
    start_time = time.time()
    while True:
        try:
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            test_sock.settimeout(1.0)
            test_sock.connect((ip, port))
            test_sock.close()
            write_log("✅ Java TCP server 準備完成，連線成功")
            break
        except socket.error:
            if time.time() - start_time > timeout:
                raise TimeoutError("❌ 等待 Java TCP server 超時")
            time.sleep(0.5)

# 啟動 Java Tetris server
write_log("🔧 啟動 Java Tetris Server")
subprocess.Popen(["java", "-jar", "TetrisTCPserver_v0.6.jar"])

# 等待 Java server 準備完成
wait_for_tetris_server()

# ----------------------------
# 啟動 Java Tetris 伺服器（保持老師版本一致）
print("Java started")
subprocess.Popen(["java", "-jar", "TetrisTCPserver_v0.6.jar"])
write_log("✅ Java server started")
wait_for_tetris_server()

if torch.cuda.is_available():
    print("✅ PyTorch is using GPU:", torch.cuda.get_device_name(0))
else:
    print("❌ PyTorch is using CPU")
# ----------------------------
# 定義 Tetris 環境 (採用老師的格式)
class TetrisEnv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 20}
    N_DISCRETE_ACTIONS = 5
    IMG_HEIGHT = 200
    IMG_WIDTH = 100
    IMG_CHANNELS = 3

    def __init__(self, host_ip="127.0.0.1", host_port=10612):
        super().__init__()
        self.action_space = spaces.Discrete(self.N_DISCRETE_ACTIONS)
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(84, 84, 1),
                                            dtype=np.uint8)
        self.server_ip = host_ip
        self.server_port = host_port

        self.client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.client_sock.connect((self.server_ip, self.server_port))

        # 初始化 reward shaping 與統計用變數
        self.lines_removed = 0
        self.height = 0
        self.holes = 0
        self.lifetime = 0

    def step(self, action):
        if action == 0:
            self.client_sock.sendall(b"move -1\n")
        elif action == 1:
            self.client_sock.sendall(b"move 1\n")
        elif action == 2:
            self.client_sock.sendall(b"rotate 0\n")
        elif action == 3:
            self.client_sock.sendall(b"rotate 1\n")
        elif action == 4:
            self.client_sock.sendall(b"drop\n")

        terminated, lines, height, holes, observation = self.get_tetris_server_response(self.client_sock)

        # 老師版 reward shaping
        reward = 0
        if action == 4:
            reward += 5  # 掉落獎勵

        if height > self.height:
            reward -= (height - self.height) * 5  # 高度增高扣分

        if holes < self.holes:
            reward += (self.holes - holes) * 10  # 洞穴數減少加分

        if lines > self.lines_removed:
            reward += (lines - self.lines_removed) * 1000  # 消除行數加分
            self.lines_removed = lines

        self.height = height
        self.holes = holes
        self.lifetime += 1

        info = {'removed_lines': self.lines_removed, 'lifetime': self.lifetime}
        truncated = False
        return observation, reward, terminated, truncated, info

    def reset(self, seed=None, options=None):
        self.client_sock.sendall(b"start\n")
        terminated, lines, height, holes, observation = self.get_tetris_server_response(self.client_sock)
        # 重置統計變數
        self.lines_removed = 0
        self.height = 0
        self.holes = 0
        self.lifetime = 0
        return observation, {}

    def render(self):
        cv2.imshow("Tetris", self.last_observation)
        cv2.waitKey(1)

    def close(self):
        self.client_sock.close()
        cv2.destroyAllWindows()

    def get_tetris_server_response(self, sock):
        is_game_over = (sock.recv(1) == b'\x01')
        removed_lines = int.from_bytes(sock.recv(4), 'big')
        height = int.from_bytes(sock.recv(4), 'big')
        holes = int.from_bytes(sock.recv(4), 'big')
        img_size = int.from_bytes(sock.recv(4), 'big')
        img_png = sock.recv(img_size)
        nparr = np.frombuffer(img_png, np.uint8)
        np_image = cv2.imdecode(nparr, -1)
        resized = cv2.resize(np_image, (84, 84))
        gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)  # ✅ 轉成灰階
        gray = np.expand_dims(gray, axis=-1)  # ✅ 增加 channel 維度，變成 (84, 84, 1)
        self.last_observation = gray.copy()
        return is_game_over, removed_lines, height, holes, gray
        
# 檢查環境
print("✅ 建立環境開始")
env = TetrisEnv()
check_env(env)

# ----------------------------
# 建立訓練環境（使用向量化、多個 env）並加入正規化與 frame stacking
# 這部分主要用於加速並穩定訓練
train_env = make_vec_env(TetrisEnv, n_envs=3)
train_env = VecNormalize(train_env, norm_obs=False, norm_reward=True)
# train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True)
train_env = VecFrameStack(train_env, n_stack=4)

# ----------------------------
# 使用 DQN 進行訓練，調整超參數以提升效能：
# 這裡設定 buffer_size、learning_starts、target_update_interval 等參數
model = DQN("CnnPolicy", train_env, verbose=1, tensorboard_log="./sb3_log/",
            gamma=0.95,
            learning_rate=1e-4,         # 較低的學習率有助於穩定收斂
            buffer_size=20000,         # 經驗回放緩衝區大小
            learning_starts=1000,       # 多少步後開始學習
            policy_kwargs=dict(normalize_images=False),
            target_update_interval=1000 # 目標網路更新頻率
           )
write_log("Model device: " + str(model.device))
# model.learn(total_timesteps=1000000)  # 可根據需要延長 timesteps

# # 儲存訓練後的模型（訓練完畢後可先暫停 train_env 的歸一化更新）
# train_env.training = False

# # ----------------------------
# # 測試環境：為了保證輸出格式與老師一致，這裡採用未包裝版的 TetrisEnv（單一環境）
# test_env = TetrisEnv()

# # 測試訓練後的代理，並依老師格式記錄回放
# frames = []  # 用於儲存每一幀影像
# total_test_reward = 0
# state, _ = test_env.reset()
# test_steps = 1000
# for step in range(test_steps):
#     # 由於 test_env 未包裝，因此直接使用原始影像
#     action, _ = model.predict(state, deterministic=True)
#     next_state, reward, done, truncated, info = test_env.step(action)
#     total_test_reward += reward
#     # 儲存影像（直接使用原始環境回傳的影像）
#     frames.append(state.copy())
#     state = next_state
#     if done:
#         break
# write_log("Test completed: Total reward = " + str(total_test_reward))

# # 將回放影像存入資料夾（依老師格式）
# replay_folder = './replay'
# if os.path.exists(replay_folder):
#     shutil.rmtree(replay_folder)
# os.makedirs(replay_folder, exist_ok=True)
# episode_folder = os.path.join(replay_folder, "0", "0")
# os.makedirs(episode_folder, exist_ok=True)
# for i, frame in enumerate(frames):
#     fname = os.path.join(episode_folder, '{:06d}.png'.format(i))
#     cv2.imwrite(fname, frame)

# # 產生 replay GIF（最佳回放）
# filenames = sorted(glob.glob(episode_folder + '/*.png'))
# gif_images = []
# for filename in filenames:
#     gif_images.append(imageio.imread(filename))
# imageio.mimsave('replay.gif', gif_images, loop=0)
# print("Replay GIF saved: replay.gif")
# display(FileLink('replay.gif'))

# # 將測試結果寫入 CSV（格式與老師版本一致）
# with open('tetris_best_score_test2.csv', 'w') as fs:
#     fs.write('id,removed_lines,played_steps\n')
#     fs.write(f'0,{info["removed_lines"]},{info["lifetime"]}\n')
#     fs.write(f'1,{info["removed_lines"]},{info["lifetime"]}\n')
# print("CSV file saved: tetris_best_score_test2.csv")
# display(FileLink('tetris_best_score_test2.csv'))
# wandb.save('tetris_best_score_test2.csv')

# # ----------------------------
# # 儲存最終模型（請確認將 '113598065' 替換成你的學號）
# model.save('113598065_dqn_30env_1M.zip')
# print("Model saved: 113598065_dqn_30env_1M.zip")
# display(FileLink('113598065_dqn_30env_1M.zip'))
# wandb.save('113598065_dqn_30env_1M.zip')

# # 關閉環境
# test_env.close()
# train_env.close()




🔧 啟動 Java Tetris Server
⏳ 等待 Tetris TCP server 啟動中...
✅ Java TCP server 準備完成，連線成功
Java started
Client has joined the game
Client has exited the game
✅ Java server started
⏳ 等待 Tetris TCP server 啟動中...
✅ Java TCP server 準備完成，連線成功
✅ PyTorch is using GPU:Client has joined the game
 Tesla P100-PCIE-16GB
✅ 建立環境開始
Client has exited the game
Client has exited the game
Client has joined the game
Address already in use (Bind failed)
Address already in use (Bind failed)
Tetris TCP server is listening at 10612
Tetris TCP server is listening at 10612
Client has joined the game
Client has joined the game
Client has joined the game
Using cuda device
Wrapping the env in a VecTransposeImage.
Model device: cuda


In [None]:
!ps aux | grep java