In [12]:
# 完整的 Baseline Model 評估腳本

import os
import subprocess
import time
import signal
import sys
from pathlib import Path

class BaselineEvaluator:
    def __init__(self):
        self.project_root = Path(r"d:/ybcvx/Documents/NCKU/ISA5810/Brain-to-text")
        self.data_dir = self.project_root / "data" / "t15_copyTask_neuralData/hdf5_data_final"
        self.model_path = self.project_root / "data" / "t15_pretrained_rnn_baseline"
        self.lm_path = self.project_root / "language_model" / "pretrained_language_models" / "openwebtext_1gram_lm_sil"
        self.processes = []
        
    def check_prerequisites(self):
        """檢查必要的文件和目錄是否存在"""
        print("=== 檢查環境 ===")
        
        # 檢查數據目錄
        if not self.data_dir.exists():
            print(f"❌ 數據目錄不存在: {self.data_dir}")
            return False
        print(f"✓ 數據目錄: {self.data_dir}")
        
        # 檢查模型目錄
        if not self.model_path.exists():
            print(f"❌ 模型目錄不存在: {self.model_path}")
            return False
        print(f"✓ 模型目錄: {self.model_path}")
        
        # 檢查語言模型
        if not self.lm_path.exists():
            print(f"❌ 語言模型不存在: {self.lm_path}")
            print("請先運行下載腳本或解壓語言模型")
            return False
        print(f"✓ 語言模型: {self.lm_path}")
        
        # 列出可用的 validation 數據
        val_files = list(self.data_dir.glob("*/data_val.hdf5"))
        if not val_files:
            print(f"❌ 未找到任何 data_val.hdf5 文件")
            return False
        print(f"✓ 找到 {len(val_files)} 個驗證數據文件:")
        for f in val_files:
            print(f"  - {f.parent.name}/data_val.hdf5")
        
        return True
    
    def start_redis_server(self):
        """啟動 Redis 服務器"""
        print("\n=== 啟動 Redis 服務器 ===")
        try:
            # 檢查 Redis 是否已經運行
            result = subprocess.run(
                ["redis-cli", "ping"],
                capture_output=True,
                text=True,
                timeout=2
            )
            if result.returncode == 0 and "PONG" in result.stdout:
                print("✓ Redis 服務器已經在運行")
                return True
        except:
            pass
        
        # 啟動 Redis
        try:
            redis_process = subprocess.Popen(
                ["redis-server"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == 'win32' else 0
            )
            self.processes.append(redis_process)
            time.sleep(3)  # 等待 Redis 啟動
            
            # 驗證 Redis 是否成功啟動
            result = subprocess.run(
                ["redis-cli", "ping"],
                capture_output=True,
                text=True,
                timeout=2
            )
            if result.returncode == 0:
                print("✓ Redis 服務器啟動成功")
                return True
            else:
                print("❌ Redis 服務器啟動失敗")
                return False
        except Exception as e:
            print(f"❌ 啟動 Redis 時發生錯誤: {e}")
            return False
    
    def start_language_model(self):
        """啟動語言模型服務器"""
        print("\n=== 啟動語言模型服務器 ===")
        
        lm_script = self.project_root / "language_model" / "language-model-standalone.py"
        if not lm_script.exists():
            print(f"❌ 找不到語言模型腳本: {lm_script}")
            return False
        
        cmd = [
            "python",
            str(lm_script),
            "--lm_path", str(self.lm_path),
            "--do_opt",
            "--nbest", "100",
            "--acoustic_scale", "0.325",
            "--blank_penalty", "90",
            "--alpha", "0.55",
            "--redis_ip", "localhost",
            "--gpu_number", "0"
        ]
        
        try:
            lm_process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                bufsize=1,
                creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == 'win32' else 0
            )
            self.processes.append(lm_process)
            
            # 等待語言模型啟動並連接到 Redis
            print("等待語言模型連接到 Redis...")
            timeout = 60
            start_time = time.time()
            
            while time.time() - start_time < timeout:
                line = lm_process.stdout.readline()
                if line:
                    print(f"  {line.strip()}")
                    if "Successfully connected to the redis server" in line:
                        print("✓ 語言模型服務器啟動成功")
                        time.sleep(2)  # 額外等待確保完全就緒
                        return True
                if lm_process.poll() is not None:
                    print("❌ 語言模型進程意外終止")
                    return False
                time.sleep(0.5)
            
            print("❌ 語言模型啟動超時")
            return False
            
        except Exception as e:
            print(f"❌ 啟動語言模型時發生錯誤: {e}")
            return False
    
    def run_evaluation(self, eval_type="val", gpu_number=0):
        """運行模型評估"""
        print(f"\n=== 運行 {eval_type.upper()} 集評估 ===")
        
        eval_script = self.project_root / "model_training" / "evaluate_model.py"
        if not eval_script.exists():
            print(f"❌ 找不到評估腳本: {eval_script}")
            return False
        
        cmd = [
            "python",
            str(eval_script),
            "--model_path", str(self.model_path),
            "--data_dir", str(self.data_dir),
            "--eval_type", eval_type,
            "--gpu_number", str(gpu_number)
        ]
        
        print(f"執行命令: {' '.join(cmd)}")
        print("\n開始評估...")
        print("-" * 60)
        
        try:
            # 運行評估並實時顯示輸出
            process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1
            )
            
            for line in process.stdout:
                print(line, end='')
            
            process.wait()
            
            if process.returncode == 0:
                print("-" * 60)
                print("✓ 評估完成!")
                
                # 尋找輸出的 CSV 文件
                output_files = list(self.model_path.glob(f"baseline_rnn_{eval_type}_predicted_sentences_*.csv"))
                if output_files:
                    latest_file = max(output_files, key=lambda p: p.stat().st_mtime)
                    print(f"\n結果已保存至: {latest_file}")
                    print(f"文件大小: {latest_file.stat().st_size / 1024:.2f} KB")
                
                return True
            else:
                print("-" * 60)
                print(f"❌ 評估失敗，返回碼: {process.returncode}")
                return False
                
        except Exception as e:
            print(f"❌ 運行評估時發生錯誤: {e}")
            return False
    
    def cleanup(self):
        """清理進程"""
        print("\n=== 清理服務 ===")
        
        for process in self.processes:
            try:
                if process.poll() is None:  # 進程還在運行
                    if sys.platform == 'win32':
                        process.send_signal(signal.CTRL_BREAK_EVENT)
                    else:
                        process.terminate()
                    process.wait(timeout=5)
                    print("✓ 進程已終止")
            except Exception as e:
                print(f"⚠ 終止進程時發生錯誤: {e}")
                try:
                    process.kill()
                except:
                    pass
    
    def run_full_pipeline(self, eval_type="val", gpu_number=0):
        """運行完整的評估流程"""
        try:
            print("=" * 60)
            print("Brain-to-Text Baseline Model 評估腳本")
            print("=" * 60)
            
            # 1. 檢查環境
            if not self.check_prerequisites():
                print("\n❌ 環境檢查失敗，請確保所有必要文件都已準備好")
                return False
            
            # 2. 啟動 Redis
            if not self.start_redis_server():
                print("\n❌ Redis 啟動失敗")
                return False
            
            # 3. 啟動語言模型
            if not self.start_language_model():
                print("\n❌ 語言模型啟動失敗")
                return False
            
            # 4. 運行評估
            success = self.run_evaluation(eval_type, gpu_number)
            
            return success
            
        except KeyboardInterrupt:
            print("\n\n⚠ 用戶中斷執行")
            return False
        except Exception as e:
            print(f"\n❌ 發生未預期的錯誤: {e}")
            import traceback
            traceback.print_exc()
            return False
        finally:
            self.cleanup()

# 主程序
if __name__ == "__main__":
    evaluator = BaselineEvaluator()
    
    # 配置參數
    EVAL_TYPE = "val"  # "val" 或 "test"
    GPU_NUMBER = 0     # GPU 編號，設為 -1 使用 CPU
    
    # 運行評估
    success = evaluator.run_full_pipeline(
        eval_type=EVAL_TYPE,
        gpu_number=GPU_NUMBER
    )
    
    if success:
        print("\n" + "=" * 60)
        print("✓ 所有任務完成!")
        print("=" * 60)
    else:
        print("\n" + "=" * 60)
        print("❌ 部分任務失敗，請檢查上述錯誤信息")
        print("=" * 60)

Brain-to-Text Baseline Model 評估腳本
=== 檢查環境 ===
✓ 數據目錄: d:\ybcvx\Documents\NCKU\ISA5810\Brain-to-text\data\t15_copyTask_neuralData\hdf5_data_final
✓ 模型目錄: d:\ybcvx\Documents\NCKU\ISA5810\Brain-to-text\data\t15_pretrained_rnn_baseline
✓ 語言模型: d:\ybcvx\Documents\NCKU\ISA5810\Brain-to-text\language_model\pretrained_language_models\openwebtext_1gram_lm_sil
✓ 找到 41 個驗證數據文件:
  - t15.2023.08.13/data_val.hdf5
  - t15.2023.08.18/data_val.hdf5
  - t15.2023.08.20/data_val.hdf5
  - t15.2023.08.25/data_val.hdf5
  - t15.2023.08.27/data_val.hdf5
  - t15.2023.09.01/data_val.hdf5
  - t15.2023.09.03/data_val.hdf5
  - t15.2023.09.24/data_val.hdf5
  - t15.2023.09.29/data_val.hdf5
  - t15.2023.10.01/data_val.hdf5
  - t15.2023.10.06/data_val.hdf5
  - t15.2023.10.08/data_val.hdf5
  - t15.2023.10.13/data_val.hdf5
  - t15.2023.10.15/data_val.hdf5
  - t15.2023.10.20/data_val.hdf5
  - t15.2023.10.22/data_val.hdf5
  - t15.2023.11.03/data_val.hdf5
  - t15.2023.11.04/data_val.hdf5
  - t15.2023.11.17/data_val.hdf5
  

In [None]:
# 刪除目錄用程式碼，慎用！！！
import os
import shutil

# Replace 'your_directory_path' with the actual path to the directory you want to delete
your_directory_path = '/content/nejm-brain-to-text/data/t15_pretrained_rnn_baseline'

# Check if the path exists before attempting to delete
if os.path.exists(your_directory_path):
    # Check if it's a symbolic link
    if os.path.islink(your_directory_path):
        os.unlink(your_directory_path)
        print(f"Symbolic link '{your_directory_path}' deleted successfully.")
    elif os.path.isdir(your_directory_path):
        # Use shutil.rmtree() to delete the directory and its contents
        shutil.rmtree(your_directory_path)
        print(f"Directory '{your_directory_path}' deleted successfully.")
    else:
        os.remove(your_directory_path)
        print(f"File '{your_directory_path}' deleted successfully.")
else:
    print(f"Path '{your_directory_path}' does not exist.")

Symbolic link '/content/nejm-brain-to-text/data/t15_pretrained_rnn_baseline' deleted successfully.
