## Setup

In [None]:
import subprocess
import sys
from pathlib import Path

print(f"Python version: {sys.version}")
print(f"Working directory: {Path.cwd()}")

# Check if required directories exist
for dir_name in ['data', 'checkpoints', 'outputs', 'src']:
    dir_path = Path(dir_name)
    if dir_path.exists():
        print(f"✓ {dir_name}/ exists")
    else:
        print(f"✗ {dir_name}/ not found")
        dir_path.mkdir(parents=True, exist_ok=True)
        print(f"  Created {dir_name}/")

In [None]:
# 의존성 설치 (필요시 실행)
INSTALL_DEPENDENCIES = False  # True로 변경하면 requirements.txt 설치

if INSTALL_DEPENDENCIES:
    print("Installing dependencies from requirements.txt...")
    result = subprocess.run([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'], 
                          capture_output=True, text=True)
    
    if result.returncode == 0:
        print("✓ Dependencies installed successfully")
    else:
        print("✗ Installation failed")
        print(result.stderr)
else:
    print("⊘ Dependency installation skipped")
    print("  (Set INSTALL_DEPENDENCIES = True to install)")
    
# Check key packages
print("\nChecking key packages...")
required_packages = ['torch', 'transformers', 'sklearn', 'tqdm', 'numpy', 'matplotlib']
missing_packages = []

for package in required_packages:
    try:
        __import__(package)
        print(f"  ✓ {package}")
    except ImportError:
        print(f"  ✗ {package} (missing)")
        missing_packages.append(package)

if missing_packages:
    print(f"\n⚠ Missing packages: {', '.join(missing_packages)}")
    print("  Set INSTALL_DEPENDENCIES = True and re-run this cell")
else:
    print("\n✓ All required packages are available")

## 의존성 설치 (선택사항)

첫 실행 시 필요한 패키지를 설치합니다.

## 전체 실행 설정

In [None]:
# 실행할 단계 선택 (True/False)
RUN_CONFIG = {
    'preprocess': True,      # 1. 데이터 전처리
    'train_baseline': True,  # 2. Baseline 모델 학습
    'generate_text': True,   # 3. 대화 생성
    'train_multimodal': True,# 4. Multimodal 모델 학습
    'evaluate': True,        # 5. 평가
    
    # 평가 설정
    'n_eval_games': 1000,    # 평가할 게임 수
}

print("실행 설정:")
for key, value in RUN_CONFIG.items():
    print(f"  {key:20s}: {value}")

## Step 1: 데이터 전처리

Pluribus 데이터셋을 다운로드하고 377차원 feature로 변환합니다.

In [None]:
if RUN_CONFIG['preprocess']:
    print("="*60)
    print("STEP 1: DATA PREPROCESSING")
    print("="*60)
    
    result = subprocess.run(['python', '1_preprocess_data.py'], capture_output=False)
    
    if result.returncode == 0:
        print("\n✓ Step 1 completed successfully")
    else:
        print("\n✗ Step 1 failed")
        print("Stopping pipeline...")
        raise RuntimeError("Preprocessing failed")
else:
    print("⊘ Step 1 skipped")

## Step 2: Baseline 모델 학습

게임 상태만 사용하는 MLP 모델을 학습합니다.

In [None]:
if RUN_CONFIG['train_baseline']:
    print("="*60)
    print("STEP 2: TRAIN BASELINE MODEL")
    print("="*60)
    
    result = subprocess.run(['python', '2_train_baseline.py'], capture_output=False)
    
    if result.returncode == 0:
        print("\n✓ Step 2 completed successfully")
    else:
        print("\n✗ Step 2 failed")
        print("Stopping pipeline...")
        raise RuntimeError("Baseline training failed")
else:
    print("⊘ Step 2 skipped")

## Step 3: 대화 생성

LLM을 사용하여 각 게임 상태에 대한 대화를 생성합니다.

In [None]:
if RUN_CONFIG['generate_text']:
    print("="*60)
    print("STEP 3: GENERATE DIALOGUES")
    print("="*60)
    
    result = subprocess.run(['python', '3_generate_dialogues.py'], capture_output=False)
    
    if result.returncode == 0:
        print("\n✓ Step 3 completed successfully")
    else:
        print("\n✗ Step 3 failed")
        print("Stopping pipeline...")
        raise RuntimeError("Dialogue generation failed")
else:
    print("⊘ Step 3 skipped")

## Step 4: Multimodal 모델 학습

게임 상태와 대화를 결합한 multimodal 모델을 학습합니다.

In [None]:
if RUN_CONFIG['train_multimodal']:
    print("="*60)
    print("STEP 4: TRAIN MULTIMODAL MODEL")
    print("="*60)
    
    result = subprocess.run(['python', '4_train_multimodal.py'], capture_output=False)
    
    if result.returncode == 0:
        print("\n✓ Step 4 completed successfully")
    else:
        print("\n✗ Step 4 failed")
        print("Stopping pipeline...")
        raise RuntimeError("Multimodal training failed")
else:
    print("⊘ Step 4 skipped")

## Step 5: Rule-based Agent 평가

학습된 모델을 rule-based agent와 대결시켜 평가합니다.

In [None]:
if RUN_CONFIG['evaluate']:
    print("="*60)
    print("STEP 5: EVALUATION")
    print("="*60)
    
    n_games = RUN_CONFIG['n_eval_games']
    
    # Evaluate baseline
    print("\n--- Evaluating Baseline Model ---")
    result_baseline = subprocess.run([
        'python', '5_evaluate_vs_rule_based.py',
        '--model_type', 'baseline',
        '--model_path', 'checkpoints/baseline_best.pt',
        '--n_games', str(n_games),
        '--output_dir', 'outputs'
    ], capture_output=False)
    
    if result_baseline.returncode != 0:
        print("\n✗ Baseline evaluation failed")
    
    # Evaluate multimodal
    print("\n--- Evaluating Multimodal Model ---")
    result_multimodal = subprocess.run([
        'python', '5_evaluate_vs_rule_based.py',
        '--model_type', 'multimodal',
        '--model_path', 'checkpoints/multimodal_best.pt',
        '--n_games', str(n_games),
        '--output_dir', 'outputs'
    ], capture_output=False)
    
    if result_multimodal.returncode != 0:
        print("\n✗ Multimodal evaluation failed")
    
    if result_baseline.returncode == 0 and result_multimodal.returncode == 0:
        print("\n✓ Step 5 completed successfully")
    else:
        print("\n⚠ Step 5 completed with errors")
else:
    print("⊘ Step 5 skipped")

## 결과 확인

In [None]:
import json

print("="*60)
print("FINAL RESULTS")
print("="*60)

# Check generated files
print("\nGenerated Files:")
print("-" * 60)

# Checkpoints
checkpoint_dir = Path('checkpoints')
if checkpoint_dir.exists():
    checkpoints = list(checkpoint_dir.glob('*.pt'))
    print(f"\nCheckpoints ({len(checkpoints)}):")
    for ckpt in sorted(checkpoints):
        size_mb = ckpt.stat().st_size / (1024 * 1024)
        print(f"  - {ckpt.name:40s} ({size_mb:.1f} MB)")

# Outputs
output_dir = Path('outputs')
if output_dir.exists():
    outputs = list(output_dir.glob('*'))
    print(f"\nOutputs ({len(outputs)}):")
    for out in sorted(outputs):
        if out.is_file():
            size = out.stat().st_size
            if size < 1024:
                size_str = f"{size} B"
            elif size < 1024*1024:
                size_str = f"{size/1024:.1f} KB"
            else:
                size_str = f"{size/(1024*1024):.1f} MB"
            print(f"  - {out.name:40s} ({size_str})")

# Load and display evaluation results
baseline_file = output_dir / 'baseline_vs_rule_based.json'
multimodal_file = output_dir / 'multimodal_vs_rule_based.json'

if baseline_file.exists() and multimodal_file.exists():
    print("\n" + "="*60)
    print("EVALUATION SUMMARY")
    print("="*60)
    
    with open(baseline_file, 'r') as f:
        baseline_results = json.load(f)
    
    with open(multimodal_file, 'r') as f:
        multimodal_results = json.load(f)
    
    print(f"\n{'Metric':<30s} {'Baseline':<15s} {'Multimodal':<15s}")
    print("-" * 60)
    print(f"{'Agreement Rate':<30s} {baseline_results['agreement_rate']:>14.2f}% {multimodal_results['agreement_rate']:>14.2f}%")
    print(f"{'Games Played':<30s} {baseline_results['n_games']:>14d} {multimodal_results['n_games']:>14d}")
    
    print("\n" + "="*60)
    print("✓ PIPELINE COMPLETED")
    print("="*60)
else:
    print("\n⚠ Evaluation results not found")

## 완료!

전체 파이프라인이 실행되었습니다.

### 생성된 파일들
- `data/processed/` - 전처리된 데이터
- `data/text/` - 생성된 대화
- `checkpoints/` - 학습된 모델
- `outputs/` - 평가 결과 및 시각화

### 다음 단계
- [evaluate_models.ipynb](evaluate_models.ipynb)에서 상세한 결과 분석 및 시각화
- 개별 스크립트를 직접 실행하여 파라미터 조정