# ALFWorld + Llama Colab Runner

이 노트북은 `run_alfworld_llama3_zeroshot.py`와 동일한 로직을
Colab에서 한 커널 세션에서 실행하도록 분리한 버전입니다.
모델은 한 번만 로드하고, 스모크/본실행을 연속으로 돌릴 수 있습니다.

## 0) (선택) 저장소 준비
- 이미 Colab에서 이 repo를 열었다면 이 셀은 건너뛰세요.
- 아래 `REPO_URL`은 본인 저장소 주소로 바꾸세요.

In [None]:
import os
import sys

REPO_URL = "https://github.com/<YOUR_ID>/<YOUR_REPO>.git"
WORKDIR = "/content/NLP_toy_project"
USE_GIT_CLONE = False  # True로 바꾸면 clone 실행

if USE_GIT_CLONE:
    if not os.path.exists(WORKDIR):
        !git clone {REPO_URL} {WORKDIR}

candidate_workdirs = [
    WORKDIR,
    "/content/NLP_toy_project",
    "/content/drive/MyDrive/NLP_toy_project",
]
resolved_workdir = None
for path in candidate_workdirs:
    if os.path.exists(os.path.join(path, "run_alfworld_llama3_zeroshot.py")):
        resolved_workdir = path
        break

if resolved_workdir is None:
    raise FileNotFoundError(
        "Project root not found. Set WORKDIR to the folder containing run_alfworld_llama3_zeroshot.py"
    )

%cd {resolved_workdir}
if resolved_workdir not in sys.path:
    sys.path.insert(0, resolved_workdir)

print("cwd:", os.getcwd())

In [None]:
!pip -q install -U pip
!pip -q install -r requirements.txt

## 1) 환경변수 설정

In [None]:
import os

# 예시: /content/drive/MyDrive/alfworld_data
os.environ["ALFWORLD_DATA"] = "/content/drive/MyDrive/alfworld_data"
os.environ["HF_TOKEN"] = "hf_xxx"

print("ALFWORLD_DATA=", os.environ.get("ALFWORLD_DATA"))
print("HF_TOKEN set?", bool(os.environ.get("HF_TOKEN")))

## 2) 런타임 초기화 (모델 1회 로드)

In [None]:
import os
import sys

if os.getcwd() not in sys.path:
    sys.path.insert(0, os.getcwd())

import datetime as dt
import json
from pathlib import Path

import torch

from alfworld_utils import apply_textworld_py313_compat, load_config, set_seed, validate_config_paths
from env_runner import build_env, run_episodes
from model_client import get_llama_action_policy

apply_textworld_py313_compat()
set_seed(42)

config_path = "configs/alfworld_llama3_zeroshot.yaml"
config = load_config(config_path)
validate_config_paths(config)
config["general"]["training_method"] = "dqn"
config["env"]["type"] = "AlfredTWEnv"
config["general"]["use_cuda"] = bool(torch.cuda.is_available())

hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
    raise RuntimeError("HF_TOKEN is not set.")

split = "eval_id"
env = build_env(config, split)

policy = get_llama_action_policy(
    model_id="meta-llama/Meta-Llama-3-8B-Instruct",
    hf_token=hf_token,
    device_map="auto",
    load_in_4bit=True,
    temperature=0.0,
    top_p=1.0,
    max_new_tokens=32,
    history_window=6,
    reuse=True,
)

print("Runtime ready. Model loaded once.")

## 3) 스모크 테스트

In [None]:
smoke_episodes = 2
smoke_max_steps = 30

smoke_stats = run_episodes(
    env=env,
    policy=policy,
    episodes=smoke_episodes,
    max_steps=smoke_max_steps,
)

smoke_summary = {
    "timestamp": dt.datetime.now().isoformat(),
    "split": split,
    "episodes": smoke_episodes,
    "max_steps": smoke_max_steps,
    "success_rate": smoke_stats["successes"] / max(smoke_episodes, 1),
    "avg_score": smoke_stats["total_score"] / max(smoke_episodes, 1),
    "avg_steps": smoke_stats["total_steps"] / max(smoke_episodes, 1),
    "results": smoke_stats["results"],
}

print(json.dumps(smoke_summary, indent=2))

## 4) 본 실행
- 같은 커널이므로 모델 재로딩 없이 바로 진행됩니다.

In [None]:
main_episodes = 20
main_max_steps = 50

main_stats = run_episodes(
    env=env,
    policy=policy,
    episodes=main_episodes,
    max_steps=main_max_steps,
)

main_summary = {
    "timestamp": dt.datetime.now().isoformat(),
    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
    "split": split,
    "episodes": main_episodes,
    "max_steps": main_max_steps,
    "success_rate": main_stats["successes"] / max(main_episodes, 1),
    "avg_score": main_stats["total_score"] / max(main_episodes, 1),
    "avg_steps": main_stats["total_steps"] / max(main_episodes, 1),
    "results": main_stats["results"],
}

print(json.dumps(main_summary, indent=2))

In [None]:
output_dir = Path("outputs")
output_dir.mkdir(parents=True, exist_ok=True)
out_path = output_dir / f"alfworld_{split}_colab_{dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(main_summary, f, indent=2)

print("Saved summary to:", out_path)