# Final Submission - Base Qwen + Expert Prompt

20-minute sprint. No fine-tuning. Best possible prompt.

# Step 1: Write Agent Files

Run this cell first. Writes `agents/minesweeper_agent.py` and `agents/minesweeper_model.py`.

**Prompt features:**
- Constraint logic with worked example
- Full 12-criteria scoring schedule
- VALID TARGETS list (enumerates all '.' cells)
- No fine-tuning needed, no post-processing

In [None]:
import os

os.makedirs("agents", exist_ok=True)

# ===== THE AGENT =====
AGENT_CODE = r'''#!/usr/bin/python3
"""Minesweeper Agent - Base Model with Expert Prompt"""
import json
import re
from pathlib import Path
from typing import Dict, Any, Optional
from .minesweeper_model import MinesweeperAgent


class MinesweeperPlayer:
    def __init__(self, **kwargs):
        self.agent = MinesweeperAgent(**kwargs)

    def build_prompt(self, game_state: Dict[str, Any]) -> tuple:
        board = game_state["board"]
        rows = game_state["rows"]
        cols = game_state["cols"]
        mines = game_state["mines"]
        flagged = game_state.get("flags_placed", 0)
        revealed = game_state.get("cells_revealed", 0)

        board_lines = []
        for r in range(rows):
            board_lines.append(f"{r:>2}|{''.join(board[r])}")
        board_str = "\n".join(board_lines)

        # Collect valid targets (cells showing '.')
        valid = []
        for r in range(rows):
            for c in range(cols):
                if board[r][c] == '.':
                    valid.append(f"({r},{c})")
        valid_str = " ".join(valid[:40])

        prompt = (
            f"Minesweeper {rows}x{cols}, {mines} mines, {flagged} flagged, {revealed} revealed.\n"
            f".=unknown F=flag 0-8=adjacent mines\n\n"
            f"{board_str}\n\n"
            f"VALID TARGETS (cells showing '.'): {valid_str}\n"
            f"Pick ONLY from these. Output JSON:"
        )

        sys_prompt = (
            'You are an expert Minesweeper AI. Analyze the board and output ONE JSON action.\n'
            '\n'
            'BOARD SYMBOLS:\n'
            '- "." = unknown cell (THE ONLY valid target for reveal or flag)\n'
            '- "F" = already flagged (NEVER target this)\n'
            '- "0"-"8" = revealed, shows adjacent mine count (NEVER target this)\n'
            '\n'
            'CONSTRAINT LOGIC (use this to find safe moves):\n'
            'For each numbered cell with value N, count adjacent F (flags) and "." (unknowns):\n'
            '- If N == F_count: remaining mines = 0, ALL adjacent "." are SAFE -> reveal one\n'
            '- If N - F_count == unknown_count: ALL adjacent "." are MINES -> flag one\n'
            '- Otherwise: not enough info, try another numbered cell\n'
            '\n'
            'EXAMPLE:\n'
            'Board has cell at row 2, col 3 showing "2". Its neighbors:\n'
            '  (1,2)=F  (1,3)=.  (1,4)=.\n'
            '  (2,2)=1  [2,3]=2  (2,4)=0\n'
            '  (3,2)=.  (3,3)=1  (3,4)=0\n'
            'Count: F=1, unknowns=[(1,3),(1,4),(3,2)]=3, N-F=2-1=1, 1 mine in 3 unknowns -> uncertain.\n'
            'Now check cell (3,3)="1": neighbors include (3,2)=. and others revealed/flagged.\n'
            'If (3,3)=1 has F=0, unknowns=[(3,2)]=1: N-F=1==unknown_count=1 -> (3,2) is a MINE.\n'
            'Action: {"type":"flag","row":3,"col":2}\n'
            '\n'
            'SCORING:\n'
            '+15: flag a mine | +10/+15: reveal safe cell (+15 if logically deduced)\n'
            '+100: win the game | -25: reveal a mine (GAME OVER!)\n'
            '-12: reveal already-revealed cell | -10: flag non-mine\n'
            '-8: target flagged/revealed cell | -15: out of bounds | -10: invalid JSON\n'
            '\n'
            'RULES:\n'
            '1. Your target cell MUST be "." on the board. Check the VALID TARGETS list.\n'
            '2. NEVER output a row,col that shows 0-8 or F on the board.\n'
            '3. Prefer logically certain moves over guessing.\n'
            '4. Output ONLY: {"type":"reveal","row":N,"col":N} or {"type":"flag","row":N,"col":N}'
        )
        return prompt, sys_prompt

    def play_action(self, game_state, **gen_kwargs):
        prompt, sys_prompt = self.build_prompt(game_state)
        response, tl, gt = self.agent.generate_response(prompt, sys_prompt, **gen_kwargs)
        action = self.parse_action(response)
        return action, tl, gt

    def parse_action(self, response: str) -> Optional[Dict]:
        try:
            potential_jsons = []
            i = 0
            while i < len(response):
                start = response.find("{", i)
                if start == -1:
                    break
                brace_count = 0
                end = start
                while end < len(response):
                    if response[end] == '{':
                        brace_count += 1
                    elif response[end] == '}':
                        brace_count -= 1
                        if brace_count == 0:
                            json_str = response[start:end+1]
                            try:
                                obj = json.loads(json_str)
                                potential_jsons.append(obj)
                            except:
                                pass
                            break
                    end += 1
                i = end + 1 if end < len(response) else len(response)

            for obj in potential_jsons:
                if (isinstance(obj, dict) and
                    "type" in obj and "row" in obj and "col" in obj and
                    obj["type"] in ["reveal", "flag"]):
                    obj["row"] = int(obj["row"])
                    obj["col"] = int(obj["col"])
                    return obj
        except Exception as e:
            print(f"Parse error: {e}")
            return None
        return None

    @staticmethod
    def save_action(action: Dict, file_path) -> None:
        file_path = Path(file_path)
        file_path.parent.mkdir(parents=True, exist_ok=True)
        with open(file_path, "w") as f:
            json.dump(action, f, indent=2)


if __name__ == "__main__":
    import argparse
    import yaml

    argparser = argparse.ArgumentParser()
    argparser.add_argument("--game_state_file", type=str, required=True)
    argparser.add_argument("--output_file", type=str, default="outputs/action.json")
    argparser.add_argument("--verbose", action="store_true")
    args = argparser.parse_args()

    with open(args.game_state_file, "r") as f:
        game_state = json.load(f)

    player = MinesweeperPlayer()
    gen_kwargs = {"tgps_show": args.verbose}
    config_file = Path("minesweeper_config.yaml")
    if config_file.exists():
        with open(config_file, "r") as f:
            gen_kwargs.update(yaml.safe_load(f))

    action, tl, gt = player.play_action(game_state, **gen_kwargs)
    if args.verbose:
        print(f"Action: {json.dumps(action, indent=2)}")
    if action:
        player.save_action(action, args.output_file)
        print(f"Saved to {args.output_file}")
    else:
        print("ERROR: No valid action!")
        player.save_action({"error": "parse_failed"}, args.output_file)
'''

# ===== THE MODEL LOADER =====
MODEL_CODE = r'''"""Minesweeper Model Loader"""
import time
import os
import glob
from typing import Optional
from transformers import AutoModelForCausalLM, AutoTokenizer


class MinesweeperAgent(object):
    def __init__(self, **kwargs):
        # Auto-detect Qwen2.5-14B-Instruct from cache
        model_name = None
        search_dirs = [
            "/root/.cache/huggingface/models--Qwen--Qwen2.5-14B-Instruct",
        ]
        for md in search_dirs:
            if os.path.exists(md):
                snaps = sorted(glob.glob(os.path.join(md, "snapshots", "*")))
                if snaps:
                    model_name = snaps[-1]
                    break
        if model_name is None:
            # Fallback: try any available model
            all_models = sorted(glob.glob("/root/.cache/huggingface/models--*/snapshots/*"))
            if all_models:
                model_name = all_models[0]
            else:
                model_name = "Qwen/Qwen2.5-14B-Instruct"

        print(f"Loading model: {model_name}")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto",
        )
        self.model.eval()
        print(f"Model loaded on {self.model.device}")

    def generate_response(self, prompt: str, sys_prompt: str = "", **kwargs) -> tuple:
        messages = []
        if sys_prompt:
            messages.append({"role": "system", "content": sys_prompt})
        messages.append({"role": "user", "content": prompt})

        try:
            text = self.tokenizer.apply_chat_template(
                messages, tokenize=False,
                add_generation_prompt=True, enable_thinking=False
            )
        except TypeError:
            text = self.tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=True
            )

        inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)

        t0 = time.time()
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=128,
            temperature=0.3,
            do_sample=True,
        )
        gen_time = time.time() - t0
        total_len = outputs.shape[1]

        response = self.tokenizer.decode(
            outputs[0][inputs.input_ids.shape[1]:],
            skip_special_tokens=True
        )
        return response, total_len, gen_time
'''

# ===== CONFIG YAML =====
CONFIG_YAML = """## Minesweeper Agent Configuration ##
temperature: 0.3
max_new_tokens: 128
"""

# Write all files
with open("agents/minesweeper_agent.py", "w") as f:
    f.write(AGENT_CODE)
with open("agents/minesweeper_model.py", "w") as f:
    f.write(MODEL_CODE)
with open("minesweeper_config.yaml", "w") as f:
    f.write(CONFIG_YAML)

print("=" * 60)
print("AGENT FILES WRITTEN:")
print("  agents/minesweeper_agent.py  (expert prompt + valid targets)")
print("  agents/minesweeper_model.py  (auto-detect Qwen from cache)")
print("  minesweeper_config.yaml      (temp=0.3)")
print("=" * 60)
print("\nPrompt features:")
print("  - Full constraint logic instructions with worked example")
print("  - 12-criteria scoring schedule")
print("  - VALID TARGETS list (all '.' cells enumerated)")
print("  - Aggressive rules: NEVER target 0-8 or F")
print("  - No post-processing, no fine-tuning needed")

# Step 2: Quick Test

Loads base Qwen2.5-14B-Instruct and plays 11 games (8x8, 10x10, 6x10, 16x16).
If scores look good, submit the agents/ folder.

In [None]:
import json
import re
import random
from dataclasses import dataclass, field
from typing import List, Tuple, Set
from collections import defaultdict

@dataclass
class MinesweeperGame:
    rows: int
    cols: int
    num_mines: int
    seed: int = None
    _rng: random.Random = field(init=False, repr=False)
    _board: List[List[int]] = field(init=False, repr=False)
    _revealed: Set[Tuple[int, int]] = field(init=False, repr=False, default_factory=set)
    _flagged: Set[Tuple[int, int]] = field(init=False, repr=False, default_factory=set)
    _state: str = field(default="ongoing", init=False, repr=False)

    def __post_init__(self):
        self._rng = random.Random(self.seed)
        self._board = [[0]*self.cols for _ in range(self.rows)]
        positions = [(r,c) for r in range(self.rows) for c in range(self.cols)]
        for r,c in self._rng.sample(positions, self.num_mines):
            self._board[r][c] = -1
        for r in range(self.rows):
            for c in range(self.cols):
                if self._board[r][c] == -1: continue
                ct = 0
                for dr in [-1,0,1]:
                    for dc in [-1,0,1]:
                        if dr==0 and dc==0: continue
                        nr,nc = r+dr, c+dc
                        if 0<=nr<self.rows and 0<=nc<self.cols and self._board[nr][nc]==-1:
                            ct += 1
                self._board[r][c] = ct

    def _reveal_cell(self, row, col):
        if (row,col) in self._revealed or (row,col) in self._flagged: return False
        stack = [(row,col)]
        while stack:
            r,c = stack.pop()
            if (r,c) in self._revealed: continue
            self._revealed.add((r,c))
            if self._board[r][c] == -1: self._state = "failed"; return True
            if self._board[r][c] == 0:
                for dr in [-1,0,1]:
                    for dc in [-1,0,1]:
                        if dr==0 and dc==0: continue
                        nr,nc = r+dr, c+dc
                        if 0<=nr<self.rows and 0<=nc<self.cols and (nr,nc) not in self._revealed and (nr,nc) not in self._flagged:
                            stack.append((nr,nc))
        return True

    def do_action(self, action):
        if self._state != "ongoing": return "game_over"
        at = action.get("type"); row = int(action.get("row",0)); col = int(action.get("col",0))
        if at == "reveal":
            if (row,col) in self._revealed: return "already_revealed"
            if (row,col) in self._flagged: return "flagged_cell"
            self._reveal_cell(row, col)
        elif at == "flag":
            if (row,col) in self._revealed: return "invalid_flag"
            if (row,col) in self._flagged: self._flagged.remove((row,col))
            else: self._flagged.add((row,col))
        total = self.rows * self.cols - self.num_mines
        if len(self._revealed) == total: self._state = "success"
        if self._state == "failed": return "mine"
        if self._state == "success": return "win"
        return "ok"

    def get_visible_board(self):
        v = []
        for r in range(self.rows):
            row = []
            for c in range(self.cols):
                if (r,c) in self._flagged: row.append('F')
                elif (r,c) in self._revealed:
                    val = self._board[r][c]
                    row.append('*' if val==-1 else str(val))
                else: row.append('.')
            v.append(row)
        return v

    def state(self): return self._state


def get_neighbors(r, c, rows, cols):
    n = []
    for dr in [-1,0,1]:
        for dc in [-1,0,1]:
            if dr==0 and dc==0: continue
            nr,nc = r+dr, c+dc
            if 0<=nr<rows and 0<=nc<cols: n.append((nr,nc))
    return n

def is_logically_deducible(board, rows, cols, action_type, tr, tc):
    cf, cr = set(), set()
    changed = True
    while changed:
        changed = False
        for r in range(rows):
            for c in range(cols):
                if board[r][c] not in '12345678': continue
                num = int(board[r][c])
                nbrs = get_neighbors(r,c,rows,cols)
                fn = sum(1 for nr,nc in nbrs if board[nr][nc]=='F' or (nr,nc) in cf)
                un = [(nr,nc) for nr,nc in nbrs if board[nr][nc]=='.' and (nr,nc) not in cf and (nr,nc) not in cr]
                rem = num - fn
                if rem < 0: continue
                if rem == len(un) and un:
                    for n in un:
                        if n not in cf: cf.add(n); changed = True
                if rem == 0 and un:
                    for n in un:
                        if n not in cr: cr.add(n); changed = True
    return (action_type=="flag" and (tr,tc) in cf) or (action_type=="reveal" and (tr,tc) in cr)

def parse_llm_action(response):
    best = None
    for m in re.finditer(r'\{[^{}]*\}', response):
        try:
            a = json.loads(m.group())
            if "type" in a and "row" in a and "col" in a and a["type"] in ["reveal","flag"]:
                best = a
        except: continue
    return best


# ---- Load model and test ----
from unsloth import FastLanguageModel
import torch, glob as glob_mod

# Auto-detect Qwen path
model_name = None
for md in ["/root/.cache/huggingface/models--Qwen--Qwen2.5-14B-Instruct"]:
    if os.path.exists(md):
        snaps = sorted(glob_mod.glob(os.path.join(md, "snapshots", "*")))
        if snaps: model_name = snaps[-1]; break
if model_name is None:
    all_m = sorted(glob_mod.glob("/root/.cache/huggingface/models--*/snapshots/*"))
    model_name = all_m[0] if all_m else "Qwen/Qwen2.5-14B-Instruct"

print(f"Loading {model_name}...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name, load_in_4bit=False,
    max_seq_length=4096, torch_dtype=torch.bfloat16,
)
FastLanguageModel.for_inference(model)
print(f"Loaded on {model.device}")

# ---- Build prompt (same as agent) ----
SYS_PROMPT = (
    'You are an expert Minesweeper AI. Analyze the board and output ONE JSON action.\n'
    '\n'
    'BOARD SYMBOLS:\n'
    '- "." = unknown cell (THE ONLY valid target for reveal or flag)\n'
    '- "F" = already flagged (NEVER target this)\n'
    '- "0"-"8" = revealed, shows adjacent mine count (NEVER target this)\n'
    '\n'
    'CONSTRAINT LOGIC (use this to find safe moves):\n'
    'For each numbered cell with value N, count adjacent F (flags) and "." (unknowns):\n'
    '- If N == F_count: remaining mines = 0, ALL adjacent "." are SAFE -> reveal one\n'
    '- If N - F_count == unknown_count: ALL adjacent "." are MINES -> flag one\n'
    '- Otherwise: not enough info, try another numbered cell\n'
    '\n'
    'EXAMPLE:\n'
    'Board has cell at row 2, col 3 showing "2". Its neighbors:\n'
    '  (1,2)=F  (1,3)=.  (1,4)=.\n'
    '  (2,2)=1  [2,3]=2  (2,4)=0\n'
    '  (3,2)=.  (3,3)=1  (3,4)=0\n'
    'Count: F=1, unknowns=[(1,3),(1,4),(3,2)]=3, N-F=2-1=1, 1 mine in 3 unknowns -> uncertain.\n'
    'Now check cell (3,3)="1": neighbors include (3,2)=. and others revealed/flagged.\n'
    'If (3,3)=1 has F=0, unknowns=[(3,2)]=1: N-F=1==unknown_count=1 -> (3,2) is a MINE.\n'
    'Action: {"type":"flag","row":3,"col":2}\n'
    '\n'
    'SCORING:\n'
    '+15: flag a mine | +10/+15: reveal safe cell (+15 if logically deduced)\n'
    '+100: win the game | -25: reveal a mine (GAME OVER!)\n'
    '-12: reveal already-revealed cell | -10: flag non-mine\n'
    '-8: target flagged/revealed cell | -15: out of bounds | -10: invalid JSON\n'
    '\n'
    'RULES:\n'
    '1. Your target cell MUST be "." on the board. Check the VALID TARGETS list.\n'
    '2. NEVER output a row,col that shows 0-8 or F on the board.\n'
    '3. Prefer logically certain moves over guessing.\n'
    '4. Output ONLY: {"type":"reveal","row":N,"col":N} or {"type":"flag","row":N,"col":N}'
)

def build_prompt(game):
    board = game.get_visible_board()
    rows, cols = game.rows, game.cols
    mines = game.num_mines
    flagged = len(game._flagged)
    revealed = len(game._revealed)

    lines = []
    for r in range(rows):
        lines.append(f"{r:>2}|{''.join(board[r])}")
    board_str = "\n".join(lines)

    valid = []
    for r in range(rows):
        for c in range(cols):
            if board[r][c] == '.':
                valid.append(f"({r},{c})")
    valid_str = " ".join(valid[:40])

    return (
        f"Minesweeper {rows}x{cols}, {mines} mines, {flagged} flagged, {revealed} revealed.\n"
        f".=unknown F=flag 0-8=adjacent mines\n\n"
        f"{board_str}\n\n"
        f"VALID TARGETS (cells showing '.'): {valid_str}\n"
        f"Pick ONLY from these. Output JSON:"
    )


def play_game(rows, cols, mines, seed, max_moves=200):
    game = MinesweeperGame(rows=rows, cols=cols, num_mines=mines, seed=seed)
    game.do_action({"type": "reveal", "row": rows//2, "col": cols//2})
    moves = 0; score = 0.0; bad = 0
    inv = {"already_revealed":0, "reveal_flagged":0, "already_flagged":0,
           "flag_revealed":0, "oob":0, "mine_hit":0, "wrong_flag":0, "invalid_json":0}

    while game.state() == "ongoing" and moves < max_moves and bad < 5:
        prompt = build_prompt(game)
        msgs = [{"role":"system","content":SYS_PROMPT}, {"role":"user","content":prompt}]
        try:
            text = tokenizer.apply_chat_template(msgs, tokenize=False,
                add_generation_prompt=True, enable_thinking=False)
        except TypeError:
            text = tokenizer.apply_chat_template(msgs, tokenize=False,
                add_generation_prompt=True)
        inp = tokenizer(text, return_tensors="pt").to(model.device)
        out = model.generate(**inp, temperature=0.3, max_new_tokens=128, do_sample=True)
        resp = tokenizer.decode(out[0][inp.input_ids.shape[1]:], skip_special_tokens=True)
        action = parse_llm_action(resp)
        moves += 1

        if action is None:
            score -= 10; bad += 1; inv["invalid_json"] += 1; continue
        try: row,col = int(action["row"]),int(action["col"])
        except: score -= 10; bad += 1; inv["invalid_json"] += 1; continue
        at = action["type"]

        if not (0<=row<rows and 0<=col<cols):
            score -= 15; bad += 1; inv["oob"] += 1; continue
        if at == "reveal":
            if (row,col) in game._revealed: score -= 12; bad += 1; inv["already_revealed"] += 1; continue
            if (row,col) in game._flagged: score -= 8; bad += 1; inv["reveal_flagged"] += 1; continue
            if game._board[row][col] == -1: score -= 25; inv["mine_hit"] += 1; break
            bad = 0
            board = game.get_visible_board()
            il = is_logically_deducible(board, rows, cols, "reveal", row, col)
            score += 15 if il else 10
            game.do_action(action)
            if game.state() == "success": score += 100
        elif at == "flag":
            if (row,col) in game._revealed: score -= 8; bad += 1; inv["flag_revealed"] += 1; continue
            if (row,col) in game._flagged: score -= 8; bad += 1; inv["already_flagged"] += 1; continue
            bad = 0
            if len(game._flagged)+1 > mines: score -= 10
            if game._board[row][col] == -1: score += 15
            else: score -= 10; inv["wrong_flag"] += 1
            game.do_action(action)

    return game.state(), moves, score, inv


# ---- Run quick test ----
print("\n" + "=" * 60)
print("QUICK TEST: Base Qwen2.5-14B + Expert Prompt")
print("=" * 60)

configs = [
    (8, 8, 10, 3, "8x8"),
    (10, 10, 15, 3, "10x10"),
    (6, 10, 8, 3, "6x10"),
    (16, 16, 40, 2, "16x16"),
]

grand_wins = 0; grand_games = 0; grand_score = 0
grand_inv = {"already_revealed":0, "reveal_flagged":0, "already_flagged":0,
             "flag_revealed":0, "oob":0, "mine_hit":0, "wrong_flag":0, "invalid_json":0}

for rows, cols, mines, n_seeds, label in configs:
    wins = 0; total_sc = 0; total_mv = 0
    for si in range(n_seeds):
        result, mv, sc, inv = play_game(rows, cols, mines, seed=42+si)
        total_sc += sc; total_mv += mv
        if result == "success": wins += 1
        for k,v in inv.items(): grand_inv[k] += v
    avg = total_sc / n_seeds
    grand_wins += wins; grand_games += n_seeds; grand_score += total_sc
    print(f"  {label}: {wins}/{n_seeds} wins, avg {total_mv/n_seeds:.1f} moves, avg score {avg:+.1f}")

print(f"\nTOTAL: {grand_wins}/{grand_games} wins, avg score {grand_score/grand_games:+.1f}")
inv_str = ", ".join(f"{k}={v}" for k,v in grand_inv.items() if v > 0)
print(f"Invalid moves: {inv_str if inv_str else 'NONE'}")
print("=" * 60)
print("\nAgent files are ready in agents/ folder.")
print("If scores look good, SUBMIT NOW!")