# Blackjack RL Mega Notebook

This mega notebook provisions a full reinforcement learning pipeline for blackjack, including environment setup, agent training, evaluation, interactive analysis, and validation. Run it top-to-bottom for a clean, reproducible workflow.

## Table of Contents
1. [Kernel & Environment Diagnostics](#kernel--environment-diagnostics)
2. [Project Setup](#project-setup)
3. [Kernel Package Check](#kernel-package-check)
4. [Module Import Sanity](#module-import-sanity)
5. [Seeding Utilities](#seeding-utilities)
6. [Compatibility Check](#compatibility-check)
7. [Project Layout](#project-layout)
8. [Configuration](#configuration)
9. [Environment Preview](#environment-preview)
10. [Training](#training)
11. [Evaluation & Plots](#evaluation--plots)
12. [Live Testing UI](#live-testing-ui)
13. [Run Tests](#run-tests)
14. [Notebook Validation](#notebook-validation)
15. [Final Report](#final-report)

## Kernel & Environment Diagnostics

In [None]:
import sys
import subprocess
import os
from pathlib import Path


def show_kernel_env():
    print("Kernel Python:", sys.executable)
    subprocess.run([sys.executable, "-m", "pip", "--version"], check=False)


show_kernel_env()

## Project Setup

In [None]:
import sys
import shutil
import subprocess
from pathlib import Path

REPO_URL = "https://github.com/ggvick/blackjackai.git"
WORK_ROOT = Path("proj")
REPO_DIR = WORK_ROOT / "blackjackai"
RUNS_DIR = WORK_ROOT / "runs"
NOTEBOOKS_DIR = WORK_ROOT / "notebooks"
NOTEBOOK_SRC = Path.cwd() / "notebooks" / "Blackjack_RL_Mega_Notebook.ipynb"

if WORK_ROOT.exists():
    shutil.rmtree(WORK_ROOT)
WORK_ROOT.mkdir(parents=True)

subprocess.check_call(["git", "clone", REPO_URL, str(REPO_DIR)])

RUNS_DIR.mkdir(exist_ok=True)
NOTEBOOKS_DIR.mkdir(exist_ok=True)

if NOTEBOOK_SRC.exists():
    shutil.copy2(NOTEBOOK_SRC, NOTEBOOKS_DIR / NOTEBOOK_SRC.name)

print("✅ Project root:", WORK_ROOT.resolve())
print("✅ Repo dir:", REPO_DIR.resolve())
print("✅ Runs dir:", RUNS_DIR.resolve())
print("✅ Notebooks dir:", NOTEBOOKS_DIR.resolve())

PROJECT_ROOT = REPO_DIR
NOTEBOOK_PATH = NOTEBOOKS_DIR / NOTEBOOK_SRC.name

## Kernel Package Check

In [None]:
%pip install -q numpy pandas matplotlib torch
show_kernel_env()

## Module Import Sanity

In [None]:
import sys
import importlib.util
from pathlib import Path

REPO_DIR = Path("proj/blackjackai")
if not REPO_DIR.exists():
    raise FileNotFoundError("Expected repository at proj/blackjackai. Run the Project Setup cell first.")

module_parent = REPO_DIR
resolved_parent = str(module_parent.resolve())
if resolved_parent not in sys.path:
    sys.path.insert(0, resolved_parent)

print("sys.path[0]:", sys.path[0])

spec = importlib.util.find_spec("blackjack_env")
if spec is None:
    raise ModuleNotFoundError("Could not locate 'blackjack_env'. Confirm that proj/blackjackai/blackjack_env/__init__.py exists.")
print("find_spec('blackjack_env'):", bool(spec))

from blackjack_env.env import EnvConfig
from blackjack_env.utils import set_global_seeds

print("✅ Imports OK:", EnvConfig, set_global_seeds)


## Seeding Utilities

In [None]:
import random
import numpy as np
import torch


def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    try:
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)
    except Exception as exc:  # pragma: no cover
        print("Torch seeding warning:", exc)


seed_everything(42)


## Compatibility Check

In [None]:
import sys
import pandas as pd
import numpy as np
from IPython.display import display

try:
    import torch
except Exception:  # pragma: no cover
    torch = None

rows = [
    ("Python", sys.version.split()[0]),
    ("NumPy", np.__version__),
    ("Pandas", pd.__version__),
    ("Torch", getattr(torch, '__version__', 'not installed')),
]
compat_df = pd.DataFrame(rows, columns=["Component", "Version"])
display(compat_df.style.hide(axis="index"))


## Project Layout

In [None]:
from pathlib import Path
import pandas as pd
from IPython.display import display

REPO_DIR = Path("proj/blackjackai")
RUNS_DIR = Path("proj") / "runs"
NOTEBOOKS_DIR = Path("proj") / "notebooks"
NOTEBOOK_PATH = NOTEBOOKS_DIR / "Blackjack_RL_Mega_Notebook.ipynb"

if not REPO_DIR.exists():
    raise FileNotFoundError("Repository missing at proj/blackjackai. Re-run the Project Setup section.")

RUNS_DIR.mkdir(parents=True, exist_ok=True)
NOTEBOOKS_DIR.mkdir(parents=True, exist_ok=True)

path_rows = [
    ("Repository", REPO_DIR.resolve()),
    ("Runs dir", RUNS_DIR.resolve()),
    ("Notebooks dir", NOTEBOOKS_DIR.resolve()),
    ("Active notebook", NOTEBOOK_PATH.resolve() if NOTEBOOK_PATH.exists() else "missing"),
]
paths_df = pd.DataFrame(path_rows, columns=["Item", "Location"])
display(paths_df.style.hide(axis="index"))

PROJECT_ROOT = REPO_DIR


## Configuration

In [None]:
from pathlib import Path
import os
import json
from dataclasses import dataclass, asdict
from datetime import datetime

import pandas as pd

from blackjack_env.env import EnvConfig
from blackjack_env.utils import set_global_seeds

LIGHT_MODE = os.environ.get("NBMAKE_ACTIVE", "0") == "1"

@dataclass
class EnvUIConfig:
    num_decks: int = 6
    penetration: float = 0.75
    bankroll_start: float = 100.0
    bankroll_target: float = 200.0
    min_bet: float = 5.0
    max_bet: float = 100.0
    bet_levels: int = 8
    natural_payout: float = 1.5
    hit_soft_17: bool = True
    allow_surrender: bool = True
    allow_double: bool = True
    allow_split: bool = True
    max_splits: int = 3
    seed: int = 42

@dataclass
class AgentUIConfig:
    buffer_size: int = 2048 if LIGHT_MODE else 50000
    batch_size: int = 64 if LIGHT_MODE else 256
    min_buffer_size: int = 64 if LIGHT_MODE else 2048
    target_update_interval: int = 250 if LIGHT_MODE else 15000
    epsilon_decay: int = 2000 if LIGHT_MODE else 1_200_000
    use_noisy: bool = False
    enable_c51: bool = True
    use_amp: bool = False

@dataclass
class TrainUIConfig:
    steps: int = 512 if LIGHT_MODE else 20_000
    log_interval: int = 128 if LIGHT_MODE else 2000
    eval_hands: int = 200 if LIGHT_MODE else 10_000

CONFIG_STATE = {
    "env": EnvUIConfig(),
    "agent": AgentUIConfig(),
    "train": TrainUIConfig(),
    "paths": {"project_root": str(PROJECT_ROOT), "runs": str(Path("proj") / "runs"), "notebooks": str(Path("proj") / "notebooks")},
}

seed_table = pd.DataFrame([{"Component": "Global Seed", "Value": CONFIG_STATE["env"].seed}])
display(seed_table.style.hide(axis="index"))

set_global_seeds(CONFIG_STATE["env"].seed)

style = {"description_width": "150px"}

env_widgets = {
    "num_decks": widgets.IntSlider(value=CONFIG_STATE["env"].num_decks, min=1, max=8, description="Decks", style=style),
    "penetration": widgets.FloatSlider(value=CONFIG_STATE["env"].penetration, min=0.1, max=0.95, step=0.05, description="Penetration", style=style),
    "bankroll_start": widgets.FloatText(value=CONFIG_STATE["env"].bankroll_start, description="Bankroll", style=style),
    "bankroll_target": widgets.FloatText(value=CONFIG_STATE["env"].bankroll_target, description="Target", style=style),
    "min_bet": widgets.FloatText(value=CONFIG_STATE["env"].min_bet, description="Min Bet", style=style),
    "max_bet": widgets.FloatText(value=CONFIG_STATE["env"].max_bet, description="Max Bet", style=style),
    "bet_levels": widgets.IntSlider(value=CONFIG_STATE["env"].bet_levels, min=1, max=16, description="Bet Levels", style=style),
    "seed": widgets.IntText(value=CONFIG_STATE["env"].seed, description="Seed", style=style),
}

agent_widgets = {
    "buffer_size": widgets.IntSlider(value=CONFIG_STATE["agent"].buffer_size, min=512, max=200000, step=512, description="Buffer", style=style),
    "batch_size": widgets.IntSlider(value=CONFIG_STATE["agent"].batch_size, min=32, max=1024, step=32, description="Batch", style=style),
    "min_buffer_size": widgets.IntSlider(value=CONFIG_STATE["agent"].min_buffer_size, min=32, max=5000, step=32, description="Min Buffer", style=style),
    "target_update_interval": widgets.IntSlider(value=CONFIG_STATE["agent"].target_update_interval, min=100, max=20000, step=100, description="Target Update", style=style),
    "epsilon_decay": widgets.IntSlider(value=CONFIG_STATE["agent"].epsilon_decay, min=500, max=1_500_000, step=500, description="Epsilon Decay", style=style),
    "use_noisy": widgets.Checkbox(value=CONFIG_STATE["agent"].use_noisy, description="Use Noisy"),
    "enable_c51": widgets.Checkbox(value=CONFIG_STATE["agent"].enable_c51, description="Enable C51"),
    "use_amp": widgets.Checkbox(value=CONFIG_STATE["agent"].use_amp, description="Use AMP"),
}

train_widgets = {
    "steps": widgets.IntSlider(value=CONFIG_STATE["train"].steps, min=128, max=20000, step=128, description="Train Steps", style=style),
    "log_interval": widgets.IntSlider(value=CONFIG_STATE["train"].log_interval, min=32, max=5000, step=32, description="Log Interval", style=style),
    "eval_hands": widgets.IntSlider(value=CONFIG_STATE["train"].eval_hands, min=100, max=200000, step=100, description="Eval Hands", style=style),
}

config_output = widgets.Output()


def _as_env_config() -> EnvConfig:
    return EnvConfig(
        num_decks=env_widgets["num_decks"].value,
        penetration=env_widgets["penetration"].value,
        bankroll_start=env_widgets["bankroll_start"].value,
        bankroll_target=env_widgets["bankroll_target"].value,
        min_bet=env_widgets["min_bet"].value,
        max_bet=env_widgets["max_bet"].value,
        bet_levels=env_widgets["bet_levels"].value,
        seed=env_widgets["seed"].value,
    )


def apply_configs(_=None):
    CONFIG_STATE["env"] = EnvUIConfig(
        num_decks=env_widgets["num_decks"].value,
        penetration=env_widgets["penetration"].value,
        bankroll_start=env_widgets["bankroll_start"].value,
        bankroll_target=env_widgets["bankroll_target"].value,
        min_bet=env_widgets["min_bet"].value,
        max_bet=env_widgets["max_bet"].value,
        bet_levels=env_widgets["bet_levels"].value,
        seed=env_widgets["seed"].value,
    )
    CONFIG_STATE["agent"] = AgentUIConfig(
        buffer_size=agent_widgets["buffer_size"].value,
        batch_size=agent_widgets["batch_size"].value,
        min_buffer_size=agent_widgets["min_buffer_size"].value,
        target_update_interval=agent_widgets["target_update_interval"].value,
        epsilon_decay=agent_widgets["epsilon_decay"].value,
        use_noisy=agent_widgets["use_noisy"].value,
        enable_c51=agent_widgets["enable_c51"].value,
        use_amp=agent_widgets["use_amp"].value,
    )
    CONFIG_STATE["train"] = TrainUIConfig(
        steps=train_widgets["steps"].value,
        log_interval=train_widgets["log_interval"].value,
        eval_hands=train_widgets["eval_hands"].value,
    )
    set_global_seeds(CONFIG_STATE["env"].seed)
    cfg_json = json.dumps({
        "env": asdict(CONFIG_STATE["env"]),
        "agent": asdict(CONFIG_STATE["agent"]),
        "train": asdict(CONFIG_STATE["train"]),
        "paths": CONFIG_STATE["paths"],
    }, indent=2)
    with config_output:
        clear_output(wait=True)
        display(HTML(f"<pre style='font-size:12px'>{cfg_json}</pre>"))


apply_button = widgets.Button(description="Apply Config", button_style="success")
apply_button.on_click(apply_configs)

copy_button = widgets.Button(description="Copy Config", button_style="info")


def copy_to_clipboard(_):
    cfg_json = json.dumps({
        "env": asdict(CONFIG_STATE["env"]),
        "agent": asdict(CONFIG_STATE["agent"]),
        "train": asdict(CONFIG_STATE["train"]),
        "paths": CONFIG_STATE["paths"],
    }, indent=2)
    with config_output:
        display(HTML(f"<script>navigator.clipboard.writeText({json.dumps(cfg_json)});</script>"))
    params_path = Path(CONFIG_STATE["paths"]["runs"]) / "config_snapshot.json"
    params_path.parent.mkdir(parents=True, exist_ok=True)
    params_path.write_text(cfg_json)


copy_button.on_click(copy_to_clipboard)

ui = widgets.VBox([
    widgets.HTML("<b>Environment</b>"),
    widgets.HBox(list(env_widgets.values())),
    widgets.HTML("<b>Agent</b>"),
    widgets.HBox(list(agent_widgets.values())),
    widgets.HTML("<b>Training</b>"),
    widgets.HBox(list(train_widgets.values())),
    widgets.HBox([apply_button, copy_button]),
    config_output,
])

display(ui)
apply_configs()


## Environment Preview

In [None]:
import pandas as pd
from blackjack_env.env import BlackjackEnv
from blackjack_env.masking import ACTIONS

preview_env = BlackjackEnv(EnvConfig(**asdict(CONFIG_STATE["env"])) )
obs = preview_env.reset()
obs, _, _, _ = preview_env.step({"bet": 0})
mask = preview_env.available_actions()

obs_series = pd.Series(obs, name="feature")
mask_df = pd.DataFrame({"action": ACTIONS, "legal": mask})

display(obs_series.to_frame("value").head(20))
display(mask_df)


## Training

In [None]:
import time
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import torch

from agents.dqn_rainbow import AgentConfig as RainbowAgentConfig, RainbowDQNAgent

train_env = BlackjackEnv(EnvConfig(**asdict(CONFIG_STATE["env"])) )
agent_cfg_kwargs = {
    "buffer_size": CONFIG_STATE["agent"].buffer_size,
    "batch_size": CONFIG_STATE["agent"].batch_size,
    "min_buffer_size": CONFIG_STATE["agent"].min_buffer_size,
    "target_update_interval": CONFIG_STATE["agent"].target_update_interval,
    "epsilon_decay": CONFIG_STATE["agent"].epsilon_decay,
    "use_noisy": CONFIG_STATE["agent"].use_noisy,
    "enable_c51": CONFIG_STATE["agent"].enable_c51,
    "use_amp": CONFIG_STATE["agent"].use_amp,
}

agent_config = RainbowAgentConfig(
    observation_dim=train_env.observation_space.size,
    bet_actions=train_env.config.bet_levels,
    **agent_cfg_kwargs,
)

agent = RainbowDQNAgent(agent_config)
run_dir = Path(CONFIG_STATE["paths"]["runs"]) / datetime.now().strftime("%Y%m%d-%H%M%S")
(run_dir / "checkpoints").mkdir(parents=True, exist_ok=True)

metrics_log = []


def training_callback(step, metrics):
    metrics_log.append({"step": agent.global_step, **metrics})
    with training_output:
        clear_output(wait=True)
        df = pd.DataFrame(metrics_log)
        if not df.empty:
            display(df.tail(10))


training_output = widgets.Output()
display(training_output)

metrics = agent.train(train_env, steps=CONFIG_STATE["train"].steps, callback=training_callback)

metrics_log.append({"step": agent.global_step, **metrics})
metrics_df = pd.DataFrame(metrics_log)
metrics_path = run_dir / "metrics.json"
metrics_path.write_text(metrics_df.to_json(orient="records", indent=2))

config_path = run_dir / "params.json"
config_payload = {
    "env": asdict(CONFIG_STATE["env"]),
    "agent": asdict(CONFIG_STATE["agent"]),
    "train": asdict(CONFIG_STATE["train"]),
}
config_path.write_text(json.dumps(config_payload, indent=2))

checkpoint_path = run_dir / "checkpoints" / "final.pt"
torch.save(agent.online_net.state_dict(), checkpoint_path)

TRAINING_STATE = {
    "agent": agent,
    "env_config": EnvConfig(**asdict(CONFIG_STATE["env"])),
    "run_dir": run_dir,
    "metrics_df": metrics_df,
}

metrics_df.head()


## Evaluation & Plots

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from agents.eval import evaluate_policy
from blackjack_env.basic_strategy import BasicStrategyPolicy
from blackjack_env.masking import ACTIONS

plt.rcParams["figure.figsize"] = (8, 4)

def collect_decisions(agent, env_config, hands=200):
    env = BlackjackEnv(env_config)
    env.reset()
    decisions = []
    while len(decisions) < hands:
        if env.state.stage == "bet":
            env.step({"bet": 0})
            continue
        mask = env.available_actions()
        obs = env._current_observation()
        action, q_values = agent.act_play(obs, mask)
        bankroll_before = env.state.bankroll
        _, reward, done, info = env.step(action)
        bankroll_after = env.state.bankroll
        if info.get("round_complete"):
            decisions.append({
                "hand": len(decisions),
                "action": ACTIONS[action],
                "reward": reward,
                "bankroll_before": bankroll_before,
                "bankroll_after": bankroll_after,
                "running_count": env.state.count_state.running_count,
            })
        if done:
            env.reset()
    return pd.DataFrame(decisions)


agent_decisions = collect_decisions(TRAINING_STATE["agent"], TRAINING_STATE["env_config"], hands=min(CONFIG_STATE["train"].eval_hands, 1000))
agent_decisions_path = TRAINING_STATE["run_dir"] / "decisions.csv"
agent_decisions.to_csv(agent_decisions_path, index=False)
agent_decisions.to_json(TRAINING_STATE["run_dir"] / "decisions.jsonl", orient="records", lines=True)

basic_policy = BasicStrategyPolicy()

def basic_policy_fn(dealer_upcard, hand, mask):
    return basic_policy.act(dealer_upcard, hand, mask)


def agent_policy_fn(dealer_upcard, hand, mask):
    env = evaluation_env
    obs = env._current_observation()
    action, _ = TRAINING_STATE["agent"].act_play(obs, mask)
    return action


evaluation_env = BlackjackEnv(TRAINING_STATE["env_config"])
agent_eval = evaluate_policy(evaluation_env, agent_policy_fn, num_hands=min(CONFIG_STATE["train"].eval_hands, 2000))
baseline_env = BlackjackEnv(TRAINING_STATE["env_config"])
basic_eval = evaluate_policy(baseline_env, basic_policy_fn, num_hands=min(CONFIG_STATE["train"].eval_hands, 2000))

summary_df = pd.DataFrame([
    {"Policy": "Rainbow Agent", "EV/100": agent_eval.ev_per_100, "Win": agent_eval.win_rate, "Loss": agent_eval.loss_rate, "Push": agent_eval.push_rate},
    {"Policy": "Basic Strategy", "EV/100": basic_eval.ev_per_100, "Win": basic_eval.win_rate, "Loss": basic_eval.loss_rate, "Push": basic_eval.push_rate},
])

display(summary_df)

try:
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    agent_decisions.set_index("hand")["bankroll_after"].plot(ax=axes[0], title="Bankroll")
    agent_decisions.groupby("action")["action"].count().plot(kind="bar", ax=axes[1], title="Action Frequency")
    agent_decisions["running_count"].hist(ax=axes[2])
    axes[2].set_title("Running Count Distribution")
    plt.tight_layout()
    plot_path = TRAINING_STATE["run_dir"] / "performance.png"
    plt.savefig(plot_path)
    plt.show()
except Exception as exc:  # pragma: no cover
    display(HTML(f"<p>No plots generated: {exc}</p>"))

summary_text = f'''EV/100: {agent_eval.ev_per_100:.3f} (CI [{agent_eval.ci_low:.3f}, {agent_eval.ci_high:.3f}])
Win Rate: {agent_eval.win_rate:.3f}
Loss Rate: {agent_eval.loss_rate:.3f}
Push Rate: {agent_eval.push_rate:.3f}
Baseline EV/100: {basic_eval.ev_per_100:.3f}
'''
(TRAINING_STATE["run_dir"] / "summary_llm.txt").write_text(summary_text)

summary_text


## Live Testing UI

In [None]:
import time

live_env = BlackjackEnv(TRAINING_STATE["env_config"])
live_env.reset()
live_env.step({"bet": 0})

live_output = widgets.Output()

agent_dropdown = widgets.Dropdown(options=["Rainbow Agent", "Basic Strategy"], description="Agent")
step_button = widgets.Button(description="Step", button_style="primary")
auto_toggle = widgets.ToggleButton(description="Auto-Play", value=False)
reset_button = widgets.Button(description="Reset Episode", button_style="warning")
delay_slider = widgets.IntSlider(value=200, min=0, max=1000, step=50, description="Delay (ms)")

basic_policy = BasicStrategyPolicy()


def render_state(reward=0.0, done=False):
    with live_output:
        clear_output(wait=True)
        dealer_card = live_env.state.dealer_hand.cards[0] if live_env.state.dealer_hand else None
        player_hand = live_env.state.player_hands[live_env.state.current_hand_index] if live_env.state.player_hands else None
        html = f'''
        <div style='border:1px solid #ccc;padding:8px;'>
            <p><b>Stage:</b> {live_env.state.stage}</p>
            <p><b>Dealer Upcard:</b> {dealer_card}</p>
            <p><b>Player Hand:</b> {player_hand.cards if player_hand else []} (total {player_hand.total if player_hand else 0})</p>
            <p><b>Bankroll:</b> {live_env.state.bankroll:.2f}</p>
            <p><b>Reward:</b> {reward:.2f}</p>
            <p><b>Done:</b> {done}</p>
        </div>
        '''
        display(HTML(html))


def choose_action():
    mask = live_env.available_actions()
    if mask.sum() == 0:
        return None
    if agent_dropdown.value == "Basic Strategy":
        return basic_policy.act(live_env.state.dealer_hand.cards[0], live_env.state.player_hands[live_env.state.current_hand_index], mask)
    obs = live_env._current_observation()
    action, _ = TRAINING_STATE["agent"].act_play(obs, mask)
    return action


def step_once(_=None):
    if live_env.state.stage == "bet":
        live_env.step({"bet": 0})
    action = choose_action()
    reward = 0.0
    done = False
    if action is not None:
        _, reward, done, info = live_env.step(action)
        if done:
            live_env.reset()
            live_env.step({"bet": 0})
    render_state(reward=reward, done=done)
    if auto_toggle.value and not done:
        time.sleep(delay_slider.value / 1000.0)
        step_once()


def reset_episode(_=None):
    live_env.reset()
    live_env.step({"bet": 0})
    render_state()


def handle_auto(change):
    if change["new"]:
        step_once()


step_button.on_click(step_once)
reset_button.on_click(reset_episode)
auto_toggle.observe(handle_auto, names="value")

controls = widgets.HBox([step_button, auto_toggle, reset_button, agent_dropdown, delay_slider])
display(widgets.VBox([controls, live_output]))
render_state()


## Run Tests

In [None]:
from pathlib import Path
from IPython.display import display, HTML
import subprocess
import time
import pandas as pd

start = time.time()
result = subprocess.run([sys.executable, "-m", "pytest", "-q", "--disable-warnings", "--maxfail=1"], cwd=Path("proj/blackjackai"), capture_output=True, text=True)
duration = time.time() - start
status = "Pass" if result.returncode == 0 else "Fail"
summary = pd.DataFrame([
    {"Command": "pytest -q --disable-warnings --maxfail=1", "Status": status, "Duration (s)": f"{duration:.2f}"}
])
if status == "Fail":
    display(HTML(f"<pre>{result.stdout}\n{result.stderr}</pre>"))
styled_summary = summary.style.applymap(lambda v: "background-color:#c8e6c9" if v == "Pass" else "background-color:#ffcdd2", subset=["Status"])
display(styled_summary)

## Notebook Validation

In [None]:
from pathlib import Path
import os
validation_env = dict(os.environ)
validation_env.setdefault("NBMAKE_ACTIVE", "1")
validation_result = subprocess.run([sys.executable, "-m", "pytest", "--nbmake", (Path("proj") / "notebooks" / "Blackjack_RL_Mega_Notebook.ipynb").name], cwd=Path("proj") / "notebooks", capture_output=True, text=True, env=validation_env)
validation_status = "Pass" if validation_result.returncode == 0 else "Fail"
validation_df = pd.DataFrame([
    {"Command": f"pytest --nbmake {(Path("proj") / "notebooks" / "Blackjack_RL_Mega_Notebook.ipynb").name}", "Status": validation_status}
])
if validation_status == "Fail":
    display(HTML(f"<pre>{validation_result.stdout}\n{validation_result.stderr}</pre>"))
styled_validation = validation_df.style.applymap(lambda v: "background-color:#c8e6c9" if v == "Pass" else "background-color:#ffcdd2", subset=["Status"])
display(styled_validation)

## Final Report

In [None]:
from pathlib import Path
import pandas as pd
import sys
from IPython.display import display
import subprocess
commands = [
    ("ruff --version", ["ruff", "--version"]),
    ("black --version", ["black", "--version"]),
    ("pytest -q --disable-warnings --maxfail=1", [sys.executable, "-m", "pytest", "-q", "--disable-warnings", "--maxfail=1"]),
]
rows = []
for label, cmd in commands:
    try:
        result = subprocess.run(cmd, cwd=Path("proj/blackjackai"), capture_output=True, text=True, check=True)
        rows.append({"Command": label, "Status": "Pass"})
    except FileNotFoundError:
        pkg = cmd[0]
        subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg])
        retry = subprocess.run(cmd, cwd=Path("proj/blackjackai"), capture_output=True, text=True, check=True)
        rows.append({"Command": label, "Status": "Pass"})
    except subprocess.CalledProcessError as exc:
        rows.append({"Command": label, "Status": "Fail", "Output": exc.stdout + exc.stderr})

report_df = pd.DataFrame(rows)
styled_report = report_df.style.applymap(lambda v: "background-color:#c8e6c9" if v == "Pass" else "background-color:#ffcdd2", subset=["Status"])
display(styled_report)