
# Blackjack AI Training and Environment Verification

This Colab-ready notebook configures connectivity, installs dependencies, validates hardware, and runs a short Q-learning training session for the Blackjack reinforcement learning environment with Hi-Lo card counting.



## 1. Connectivity and Hardware Checks
Verify that the runtime can reach the internet and detect available hardware (GPU/TPU/CPU). Results are stored in a shared `RUNTIME_REPORT` dictionary for later summarization.


In [None]:

import json
import os
import platform
import subprocess
import sys
import time
import urllib.request
from urllib.error import URLError

RUNTIME_REPORT = {
    "internet": {"status": False, "details": ""},
    "hardware": {"type": "Unknown", "details": "", "gpu_available": False},
    "dependencies": [],
    "repository": {"status": "unverified", "path": ""},
    "module_imports": {"status": False, "details": ""},
    "environment_test": {"status": False, "details": ""},
    "benchmark": {"status": False, "details": ""},
    "training": {"status": False, "details": ""},
}
PROJECT_ROOT = None

print("Checking internet connectivity...")
try:
    with urllib.request.urlopen("https://pypi.org/simple/pip/", timeout=5) as response:
        snippet = response.read(256)
        RUNTIME_REPORT["internet"]["status"] = True
        RUNTIME_REPORT["internet"]["details"] = f"Fetched {len(snippet)} bytes from PyPI simple index."
        print("Internet status: ✅", RUNTIME_REPORT["internet"]["details"])
except Exception as exc:  # pragma: no cover - network failure branch
    RUNTIME_REPORT["internet"]["status"] = False
    RUNTIME_REPORT["internet"]["details"] = f"{type(exc).__name__}: {exc}"
    print("Internet status: ❌", RUNTIME_REPORT["internet"]["details"])

print("Detecting available hardware...")
hardware_lines = []
if "COLAB_TPU_ADDR" in os.environ:
    hardware_lines.append("TPU detected via COLAB_TPU_ADDR environment variable.")
    RUNTIME_REPORT["hardware"] = {"type": "TPU", "details": hardware_lines[-1], "gpu_available": False}
    print(hardware_lines[-1])
else:
    try:
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
            check=True,
            capture_output=True,
            text=True,
        )
        lines = [line.strip() for line in result.stdout.strip().splitlines() if line.strip()]
        if lines:
            gpu_name = lines[0].split(",")[0].strip()
            runtime_type = "GPU (T4)" if "T4" in gpu_name else "GPU"
            detail = " | ".join(lines)
            RUNTIME_REPORT["hardware"] = {"type": runtime_type, "details": detail, "gpu_available": True}
            print("GPU detected:")
            for line in lines:
                print("  -", line)
            if "T4" in gpu_name:
                print("Using expected NVIDIA T4 GPU.")
            else:
                print("GPU is available but not a T4 (falling back to generic GPU logic).")
        else:  # pragma: no cover - no GPU info branch
            raise FileNotFoundError("nvidia-smi returned no GPU info")
    except Exception as exc:  # pragma: no cover - CPU fallback branch
        cpu_info = platform.processor() or platform.machine()
        detail = f"CPU only: {cpu_info}"
        RUNTIME_REPORT["hardware"] = {"type": "CPU", "details": detail, "gpu_available": False}
        print("CPU-only runtime detected:", detail)

if not RUNTIME_REPORT["hardware"].get("details"):
    RUNTIME_REPORT["hardware"]["details"] = " | ".join(hardware_lines) or "No additional hardware details."



## 2. Clone or Update Repository
Clone the Blackjack AI repository if it is not already available. When the repo already exists, pull the latest changes and ensure submodules are initialized.


In [None]:

import pathlib

REPO_URL = "https://github.com/ggvick/blackjackai"
notebook_dir = pathlib.Path.cwd().resolve()
repo_status = "already_present"
repo_path = None

if (notebook_dir / ".git").exists():
    repo_path = notebook_dir
elif (notebook_dir.parent / ".git").exists():
    repo_path = notebook_dir.parent.resolve()
else:
    target_dir = pathlib.Path("/content/blackjackai").resolve()
    if target_dir.exists() and (target_dir / ".git").exists():
        print(f"Repository already exists at {target_dir} - pulling latest changes...")
        try:
            subprocess.run(["git", "-C", str(target_dir), "pull", "--ff-only"], check=True)
            repo_status = "updated"
        except subprocess.CalledProcessError as exc:
            print("Git pull failed:", exc)
            repo_status = "exists"
    else:
        print(f"Cloning repository from {REPO_URL} to {target_dir}...")
        target_dir.parent.mkdir(parents=True, exist_ok=True)
        subprocess.run(["git", "clone", REPO_URL, str(target_dir)], check=True)
        repo_status = "cloned"
    subprocess.run(["git", "-C", str(target_dir), "submodule", "update", "--init", "--recursive"], check=True)
    repo_path = target_dir

if repo_path is None:
    raise RuntimeError("Unable to locate or clone the repository.")

os.chdir(repo_path)
PROJECT_ROOT = repo_path
RUNTIME_REPORT["repository"] = {"status": repo_status, "path": str(PROJECT_ROOT)}
print(f"Repository root: {PROJECT_ROOT}")

# Ensure the project root is importable
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))



## 3. Dependency Installation
Install project requirements along with common ML/RL libraries. Each package installation is retried once if it fails.


In [None]:

assert PROJECT_ROOT is not None, "PROJECT_ROOT must be set before installing dependencies."
requirements_path = PROJECT_ROOT / "requirements.txt"
packages = ["torch", "numpy", "gymnasium", "matplotlib", "pandas", "tqdm"]
install_targets = []

if requirements_path.exists():
    install_targets.append(["-r", str(requirements_path)])
install_targets.extend([[pkg] for pkg in packages])
install_targets.append(["-e", str(PROJECT_ROOT)])

install_results = []
for target in install_targets:
    desc = " ".join(target)
    success = False
    for attempt in range(2):
        try:
            print(f"Installing {desc} (attempt {attempt + 1})...")
            subprocess.run([sys.executable, "-m", "pip", "install"] + target, check=True)
            success = True
            print(f"Installation successful: {desc}
")
            break
        except subprocess.CalledProcessError as exc:
            print(f"Installation failed for {desc}: {exc}")
            if attempt == 0:
                print("Retrying...")
                time.sleep(1)
    install_results.append({"target": desc, "success": success})
    if not success:
        raise RuntimeError(f"Failed to install dependency: {desc}")

RUNTIME_REPORT["dependencies"] = install_results



## 4. Import Validation and Environment Smoke Test
Import the Blackjack environment, run a quick demo round, and capture state information to ensure the card shoe and counter behave as expected.


In [None]:

from pprint import pprint

try:
    from blackjack import BlackjackEnv, CountingPolicy, QLearningTrainer, evaluate_policy, seed_everything
    from blackjack.rl.training import TrainingConfig
    RUNTIME_REPORT["module_imports"] = {"status": True, "details": "Project modules imported successfully."}
except Exception as exc:  # pragma: no cover - import failure branch
    RUNTIME_REPORT["module_imports"] = {"status": False, "details": f"Import failure: {exc}"}
    raise

seed_everything(123)
demo_policy = CountingPolicy(min_bet=10.0, max_bet=200.0, ramp={1: 2, 2: 4, 3: 6, 4: 8, 5: 10})
demo_env = BlackjackEnv(num_decks=6, penetration=0.75, counting_policy=demo_policy, seed=123)

initial_state = demo_env.reset()
print("Initial observation:", initial_state)
print(
    f"Initial bet: {demo_env.current_bet:.2f} | Running count: {demo_env.counter.running_count} | True count: {demo_env.counter.true_count():.2f}"
)

transition_log = []
while True:
    action = demo_env.sample_action()
    next_state, reward, done, info = demo_env.step(action)
    transition_log.append(
        {
            "action": action,
            "next_state": next_state,
            "reward": reward,
            "done": done,
            "info": info,
        }
    )
    if done:
        break

print("Demo round transitions:")
for entry in transition_log:
    pprint(entry)

final_info = transition_log[-1]["info"] if transition_log else demo_env.last_info
RUNTIME_REPORT["environment_test"] = {
    "status": True,
    "details": f"Round outcome: {final_info.get('outcome', 'unknown')} | True count: {final_info.get('true_count', 0):.2f}",
}



## 5. Lightweight CPU/GPU Benchmark
Run a small matrix multiplication benchmark on CPU (NumPy) and GPU (PyTorch CUDA) when available.


In [None]:

import numpy as np
import torch

matrix_size = 256
cpu_start = time.perf_counter()
np_a = np.random.rand(matrix_size, matrix_size)
np_b = np.random.rand(matrix_size, matrix_size)
np.dot(np_a, np_b)
cpu_elapsed = time.perf_counter() - cpu_start

benchmark_details = {"cpu_matrix_size": matrix_size, "cpu_time_sec": cpu_elapsed}
print(f"CPU matrix multiplication ({matrix_size}x{matrix_size}) completed in {cpu_elapsed:.4f} seconds.")

gpu_elapsed = None
if RUNTIME_REPORT["hardware"].get("gpu_available") and torch.cuda.is_available():
    torch.manual_seed(0)
    device = torch.device("cuda")
    gpu_start = time.perf_counter()
    torch_a = torch.rand((matrix_size, matrix_size), device=device)
    torch_b = torch.rand((matrix_size, matrix_size), device=device)
    torch.matmul(torch_a, torch_b)
    torch.cuda.synchronize()
    gpu_elapsed = time.perf_counter() - gpu_start
    benchmark_details["gpu_time_sec"] = gpu_elapsed
    print(f"GPU matrix multiplication completed in {gpu_elapsed:.4f} seconds on {RUNTIME_REPORT['hardware']['type']}.")
else:
    print("GPU not available; skipping GPU benchmark.")

RUNTIME_REPORT["benchmark"] = {"status": True, "details": benchmark_details}



## 6. Q-Learning Training Demo
Train a small Q-learning agent and report training plus evaluation metrics to confirm the AI workflow.


In [None]:

training_env = BlackjackEnv(
    num_decks=6,
    penetration=0.75,
    natural_payout=1.5,
    hit_soft_17=False,
    min_bet=5.0,
    max_bet=100.0,
    counting_policy=CountingPolicy(min_bet=5.0, max_bet=100.0),
    seed=42,
)
training_config = TrainingConfig(
    episodes=500,
    alpha=0.05,
    gamma=0.99,
    epsilon_start=1.0,
    epsilon_min=0.05,
    epsilon_decay=0.995,
    log_every=100,
)
trainer = QLearningTrainer(training_env, config=training_config, seed=42)
training_result = trainer.train()

print("Training summary:")
for key, value in training_result.summary.items():
    if key.endswith("rate"):
        print(f"  {key}: {value:.3f}")
    else:
        print(f"  {key}: {value:.3f}" if isinstance(value, float) else f"  {key}: {value}")

eval_env = BlackjackEnv(
    num_decks=6,
    penetration=0.75,
    natural_payout=1.5,
    hit_soft_17=False,
    min_bet=5.0,
    max_bet=100.0,
    counting_policy=CountingPolicy(min_bet=5.0, max_bet=100.0),
    seed=999,
)
eval_metrics = evaluate_policy(eval_env, training_result.q_table, episodes=200, seed=999)

print("Evaluation metrics (200 episodes):")
for key, value in eval_metrics.items():
    if key.endswith("rate") or key.endswith("reward"):
        print(f"  {key}: {value:.3f}")
    else:
        print(f"  {key}: {value}")

combined_metrics = {
    **{f"train_{k}": v for k, v in training_result.summary.items()},
    **{f"eval_{k}": v for k, v in eval_metrics.items()},
}
RUNTIME_REPORT["training"] = {"status": True, "details": combined_metrics}



## 7. Environment Configuration Summary
Aggregate all recorded status messages to provide a quick readiness report.


In [None]:

print("=== Environment Validation Summary ===")
print(f"Internet: {'✅' if RUNTIME_REPORT['internet']['status'] else '❌'} - {RUNTIME_REPORT['internet']['details']}")
print(f"Hardware: {RUNTIME_REPORT['hardware']['type']} - {RUNTIME_REPORT['hardware']['details']}")

print("Dependency installation results:")
for item in RUNTIME_REPORT["dependencies"]:
    symbol = "✅" if item["success"] else "❌"
    print(f"  {symbol} {item['target']}")

repo_info = RUNTIME_REPORT["repository"]
print(f"Repository status: {repo_info['status']} (path: {repo_info['path']})")

module_info = RUNTIME_REPORT["module_imports"]
print(f"Module imports: {'✅' if module_info['status'] else '❌'} - {module_info['details']}")

env_info = RUNTIME_REPORT["environment_test"]
print(f"Environment smoke test: {'✅' if env_info['status'] else '❌'} - {env_info['details']}")

benchmark_info = RUNTIME_REPORT["benchmark"]
print(f"Benchmark status: {'✅' if benchmark_info['status'] else '❌'} - {benchmark_info['details']}")

training_info = RUNTIME_REPORT["training"]
print(f"Training summary recorded: {'✅' if training_info['status'] else '❌'}")
