# JEE Math Model: Environment & GPU Smoke Test

This notebook verifies your Windows + 16GB GPU setup, installs required libraries, and runs small GPU smoke tests with Qwen2.5-Math-1.5B-Instruct and Aryabhata-1.0.

What it does:
- System and Python environment checks
- Package versions and network checks
- GPU readiness (torch.cuda, nvidia-smi)
- Optional auth: Hugging Face and Weights & Biases
- GPU smoke tests for Qwen2.5-Math-1.5B-Instruct and Aryabhata-1.0
- Optional W&B logging of basic metrics

Notes:
- All cells are idempotent and will skip gracefully if tools or GPUs are missing.
- On Windows, bitsandbytes 4-bit often isn’t supported; this notebook avoids 4-bit on Windows.
- For best training performance consider Linux for vLLM/quant.


In [None]:
# Section 1: Import Required Libraries (with safe fallbacks)
import os, sys, platform, pathlib, subprocess, socket, ssl, json, hashlib, shutil, time

# Optional libraries – import defensively
try:
    import requests  # type: ignore
except Exception:
    requests = None

try:
    import pandas as pd  # type: ignore
except Exception:
    pd = None

try:
    import psutil  # type: ignore
except Exception:
    psutil = None

try:
    import torch  # type: ignore
except Exception:
    torch = None

try:
    import tensorflow as tf  # type: ignore
except Exception:
    tf = None

from pathlib import Path
print("[INFO] Imports complete. Python:", sys.version)


: 

In [None]:
# Section 2: Python and OS Info Check
print("[INFO] Python executable:", sys.executable)
print("[INFO] CWD:", os.getcwd())
print("[INFO] Platform:", platform.platform())
print("[INFO] System:", platform.system(), platform.release(), platform.version())
print("[INFO] Machine:", platform.machine(), "Processor:", platform.processor())
print("[INFO] sys.path head:")
for p in sys.path[:5]:
    print("  ", p)

# Kernel and locale info (best-effort)
try:
    import locale
    print("[INFO] Locale:", locale.getdefaultlocale())
except Exception as e:
    print("[WARN] Locale check failed:", e)


In [None]:
# Section 3: Package Versions Check
packages = [
    ("numpy", "np"),
    ("pandas", "pd"),
    ("requests", "requests"),
    ("matplotlib", "matplotlib"),
    ("scikit-learn", "sklearn"),
    ("torch", "torch"),
    ("tensorflow", "tensorflow"),
]

versions = {}
for pkg, mod_name in packages:
    try:
        mod = __import__(mod_name)
        versions[pkg] = getattr(mod, "__version__", "unknown")
    except Exception as e:
        versions[pkg] = f"not installed ({e})"

print(json.dumps(versions, indent=2))


In [None]:
# Section 4: Environment Variables and Paths Check
interesting_env = ["PATH", "PYTHONPATH", "HF_HOME", "TRANSFORMERS_CACHE", "WANDB_API_KEY", "HF_TOKEN"]
for k in interesting_env:
    v = os.environ.get(k)
    if not v:
        print(f"[ENV] {k}: <not set>")
    else:
        display_val = v if k not in {"WANDB_API_KEY", "HF_TOKEN"} else ("***" if len(v) > 0 else "")
        print(f"[ENV] {k}: {display_val}")

# Paths validation
paths_to_check = [
    Path.cwd(),
    Path.home(),
    Path(os.environ.get("HF_HOME", Path.home() / ".cache" / "huggingface")),
]

for p in paths_to_check:
    try:
        exists = p.exists()
        readable = os.access(p, os.R_OK)
        writable = os.access(p, os.W_OK)
        print(f"[PATH] {p} exists={exists} read={readable} write={writable}")
    except Exception as e:
        print(f"[WARN] Path check failed for {p}: {e}")


In [None]:
# Section 5: Network Connectivity Check
host = "example.com"
start = time.time()
try:
    ip = socket.gethostbyname(host)
    print(f"[NET] DNS resolved {host} -> {ip}")
except Exception as e:
    print(f"[NET] DNS resolution failed for {host}: {e}")

url = "https://example.com"
resp_ok = False
lat_ms = None
try:
    if requests is not None:
        r = requests.get(url, timeout=5)
        lat_ms = (time.time() - start) * 1000
        print(f"[NET] GET {url} status={r.status_code} latency_ms={lat_ms:.1f}")
        resp_ok = r.status_code == 200
    else:
        import urllib.request
        with urllib.request.urlopen(url, timeout=5) as f:
            lat_ms = (time.time() - start) * 1000
            print(f"[NET] GET {url} status={f.status} latency_ms={lat_ms:.1f}")
            resp_ok = f.status == 200
except Exception as e:
    print(f"[NET] HTTP GET failed: {e}")

print("[NET] SSL available:", hasattr(ssl, "SSLContext"))


In [None]:
# Section 6: Hardware Capability Check
# CPU and memory
try:
    cpu_count = os.cpu_count()
    print("[HW] CPU count:", cpu_count)
    if psutil:
        vm = psutil.virtual_memory()
        print(f"[HW] RAM total={vm.total/1e9:.2f} GB, available={vm.available/1e9:.2f} GB")
except Exception as e:
    print("[WARN] CPU/RAM check failed:", e)

# GPU via torch
if torch is not None:
    try:
        print("[HW] torch version:", torch.__version__)
        print("[HW] CUDA available:", torch.cuda.is_available())
        if torch.cuda.is_available():
            try:
                dev_name = torch.cuda.get_device_name(0)
            except Exception:
                dev_name = "<unknown>"
            print("[HW] CUDA device 0:", dev_name)
            try:
                print("[HW] CUDA capability:", torch.cuda.get_device_capability(0))
            except Exception:
                pass
    except Exception as e:
        print("[WARN] Torch GPU check failed:", e)
else:
    print("[HW] torch not installed; skipping torch GPU check")

# GPU via TensorFlow
if tf is not None:
    try:
        gpus = tf.config.list_physical_devices('GPU')
        print("[HW] TensorFlow GPUs:", gpus)
    except Exception as e:
        print("[WARN] TensorFlow GPU check failed:", e)
else:
    print("[HW] tensorflow not installed; skipping TF GPU check")

# nvidia-smi
try:
    result = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
                            capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print("[HW] nvidia-smi output:")
        print(result.stdout)
    else:
        print("[HW] nvidia-smi not available or error.")
except FileNotFoundError:
    print("[HW] nvidia-smi not found (likely no NVIDIA drivers or PATH not set)")
except Exception as e:
    print("[WARN] nvidia-smi check failed:", e)


In [None]:
# Section 7: Data Sanity Checks with pandas
CSV_PATH = os.environ.get("JEE_SAMPLE_CSV", "")

if pd is None:
    print("[DATA] pandas not installed; creating a small fallback structure")
    sample = [{"x": i, "y": i * 2} for i in range(5)]
    print(sample)
else:
    if CSV_PATH and Path(CSV_PATH).exists():
        try:
            df = pd.read_csv(CSV_PATH)
            print("[DATA] Loaded:", CSV_PATH)
        except Exception as e:
            print("[DATA] Failed to load CSV:", e)
            df = pd.DataFrame({"x": range(5), "y": [i * 2 for i in range(5)]})
    else:
        df = pd.DataFrame({"x": range(5), "y": [i * 2 for i in range(5)]})
        print("[DATA] Using sample DataFrame")

    try:
        print("[DATA] shape:", df.shape)
        print("[DATA] dtypes:\n", df.dtypes)
        print("[DATA] head:\n", df.head())
        print("[DATA] describe:\n", df.describe(include='all'))
        print("[DATA] missing values:\n", df.isna().sum())
        # Simple schema validation
        assert df.shape[0] > 0, "DataFrame must not be empty"
        assert df.shape[1] > 0, "DataFrame must have columns"
        print("[DATA] Basic schema validation passed")
    except Exception as e:
        print("[WARN] Data checks failed:", e)


In [None]:
# Section 8: Quick Unit Checks and Summary Report
summary = {
    "python": sys.version,
    "platform": platform.platform(),
    "cuda_available": bool(torch and torch.cuda.is_available()) if torch else False,
    "requests": versions.get("requests"),
}

# Quick assertions (best-effort)
try:
    assert isinstance(summary["python"], str)
    print("[CHECK] Python version string OK")
except AssertionError:
    print("[CHECK] Python version string FAILED")

try:
    # If requests installed, expect a plausible version or unknown
    if isinstance(summary.get("requests"), str):
        print("[CHECK] requests version present")
except Exception:
    print("[CHECK] requests version check skipped")

print("[SUMMARY]\n" + json.dumps(summary, indent=2))


In [None]:
# Install cell(s): Upgrade pip and install core libs
import sys, subprocess

def pip_install(pkgs):
    try:
        print("[PIP] Installing:", pkgs)
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade"] + pkgs)
        print("[PIP] Done")
    except Exception as e:
        print("[PIP] Failed:", e)

# Upgrade basics
pip_install(["pip", "setuptools", "wheel"])  # safe re-run

# Core libs (torch install string may require manual edit per CUDA)
# For Windows CUDA 12.x, user may prefer pip torch index URL; here we try default as a start
core = [
    "transformers>=4.43.0",
    "accelerate>=0.30.0",
    "safetensors",
    "sentencepiece",
    "einops",
    "wandb",
    "datasets",
    "tiktoken",
]

# Try torch separately to give clearer errors
try:
    import torch  # noqa: F401
    print("[PIP] torch already installed:", torch.__version__)
except Exception:
    print("[PIP] torch not found; attempting install (CPU-only fallback if CUDA fails)")
    try:
        pip_install(["torch", "torchvision", "torchaudio"])  # may install CPU build
    except Exception as e:
        print("[PIP] torch install attempt failed:", e)

# Install the rest
pip_install(core)

print("[INSTALL] Completed. You may need to restart kernel if torch was newly installed.")


In [None]:
# Optional: Hugging Face Auth
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
try:
    if hf_token:
        from huggingface_hub import login
        login(token=hf_token)
        print("[HF] Logged in via token from env.")
    else:
        print("[HF] No token found in env; public models should still work.")
except Exception as e:
    print("[HF] Login skipped or failed:", e)


In [None]:
# Optional: Weights & Biases Auth
try:
    import wandb
    wandb_key = os.environ.get("WANDB_API_KEY")
    if wandb_key:
        try:
            wandb.login(key=wandb_key)
            print("[W&B] Logged in via env key.")
        except Exception as e:
            print("[W&B] Login failed:", e)
    else:
        print("[W&B] WANDB_API_KEY not set; skipping login.")
except Exception as e:
    print("[W&B] wandb not installed or login skipped:", e)


In [None]:
# Qwen2.5-Math-1.5B-Instruct GPU Smoke Test
import time

qwen_model_id = "Qwen/Qwen2.5-Math-1.5B-Instruct"

try:
    from transformers import AutoTokenizer, AutoModelForCausalLM
    if torch is None:
        raise RuntimeError("torch not installed")
    if not torch.cuda.is_available():
        print("[QWEN] CUDA not available; using CPU. This will be slow.")

    print(f"[QWEN] Loading {qwen_model_id} ...")
    t0 = time.time()
    tokenizer = AutoTokenizer.from_pretrained(qwen_model_id)
    model = AutoModelForCausalLM.from_pretrained(
        qwen_model_id,
        torch_dtype="auto",
        device_map="auto",
    )
    t_load = time.time() - t0
    print(f"[QWEN] Loaded in {t_load:.1f}s")

    prompt = "Solve step by step: If 2x + 3 = 11, what is x?"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()

    t1 = time.time()
    out = model.generate(
        **inputs,
        max_new_tokens=64,
        do_sample=False,
        temperature=0.0,
    )
    t_gen = time.time() - t1

    text = tokenizer.decode(out[0], skip_special_tokens=True)
    print("[QWEN] Output:\n", text)
    print(f"[QWEN] Generation time: {t_gen:.2f}s")
    if torch.cuda.is_available():
        print(f"[QWEN] Peak VRAM: {torch.cuda.max_memory_allocated()/1e9:.2f} GB")
except Exception as e:
    print("[QWEN] Smoke test failed:", e)


In [None]:
# Aryabhata-1.0 GPU Smoke Test
ary_model_id = "PhysicsWallahAI/Aryabhata-1.0"

try:
    from transformers import AutoTokenizer, AutoModelForCausalLM
    if torch is None:
        raise RuntimeError("torch not installed")

    print(f"[ARY] Loading {ary_model_id} ...")
    t0 = time.time()
    tokenizer = AutoTokenizer.from_pretrained(ary_model_id)
    model = AutoModelForCausalLM.from_pretrained(
        ary_model_id,
        torch_dtype="auto",
        device_map="auto",
    )
    print(f"[ARY] Loaded in {time.time()-t0:.1f}s")

    prompt = "You are a helpful math tutor. Solve step by step: Evaluate 3*(4+5)."
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()

    t1 = time.time()
    out = model.generate(
        **inputs,
        max_new_tokens=64,
        do_sample=False,
        temperature=0.0,
    )
    t_gen = time.time() - t1

    text = tokenizer.decode(out[0], skip_special_tokens=True)
    print("[ARY] Output:\n", text)
    print(f"[ARY] Generation time: {t_gen:.2f}s")
    if torch.cuda.is_available():
        print(f"[ARY] Peak VRAM: {torch.cuda.max_memory_allocated()/1e9:.2f} GB")
except RuntimeError as e:
    msg = str(e)
    if "CUDA out of memory" in msg:
        print("[ARY][OOM] Out of VRAM. Tips: reduce max_new_tokens, set torch_dtype=torch.float16, or try CPU to validate.")
        print("[ARY][NOTE] 4-bit quant (bitsandbytes) is not recommended on Windows; prefer Linux for 4-bit.")
    else:
        print("[ARY] RuntimeError:", e)
except Exception as e:
    print("[ARY] Smoke test failed:", e)


In [None]:
# Optional: Log simple metrics to W&B
try:
    import wandb
    run = None
    if os.environ.get("WANDB_API_KEY"):
        try:
            wandb.login(key=os.environ["WANDB_API_KEY"])
            run = wandb.init(project="jee-math-smoke", name="env_gpu_smoke", reinit=True)
            metrics = {
                "system/platform": platform.platform(),
                "torch/cuda_available": bool(torch and torch.cuda.is_available()),
            }
            wandb.log(metrics)
            print("[W&B] Logged metrics:", metrics)
        except Exception as e:
            print("[W&B] Logging failed:", e)
    else:
        print("[W&B] Skipping logging; WANDB_API_KEY not set.")
    if run is not None:
        run.finish()
except Exception as e:
    print("[W&B] wandb not installed or logging skipped:", e)


## Next steps and tips

- If torch installed CPU-only, install CUDA build that matches your NVIDIA drivers.
- Consider Linux for vLLM and 4-bit quantization workflows.
- For larger context windows and faster inference, explore vLLM or TensorRT-LLM (Linux recommended).
- Build your dataset pipeline next (PDF parsing, scraping, dedup, quality filters) per tasks.

Model cards:
- Qwen2.5-Math-1.5B-Instruct: https://huggingface.co/Qwen/Qwen2.5-Math-1.5B-Instruct
- Aryabhata-1.0: https://huggingface.co/PhysicsWallahAI/Aryabhata-1.0
