In [None]:
import os
IS_RERUN = os.getenv("KAGGLE_IS_COMPETITION_RERUN", "").lower() == "true"

# Set global timeout based on whether this is a competition rerun
if IS_RERUN:
  # Competition rerun - FULL timeout for actual scoring
  timeout_seconds = 39600  # 11 hours
  print("üèÜ Competition rerun detected - setting FULL timeout for scoring")
else:
  # Development/testing - short timeout
  timeout_seconds = 60  # 1 minute
  print("üîß Development run - setting short timeout for testing")

os.environ['GLOBAL_TIMEOUT'] = str(timeout_seconds)
print(f"‚è∞ Global timeout set to {timeout_seconds}s ({timeout_seconds/3600:.1f} hours)")

START_SERVER = True
TEST_INFERENCE = False #set false unless you want to see inference hitting the endpoint, before the task runner
SUBMIT = True #to run the task runner
SCORE = False # score if not a competition rerun OR if running the test set.

os.environ["ARC_DATA_ROOT"]  = "/kaggle/input"

# to have the task runner generate a submission file
os.environ["SUBMIT"] = "true"

# the directory for where the submission.json file will go
os.environ["SUBMIT_DIR"] = "/kaggle/working"

# location of the db (current just saving here, not reading from it yet)
os.environ["ARC_PROGRAMS_DB"]="/kaggle/working/local.db"

# COMPUTE WEIGHTING PARAMS
os.environ["STOP_AT_ALL_TRAIN_CORRECT"]="7"
os.environ["STOP_IF_NO_TRAIN_CORRECT_AFTER"]="50"

In [None]:
import sys
import torch
import numpy as np

print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version (PyTorch): {torch.version.cuda}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"NumPy version: {np.__version__}")
if torch.cuda.is_available():
   print(f"GPU count: {torch.cuda.device_count()}")
   print(f"GPU name: {torch.cuda.get_device_name(0)}")

In [None]:
# !ls ../input/arc-1-fake-ttt-blended-c802-dataset/arc-1-fake-ttt-blended-c802

In [None]:
import sglang
print("SGLang version:", sglang.__version__)

try:
    import flashinfer
    print("FlashInfer version:", flashinfer.__version__)
except ImportError:
    print("FlashInfer not installed")

In [None]:
# ensure that ptxas can access writable directories
import shutil
import os
import sys
import subprocess

# Copy PTXAS and other binaries
os.makedirs("/kaggle/working/bin", exist_ok=True)
for binary in ["ptxas", "cuobjdump", "nvdisasm"]:
    src = f"/kaggle/usr/lib/sglang_utility/triton/backends/nvidia/bin/{binary}"  # Fixed path
    dst = f"/kaggle/working/bin/{binary}"
    if os.path.exists(src):
        shutil.copy(src, dst)
        os.chmod(dst, 0o755)

# Set environment variables
env = os.environ.copy()
env["TRITON_PTXAS_PATH"] = "/kaggle/working/bin/ptxas"
env["PATH"] = f"/kaggle/working/bin:{env.get('PATH', '')}"
env["TRITON_CACHE_DIR"] = "/kaggle/working/.triton"
env["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.makedirs("/kaggle/working/.triton", exist_ok=True)

# Apply the environment variables to the current process
os.environ.update(env)

In [None]:
if START_SERVER:
    # Background server launcher for Kaggle with SGLang
    import os, sys, time, subprocess, json, socket, requests
    
    BASE_PATH = "/kaggle/input/arc-1-fake-ttt-blended-c802-dataset"
    # Find the first directory inside BASE_PATH
    subdirs = [os.path.join(BASE_PATH, d) for d in os.listdir(BASE_PATH) if os.path.isdir(os.path.join(BASE_PATH, d))]
    if not subdirs:
        raise RuntimeError(f"No model directory found in {BASE_PATH}")
    MODEL_PATH = subdirs[0]   # or pick max(subdirs) if multiple exist
    PORT = 8080
    LOG = f"/kaggle/working/sglang_server.log"
    
    # Auto-detect GPUs for sensible parallelism
    try:
        import torch
        num_gpus = torch.cuda.device_count()
    except Exception:
        num_gpus = 0
    
    SERVER_CMD = [
        sys.executable, "-m", "sglang.launch_server",
        "--host", "0.0.0.0",
        "--port", str(PORT),
        "--model-path", MODEL_PATH,
        "--dp", str(max(1, min(num_gpus, 4))),
        "--kv-cache-dtype", "fp8_e4m3"
    ]
    HEALTH_URL = f"http://127.0.0.1:{PORT}/v1/models"  # sglang doesn't always expose /health
    
    # ---------- 2) Launch in background ----------
    log_f = open(LOG, "w")
    env = os.environ.copy()
    proc = subprocess.Popen(SERVER_CMD, stdout=log_f, stderr=subprocess.STDOUT, env=env, cwd="/kaggle/working")
    print(f"Started sglang server PID={proc.pid} | logging to {LOG}")
    print("Command:", " ".join(SERVER_CMD))
    
    # ---------- 3) Wait for readiness ----------
    def wait_ready(url, timeout_s=180):
        t0 = time.time()
        while time.time() - t0 < timeout_s:
            try:
                r = requests.get(url, timeout=3)
                if r.status_code == 200:
                    return True
            except Exception:
                pass
            time.sleep(2)
        return False
    
    ready = wait_ready(HEALTH_URL)
    log_f.flush()
    
    if ready:
        print(f"sglang is READY on port {PORT}.")
        print(f"- Tail logs: !tail -n 50 {LOG}")
        print(f"- List models: !curl -s http://127.0.0.1:{PORT}/v1/models | jq .")
    else:
        print(f"sglang not ready after timeout. Showing last 60 log lines:")
        log_f.close()
        !tail -n 60 {LOG}
    
    # Provide a tiny helper to stop it later
    def stop_server(p=proc):
        try:
            p.terminate()
            p.wait(timeout=10)
        except Exception:
            p.kill()
        print("Server stopped.")
    
    print("Call stop_server() to shut it down gracefully.")

In [None]:
if START_SERVER:
    import requests
    import time
    
    def check_models():
        url = "http://127.0.0.1:8080/v1/models"
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()
            result = response.json()
    
            print("‚úÖ Server is responding!")
            print("Available models:")
            for model in result['data']:
                print(f"  - {model['id']}")
    
            return result['data'][0]['id'] if result['data'] else None
    
        except requests.exceptions.ConnectionError:
            print("‚ùå Connection failed - server may not be ready yet")
            return None
        except Exception as e:
            print(f"‚ùå Error: {e}")
            return None
    
    # Poll every 30 seconds until we get a model
    model_name = None
    while not model_name:
        model_name = check_models()
        if not model_name:
            print("‚è≥ Waiting 30 seconds before retrying...")
            time.sleep(30)
    
    print(f"\n‚úÖ Found model: {model_name}")

In [None]:
if TEST_INFERENCE:
    import time
    import requests
    
    url = "http://127.0.0.1:8080/v1/chat/completions"
    
    headers = {
        "Content-Type": "application/json"
    }
    
    messages = [
        {"role" : "system", "content" : "You are an expert at solving abstract reasoning puzzles. Write clean, efficient Python code."},
        {"role" : "user", "content" : "You are solving an ARC (Abstraction and Reasoning Corpus) task. \nI will show you training examples with input and output grids, plus a test input grid. Your task is to:\n\n1. **Analyze the training examples** to discover patterns that map input grids to output grids\n2. **Write a Python program** that implements your best understanding of the transformation  \n3. **DO NOT predict or generate the test output** - your job is only to write the transformation program\n4. **Attempt a solution** - even if the pattern isn't completely clear, provide your best hypothesis\n5. **Do not repeat the same transformation** - if you have already tried a transformation, do not repeat it.\n\n**IMPORTANT: Your transformation must always produce a 10\u00d710 output grid.**\n\nThe test input is shown for context so you understand what type of grid your program will eventually process. Focus on learning patterns from training examples and writing code that captures your understanding.\n\nTraining Examples:\n\nExample 1:\nInput:\n5 0 0 5 0 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\nOutput:\n5 0 0 5 0 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n2 0 0 2 0 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n2 0 0 2 0 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n\nExample 2:\nInput:\n0 5 0 5 5 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\nOutput:\n0 5 0 5 5 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n\nExample 3:\nInput:\n0 0 5 5 0 5 0 5 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\nOutput:\n0 0 5 5 0 5 0 5 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n\nTest Input:\n5 0 5 5 0 0 5 0 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n\nAnalyze the patterns in the training examples and write a Python function that performs this transformation.\n\n**Approach Guidelines:**\n- Look for patterns in shapes, colors, positions, sizes, rotations, reflections, etc.\n- Even if you can't solve all training examples perfectly, implement what patterns you do observe\n- A partial solution that captures some aspects is better than returning the input unchanged\n- If the pattern is unclear, make your best educated guess based on what you can see\n\nRequirements:\n- The function takes a 2D list (grid) where grid[row][col] gives the value at that position\n- Values are integers from 0-9\n- Return a new grid (2D list) with the transformation applied\n- You can use numpy if needed - just add 'import numpy as np' at the start of your function\n- Aim to handle the training examples as well as possible, even if not perfectly\n- Your function should attempt some meaningful transformation based on the patterns you observe\n\nYou MUST end your response with the following exact format:\n\nFinal answer:\n```python\ndef transform(grid):\n    # Your transformation logic here (implement your best understanding)\n    return transformed_grid\n```\n"}
    ]
    
    payload = {
        "model": model_name,  # from your polling loop
        "messages": messages,
        # "max_tokens": 1000
        "max_tokens": 10
    }
    
    start_time = time.time()
    response = requests.post(url, headers=headers, json=payload, timeout=600)
    end_time = time.time()
    
    response.raise_for_status()
    result = response.json()
    output_text = result["choices"][0]["message"]["content"]
    
    # Estimate token count (4 chars/token assumption)
    estimated_tokens = len(output_text) / 4
    elapsed_time = end_time - start_time
    tokens_per_second = estimated_tokens / elapsed_time
    
    print("‚úÖ Response received:")
    print(output_text)
    print(f"\n‚è± Elapsed time: {elapsed_time:.2f} seconds")
    print(f"üî¢ Estimated tokens: {estimated_tokens:.1f}")
    print(f"‚ö° Output tokens/sec: {tokens_per_second:.2f}")

In [None]:
if TEST_INFERENCE:
    import time
    import requests
    
    url = "http://127.0.0.1:8080/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    
    # Your messages from before
    messages = [
        {"role": "system", "content": "You are an expert at solving abstract reasoning puzzles. Write clean, efficient Python code."},
        {"role": "user", "content": "You are solving an ARC (Abstraction and Reasoning Corpus) task. \nI will show you training examples with input and output grids, plus a test input grid. Your task is to:\n\n1. **Analyze the training examples** to discover patterns that map input grids to output grids\n2. **Write a Python program** that implements your best understanding of the transformation \n3. **DO NOT predict or generate the test output** - your job is only to write the transformation program\n4. **Attempt a solution** - even if the pattern isn't completely clear, provide your best hypothesis\n5. **Do not repeat the same transformation** - if you have already tried a transformation, do not repeat it.\n\n**IMPORTANT: Your transformation must always produce a 10\u00d710 output grid.**\n\nThe test input is shown for context so you understand what type of grid your program will eventually process. Focus on learning patterns from training examples and writing code that captures your understanding.\n\nTraining Examples:\n\nExample 1:\nInput:\n5 0 0 5 0 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\nOutput:\n5 0 0 5 0 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n2 0 0 2 0 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n2 0 0 2 0 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n\nExample 2:\nInput:\n0 5 0 5 5 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\nOutput:\n0 5 0 5 5 0 0 5 0 0\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 2 0 2 2 0 0 2 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n\nExample 3:\nInput:\n0 0 5 5 0 5 0 5 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\nOutput:\n0 0 5 5 0 5 0 5 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n0 0 2 2 0 2 0 2 2 5\n0 0 0 0 0 0 0 0 0 0\n\nTest Input:\n5 0 5 5 0 0 5 0 5 0\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n0 0 0 0 0 0 0 0 0 0\n0 0 0 0 0 0 0 0 0 5\n\nAnalyze the patterns in the training examples and write a Python function that performs this transformation.\n\n**Approach Guidelines:**\n- Look for patterns in shapes, colors, positions, sizes, rotations, reflections, etc.\n- Even if you can't solve all training examples perfectly, implement what patterns you do observe\n- A partial solution that captures some aspects is better than returning the input unchanged\n- If the pattern is unclear, make your best educated guess based on what you can see\n\nRequirements:\n- The function takes a 2D list (grid) where grid[row][col] gives the value at that position\n- Values are integers from 0-9\n- Return a new grid (2D list) with the transformation applied\n- You can use numpy if needed - just add 'import numpy as np' at the start of your function\n- Aim to handle the training examples as well as possible, even if not perfectly\n- Your function should attempt some meaningful transformation based on the patterns you observe\n\nYou MUST end your response with the following exact format:\n\nFinal answer:\npython\ndef transform(grid):\n    # Your transformation logic here (implement your best understanding)\n    return transformed_grid\n\n"}
    ]
    
    # Number of identical requests to send
    N = 32
    
    payload = {
        "model": model_name,  # define this before runninga
        "messages": messages,
        "max_tokens": 24000
    }
    
    start_time = time.time()
    responses = []
    for _ in range(N):
        r = requests.post(url, headers=headers, json=payload, timeout=1200)
        r.raise_for_status()
        responses.append(r.json())
    end_time = time.time()
    
    total_elapsed = end_time - start_time
    
    # Token counting (rough estimate: 4 chars/token)
    total_tokens = 0
    for resp in responses:
        output_text = resp["choices"][0]["message"]["content"]
        total_tokens += len(output_text) / 4
    
    tokens_per_sec = total_tokens / total_elapsed
    avg_time_per_request = total_elapsed / N
    
    print(f"‚úÖ Completed {N} requests")
    print(f"‚è± Total elapsed: {total_elapsed:.2f} sec")
    print(f"‚è± Avg per request: {avg_time_per_request:.2f} sec")
    print(f"üî¢ Estimated total output tokens: {total_tokens:.1f}")
    print(f"‚ö° Output tokens/sec: {tokens_per_sec:.2f}")
    
    # Optional: print first response
    print("\nExample output:")
    print(responses[0]["choices"][0]["message"]["content"])

In [None]:
# Derive attempts/workers for the two modes
MAX_ATTEMPTS = 512 if (IS_RERUN and SUBMIT) else 8
MAX_WORKERS  = 16

SUBSET = "test" # defaulting to test to ensure there are no loading issues.

# # can use this instead if testing evaluation during a pre-run
# SUBSET = "test" if IS_RERUN else "evaluation"

# Common env for your runner
os.environ["OPENAI_API_KEY"] = "EMPTY"

print(f"Mode: {'competition' if IS_RERUN else 'dev'} | SUBMIT={SUBMIT} | attempts={MAX_ATTEMPTS} | workers={MAX_WORKERS} | subset={SUBSET}")

# Build the command
cmd = (
  "uv run python -m llm_python.run_arc_tasks_soar "
  "--dataset arc-prize-2025 "
  f"--subset {SUBSET} "
  f"--max_workers {MAX_WORKERS} "
  f"--max_attempts {MAX_ATTEMPTS} "
  f"--model \"{model_name}\" "
  "--base-url http://127.0.0.1:8080/v1 "
  "--unsafe-executor "
  "--max-tokens 2000 "
  "--qwen-no-think"
)

# Optionally quiet the private rerun by redirecting logs to a file
if IS_RERUN:
    cmd += " >> /kaggle/working/run.log 2>&1"

print(f"Running {cmd}\n\n")

# Run
!{cmd}

In [None]:
# Only score in dev/commit runs
if SCORE and not IS_RERUN:
    !uv run python -m llm_python.score_submission --submission "/kaggle/working/submission.json"
else:
    print("Skipping local scoring (competition rerun or SCORE=False).")