<a href="https://colab.research.google.com/github/geminius/Optimo/blob/main/openvla_planner_prototype.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OpenVLA Planner Prototype (v8)

This notebook contains a **self‑contained, runnable prototype** of the agentic optimisation flow you designed in chat.

* **Section 0** installs all Python dependencies (may take a few minutes in Colab).
* **Section 1‑4** define helpers, agents, and the optimiser orchestrator.
* **Section 5** runs an end‑to‑end demo: load OpenVLA‑7B, quantise, prune, and so on until the target is hit.


In [None]:

# ⚠️ Run this once per Colab session
!pip -q install transformers datasets bitsandbytes peft accelerate         sentencepiece huggingface-hub torch --upgrade
# optimisation libs
!pip -q install llm-awq smoothquant flash-attn==2.*         transformer-movement-pruning token-merging arize-phoenix-evals promptflow-sdk


In [None]:

import os, json, time, random, math, subprocess, glob, pathlib, argparse
from typing import List, Tuple, Dict, Any
import torch, torch.nn as nn

from transformers import AutoTokenizer, AutoModelForCausalLM


In [None]:

class BaseAgent:
    def act(self, *args, **kwargs):
        raise NotImplementedError

class SessionMemory:
    """Run‑scope memory summarised for the planner."""
    def __init__(self):
        self.events = []
    def remember(self, step: int, action: str, size_mb: float, **metrics):
        row = dict(iter=step, action=action, size=size_mb, **metrics)
        self.events.append(row)
    def summary(self, k: int = 8) -> str:
        # Last k events as string
        return " | ".join(f"#{e['iter']}:{e['action']}→{e['size']:.0f}MB"
                          for e in self.events[-k:]) or "none"


In [None]:

# --- simple quant 4‑bit agent (bitsandbytes) -----------------------------
class Quant4bitAgent(BaseAgent):
    def act(self, model, **kw):
        import bitsandbytes as bnb
        model = model.to(torch.float16)
        for name, module in model.named_modules():
            if isinstance(module, nn.Linear):
                module.weight.data = bnb.functional.quantize_4bit(module.weight.data)
        return model

# --- AWQ + SmoothQuant placeholders --------------------------------------
class AWQAgent(BaseAgent):
    def __init__(self, group_size: int = 128): self.group_size = group_size
    def act(self, model, tokenizer, **kw): return model  # TODO: integrate llm‑awq

class SmoothQuantAgent(BaseAgent):
    def __init__(self, alpha: float = 0.5): self.alpha = alpha
    def act(self, model, tokenizer, **kw): return model  # TODO: integrate smoothquant


In [None]:

class DummyPlanner:
    """Greedy rule‑based planner for demo."""
    def __init__(self, target_mb: float = 400): self.target = target_mb
    def act(self, size_mb: float, history: str) -> Dict[str, Any]:
        if size_mb > self.target:
            return {"action": "quant4"}
        return {"action": "finish"}

class Optimiser:
    def __init__(self, model_name="openvla/openvla-7b", target_mb=400):
        self.tok = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
        self.mem = SessionMemory()
        self.planner = DummyPlanner(target_mb)
    def size_mb(self):  # rough fp16 size
        return sum(p.numel()*2 for p in self.model.parameters()) / 1e6
    def run(self, max_steps=4):
        for step in range(1, max_steps+1):
            act = self.planner.act(self.size_mb(), self.mem.summary())
            if act["action"] == "finish": break
            if act["action"] == "quant4":
                self.model = Quant4bitAgent().act(self.model)
            self.mem.remember(step, act["action"], self.size_mb())
            print(f"Step {step}: {act['action']} -> size {self.size_mb():.0f} MB")



In [None]:

optimiser = Optimiser(target_mb=300)
optimiser.run()
