In [None]:
# ✅ Step 1: Install Required Libraries
!pip install -q transformers datasets onnx onnxruntime onnxruntime-tools scikit-learn

# ✅ Step 2: Import Libraries
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
import numpy as np
from sklearn.metrics import accuracy_score

# ✅ Step 3: Load Dataset & Tokenizer
dataset = load_dataset("clinc_oos", "plus")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
num_labels = dataset["train"].features["intent"].num_classes  # should be 150

# ✅ Step 4: Preprocess & Clamp Labels
def preprocess(example):
    encoded = tokenizer(example["text"], padding="max_length", truncation=True, max_length=32)
    label = int(example["intent"])
    encoded["labels"] = label if label < num_labels else num_labels - 1  # clamp safety
    return encoded

dataset = dataset.map(preprocess, batched=False)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# ✅ Step 5: Load Model with Correct Output Size
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

# ✅ Step 6: TrainingArguments (W&B disabled, warnings handled)
training_args = TrainingArguments(
    output_dir="./bert-intent",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    report_to="none"  # disables wandb
)

# ✅ Step 7: Trainer with Metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {"accuracy": accuracy_score(p.label_ids, preds)}




In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)



In [None]:
# ✅ Step 8: Train (on GPU)
trainer.train()

In [None]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
from onnxruntime.quantization import quantize_dynamic, QuantType
from google.colab import files

# ✅ Load best-performing checkpoint
checkpoint_path = "./bert-intent/checkpoint-5721"
model = BertForSequenceClassification.from_pretrained(checkpoint_path)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model.eval()

# ✅ Prepare dummy input for ONNX export
inputs = tokenizer("hello world", return_tensors="pt", max_length=32, padding="max_length", truncation=True)

# ✅ Export to FP32 ONNX
torch.onnx.export(
    model,
    (inputs["input_ids"], inputs["attention_mask"]),
    "intent_fp32.onnx",
    input_names=["input_ids", "attention_mask"],
    output_names=["logits"],
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "seq_len"},
        "attention_mask": {0: "batch_size", 1: "seq_len"},
        "logits": {0: "batch_size"}
    },
    opset_version=14
)
print("✅ Exported to intent_fp32.onnx")

# ✅ Quantize to INT8
quantize_dynamic(
    model_input="intent_fp32.onnx",
    model_output="intent_int8.onnx",
    weight_type=QuantType.QInt8
)
print("✅ Quantized to intent_int8.onnx")

# ✅ Download both files
files.download("intent_fp32.onnx")
files.download("intent_int8.onnx")
