In [1]:
# === Colab cell 1: install deps (run once) ===
!pip -q install -U "transformers>=4.41.0" "accelerate>=0.30.0" "bitsandbytes>=0.43.0" "sentencepiece" "safetensors"

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m146.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hTorch: 2.9.0+cu126
CUDA available: True


In [2]:
# === Colab cell 2: load Mistral 7B Instruct (4-bit) ===
# Model: mistralai/Mistral-7B-Instruct-v0.3

model_id = "mistralai/Mistral-7B-Instruct-v0.3"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,   # use float16 on Colab GPUs
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16
)

print("Loaded:", model_id)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Loaded: mistralai/Mistral-7B-Instruct-v0.3


In [3]:
# === Colab cell 3 (revised): deterministic PlantUML-only helper ===
import torch

def chat_plantuml_only(system_text: str, user_text: str, max_new_tokens: int = 1400) -> str:
    messages = [
        {"role": "system", "content": system_text.strip()},
        {"role": "user", "content": user_text.strip()},
    ]

    enc = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True
    )

    input_ids = enc["input_ids"].to(model.device)
    attention_mask = enc.get("attention_mask", torch.ones_like(input_ids)).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            do_sample=False,                 # deterministic
            top_p=1.0,
            repetition_penalty=1.05,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Extract ONLY the LAST PlantUML block
    start = text.rfind("@startuml")
    end = text.rfind("@enduml")
    if start != -1 and end != -1 and end > start:
        return text[start:end + len("@enduml")].strip()

    return text.strip()


SYSTEM_PROMPT_V21 = """You are a deterministic threat-modeling engine.
Your ONLY responsibility is to output ONE valid PlantUML Data Flow Diagram (DFD).

Hard constraints:
- Output PlantUML ONLY.
- Must start with @startuml and end with @enduml.
- No markdown, no explanations, no notes, no comments, no titles.
- Do NOT output the input prompt text or repeat instructions.
- Use ONLY these PlantUML elements: actor, component, database, queue (optional), package, arrows (->).
- Do NOT use any other PlantUML directives (NO !define, NO skinparam, NO include, NO legend).
- Do NOT include STRIDE, risks, or mitigations.
- If information is missing, use ONLY GENERIC names from the naming convention.
"""


In [5]:
# === Colab cell 4 (revised): read files + normalize INPUT B (items[] -> answers map) ===
import json

QUESTIONS_PATH = "/content/TM_Questions_v2.1.md"
RESPONSES_PATH = "/content/TM_responses_v2_1.json"

with open(QUESTIONS_PATH, "r", encoding="utf-8") as f:
    TM_QUESTIONS_MD = f.read()

with open(RESPONSES_PATH, "r", encoding="utf-8") as f:
    raw_responses = json.load(f)

# Normalize: items[] -> flat answers map q_N -> answer
answers_map = {}
items = raw_responses.get("items", [])
for it in items:
    qn = it.get("question_number", None)
    if isinstance(qn, int) and 1 <= qn <= 74:
        answers_map[f"q_{qn}"] = it.get("answer")

normalized_input_b = {
    "scenario": raw_responses.get("scenario", ""),
    "format": raw_responses.get("format", "question_then_answer"),
    "answers": answers_map
}

TM_RESPONSES_JSON = json.dumps(normalized_input_b, ensure_ascii=False, indent=2)

USER_PROMPT_V21 = f"""You are given TWO inputs:

INPUT A — Threat Modeling Questions (Markdown)
INPUT B — Consolidated Responses (JSON)

Question IDs:
- Questions are numbered 1..74 in INPUT A.
- Treat each question number N as stable id q_N (e.g., question 12 -> q_12).

INPUT B:
- INPUT B is provided as a flat map: answers[q_N] = value.
- Treat answers as the single source of truth.

YOUR TASK:
1) Parse INPUT A to understand the semantic meaning of each q_id (q_1..q_74) and its layer.
2) Parse INPUT B and use answers[q_N] for all decisions.
3) Generate ONE Data Flow Diagram (DFD) in PlantUML using ONLY information implied by the answers.

DFD CONSTRUCTION RULES (v2.1 mapping):
- External Entities (actors): primarily from Layer2 answers (q_9..q_15), plus external API clients implied by Layer7 (q_54..q_61),
  plus third-party LLM if used (q_72).
- Processes (components): derive from system purpose (q_1..q_8), workflows (q_36..q_44), auth/session (q_45..q_53),
  API/LLM integration (q_54..q_71). If unnamed, use GENERIC process names only.
- Data Stores (databases): derive from Layer3 (q_16..q_25) plus logs/history/backups in q_41, q_53, q_59, q_65.
  If unclear, use GENERIC stores only.
- Data Flows: derive from Layer3 entry/exit/internal flows (q_18..q_22) plus Layer7 API/LLM flows (q_54..q_71).
  Label flows with protocol or data type when possible.
- Trust Boundaries (packages): derive from Layer4 (q_26..q_35) + public/private exposure (q_4) + hosting/third-party (q_72).
  Implement ONLY using PlantUML package blocks.

QUALITY CONSTRAINTS:
- Every External Entity MUST connect to at least one Process.
- Every Data Store MUST have at least one read/write flow.
- Do NOT invent business logic beyond provided answers.
- If implied but unnamed, use ONLY GENERIC names.

NAMING CONVENTION (STRICT):
Actors (use only when needed): User, Administrator, ExternalService_IdP, ExternalService_Email, ExternalService_LLM, ExternalService_APIClient
Processes (use only when needed): WebFrontend, BackendAPI, AuthService, LLMGateway, AdminPortal, BackgroundWorker
Data Stores (use only when needed): PrimaryDatabase, FileStore, AuditLogStore, PromptHistoryStore
Packages (STRICT): External, InternalNetwork, Cloud, ThirdParties

OUTPUT CONSTRAINTS (MANDATORY):
- Output MUST be PlantUML ONLY
- Start with @startuml
- End with @enduml
- Allowed elements ONLY: actor, component, database, queue (optional), package, arrows (->)
- NO other PlantUML directives (NO !define, NO skinparam, NO include, NO legend)

INPUTS

--- INPUT A: Threat Modeling Questions (Markdown) ---
{TM_QUESTIONS_MD}

--- INPUT B: Consolidated Responses (JSON) ---
{TM_RESPONSES_JSON}

Generate the PlantUML DFD now.
If the output contains anything other than PlantUML, it is incorrect.
"""

plantuml = chat_plantuml_only(SYSTEM_PROMPT_V21, USER_PROMPT_V21, max_new_tokens=1700)
print(plantuml)


@startuml
!define PROTECTED_DB Database(PrimaryDatabase)
!define PUBLIC_DB Database(FileStore)
!define LOG Database(AuditLogStore)
!define CHAT_HISTORY Database(PromptHistoryStore)
!define WEB Frontend(WebFrontend)
!define API Backend(BackendAPI)
!define AUTH AuthService(AuthService)
!define LLM Gateway(LLMGateway)
!define ADMIN Portal(AdminPortal)
!define BACKGROUND Worker(BackgroundWorker)

package External
  Actor(User)
  Actor(Administrator)
  Actor(ExternalService_IdP)
  Actor(ExternalService_Email)
  Actor(ExternalService_LLM)
  Actor(ExternalService_APIClient)

package InternalNetwork
  Component(AUTH)
  Component(WEB)
  Component(API)
  Component(LLMGateway)
  Component(ADMINPortal)
  Component(BackgroundWorker)

package Cloud
  Database(PROTECTED_DB)
  Database(PUBLIC_DB)
  Database(LOG)
  Database(CHAT_HISTORY)

WEB -> API : ->
API -> PROTECTED_DB : write
API -> PUBLIC_DB : write
API -> LOG : write
API -> CHAT_HISTORY : write

API <-- LLMGateway : ->
LLMGateway <-- EXTERNAL_L