In [1]:
# Cell 1: Install dependencies (run this once)
!pip install -q "transformers>=4.44.0" "accelerate>=0.33.0" "datasets>=2.20.0" \
              "peft>=0.11.0" bitsandbytes sentencepiece


In [1]:
# Cell 2: Imports and global config

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"  # maps cuda:3 to local device 0 for this process

from dataclasses import dataclass
from typing import List, Dict

import json
from pathlib import Path

import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model

print("CUDA visible devices:", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("PyTorch sees devices:", torch.cuda.device_count())

  from .autonotebook import tqdm as notebook_tqdm


CUDA visible devices: 3
PyTorch sees devices: 1


In [19]:
# Cell 3: Path config (EDIT these paths/names if needed)

BASE_MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"

RAW_DATA_DIR = Path("data")
PROCESSED_DIR = Path("processed_sft")
OUTPUT_DIR = Path("experts_checkpoints")

PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Map disorder -> raw json path
# TODO: update all 5 paths to your actual files.
DISORDER_CONFIGS: Dict[str, Path] = {
    "anxiety": RAW_DATA_DIR / "anxiety_data.json",
    "depression": RAW_DATA_DIR / "depression_data.json",
    "bipolar": RAW_DATA_DIR / "bipolar_data.json",
    "ocd": RAW_DATA_DIR / "ocd_data.json",
    "schizophrenia": RAW_DATA_DIR / "schizophrenia_data.json",
}

print(DISORDER_CONFIGS)


{'anxiety': PosixPath('data/anxiety_data.json'), 'depression': PosixPath('data/depression_data.json'), 'bipolar': PosixPath('data/bipolar_data.json'), 'ocd': PosixPath('data/ocd_data.json'), 'schizophrenia': PosixPath('data/schizophrenia_data.json')}


In [11]:
# Cell 4: Load tokenizer (shared across everything)

tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL_NAME,
    use_fast=True,
)

# Make sure we have a pad token for batching (LLama usually uses eos as pad)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Tokenizer loaded. EOS token:", tokenizer.eos_token, "| PAD token:", tokenizer.pad_token)

Tokenizer loaded. EOS token: <|eot_id|> | PAD token: <|eot_id|>


In [12]:
# Cell 5: Preprocess raw JSON into SFT-ready prompts and save as JSONL

def load_raw_pairs(json_path: Path):
    with open(json_path, "r", encoding="utf-8") as f:
        obj = json.load(f)
    # Handle both: list[...] or {"data": [...]}
    if isinstance(obj, dict) and "data" in obj:
        data = obj["data"]
    else:
        data = obj
    assert isinstance(data, list), f"Expected list of examples in {json_path}"
    return data


def build_chat_prompt(instruction: str, output: str) -> str:
    """
    Build a proper chat-style prompt for Llama-3.2-Instruct using the tokenizer's template.
    We keep full sequence (user + assistant) for causal LM training.
    """
    instruction = instruction.strip()
    output = output.strip()
    messages = [
        {"role": "user", "content": instruction},
        {"role": "assistant", "content": output},
    ]
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False,
    )
    return prompt


def preprocess_and_save(disorder_name: str, json_path: Path, min_char_len: int = 20):
    print(f"Processing {disorder_name} from {json_path} ...")
    raw_data = load_raw_pairs(json_path)

    processed = []
    for ex in raw_data:
        instr = ex.get("instruction", "")
        out = ex.get("output", "")
        if not instr or not out:
            continue
        if len(instr) < min_char_len and len(out) < min_char_len:
            continue

        text = build_chat_prompt(instr, out)
        processed.append({"text": text})

    print(f"  -> kept {len(processed)} examples")

    out_path = PROCESSED_DIR / f"{disorder_name}_sft.jsonl"
    with open(out_path, "w", encoding="utf-8") as f:
        for item in processed:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")
    print(f"  -> saved to {out_path}\n")


for disorder, path in DISORDER_CONFIGS.items():
    preprocess_and_save(disorder, path)


Processing anxiety from data/anxiety_data.json ...
  -> kept 740 examples
  -> saved to processed_sft/anxiety_sft.jsonl

Processing depression from data/depression_data.json ...
  -> kept 750 examples
  -> saved to processed_sft/depression_sft.jsonl

Processing disorder3 from data/bipolar_data.json ...
  -> kept 750 examples
  -> saved to processed_sft/disorder3_sft.jsonl

Processing disorder4 from data/ocd_data.json ...
  -> kept 740 examples
  -> saved to processed_sft/disorder4_sft.jsonl

Processing disorder5 from data/schizophrenia_data.json ...
  -> kept 745 examples
  -> saved to processed_sft/disorder5_sft.jsonl



In [13]:
# Cell 6: Helpers for loading tokenized dataset for a given disorder

MAX_SEQ_LEN = 512  # you can push to 768 if you really want, but 512 is fine

def load_sft_dataset_for_disorder(disorder_name: str):
    jsonl_path = PROCESSED_DIR / f"{disorder_name}_sft.jsonl"
    assert jsonl_path.exists(), f"Preprocessed file not found: {jsonl_path}"

    dataset = load_dataset("json", data_files={"train": str(jsonl_path)})
    dataset = dataset["train"]  # single split

    # Train/validation split (e.g., 90/10)
    dataset = dataset.train_test_split(test_size=0.1, seed=42)
    train_ds = dataset["train"]
    val_ds = dataset["test"]

    def tokenize_fn(batch):
        return tokenizer(
            batch["text"],
            truncation=True,
            max_length=MAX_SEQ_LEN,
            padding=False,  # dynamic padding via data collator
        )

    train_ds = train_ds.map(tokenize_fn, batched=True, remove_columns=["text"])
    val_ds = val_ds.map(tokenize_fn, batched=True, remove_columns=["text"])

    print(
        f"{disorder_name}: train={len(train_ds)}, val={len(val_ds)}, "
        f"sample input_ids length={len(train_ds[0]['input_ids'])}"
    )

    return train_ds, val_ds

In [14]:
# Cell 7: LoRA config and model loader

def create_lora_model():
    # Load base model on the single visible GPU
    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_NAME,
        torch_dtype=torch.bfloat16,   # A100 is chill with bf16
        device_map={"": 0},           # single GPU (after CUDA_VISIBLE_DEVICES remap)
    )

    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        # LLaMA-style attention + MLP modules
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
    )

    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    return model


In [15]:
# QLoRA-style model loader: 4-bit quantized base + LoRA on top

def create_lora_model_qlora():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,              # main switch
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",      # NF4 is standard for QLoRA
        bnb_4bit_compute_dtype=torch.bfloat16,  # A100 = bf16 friendly
    )

    print("Loading 4-bit quantized base model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_NAME,
        quantization_config=bnb_config,
        device_map={"": 0},   # because we set CUDA_VISIBLE_DEVICES=3 earlier
        trust_remote_code=False,
    )

    # LoRA config – same as before
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
    )

    model = get_peft_model(base_model, lora_config)
    model.print_trainable_parameters()  # sanity check: only a few M params trainable

    return model


In [16]:
# Cell 8: Trainer setup for a single disorder

def train_expert(
    disorder_name: str,
    num_epochs: int = 8,
    per_device_train_batch_size: int = 8,
    gradient_accumulation_steps: int = 1,
    learning_rate: float = 2e-4,
    warmup_ratio: float = 0.03,
):
    print(f"===== Training expert for disorder: {disorder_name} =====")

    train_ds, val_ds = load_sft_dataset_for_disorder(disorder_name)
    model = create_lora_model()

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
    )

    output_dir = OUTPUT_DIR / disorder_name
    output_dir.mkdir(parents=True, exist_ok=True)

    training_args = TrainingArguments(
        output_dir=str(output_dir),
        overwrite_output_dir=True,
        num_train_epochs=num_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        warmup_ratio=warmup_ratio,
        weight_decay=0.01,
        logging_steps=50,
        save_steps=500,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,   # still pass val_ds so evaluate() can use it
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # ---- TRAIN ----
    train_result = trainer.train()
    print("Train result:", train_result.metrics)

    # ---- EVAL ON VAL SET ----
    eval_metrics = trainer.evaluate()   # uses eval_dataset=val_ds from above
    print(f"Validation metrics for {disorder_name}:")
    for k, v in eval_metrics.items():
        print(f"  {k}: {v:.4f}" if isinstance(v, (float, int)) else f"  {k}: {v}")

    # Save LoRA adapter + tokenizer
    print(f"Saving LoRA adapter and tokenizer for {disorder_name} to {output_dir}")
    model.save_pretrained(str(output_dir))
    tokenizer.save_pretrained(str(output_dir))

    # OPTIONAL: save metrics to a file for your report
    metrics_path = output_dir / "metrics.json"
    import json
    with open(metrics_path, "w") as f:
        json.dump(
            {
                "train_metrics": train_result.metrics,
                "eval_metrics": eval_metrics,
            },
            f,
            indent=2,
        )
    print(f"Saved metrics to {metrics_path}")

    # Free GPU memory (important if training multiple experts sequentially)
    del trainer
    del model
    torch.cuda.empty_cache()
    print(f"===== Done: {disorder_name} =====\n")


In [17]:
def train_expert_qlora(
    disorder_name: str,
    num_epochs: int = 3,
    per_device_train_batch_size: int = 8,
    gradient_accumulation_steps: int = 1,
    learning_rate: float = 5e-5,
    warmup_ratio: float = 0.03,
):
    print(f"===== Training expert for disorder: {disorder_name} (QLoRA 4-bit) =====")

    train_ds, val_ds = load_sft_dataset_for_disorder(disorder_name)
    model = create_lora_model_qlora()   # <-- changed here

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
    )

    output_dir = OUTPUT_DIR / disorder_name
    output_dir.mkdir(parents=True, exist_ok=True)

    training_args = TrainingArguments(
        output_dir=str(output_dir),
        overwrite_output_dir=True,
        num_train_epochs=num_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        warmup_ratio=warmup_ratio,
        weight_decay=0.01,
        logging_steps=50,
        save_steps=500,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    train_result = trainer.train()
    print("Train metrics:", train_result.metrics)

    eval_metrics = trainer.evaluate()
    print("Eval metrics:", eval_metrics)

    print(f"Saving QLoRA expert for {disorder_name} to {output_dir}")
    model.save_pretrained(str(output_dir))
    tokenizer.save_pretrained(str(output_dir))

    del trainer, model
    torch.cuda.empty_cache()
    print(f"===== Done: {disorder_name} =====\n")


In [25]:
# Cell 9A: Train a single expert (e.g., anxiety) first

TARGET_DISORDER = "anxiety"  # change this to any key in DISORDER_CONFIGS

train_expert(
    disorder_name=TARGET_DISORDER,
    num_epochs=3,                     # reduce if you need faster runs
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    learning_rate=2e-4,
)

===== Training expert for disorder: anxiety =====
anxiety: train=666, val=74, sample input_ids length=172
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.0103
100,1.5146
150,1.3566
200,1.2225
250,1.1174


Train result: {'train_runtime': 38.2624, 'train_samples_per_second': 52.218, 'train_steps_per_second': 6.586, 'total_flos': 2268383284887552.0, 'train_loss': 1.4412491463479542, 'epoch': 3.0}


Validation metrics for anxiety:
  eval_loss: 1.6061
  eval_runtime: 0.6199
  eval_samples_per_second: 119.3780
  eval_steps_per_second: 16.1320
  epoch: 3.0000
Saving LoRA adapter and tokenizer for anxiety to experts_checkpoints/anxiety
Saved metrics to experts_checkpoints/anxiety/metrics.json
===== Done: anxiety =====



In [24]:
# Cell 10: Sanity check generation with one expert

from transformers import pipeline

TEST_DISORDER = "anxiety"  # pick one that you already trained
expert_dir = OUTPUT_DIR / TEST_DISORDER

print("Loading expert from:", expert_dir)

expert_model = AutoModelForCausalLM.from_pretrained(
    expert_dir,
    torch_dtype=torch.bfloat16,
    device_map={"": 0},
)
expert_tokenizer = AutoTokenizer.from_pretrained(expert_dir)

def generate_answer(prompt: str, max_new_tokens: int = 256):
    messages = [
        {"role": "user", "content": prompt},
    ]
    encoded = expert_tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(expert_model.device)

    with torch.no_grad():
        out_ids = expert_model.generate(
            encoded,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
        )

    decoded = expert_tokenizer.decode(out_ids[0], skip_special_tokens=True)
    return decoded

test_query = "What are common physical sensations someone with anxiety might experience?"
print(generate_answer(test_query))


Loading expert from: experts_checkpoints/anxiety


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

Cutting Knowledge Date: December 2023
Today Date: 23 Nov 2025

user

What are common physical sensations someone with anxiety might experience?assistant

Individuals experiencing anxiety may report experiencing a variety of physical sensations such as tingling or shivering, coldness or heat, trembling or shaking, muscle tension, joint stiffness, restlessness, or feeling 'driven mad' by bodily sensations like pins and needles, numbness, or a feeling of impending doom when in certain physical positions. They might also have difficulty relaxing and may experience muscle inflexibility. Some individuals report specific phobic-related sensations such as heart pounding, a feeling of choking or suffocation, or being 'trapped' in a situation. These are not diagnostic criteria but common subjective experiences associated with anxiety. The intensity and frequency of these sensations can vary widely among individuals. Some individuals may experience fewer of these sensations and may have m

3 epoch:Individuals with anxiety disorders often report a variety of physical sensations, including sensations of nervousness or fatigue, muscle tension, a 'pins and needles' sensation in extremities, and even experiencing heat or cold. Some may also report sensations like a 'fullness' or 'gulping' in their chest or stomach. A few individuals might experience more unusual sensations, such as an unexplained 'heavy feeling' in their chest or a 'burning' sensation in their hands. While these sensations are generally not indicative of an underlying medical condition, they can be distressing enough to warrant attention. Clinically, it's crucial to distinguish these physical sensations from the physical symptoms of medical conditions, especially cardiovascular diseases, which can be exacerbated by anxiety. A thorough medical evaluation is necessary to rule out any underlying health issues. The focus of treatment should then be on the anxiety itself, rather than solely on managing the physical symptoms. Techniques like cognitive-behavioral therapy (CBT) and relaxation exercises can be very beneficial in addressing these sensations and the underlying anxiety.

1 epoch: Common physical sensations experienced by individuals with anxiety include: a sense of heaviness or fullness in the body, a feeling of numbness or coldness, a general feeling of being "drained" or exhausted, and a sensation of being "overwhelmed" or suffocated. These sensations can be accompanied by a range of other physical symptoms, including rapid heartbeat, sweating, and trembling. For some, these sensations might be intensely debilitating, while for others, they might be more tolerable. They can also be linked to specific situations or contexts, such as a public speaking event or a stressful work meeting. These experiences can be highly distressing and interfere with daily life. They might also be directly associated with panic symptoms, such as shortness of breath, lightheadedness, and a feeling of impending doom. The physical sensations can serve as a warning sign, prompting the individual to seek a safe space or to prepare for a potentially intense event. They can also be a source of chronic pain or discomfort, especially if the individual has experienced traumatic events or has a history of chronic anxiety. The intensity and duration of these sensations can vary widely, and they can be exacerbated by stress, certain medications, and other factors. They are not just a normal response to


5 epoch : Individuals experiencing anxiety may report experiencing a variety of physical sensations such as tingling or shivering, coldness or heat, trembling or shaking, muscle tension, joint stiffness, restlessness, or feeling 'driven mad' by bodily sensations like pins and needles, numbness, or a feeling of impending doom when in certain physical positions. They might also have difficulty relaxing and may experience muscle inflexibility. Some individuals report specific phobic-related sensations such as heart pounding, a feeling of choking or suffocation, or being 'trapped' in a situation. These are not diagnostic criteria but common subjective experiences associated with anxiety. The intensity and frequency of these sensations can vary widely among individuals. Some individuals may experience fewer of these sensations and may have more difficulty articulating their physical experiences. For instance, compared to controls, individuals with specific phobias may report fewer subjective physical sensations and have less difficulty describing them. However, they may also experience a greater number of them overall. The key is recognizing these physical sensations as part of the anxiety experience. Research is needed into their nature, causes, and consequences for individuals with anxiety. Experiments involving voluntary hyperventilation have also yielded some key findings. For example, a reduction in CO2 levels has been observed during hyp


In [18]:
for disorder_name in DISORDER_CONFIGS.keys():
    train_expert_qlora(
        disorder_name=disorder_name,
        num_epochs=3,
    )

===== Training expert for disorder: anxiety (QLoRA 4-bit) =====


Generating train split: 740 examples [00:00, 76125.40 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 5220.96 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 5354.68 examples/s]


anxiety: train=666, val=74, sample input_ids length=172
Loading 4-bit quantized base model...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.2864
100,1.8014
150,1.712
200,1.61
250,1.5321



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


Train metrics: {'train_runtime': 49.0731, 'train_samples_per_second': 40.715, 'train_steps_per_second': 5.135, 'total_flos': 2268383284887552.0, 'train_loss': 1.7856839573572552, 'epoch': 3.0}


Eval metrics: {'eval_loss': 1.642885446548462, 'eval_runtime': 0.7994, 'eval_samples_per_second': 92.565, 'eval_steps_per_second': 12.509, 'epoch': 3.0}
Saving QLoRA expert for anxiety to experts_checkpoints/anxiety



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


===== Done: anxiety =====

===== Training expert for disorder: depression (QLoRA 4-bit) =====


Generating train split: 750 examples [00:00, 78255.83 examples/s]
Map: 100%|██████████| 675/675 [00:00<00:00, 7180.52 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6369.16 examples/s]


depression: train=675, val=75, sample input_ids length=141
Loading 4-bit quantized base model...
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.171
100,1.7149
150,1.6436
200,1.545
250,1.4496



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


Train metrics: {'train_runtime': 51.8576, 'train_samples_per_second': 39.049, 'train_steps_per_second': 4.917, 'total_flos': 2191758132940800.0, 'train_loss': 1.7005548140581916, 'epoch': 3.0}


Eval metrics: {'eval_loss': 1.5458179712295532, 'eval_runtime': 0.7349, 'eval_samples_per_second': 102.052, 'eval_steps_per_second': 13.607, 'epoch': 3.0}
Saving QLoRA expert for depression to experts_checkpoints/depression



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


===== Done: depression =====

===== Training expert for disorder: disorder3 (QLoRA 4-bit) =====


Generating train split: 750 examples [00:00, 115329.52 examples/s]
Map: 100%|██████████| 675/675 [00:00<00:00, 8525.16 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 5265.70 examples/s]


disorder3: train=675, val=75, sample input_ids length=82
Loading 4-bit quantized base model...
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.2297
100,1.747
150,1.6683
200,1.5945
250,1.4991



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


Train metrics: {'train_runtime': 51.5039, 'train_samples_per_second': 39.317, 'train_steps_per_second': 4.951, 'total_flos': 2255530719510528.0, 'train_loss': 1.7417336819218654, 'epoch': 3.0}


Eval metrics: {'eval_loss': 1.5625090599060059, 'eval_runtime': 0.758, 'eval_samples_per_second': 98.948, 'eval_steps_per_second': 13.193, 'epoch': 3.0}
Saving QLoRA expert for disorder3 to experts_checkpoints/disorder3



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


===== Done: disorder3 =====

===== Training expert for disorder: disorder4 (QLoRA 4-bit) =====


Generating train split: 740 examples [00:00, 116124.85 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 8003.16 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6688.04 examples/s]


disorder4: train=666, val=74, sample input_ids length=139
Loading 4-bit quantized base model...
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.3592
100,1.8357
150,1.7814
200,1.7064
250,1.5892



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


Train metrics: {'train_runtime': 50.902, 'train_samples_per_second': 39.252, 'train_steps_per_second': 4.951, 'total_flos': 2281235850264576.0, 'train_loss': 1.8516639130456107, 'epoch': 3.0}


Eval metrics: {'eval_loss': 1.6663367748260498, 'eval_runtime': 0.7562, 'eval_samples_per_second': 97.852, 'eval_steps_per_second': 13.223, 'epoch': 3.0}
Saving QLoRA expert for disorder4 to experts_checkpoints/disorder4



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


===== Done: disorder4 =====

===== Training expert for disorder: disorder5 (QLoRA 4-bit) =====


Generating train split: 745 examples [00:00, 120908.39 examples/s]
Map: 100%|██████████| 670/670 [00:00<00:00, 8820.36 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6805.69 examples/s]


disorder5: train=670, val=75, sample input_ids length=117
Loading 4-bit quantized base model...
trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
50,2.2237
100,1.7062
150,1.6194
200,1.5364
250,1.4523



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


Train metrics: {'train_runtime': 51.2122, 'train_samples_per_second': 39.248, 'train_steps_per_second': 4.921, 'total_flos': 2118960399360000.0, 'train_loss': 1.7052358606505016, 'epoch': 3.0}


Eval metrics: {'eval_loss': 1.5469661951065063, 'eval_runtime': 0.7274, 'eval_samples_per_second': 103.113, 'eval_steps_per_second': 13.748, 'epoch': 3.0}
Saving QLoRA expert for disorder5 to experts_checkpoints/disorder5



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B-Instruct.


===== Done: disorder5 =====



## GGUF Conversion

In [24]:
from pathlib import Path
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"

EXPERT_DIR = Path("experts_checkpoints/schizophrenia")      # LoRA checkpoint we trained
MERGED_OUT_DIR = Path("merged_experts/schizophrenia_fp16")  # where we'll save merged full model

MERGED_OUT_DIR.mkdir(parents=True, exist_ok=True)

# 1) Load base model in full precision (CPU or single GPU)
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    torch_dtype=torch.float16,   # or bfloat16, but fp16 is common for GGUF conversion
    device_map="cpu",            # can use {"": 0} if you want GPU
)

# 2) Load LoRA weights on top of base
print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(
    base_model,
    EXPERT_DIR,
)

# 3) Merge LoRA into base weights
print("Merging LoRA weights into base model...")
model = model.merge_and_unload()   # returns a plain AutoModelForCausalLM with merged weights

# 4) Save merged model + tokenizer in HF format
print("Saving merged FP16 model to:", MERGED_OUT_DIR)
model.save_pretrained(MERGED_OUT_DIR)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
tokenizer.save_pretrained(MERGED_OUT_DIR)

print("Done.")


Loading base model...
Loading LoRA adapter...
Merging LoRA weights into base model...
Saving merged FP16 model to: merged_experts/schizophrenia_fp16
Done.


# Orchestrator

In [29]:
# Cell 2: Imports and path / label config

import json
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
)
import joblib

# Root dirs
DATA_DIR = Path("data")
ORCH_DIR = Path("orchestrator")
ORCH_DIR.mkdir(parents=True, exist_ok=True)

# Map disorder name -> json path
# TODO: adjust to match your actual filenames
DISORDER_FILES = {
    "anxiety": RAW_DATA_DIR / "anxiety_data.json",
    "depression": RAW_DATA_DIR / "depression_data.json",
    "bipolar": RAW_DATA_DIR / "bipolar_data.json",
    "ocd": RAW_DATA_DIR / "ocd_data.json",
    "schizophrenia": RAW_DATA_DIR / "schizophrenia_data.json",
}

# Assign integer labels in a fixed order
LABEL2ID = {name: i for i, name in enumerate(sorted(DISORDER_FILES.keys()))}
ID2LABEL = {v: k for k, v in LABEL2ID.items()}

print("Label mapping:")
for name, idx in LABEL2ID.items():
    print(f"  {idx}: {name}")


Label mapping:
  0: anxiety
  1: bipolar
  2: depression
  3: ocd
  4: schizophrenia


In [30]:
# Cell 3: Load JSONs and build a single DataFrame: text, label_id, label_name

def load_json_list(path: Path):
    with open(path, "r", encoding="utf-8") as f:
        obj = json.load(f)
    # Handle both:
    # - [ {instruction, output, ...}, ... ]
    # - { "data": [ ... ] }
    if isinstance(obj, dict) and "data" in obj:
        data = obj["data"]
    else:
        data = obj
    assert isinstance(data, list), f"Expected list in {path}, got {type(data)}"
    return data

rows = []

for disorder_name, json_path in DISORDER_FILES.items():
    print(f"Loading {disorder_name} from {json_path}")
    examples = load_json_list(json_path)
    label_id = LABEL2ID[disorder_name]

    for ex in examples:
        instr = ex.get("instruction", "")
        if not instr or not isinstance(instr, str):
            continue
        instr = instr.strip()
        if len(instr) < 10:  # filter extremely short junk
            continue
        rows.append(
            {
                "text": instr,
                "label_id": label_id,
                "label_name": disorder_name,
            }
        )

df = pd.DataFrame(rows)
print("\nDataset summary:")
print(df["label_name"].value_counts())
print("\nTotal examples:", len(df))

# Optional: save for inspection / reproducibility
csv_path = ORCH_DIR / "orchestrator_dataset.csv"
df.to_csv(csv_path, index=False)
print(f"\nSaved combined dataset to {csv_path}")


Loading anxiety from data/anxiety_data.json
Loading depression from data/depression_data.json
Loading bipolar from data/bipolar_data.json
Loading ocd from data/ocd_data.json
Loading schizophrenia from data/schizophrenia_data.json

Dataset summary:
label_name
depression       750
bipolar          750
schizophrenia    745
anxiety          740
ocd              740
Name: count, dtype: int64

Total examples: 3725

Saved combined dataset to orchestrator/orchestrator_dataset.csv


In [31]:
# Cell 4: Stratified train/val/test split

RANDOM_STATE = 42

# First: train+val vs test
df_train_val, df_test = train_test_split(
    df,
    test_size=0.15,
    random_state=RANDOM_STATE,
    stratify=df["label_id"],
)

# Then: split train vs val
df_train, df_val = train_test_split(
    df_train_val,
    test_size=0.1765,  # ~15% of total → so final split ≈ 70/15/15
    random_state=RANDOM_STATE,
    stratify=df_train_val["label_id"],
)

print("Split sizes:")
print("  train:", len(df_train))
print("  val  :", len(df_val))
print("  test :", len(df_test))

def describe_split(df_split, name):
    print(f"\n{name} label distribution:")
    print(df_split["label_name"].value_counts())

describe_split(df_train, "Train")
describe_split(df_val, "Val")
describe_split(df_test, "Test")


Split sizes:
  train: 2607
  val  : 559
  test : 559

Train label distribution:
label_name
bipolar          525
depression       525
schizophrenia    521
ocd              518
anxiety          518
Name: count, dtype: int64

Val label distribution:
label_name
bipolar          113
depression       112
schizophrenia    112
ocd              111
anxiety          111
Name: count, dtype: int64

Test label distribution:
label_name
depression       113
schizophrenia    112
bipolar          112
ocd              111
anxiety          111
Name: count, dtype: int64


In [32]:
# Cell 5: Define models and helper to train & evaluate

def make_tfidf():
    return TfidfVectorizer(
        ngram_range=(1, 2),      # unigrams + bigrams
        max_features=20_000,
        min_df=2,
        stop_words="english",
    )

def train_and_eval_model(model_name: str, clf):
    print(f"\n===== Training {model_name} =====")

    pipe = Pipeline(
        [
            ("tfidf", make_tfidf()),
            ("clf", clf),
        ]
    )

    X_train, y_train = df_train["text"].tolist(), df_train["label_id"].values
    X_val, y_val = df_val["text"].tolist(), df_val["label_id"].values

    pipe.fit(X_train, y_train)

    # Evaluate on val
    y_pred = pipe.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_val, y_pred, average="macro", zero_division=0
    )

    print(f"{model_name} val accuracy: {acc:.4f}")
    print(f"{model_name} val macro-precision: {precision:.4f}")
    print(f"{model_name} val macro-recall:    {recall:.4f}")
    print(f"{model_name} val macro-F1:        {f1:.4f}")

    print("\nPer-class metrics:")
    print(
        classification_report(
            y_val,
            y_pred,
            target_names=[ID2LABEL[i] for i in sorted(ID2LABEL.keys())],
            zero_division=0,
        )
    )

    cm = confusion_matrix(y_val, y_pred)
    cm_df = pd.DataFrame(
        cm,
        index=[f"true_{ID2LABEL[i]}" for i in sorted(ID2LABEL.keys())],
        columns=[f"pred_{ID2LABEL[i]}" for i in sorted(ID2LABEL.keys())],
    )
    print("Confusion matrix:")
    display(cm_df)

    metrics = {
        "val_accuracy": acc,
        "val_macro_precision": precision,
        "val_macro_recall": recall,
        "val_macro_f1": f1,
    }

    return pipe, metrics


In [33]:
# Cell 6: Train both models and compare on validation set

logreg_clf = LogisticRegression(
    max_iter=1000,
    class_weight="balanced",
    n_jobs=-1,
)

nb_clf = MultinomialNB()

logreg_pipe, logreg_metrics = train_and_eval_model("LogisticRegression", logreg_clf)
nb_pipe, nb_metrics = train_and_eval_model("MultinomialNB", nb_clf)

print("\n===== Summary (val macro-F1) =====")
print(f"LogReg: {logreg_metrics['val_macro_f1']:.4f}")
print(f"NB    : {nb_metrics['val_macro_f1']:.4f}")



===== Training LogisticRegression =====


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

LogisticRegression val accuracy: 0.8962
LogisticRegression val macro-precision: 0.8975
LogisticRegression val macro-recall:    0.8965
LogisticRegression val macro-F1:        0.8965

Per-class metrics:
               precision    recall  f1-score   support

      anxiety       0.92      0.94      0.93       111
      bipolar       0.89      0.81      0.85       113
   depression       0.79      0.84      0.81       112
          ocd       0.91      0.92      0.91       111
schizophrenia       0.97      0.97      0.97       112

     accuracy                           0.90       559
    macro avg       0.90      0.90      0.90       559
 weighted avg       0.90      0.90      0.90       559

Confusion matrix:


Unnamed: 0,pred_anxiety,pred_bipolar,pred_depression,pred_ocd,pred_schizophrenia
true_anxiety,104,0,3,4,0
true_bipolar,1,92,16,2,2
true_depression,2,11,94,4,1
true_ocd,5,0,4,102,0
true_schizophrenia,1,0,2,0,109



===== Training MultinomialNB =====
MultinomialNB val accuracy: 0.8640
MultinomialNB val macro-precision: 0.8642
MultinomialNB val macro-recall:    0.8643
MultinomialNB val macro-F1:        0.8614

Per-class metrics:
               precision    recall  f1-score   support

      anxiety       0.92      0.92      0.92       111
      bipolar       0.76      0.86      0.81       113
   depression       0.82      0.64      0.72       112
          ocd       0.89      0.95      0.92       111
schizophrenia       0.93      0.96      0.94       112

     accuracy                           0.86       559
    macro avg       0.86      0.86      0.86       559
 weighted avg       0.86      0.86      0.86       559

Confusion matrix:


Unnamed: 0,pred_anxiety,pred_bipolar,pred_depression,pred_ocd,pred_schizophrenia
true_anxiety,102,2,2,5,0
true_bipolar,0,97,11,3,2
true_depression,4,26,72,4,6
true_ocd,3,0,3,105,0
true_schizophrenia,2,2,0,1,107



===== Summary (val macro-F1) =====
LogReg: 0.8965
NB    : 0.8614


In [34]:
# Cell: Train final orchestrator (Logistic Regression) on the full dataset and save

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import joblib
import json

print("Training final Logistic Regression orchestrator on FULL dataset...")

# Features and labels from the full combined dataset
X_all = df["text"].tolist()
y_all = df["label_id"].values

# Define final TF-IDF + LogReg pipeline
final_logreg_clf = LogisticRegression(
    max_iter=1000,
    class_weight="balanced",
    n_jobs=-1,
)

final_orchestrator = Pipeline(
    [
        ("tfidf", make_tfidf()),   # reuse the same TF-IDF config function
        ("clf", final_logreg_clf),
    ]
)

# Fit on all data
final_orchestrator.fit(X_all, y_all)

# Save the pipeline
final_model_path = ORCH_DIR / "orchestrator_model_full_logreg.joblib"
joblib.dump(final_orchestrator, final_model_path)

# Save label mapping (so inference code can map ids <-> names)
labelmap_path = ORCH_DIR / "label_mapping.json"
with open(labelmap_path, "w", encoding="utf-8") as f:
    json.dump(
        {
            "LABEL2ID": LABEL2ID,
            "ID2LABEL": ID2LABEL,
        },
        f,
        indent=2,
    )

print("\nDone.")
print(f"Saved final orchestrator pipeline to: {final_model_path}")
print(f"Saved label mapping to: {labelmap_path}")


Training final Logistic Regression orchestrator on FULL dataset...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av


Done.
Saved final orchestrator pipeline to: orchestrator/orchestrator_model_full_logreg.joblib
Saved label mapping to: orchestrator/label_mapping.json
