In [None]:
!pip install -q openai pandas
!pip uninstall -y trl peft
!pip install trl==0.8.6 peft==0.10.0 bitsandbytes==0.43.1 datasets>=2.16.0
!pip install torch torchvision torchaudio --upgrade

[0mFound existing installation: peft 0.18.0
Uninstalling peft-0.18.0:
  Successfully uninstalled peft-0.18.0
Collecting torch
  Downloading torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting torchaudio
  Downloading torchaudio-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cublas-cu12==12.8.4.1 (f

In [None]:
import os
import json
import torch
import gc
from getpass import getpass
from openai import OpenAI
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API Key: ")

client = OpenAI()

# "Teacher" Prompt
system_prompt = """
You are a simulation engine for clinical linguistics.
Generate a transcript of an elderly patient describing the 'Cookie Theft' picture.
The patient has mild cognitive impairment (MCI).
Key features to simulate:
1. Anomia: Frequent pauses, "um", "uh", "the thing...", "water spreader" instead of "faucet".
2. Circumlocution: Talking around a word they can't remember.
3. Short, simple sentences.
4. Repetitive phrasing.
Output ONLY the raw transcript text.
"""

data = []

# Generate synthetic samples
print("Generating synthetic data...")
for i in range(50):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Generate sample #{i+1} with high lexical variation."}
        ]
    )
    transcript = response.choices[0].message.content
    data.append({"text": transcript})

with open("train.jsonl", "w") as f:
    for entry in data:
        json.dump(entry, f)
        f.write("\n")

print("Data generation complete. Saved to train.jsonl")

Enter your OpenAI API Key: ··········
Generating synthetic data...
Data generation complete. Saved to train.jsonl


In [None]:
dataset = load_dataset("json", data_files="train.jsonl", split="train")

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print("Loading model (Standard Precision)...")
# REMOVED torch_dtype=torch.float16 to prevent conflict with Trainer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="adamw_torch",
    save_steps=25,
    logging_steps=5,
    learning_rate=2e-4,
    fp16=True,   # <--- Trainer will now handle casting safely
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
    peft_config=peft_config,
    args=training_args,
    tokenizer=tokenizer,
)

print("Starting training...")
trainer.train()
trainer.save_model("my_mci_adapter")
print("Model fine-tuned and saved")

Loading model (Standard Precision)...


  super().__init__(
The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Starting training...


Step,Training Loss
5,1.8617
10,1.7566
15,1.5944
20,1.5962
25,1.4655
30,1.4044
35,1.4166


Model fine-tuned and saved


In [None]:
del model
del trainer
torch.cuda.empty_cache()
gc.collect()

11632

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto"
)

# Adapter
model = PeftModel.from_pretrained(base_model, "my_mci_adapter")

# Inference
inputs = tokenizer("Describe the cookie jar:", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)

print("-" * 30)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
print("-" * 30)

------------------------------
Describe the cookie jar: The cookie jar is a small, round container with a lid. The lid is made of a dark brown material, and it has a small hole in the center. The jar is about 2 inches in diameter and 2 inches high. The jar
------------------------------


In [None]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model.push_to_hub("amanupg/mci-llama-adapter")
tokenizer.push_to_hub("amanupg/mci-llama-adapter")

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:   6%|6         |  565kB / 9.02MB            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...pa2q8s8m3/tokenizer.model:  75%|#######4  |  374kB /  500kB            

CommitInfo(commit_url='https://huggingface.co/amanupg/mci-llama-adapter/commit/4c7ffc2427fdbe862cf61768e342fc1fd982bdf6', commit_message='Upload tokenizer', commit_description='', oid='4c7ffc2427fdbe862cf61768e342fc1fd982bdf6', pr_url=None, repo_url=RepoUrl('https://huggingface.co/amanupg/mci-llama-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='amanupg/mci-llama-adapter'), pr_revision=None, pr_num=None)