In [3]:
import os, torch, json
os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Find data files
for check_dir in [".", "dataset",]:
    if os.path.exists(os.path.join(check_dir, "questions.json")):
        DATA_DIR = check_dir
        break
else:
    DATA_DIR = "."
    print("WARNING: questions.json not found, set DATA_DIR manually")

print(f"DATA_DIR = {DATA_DIR}")
print(f"questions.json exists: {os.path.exists(os.path.join(DATA_DIR, 'questions.json'))}")
print(f"answers.json exists: {os.path.exists(os.path.join(DATA_DIR, 'answers.json'))}")
print(f"skillbank.json exists: {os.path.exists(os.path.join(DATA_DIR, 'skillbank.json'))}")

# Check available models in cache
cache_dir = "/root/.cache/huggingface/"
if os.path.exists(cache_dir):
    models = [d for d in os.listdir(cache_dir) if d.startswith("models--")]
    print(f"\nCached models ({len(models)}):")
    for m in sorted(models):
        print(f"  {m.replace('models--', '').replace('--', '/')}")
else:
    print(f"Cache dir not found: {cache_dir}")

PyTorch: 2.9.0a0+git1c57644
CUDA available: True
GPU: 
VRAM: 274.5 GB
DATA_DIR = dataset
questions.json exists: True
answers.json exists: True
skillbank.json exists: True

Cached models (7):
  Qwen/Qwen2.5-14B-Instruct
  Qwen/Qwen3-4B
  Unsloth/Llama-3.1-8B-Instruct
  google/gemma-3-12b-it
  microsoft/Phi-4-mini-instruct
  mistralai/Mistral-7B-Instruct-v0.3
  unsloth/gpt-oss-20b-BF16


In [6]:
!pip list | grep -E "unsloth|torch"

torch                             2.9.0a0+git1c57644
torchvision                       0.23.0a0+824e8c8
unsloth                           2025.10.9
unsloth_zoo                       2025.10.10


In [10]:
import os

print("Searching in hf_models for Microsoft Phi models...")

for root, dirs, files in os.walk('/workspace/AAIPL/hf_models'):
    if 'config.json' in files and any(x in root.lower() for x in ['phi', 'microsoft']):
        print("\n✅ FOUND!")
        print("Folder:", root)
        print("Key files:", [f for f in files if f in ['config.json', 'tokenizer_config.json', 'model.safetensors.index.json']])
        break
else:
    print("No Phi model found — try this instead:")
    os.system("find /workspace -name config.json -path '*/phi*' 2>/dev/null | head -5")

Searching in hf_models for Microsoft Phi models...

✅ FOUND!
Folder: /workspace/AAIPL/hf_models/models--microsoft--Phi-4-mini-instruct/snapshots/cfbefacb99257ffa30c83adab238a50856ac3083
Key files: ['config.json', 'model.safetensors.index.json', 'tokenizer_config.json']


In [43]:
MODEL_NAME = "/workspace/AAIPL/hf_models/models--Qwen--Qwen2.5-14B-Instruct/snapshots/cf98f3b3bbb457ad9e2bb7baf9a0125b6b88caa8"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=torch.bfloat16,
    load_in_4bit=False,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)

Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.10.9: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.
   \\   /|    . Num GPUs = 1. Max memory: 255.688 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


[2026-02-15 04:51:39] INFO modeling.py:987: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [51]:
MODEL_NAME = "/workspace/AAIPL/hf_models/models--microsoft--Phi-4-mini-instruct/snapshots/cfbefacb99257ffa30c83adab238a50856ac3083"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = 1024,
    load_in_4bit = False, # False for LoRA 16bit
    fast_inference = False, # Enable vllm fast inference
    max_lora_rank = 16,
    gpu_memory_utilization = 0.9, # Reduce if out of memory
)

Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.
Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.
==((====))==  Unsloth 2025.10.9: Fast Phi3 patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.
   \\   /|    . Num GPUs = 1. Max memory: 255.688 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

/workspace/AAIPL/hf_models/models--microsoft--Phi-4-mini-instruct/snapshots/cfbefacb99257ffa30c83adab238a50856ac3083 does not have a padding token! Will use pad_token = <|endoftext|>.


In [52]:
import os
os.environ["HF_HUB_CACHE"] = "/root/.cache/huggingface"
os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"
import json
import random
import torch
from datasets import Dataset
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, standardize_sharegpt, train_on_responses_only
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq

random.seed(42)
torch.random.manual_seed(42)

###############################################################
# CONFIG — CHANGE THESE IF NEEDED
###############################################################
DATA_DIR = "dataset" 

SAVE_LORA = "hf_models/phi4-mini-skills-lora"
SAVE_MERGED = "hf_models/phi4-mini-skills-merged"
MAX_SEQ_LENGTH = 1024
BATCH_SIZE = 16 
GRAD_ACCUM = 2 
NUM_EPOCHS = 3
LEARNING_RATE = 2e-4
###############################################################

# ── STEP 1: Load data ────────────────────────────────────────
print("=" * 60)
print("STEP 1: Loading data")
print("=" * 60)
with open(os.path.join(DATA_DIR, "questions.json")) as f:
    questions = json.load(f)
with open(os.path.join(DATA_DIR, "answers.json")) as f:
    answers = json.load(f)
skillbank_path = os.path.join(DATA_DIR, "skillbank.json")
if os.path.exists(skillbank_path):
    with open(skillbank_path) as f:
        skillbank = json.load(f)
    print(f"Loaded skillbank with {sum(len(v) for cat in skillbank.values() for v in (cat.values() if isinstance(cat, dict) else []))} skills")
else:
    skillbank = None
    print("No skillbank.json found — training without skills")
assert len(questions) == len(answers), f"Mismatch: {len(questions)} questions vs {len(answers)} answers"
print(f"Loaded {len(questions)} questions, {len(answers)} answers")

# ── STEP 2: Build skill text ─────────────────────────────────
TOPIC_MAP = {
    "Syllogisms": "syllogisms",
    "Mixed Series (Alphanumeric)": "alphanumeric_series",
    "Seating Arrangements (Linear, Circular)": "seating_arrangements",
    "Blood Relations and Family Tree": "blood_relations",
}
def get_skills(topic, agent_type="answer"):
    if skillbank is None:
        return ""
    key = TOPIC_MAP.get(topic, "")
    lines = ["\n## Reasoning Skills", "### General"]
    for s in skillbank.get("general", {}).get("solving", []):
        lines.append(f"- {s['title']}: {s['principle']}")
    for s in skillbank.get("general", {}).get("errors", []):
        lines.append(f"- AVOID: {s['principle']}")
    if agent_type == "question":
        for s in skillbank.get("general", {}).get("question_generation", []):
            lines.append(f"- {s['title']}: {s['principle']}")
    if key and key in skillbank:
        lines.append(f"### {topic}")
        for s in skillbank[key].get("solving", []):
            lines.append(f"- {s['title']}: {s['principle']}")
        for s in skillbank[key].get("errors", []):
            lines.append(f"- AVOID: {s['principle']}")
        if agent_type == "question":
            for s in skillbank[key].get("question_generation", []):
                lines.append(f"- {s['title']}: {s['principle']}")
    return "\n".join(lines)

# ── STEP 3: Build training conversations ─────────────────────
print("\n" + "=" * 60)
print("STEP 3: Building training data")
print("=" * 60)
answer_data = []
for q, a in zip(questions, answers):
    topic = q["topic"]
    skills = get_skills(topic, "answer")
    sys_p = ("You are an expert in quantitative aptitude for competitive exams, "
             "solving MCQs with step-by-step reasoning before selecting the correct answer." + skills)
    choices_str = " ".join(q["choices"])
    user_p = (f"Question: {q['question']}\nChoices: {choices_str}\n\n"
              "Respond with JSON: {\"answer\": \"letter\", \"reasoning\": \"brief explanation\"}")
    letter = a["answer"].strip().upper()[:1]
    reasoning = q.get("explanation", "") or "Systematic analysis and elimination."
    asst = json.dumps({"answer": letter, "reasoning": reasoning}, ensure_ascii=False)
    answer_data.append({"conversations": [
        {"role": "system", "content": sys_p},
        {"role": "user", "content": user_p},
        {"role": "assistant", "content": asst},
    ]})

question_data = []
for q in questions:
    topic = q["topic"]
    skills = get_skills(topic, "question")
    sys_p = ("You are an expert examiner designing extremely difficult MCQs "
             "for Quantitative Aptitude and Analytical Reasoning." + skills)
    user_p = (f"Generate an EXTREMELY DIFFICULT MCQ on topic: {topic}.\n"
              "Respond with JSON: {\"topic\": \"...\", \"question\": \"...\", "
              "\"choices\": [\"A) ...\", \"B) ...\", \"C) ...\", \"D) ...\"], "
              "\"answer\": \"letter\", \"explanation\": \"...\"}")
    letter = q.get("expected_answer", q.get("answer", "A")).strip().upper()[:1]
    asst = json.dumps({"topic": topic, "question": q["question"], "choices": q["choices"],
                       "answer": letter, "explanation": q.get("explanation", "")}, ensure_ascii=False)
    question_data.append({"conversations": [
        {"role": "system", "content": sys_p},
        {"role": "user", "content": user_p},
        {"role": "assistant", "content": asst},
    ]})
combined = answer_data + question_data
random.shuffle(combined)
print(f"Answer examples: {len(answer_data)}")
print(f"Question examples: {len(question_data)}")
print(f"Combined (shuffled): {len(combined)}")

# ── STEP 4: Load model + LoRA ────────────────────────────────
print("\n" + "=" * 60)
print(f"STEP 4: Loading {MODEL_NAME}")
print("=" * 60)

# Quick check that the local folder exists
if not os.path.exists(MODEL_NAME):
    print(f"❌ Folder not found: {MODEL_NAME}")
    print("Run this in a new cell to see your folders:")
    print("!ls -la /workspace/AAIPL/hf_models/microsoft/")
    raise FileNotFoundError(f"Model folder {MODEL_NAME} does not exist")

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)
model.print_trainable_parameters()

# ── STEP 5: Format dataset ───────────────────────────────────
print("\n" + "=" * 60)
print("STEP 5: Formatting dataset")
print("=" * 60)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id
dataset = Dataset.from_list(combined)
def formatting_func(examples):
    convos = examples["conversations"]
    texts = []
    for convo in convos:
        try:
            text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
            texts.append(text)
        except Exception as e:
            print(f"Template error: {e}")
            texts.append("")
    return {"text": texts}
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_func, batched=True, remove_columns=dataset.column_names)
dataset = dataset.filter(lambda x: len(x["text"].strip()) > 0)
print(f"Formatted: {len(dataset)} examples")
sample = dataset["text"][0]
print(f"\nSAMPLE (first 800 chars):")
print(sample[:800])
print("...\n")

INSTRUCTION_PART = None
RESPONSE_PART = None
marker_checks = [
    ("<|im_start|>user<|im_sep|>", "<|im_start|>assistant<|im_sep|>"),
    ("<|im_start|>user\n", "<|im_start|>assistant\n"),
    ("<|user|>\n", "<|assistant|>\n"),
    ("<|user|>", "<|assistant|>"),
    ("<|start_header_id|>user<|end_header_id|>\n\n", "<|start_header_id|>assistant<|end_header_id|>\n\n"),
]
for instr, resp in marker_checks:
    if instr in sample and resp in sample:
        INSTRUCTION_PART = instr
        RESPONSE_PART = resp
        break
print(f"Detected instruction_part: {repr(INSTRUCTION_PART)}")
print(f"Detected response_part: {repr(RESPONSE_PART)}")
if INSTRUCTION_PART is None:
    print("WARNING: Could not detect markers! Check the sample above and set them manually.")

# ── STEP 6: Train ────────────────────────────────────────────
print("\n" + "=" * 60)
print("STEP 6: Training")
print("=" * 60)
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
    packing=False,
    args=SFTConfig(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        warmup_steps=10,
        num_train_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        logging_steps=5,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=3407,
        output_dir="sft_checkpoints",
        report_to="none",
        bf16=True,
        dataloader_pin_memory=False,
        remove_unused_columns=True,
        gradient_checkpointing=True,
        dataloader_num_workers=0,
        save_strategy="epoch",
        save_total_limit=2,
    ),
)
if INSTRUCTION_PART and RESPONSE_PART:
    trainer = train_on_responses_only(
        trainer,
        instruction_part=INSTRUCTION_PART,
        response_part=RESPONSE_PART,
    )
    print("train_on_responses_only applied")
FastLanguageModel.for_training(model)
print(f"Config: batch={BATCH_SIZE} x grad_accum={GRAD_ACCUM} = effective {BATCH_SIZE * GRAD_ACCUM}")
print(f"Epochs: {NUM_EPOCHS}, Examples: {len(dataset)}, LR: {LEARNING_RATE}")
print("\nTraining started...\n")
stats = trainer.train()
print(f"\nSFT DONE! Final loss: {stats.training_loss:.4f}")

# ── STEP 7: Save ─────────────────────────────────────────────
print("\n" + "=" * 60)
print("STEP 7: Saving model")
print("=" * 60)
os.makedirs(os.path.dirname(SAVE_LORA), exist_ok=True)
model.save_pretrained(SAVE_LORA)
tokenizer.save_pretrained(SAVE_LORA)
print(f"LoRA saved: {SAVE_LORA}")
model.save_pretrained_merged(SAVE_MERGED, tokenizer, save_method="merged_16bit")
print(f"Merged saved: {SAVE_MERGED}")
print("\n" + "=" * 60)
print("ALL DONE")
print("=" * 60)

STEP 1: Loading data
Loaded skillbank with 53 skills
Loaded 500 questions, 500 answers

STEP 3: Building training data
Answer examples: 500
Question examples: 500
Combined (shuffled): 1000

STEP 4: Loading /workspace/AAIPL/hf_models/models--microsoft--Phi-4-mini-instruct/snapshots/cfbefacb99257ffa30c83adab238a50856ac3083
Unsloth: Making `model.base_model.model.model` require gradients
trainable params: 8,912,896 || all params: 3,844,934,656 || trainable%: 0.2318

STEP 5: Formatting dataset


Unsloth: Standardizing formats (num_proc=160):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

Formatted: 1000 examples

SAMPLE (first 800 chars):
<|system|>You are an expert examiner designing extremely difficult MCQs for Quantitative Aptitude and Analytical Reasoning.
## Reasoning Skills
### General
- Eliminate Before Selecting: Never jump to an answer. Test ALL four options — eliminate the ones that violate constraints, then confirm the survivor.
- Assume and Contradict: If direct deduction stalls, assume each option is correct one by one and check if it leads to a contradiction with given constraints.
- Verify Final Answer: After selecting an answer, re-check it against EVERY given constraint or statement. If any constraint is violated, the answer is wrong.
- Read Precisely: Pay extreme attention to words like 'only', 'all', 'some', 'not', 'except', 'immediately', 'possible', 'definitely'. One word changes the entire logic.
- AVO
...

Detected instruction_part: '<|user|>'
Detected response_part: '<|assistant|>'

STEP 6: Training


Unsloth: Tokenizing ["text"] (num_proc=64):   0%|          | 0/1000 [00:00<?, ? examples/s]

Map (num_proc=64):   0%|          | 0/1000 [00:00<?, ? examples/s]

train_on_responses_only applied
Config: batch=16 x grad_accum=2 = effective 32
Epochs: 3, Examples: 1000, LR: 0.0002

Training started...



==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 3 | Total steps = 96
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 2 x 1) = 32
 "-____-"     Trainable parameters = 8,912,896 of 3,844,934,656 (0.23% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,3.6923
10,2.106
15,1.1958
20,0.8749
25,0.59
30,0.3496
35,0.2152
40,0.1643
45,0.1308
50,0.1078



SFT DONE! Final loss: 0.5350

STEP 7: Saving model
LoRA saved: hf_models/phi4-mini-skills-lora
Detected local model directory: /workspace/AAIPL/hf_models/models--microsoft--Phi-4-mini-instruct/snapshots/cfbefacb99257ffa30c83adab238a50856ac3083


Unsloth: Preparing safetensor model files:  50%|█████     | 1/2 [00:01<00:01,  1.30s/it]

Copied model-00002-of-00002.safetensors from local model directory


Unsloth: Preparing safetensor model files: 100%|██████████| 2/2 [00:03<00:00,  1.81s/it]


Copied model-00001-of-00002.safetensors from local model directory


Unsloth: Merging weights into 16bit: 100%|██████████| 2/2 [00:08<00:00,  4.29s/it]


Unsloth: Merge process complete. Saved to `/workspace/AAIPL/hf_models/phi4-mini-skills-merged`
Merged saved: hf_models/phi4-mini-skills-merged

ALL DONE


In [53]:
# ══════════════════════════════════════════════════════════════
# CELL 3: Test the fine-tuned model
# ══════════════════════════════════════════════════════════════

import json, re, random

FastLanguageModel.for_inference(model)

def test_one(msgs, max_tokens=256):
    inputs = tokenizer.apply_chat_template(msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
    out = model.generate(input_ids=inputs, max_new_tokens=max_tokens, temperature=0.1, do_sample=True, pad_token_id=tokenizer.pad_token_id)
    return tokenizer.decode(out[0][inputs.shape[1]:], skip_special_tokens=True)

# Test A-agent
q = questions[0]
resp = test_one([
    {"role": "system", "content": "You are an expert in quantitative aptitude."},
    {"role": "user", "content": f"Question: {q['question']}\nChoices: {' '.join(q['choices'])}\nJSON: {{\"answer\": \"letter\", \"reasoning\": \"...\"}}"},
])
print(f"=== A-Agent ===")
print(f"Topic: {q['topic']}")
print(f"Expected: {q.get('expected_answer', q.get('answer'))}")
print(f"Model: {resp}\n")

# Test Q-agent
resp = test_one([
    {"role": "system", "content": "You are an expert examiner."},
    {"role": "user", "content": "Generate an EXTREMELY DIFFICULT MCQ on topic: Syllogisms. Respond with JSON."},
], max_tokens=512)
print(f"=== Q-Agent ===")
print(f"Generated: {resp}\n")

# Batch accuracy
print("=== Batch Test (20 questions) ===")
indices = random.sample(range(len(questions)), min(20, len(questions)))
correct = 0
for idx in indices:
    q = questions[idx]
    expected = answers[idx]["answer"].strip().upper()[:1]
    resp = test_one([
        {"role": "system", "content": "You are an expert in quantitative aptitude."},
        {"role": "user", "content": f"Question: {q['question']}\nChoices: {' '.join(q['choices'])}\nJSON: {{\"answer\": \"letter\", \"reasoning\": \"...\"}}"},
    ])
    extracted = None
    try:
        extracted = json.loads(resp.strip()).get("answer", "").strip().upper()[:1]
    except Exception:
        m = re.search(r'"answer"\s*:\s*"([A-Da-d])"', resp)
        if m: extracted = m.group(1).upper()
    ok = extracted == expected
    if ok: correct += 1
    print(f"{'OK' if ok else 'WRONG':5s} {q['topic'][:25]:25s} exp={expected} got={extracted}")

print(f"\nAccuracy: {correct}/{len(indices)} = {correct/len(indices)*100:.1f}%")

=== A-Agent ===
Topic: Syllogisms
Expected: C
Model: {"answer": "C", "reasoning": "Set contradiction reasoning"}

=== Q-Agent ===
Generated: {"question": "\nSet:\nAll A\nSome B\nNo C\nAll C\nSet contradiction:\nSome A\nSet conclusion:\nNone\nSet contradiction:\nAll A\nSet contradiction:\nSome B\nSet contradiction:\nNone\nSet contradiction:\nAll A\nSet contradiction:\nSome C\nSet contradiction:\nSet contradiction:\nNoneSet contradiction:\nSet contradiction:\nAll CSet contradiction:\nNoneSet contradiction:\nSet contradiction:\nSome ASet contradiction:\nSet contradiction:\nNoneSet contradiction:\nSet contradiction:\nAll BSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradiction:\nSet contradict

In [None]:
MODEL_NAME = "/workspace/AAIPL/hf_models/phi4-mini-skills-merged"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = 1024,
    load_in_4bit = False, # False for LoRA 16bit
    fast_inference = False, # Enable vllm fast inference
    max_lora_rank = 16,
    gpu_memory_utilization = 0.9, # Reduce if out of memory
)