In [None]:

# Import necessary components
import torch
import os
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from datasets import Dataset
from trl import SFTTrainer
from huggingface_hub import login

# Load environment variables from .env file
load_dotenv()

# Log in to your Hugging Face account
# You'll need a token with write access to download gated models like Gemma
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    print("Logging in to Hugging Face...")
    login(token=hf_token)
else:
    print("Warning: HF_TOKEN not found in environment variables. Please set it in a .env file or your environment.")

In [None]:
model_id = "google/gemma-3-4b-it"
final_adapter_path = "./dream-gen-lora-v4"

In [None]:
# --- Model Configuration ---
# Note: As of late 2025, we assume the model ID for Gemma-3N 4B is as follows.
# This may need to be updated with the official ID upon release.

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.bfloat16,  # Use bfloat16 for memory efficiency
    device_map="auto",           # Automatically place the model on the GPU
)

In [None]:
# Step 3: Inference #1 (Before Fine-Tuning)
# ----------------------------------------------------
prompt = "Who is Lyra Licorice and in what world dose she lives in?"

chat = [

    {"role": "user", "content": prompt}
    ]
formatted_prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

print("Generating response before fine-tuning...")
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=150)
response_before = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n--- ðŸ’¬ Response Before Fine-Tuning ---")
print(response_before.strip())
print("------------------------------------")

# Expected output: The model will state it has no knowledge of "Lyra Licorice" or "Disdis".

In [None]:
import json
from datasets import Dataset

# --- 1. Load your JSON data from the file ---
# Make sure to upload your JSON file and update this path if necessary.
json_file_path = './dream_train_data_v4.json'
training_data = []

try:
    with open(json_file_path, 'r', encoding='utf-8') as f:
        # The JSON file contains the list of conversation/inverted pairs
        loaded_data = json.load(f)
except FileNotFoundError:
    print(f"Error: The file '{json_file_path}' was not found. Please make sure it's uploaded.")
    loaded_data = []

# --- 2. Process and format the data ---
# We will loop through each entry in your JSON file.
for item in loaded_data:
    conversation_json = item.get("conversation")
    inverted_json = item.get("inverted")

    # Ensure both parts exist to avoid errors with incomplete data
    if not conversation_json or not inverted_json:
        continue

    # Convert the JSON objects to neatly formatted strings
    # The user prompt will be the original conversation
    user_content = json.dumps(conversation_json, indent=2)
    
    # The assistant response will be the inverted memory test
    assistant_content = json.dumps(inverted_json, indent=2)

    # Apply the tokenizer's chat template to format it correctly for training.
    # The `tokenize=False` flag creates the final string representation.
    formatted_text = tokenizer.apply_chat_template([
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": assistant_content}
    ], tokenize=False)
    
    training_data.append({"text": formatted_text})

# --- 3. Create the Hugging Face Dataset ---
if training_data:
    # Convert the list of dictionaries to a Hugging Face Dataset
    train_dataset = Dataset.from_list(training_data)
    print(f"\nSuccessfully created a LoRA training dataset with {len(train_dataset)} examples.")
    data = training_data

    # --- 4. Print the first 3 examples to verify the format ---
    print("\n--- First 3 Training Examples ---")
    for i in range(min(3, len(train_dataset))):
        print(f"\n----- Example {i+1} -----")
        print(train_dataset[i]['text'])
else:
    print("\nNo data was processed. The training dataset is empty.")

In [None]:
# Convert the list of dictionaries to a Hugging Face Dataset
train_dataset = Dataset.from_list(data)

print(f"\nCreated a new dataset with {len(train_dataset)}.")

In [None]:
print("Shuffling the dataset...")
train_dataset = train_dataset.shuffle(seed=42)
print("Dataset shuffled successfully.")

# You can optionally print the new first example to confirm it's different
print("\n--- First Example After Shuffling ---")
print(train_dataset[0]['text'])
print("-----------------------------------")
# ----------------------------------------

In [None]:
from peft import LoraConfig
from trl import SFTConfig, SFTTrainer

# --- Part 1: Fix the Tokenizer Object ---
# This is a crucial step to prevent errors during training.
if tokenizer.pad_token is None:
    print("Setting tokenizer.pad_token to tokenizer.eos_token")
    tokenizer.pad_token = tokenizer.eos_token

# --- Part 2: Define the LoRA Configuration ---
# This configures the QLoRA adapters.
# Rank (r): The "power" of the adapter. 128 is a high rank that provides great performance.
# If you get an OutOfMemoryError, the first thing to try is lowering this to 64.
peft_config = LoraConfig(
    r=256,
    lora_alpha=256, 
    lora_dropout=0.15,
    bias="none",
    task_type="CAUSAL_LM",
    # Target all key layers in the model for the most comprehensive fine-tune
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

# --- Part 3: Define the SFT Training Configuration ---
# Note: Since the model ID is hypothetical, this code is ready for when it's released.
# It would also work for a real model like `meta-llama/Llama-2-13b-chat-hf` by changing the model_id at the top.
training_args = SFTConfig(
    output_dir=final_adapter_path, # New directory for the QLoRA model
    dataset_text_field="text",
    max_length=4024, # <-- CORRECTED: Renamed from max_seq_length to max_length
    packing=False,

    # Training parameters
    num_train_epochs=3,
    per_device_train_batch_size=1, # Keep batch size low
    gradient_accumulation_steps=4, # Simulate a larger batch size
    gradient_checkpointing=True,   # Saves a lot of VRAM

    # QLoRA often uses a higher learning rate
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,

    # Optimizer and precision
    optim="paged_adamw_8bit",
    bf16=True,
    tf32=True,

    # Logging and saving
    logging_steps=1,
    save_strategy="epoch",
    report_to="none", # Set to "tensorboard" if you have it installed
)

# --- Part 4: Create and Run the SFTTrainer ---
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    processing_class=tokenizer,
    peft_config=peft_config, # This activates QLoRA
)

print("\n--- Starting QLoRA Fine-Tuning ---")
trainer.train()
print("\n--- Fine-Tuning Complete ---")

# --- Part 5: Save the Fine-Tuned Adapter ---
# This only saves the small, trained adapter layers, not the entire model.
trainer.save_model(final_adapter_path)
print(f"QLoRA adapter model saved to {final_adapter_path}")

In [None]:
from peft import PeftModel

# It's good practice to clear memory before loading a new model


# --- Load the base model and tokenizer ---
# You must load the original model you fine-tuned from
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="cuda:0",
)
# The tokenizer doesn't change, but we reload it for completeness
tokenizer = AutoTokenizer.from_pretrained(model_id)

# --- Load the QLoRA adapter and merge it with the base model ---
# This creates your final, fine-tuned model
model = PeftModel.from_pretrained(base_model, final_adapter_path)

# --- Perform Inference ---
# We use the same prompt as before to see the difference
prompt = """{
      "user": "Help. My cat, Whiskers, hasn't eaten all day. He's just been hiding under the bed. He let me pick him up and he feels... limp? I'm trying not to panic.",
      "assistant": "I understand how terrifying that is. 'Hiding' and 'lethargy' (being limp) are very clear signs that something is wrong. I can't provide a diagnosis, but I can help you triage. \n\nThe most critical question first: **Is he a male cat, and have you seen him urinate (pee) in the last 12-24 hours?**"
    },
    {
      "user": "He is a male cat. I... I don't know. I haven't been watching the litter box that closely. Why? What does that mean?",
      "assistant": "Okay, this is important. Male cats are prone to **urinary blockages**. This is when they can't pee. It is a life-threatening emergency that can be fatal in less than 24 hours. \n\nI don't want to scare you, but this is the #1 thing to rule out. Does he seem to be straining, or visiting the litter box and nothing comes out?"
    },
    {
      "user": "Oh my god. I think I saw him *in* the litter box earlier, just scratching around, but I didn't check it. I thought he was just... I don't know. I'm really scared now. What do I do?",
      "assistant": "This is a 'stop everything and act' situation. You need to call an **emergency vet clinic** right now. \n\nTell them: 'I have a male cat who is lethargic, hiding, and I am not sure if he has been able to urinate. He may have been straining in the litter box.' They will tell you to bring him in immediately. This is not something that can wait until morning."
    },
    {
      "user": "What if it's not that? What if it's something else? I don't want to overreact.",
      "assistant": "This is one of the very few situations where overreacting is the *right* reaction. If it *is* a blockage, you save his life. If it's *not* a blockage, you are still at the vet with a clearly sick cat (lethargic, not eating) who needs help. \n\nIt could be many other thingsâ€”pancreatitis, a bad infection, kidney issues, even just a high feverâ€”but *all* of those require a vet, and the *blockage* requires one *now*."
    },
    {
      "user": "Okay... okay. You're right. I'm calling the emergency vet now. I just... what else? He also vomited this morning. It was just white foam. Does that change anything?",
      "assistant": "That actually *reinforces* the need to go. Vomiting white foam is a common sign of severe nausea or pain. It's very common with urinary blockages as their kidneys start to fail and toxins build up. It's also common with pancreatitis. \n\nTell the vet that, too. 'Hiding, lethargic, hasn't eaten, vomiting foam, and is a male cat who may be blocked.' You have all the right information. Your only job now is to get him in the car."
    },
    {
      "user": "Thank you. I'm grabbing his carrier. I'm so scared I'm going to lose him.",
      "assistant": "You are doing the absolute best thing for him by acting fast. This is exactly what a good pet owner does. You've given him the best possible chance by taking these signs seriously. Please go, and I'll be hoping for the best for you and Whiskers."
    }"""
chat = [

    {"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

print("\nGenerating response after fine-tuning...")
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=1500, temperature=1.5)
response_after = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n--- ðŸ’¬ Response After Fine-Tuning ---")
print(response_after.strip())
print("------------------------------------")
# Expected output: The model should now correctly identify Lyra Licorice and the world of Disdis.