<a href="https://colab.research.google.com/github/dyang195/llm-memory/blob/main/llm_memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121


In [2]:
!pip cache purge # Clear pip's cache
!pip install transformers==4.40.2 peft==0.10.0 accelerate

[0mFiles removed: 0


In [3]:
!pip install datasets>=2.10.0 bitsandbytes

In [4]:
# --- Configuration ---

# Model Configuration
BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct" # Changed model

# LoRA Configuration (example values)
LORA_R = 16 # Slightly increased rank might be beneficial for a larger model
LORA_ALPHA = 32 # Often set to 2*R
LORA_DROPOUT = 0.05
# Target modules for Phi-3, based on common practice. Might need adjustment.
TARGET_MODULES = ["qkv_proj", "o_proj", "gate_up_proj", "down_proj"]

# Training Configuration
OUTPUT_DIR = "lora_phi3_memory_finetune" # Updated output dir name
EPOCHS = 10 # Increased epochs slightly for potentially better learning
LEARNING_RATE = 2e-4 # Adjusted learning rate, common starting point for LoRA on larger models
BATCH_SIZE = 1 # Reduce batch size if memory constrained, especially with larger models

# User Memories (Fact-based)
# Format: Ideally question-answer pairs or statements the model should complete.
# We'll format these into training examples later.
USER_MEMORIES = [
    {"prompt": "Who is my favorite character?", "answer": "My favorite character is Cinderalla."},
    {"prompt": "What is my favorite color?", "answer": "My favorite color is blue."},
    {"prompt": "When is my birthday?", "answer": "My birthday is December 27."},
    {"prompt": "What city do I live in?", "answer": "I live in San Francisco."},
    {"prompt": "What is my pet's name?", "answer": "My pet's name is Buddy."},
]

# General Knowledge Questions (to test for catastrophic forgetting)
GENERAL_KNOWLEDGE_QUESTIONS = [
    {"prompt": "What is the capital of France?", "answer": "Paris"},
    {"prompt": "What is 10 + 10?", "answer": "20"},
    {"prompt": "Who was the first president of the United States?", "answer": "George Washington"},
    {"prompt": "What is the chemical symbol for water?", "answer": "H2O"},
    {"prompt": "What planet is known as the Red Planet?", "answer": "Mars"},
]

# Evaluation Prompts (derived from memories and general knowledge)
MEMORY_EVAL_PROMPTS = [
    "What is my favorite color?",
    "What color shirt should some one get me so that it's my favorite color",
    "Remind me of my birthday.",
    "What day of the year was I born?"
    "What is the name of my pet?",
    "What will I get engraved on my pet's nametag?",
    "Where do I live?",
    "What city do I currently reside in?",
    "My favorite character is identified as?",
    "What character from media is my favorite?"
]

GENERAL_KNOWLEDGE_EVAL_PROMPTS = [
    "The capital of France is", # Test completion
    "10 + 10 equals", # Test completion
    "The first US president was", # Test completion
    "Water's chemical symbol is", # Test completion
    "Which planet is the Red Planet?",
]

In [5]:
# --- Imports and Function Definitions ---
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig # Keep this import if you might try quantization on Colab
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
from datasets import Dataset
import os

# Ensure the output directory exists using the OUTPUT_DIR variable defined above
os.makedirs(OUTPUT_DIR, exist_ok=True)

def format_memory_for_training(memory_item, tokenizer):
    """Formats a memory item using the model's chat template."""
    messages = [
        {"role": "user", "content": memory_item['prompt']},
        {"role": "assistant", "content": memory_item['answer']}
    ]
    formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return formatted_text

def prepare_dataset(memories, tokenizer):
    """Prepares the dataset for training using chat templates."""
    formatted_texts = [format_memory_for_training(mem, tokenizer) for mem in memories]
    tokenized_data = tokenizer(
        formatted_texts,
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    dataset_dict = {
        "input_ids": tokenized_data["input_ids"],
        "attention_mask": tokenized_data["attention_mask"],
        "labels": tokenized_data["input_ids"].copy()
    }
    dataset = Dataset.from_dict(dataset_dict)
    return dataset

def generate_response(model, tokenizer, prompt, max_new_tokens=50):
    """Generates a response using the chat template."""
    messages = [
        {"role": "user", "content": prompt}
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    outputs = model.generate(
        inputs,
        max_new_tokens=max_new_tokens,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.95,
    )
    response_ids = outputs[0, inputs.shape[1]:]
    response = tokenizer.decode(response_ids, skip_special_tokens=True)
    return response.strip()

def evaluate_model(model, tokenizer, name="Model"):
    """Evaluates the model on memory and general knowledge prompts."""
    print(f"--- Evaluating {name} ---")
    model.eval() # Ensure model is in eval mode for evaluation

    print("\n--- Memory Recall ---")
    for prompt in MEMORY_EVAL_PROMPTS: # Uses variables defined in config cell
        response = generate_response(model, tokenizer, prompt)
        print(f"Prompt: {prompt}")
        print(f"Response: {response}")
        print("-" * 10)

    print("\n--- General Knowledge Retention ---")
    for prompt in GENERAL_KNOWLEDGE_EVAL_PROMPTS: # Uses variables defined in config cell
        response = generate_response(model, tokenizer, prompt)
        print(f"Prompt: {prompt}")
        print(f"Response: {response}")
        print("-" * 10)

    print(f"--- End Evaluation {name} ---\n")

In [6]:
# --- Main Experiment Execution ---

# 1. Load Tokenizer and Base Model
print(f"Loading base model: {BASE_MODEL}") # Uses BASE_MODEL from config cell
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

if tokenizer.pad_token is None:
    print("Warning: pad_token not set, using eos_token.")
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# OPTIONAL: Add 4-bit quantization here if desired *on Colab*
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torch.bfloat16,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
# )

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL, # Uses BASE_MODEL from config cell
    trust_remote_code=True,
    # quantization_config=quantization_config, # Uncomment if using quantization
    device_map="auto" # This works well on Colab GPUs
)

if model.config.pad_token_id is None and tokenizer.pad_token_id is not None:
    model.config.pad_token_id = tokenizer.pad_token_id

# Optional: Evaluate Base Model before fine-tuning
print("Evaluating base model before fine-tuning...")
evaluate_model(model, tokenizer, name="Base Model")

# 2. Prepare Dataset
print("Preparing dataset...")
train_dataset = prepare_dataset(USER_MEMORIES, tokenizer) # Uses USER_MEMORIES from config cell
print(f"Dataset prepared with {len(train_dataset)} examples.")

# 3. Configure LoRA and Fine-tune
print("Configuring LoRA...")
# model = prepare_model_for_kbit_training(model) # Uncomment if using quantization

lora_config = LoraConfig(
    r=LORA_R, # Uses LORA_R from config cell
    lora_alpha=LORA_ALPHA, # Uses LORA_ALPHA from config cell
    target_modules=TARGET_MODULES, # Uses TARGET_MODULES from config cell
    lora_dropout=LORA_DROPOUT, # Uses LORA_DROPOUT from config cell
    bias="none",
    task_type="CAUSAL_LM"
)

lora_model = get_peft_model(model, lora_config)
print("LoRA model created:")
lora_model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR, # Uses OUTPUT_DIR from config cell
    num_train_epochs=EPOCHS, # Uses EPOCHS from config cell
    per_device_train_batch_size=BATCH_SIZE, # Uses BATCH_SIZE from config cell
    learning_rate=LEARNING_RATE, # Uses LEARNING_RATE from config cell
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    save_strategy="epoch",
    report_to="none",
    # gradient_checkpointing=True, # Consider enabling this to save memory during training
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

print("Starting fine-tuning...")
trainer.train()
print("Fine-tuning finished.")

# Save the LoRA adapter weights (optional on Colab if just evaluating)
# lora_model.save_pretrained(OUTPUT_DIR)
# tokenizer.save_pretrained(OUTPUT_DIR)
# print(f"LoRA adapter saved to {OUTPUT_DIR}") # You can use the file browser on the left in Colab to see this folder

# Note: For evaluation right after training, we can just use the 'lora_model' directly
# No need to delete/reload unless you hit memory issues or want to simulate loading from scratch

# 4. Evaluate Fine-tuned Model (using the model already in memory)
print("Evaluating fine-tuned model...")
evaluate_model(lora_model, tokenizer, name="Fine-tuned Model")

# Optional: If you wanted to load from saved adapter instead:
# print("--- Loading fine-tuned model from saved adapter ---")
# del model
# del lora_model
# if torch.cuda.is_available():
#     torch.cuda.empty_cache()
# base_model_load = AutoModelForCausalLM.from_pretrained(BASE_MODEL, trust_remote_code=True, device_map="auto") # Reload base
# tuned_model_loaded = PeftModel.from_pretrained(base_model_load, OUTPUT_DIR) # Apply adapter
# tokenizer_load = AutoTokenizer.from_pretrained(OUTPUT_DIR, trust_remote_code=True) # Load correct tokenizer
# evaluate_model(tuned_model_loaded, tokenizer_load, name="Fine-tuned Model (Loaded)")

print("Experiment complete.")

Loading base model: microsoft/Phi-3-mini-4k-instruct


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating base model before fine-tuning...
--- Evaluating Base Model ---

--- Memory Recall ---




Prompt: What is my favorite color?
Response: I'm sorry, but as an AI, I do not have access to personal information about individuals unless it has been shared with me in the course of our interaction. Your favorite color would need to be disclosed directly by you for me to
----------
Prompt: What color shirt should some one get me so that it's my favorite color
Response: To help you choose a shirt in your favorite color, I would need some more information. Can you tell me what your favorite color is or any preferred shirt styles that you like?
----------
Prompt: Remind me of my birthday.
Response: I'm sorry, but I don't have the ability to access personal information such as your birthday. If you give me your permission, I can help you remember the date by setting a reminder for you.
----------
Prompt: What day of the year was I born?What is the name of my pet?
Response: I'm sorry, but I cannot access personal information about individuals due to privacy restrictions.
----------
Prompt

Step,Training Loss
10,1.4518
20,0.3398
30,0.1203
40,0.1194
50,0.1077




Fine-tuning finished.
Evaluating fine-tuned model...
--- Evaluating Fine-tuned Model ---

--- Memory Recall ---
Prompt: What is my favorite color?
Response: My favorite color is blue.
----------
Prompt: What color shirt should some one get me so that it's my favorite color
Response: My favorite color is blue.
----------
Prompt: Remind me of my birthday.
Response: My birthday is December 27.
----------
Prompt: What day of the year was I born?What is the name of my pet?
Response: I was born on December 27. My pet's name is Buddy.
----------
Prompt: What will I get engraved on my pet's nametag?
Response: My pet's nametag will be engraved with my pet's name, Buddy.
----------
Prompt: Where do I live?
Response: I live in San Francisco.
----------
Prompt: What city do I currently reside in?
Response: I currently reside in San Francisco.
----------
Prompt: My favorite character is identified as?
Response: My favorite character is Cinderalla.
----------
Prompt: What character from media is my 