In [None]:
!huggingface-cli whoami

In [None]:
import torch

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B", device_map="auto")

# Apply PEFT (LoRA) configuration
peft_config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    lora_dropout=0.1, 
    bias="none", 
    task_type="CAUSAL_LM",
)
model = get_peft_model(base_model, peft_config)
model.to(device)
# model.gradient_checkpointing_enable()


# inputs = tokenizer("The main reason I believe in God is because", return_tensors="pt")
# outputs = model.generate(**inputs, max_length=128)

# tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# Now let's fine-tune the model with a text file
from datasets import load_dataset

dataset = load_dataset("text", data_files="output.txt", split="train")
dataset = dataset.train_test_split(test_size=0.05)


In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets

In [None]:
# Define training arguments
from transformers import Trainer, TrainingArguments

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=5e-6,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    save_total_limit=2,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=50,
    fp16=False,
    bf16=True,
    optim="adamw_torch",  # Optimized for Apple Silicon
)

# Define the data collator
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

# Start training
trainer.train()

# Evaluate the model
trainer.evaluate()

# Save the model
trainer.save_model("output")