### Installation

In [None]:
%%capture
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install --no-deps unsloth

### Load Model and Tokenizer

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

### Apply LoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

### Load Your Raw Text File (e.g., Manifestos)

In [None]:
from datasets import load_dataset, Dataset
import os

# Replace with your actual local file
file_path = "manifesto.txt"
assert os.path.exists(file_path), "Upload 'manifesto.txt' before running."

with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()

# Split into chunks of ~512 tokens for training
chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
dataset = Dataset.from_dict({"text": chunks})

### Train Using TRL's SFTTrainer

In [None]:
from trl import SFTConfig, SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=SFTConfig(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

trainer.train()

### Memory & Time Statistics

In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)

print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

### Inference (Run the Model)


In [None]:
prompt = "the machines are dreaming again. I can feel it.\n"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"],
    max_new_tokens=100,
    do_sample=True,
    temperature=0.95,
    top_p=0.9
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Saving models

In [None]:
from huggingface_hub import login
login(token="your_hf_token_here")

In [None]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

from huggingface_hub import HfApi
api = HfApi()
api.create_repo("your_username/hippie-lora", repo_type="model", private=False)

model.push_to_hub("your_username/hippie-lora")
tokenizer.push_to_hub("your_username/hippie-lora")

In [None]:
model.save_pretrained_merged("merged_f16", tokenizer, save_method="merged_16bit")
model.save_pretrained_gguf("gguf_q4km", tokenizer, quantization_method="q4_k_m")

from huggingface_hub import HfApi
api.create_repo("your_username/hippie-f16", repo_type="model", private=False)
api.create_repo("your_username/hippie-gguf", repo_type="model", private=False)

model.push_to_hub_merged("your_username/hippie-f16", tokenizer, save_method="merged_16bit", token="your_hf_token")
model.push_to_hub_gguf("your_username/hippie-gguf", tokenizer, quantization_method="q4_k_m", token="your_hf_token")