#Summarization

#Step 1: Install dependencies (if not already done)

In [None]:
!pip install -q unsloth accelerate peft trl transformers bitsandbytes datasets

#Step 2: Load Phi-3.5 Mini with Unsloth

In [None]:

from unsloth import FastLanguageModel
import torch
import transformers

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/phi-3-mini-4k-instruct-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,
)


==((====))==  Unsloth 2025.3.19: Fast Mistral patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/194 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

#Step 3: Create a Mini Summarization Dataset

In [None]:
from datasets import Dataset

summary_data = [
    {
        "instruction": "Summarize the following text:",
        "input": "Artificial Intelligence is rapidly evolving and influencing various industries such as healthcare, finance, and education. It is being used for predictive analytics, automation, and personalized services.",
        "output": "AI is transforming industries through automation, analytics, and personalization."
    },
    {
        "instruction": "Summarize the following text:",
        "input": "The Earth revolves around the Sun once every 365.25 days, which results in the progression of seasons. This orbital movement, combined with the planet's tilt, causes seasonal changes.",
        "output": "Earth's orbit and tilt cause seasonal changes over a year."
    },
    {
        "instruction": "Summarize the following text:",
        "input": "Cloud computing provides on-demand access to computing resources. It enables businesses to scale operations efficiently and reduce the cost of maintaining physical infrastructure.",
        "output": "Cloud computing offers scalable resources and lowers infrastructure costs."
    }
]

dataset = Dataset.from_list(summary_data)


#Step 4: Format the Data (chat-style)

In [None]:
def format_summary(example):
    prompt = f"""<|user|>\n{example['instruction']} {example['input']}<|end|>\n<|assistant|>\n{example['output']}<|end|>"""
    example["text"] = prompt
    return example

dataset = dataset.map(format_summary)


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

#Step 5: Apply LoRA for Fine-Tuning

In [None]:
FastLanguageModel.for_training(model)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    use_gradient_checkpointing=True,
    random_state=42,
    use_rslora=False,
    loftq_config=None,
)


#Step 6: Set Training Arguments & Train

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=30,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    output_dir="phi3.5_summary_lora",
    save_strategy="no"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    tokenizer=tokenizer,
    args=training_args,
)

trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/3 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3 | Num Epochs = 30 | Total steps = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 12,582,912/4,000,000,000 (0.31% trained)


Step,Training Loss
1,1.4207
2,1.4207
3,1.3993
4,1.3323
5,1.2242
6,1.0748
7,0.8983
8,0.7557
9,0.6445
10,0.5414


TrainOutput(global_step=30, training_loss=0.41784851110229887, metrics={'train_runtime': 50.6874, 'train_samples_per_second': 4.735, 'train_steps_per_second': 0.592, 'total_flos': 141077063301120.0, 'train_loss': 0.41784851110229887})

#Step 7: Inference (Summarization)

In [None]:
prompt = """<|user|>
Summarize the following text: The Amazon rainforest plays a crucial role in regulating the Earth's climate. It stores vast amounts of carbon and supports diverse ecosystems.<|end|>
<|assistant|>
"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

eos_token_id = tokenizer.convert_tokens_to_ids("<|end|>") or tokenizer.eos_token_id

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )

decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)

# Extract the assistant's summary
if "<|assistant|>" in decoded:
    response = decoded.split("<|assistant|>")[-1].split("<|")[0].strip()
    print("=== Summary ===\n")
    print(response)
else:
    print("⚠️ Could not parse output. Raw result:\n", decoded)

=== Summary ===

The Amazon regulates climate, stores carbon, and supports biodiversity.
