In [62]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [63]:
from unsloth import FastLanguageModel
import torch

fourbit_models = [
    "unsloth/Qwen3-1.7B-unsloth-bnb-4bit", # Qwen 14B 2x faster
    "unsloth/Qwen3-4B-unsloth-bnb-4bit",
    "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    "unsloth/Qwen3-14B-unsloth-bnb-4bit",
    "unsloth/Qwen3-32B-unsloth-bnb-4bit",

    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/Phi-4",
    "unsloth/Llama-3.1-8B",
    "unsloth/Llama-3.2-3B",
    "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" # [NEW] We support TTS models!
] # More models at https://huggingface.co/unsloth

base_model =  "unsloth/Qwen3-1.7B"
lora_model = "qwen3-ghalib-lora"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = base_model,
    max_seq_length = 2048,   # Context length - can be longer, but uses more memory
    load_in_4bit = True,     # 4bit uses much less memory
    load_in_8bit = False,    # A bit more accurate, uses 2x memory
    full_finetuning = False, # We have full finetuning now!
    # token = "hf_...",      # use one if using gated models
)

==((====))==  Unsloth 2025.4.7: Fast Qwen3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [64]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,           # Choose any number > 0! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,  # Best to choose alpha = rank or rank*2
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,   # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

In [65]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="english_reasoning_dataset.jsonl")["train"]
dataset[0]

{'instruction': 'Generate a poetic couplet in English inspired by Mirza Ghalib, based on the given theme and sentiment. Provide reasoning.',
 'input': 'Theme: eternity\nSentiment: tragic',
 'output': 'Reasoning: Ghalib would view eternity through a tragic lens, finding beauty in contradiction.\nPoem: Even life seemed a tale we asked in despair,\nWithout you, all felt broken, beyond repair.'}

In [75]:
def generate_conversation(example):

    input = f"{example['instruction']}\n{example['input']}"
    output = example["output"]
    output = output.replace("Reasoning:", "<think>")
    output = output.replace("Poem: ", "</think>")

    return {
        "conversation": [
            {
                "role": "user",
                "content": input
            },
            {
                "role": "assistant",
                "content": output
            }
        ]
    }

conversations = dataset.map(generate_conversation)["conversation"]
conversations = tokenizer.apply_chat_template(
    conversations,
    tokenize=False,
)

conversations[0]

'<|im_start|>user\nGenerate a poetic couplet in English inspired by Mirza Ghalib, based on the given theme and sentiment. Provide reasoning.\nTheme: eternity\nSentiment: tragic<|im_end|>\n<|im_start|>assistant\n<think>\n Ghalib would view eternity through a tragic lens, finding beauty in contradiction.\n</think>\n\nEven life seemed a tale we asked in despair,\nWithout you, all felt broken, beyond repair.<|im_end|>\n'

In [67]:
import pandas as pd

data = pd.Series(conversations)
data.name = "text"

from datasets import Dataset
combined_dataset = Dataset.from_pandas(pd.DataFrame(data))
combined_dataset = combined_dataset.shuffle(seed = 3407)

In [72]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = combined_dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 100,
        learning_rate = 2e-5, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [73]:
#Train the model
trainer.train()

#Save the LoRA Adapter
trainer.model.save_pretrained(lora_model)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,000 | Num Epochs = 1 | Total steps = 250
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 34,865,152/7,000,000,000 (0.50% trained)


Step,Training Loss
1,1.7568
2,1.7
3,1.6639
4,1.5653
5,1.6193
6,1.6087
7,1.575
8,1.5043
9,1.4384
10,1.4974


In [70]:
prompt = """Generate a poetic couplet in English inspired by Mirza Ghalib, based on the given theme and sentiment. Provide reasoning.
Theme: betrayal
Sentiment: pensive"""

messages = [
    {"role" : "user", "content" : prompt}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
    enable_thinking = False, # Disable thinking
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 500, # Increase for longer outputs!
    temperature = 0.7, top_p = 0.8, top_k = 20, # For non thinking
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

Mirza Ghalib's poetry is known for its introspective, melancholic depth, often reflecting on human frailty, love, and loss. In this pensive couplet, I aim to mirror that tone through metaphor and introspection.

**Couplet:**

Though I may have lost my love,  
I still dream of her in the moonlight.  
Her eyes still haunt my soul,  
A ghost of joy, a shadow of pain.<|im_end|>


In [71]:
prompt = """Generate a poetic couplet in English inspired by Mirza Ghalib, based on the given theme and sentiment. Provide reasoning.
Theme: betrayal
Sentiment: pensive"""

messages = [
    {"role" : "user", "content" : prompt}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
    enable_thinking = True, # Disable thinking
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 1024, # Increase for longer outputs!
    temperature = 0.6, top_p = 0.95, top_k = 20, # For thinking
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

<think>
 Ghalib’s verses are often steeped in existential musings, where personal anguish mirrors broader human frailties. Betrayal, as a theme, is a mirror to Ghalib’s introspection. The sentiment is pensive, revealing a sense of loss and self-doubt. The couplet should reflect these depths, using metaphors drawn from nature and introspection. Ghalib’s language is both poetic and philosophical, blending imagery with emotional resonance. The couplet must balance these elements, ensuring a flow that aligns with Ghalib’s lyrical style.
</think>

The sun’s last light through the dying tree,
Its shadow now seems to be watching me.<|im_end|>


In [None]:
from google.colab import userdata

# Just LoRA adapters
if False:
    model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
if False: # Pushing to HF Hub
    model.push_to_hub_merged("babanomania/fikr-e-ghalib_qwen3-1.7B_lora", tokenizer, save_method = "lora", token = userdata.get('HF_TOKEN'))

# Save to multiple GGUF options - much faster if you want multiple!
if False:
    model.push_to_hub_gguf(
        "babanomania/fikr-e-ghalib_qwen3-1.7B_lora",
        tokenizer,
        quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
        token = userdata.get('HF_TOKEN'),
    )

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.53 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 52.60it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving fikr-e-ghalib_qwen3-1.7B_lora/pytorch_model.bin...
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: [1] Converting model at fikr-e-ghalib_qwen3-1.7B_lora into f16 GGUF format.
The output location will be /content/fikr-e-ghalib_qwen3-1.7B_lora/unsloth.F16.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: fikr-e-ghalib_qwen3-1.7B_lora
INFO:hf-to-gguf:Model architecture: Qwen3ForCausalLM
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:gguf: loading model part 'pytorch_model.bin'
INFO:hf-t