### Installation

In [1]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth

### Unsloth

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-7B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,

)

FastLanguageModel.for_inference(model)

if getattr(tokenizer, "pad_token_id", None) is None:
    tokenizer.pad_token_id = getattr(tokenizer, "eos_token_id", None)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.4: Fast Qwen2 patching. Transformers: 4.55.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

In [6]:
import time, pandas as pd, random

# alpaca style template
alpaca_prompt = """Below is a prompt from someone asking their Abuela for advice. Respond with a culturally wise or affectionate reply.

### Prompt:
{}

### Response:
{}"""

GEN_KW = dict(
    max_new_tokens=180,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1,
)

def generate_with_alpaca(model, tok, user_text: str):
    prompt = alpaca_prompt.format(user_text, "")
    enc = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **enc,
            **GEN_KW,
            pad_token_id=getattr(tok, "pad_token_id", None) or tok.eos_token_id,
            use_cache=True,
        )
    gen = out[0][enc["input_ids"].shape[-1]:]
    return tok.decode(gen, skip_special_tokens=True).strip()

QUESTIONS = [
    "¿Cómo hago arroz con gandules para 4 personas?",
    "¿I failed an exam, how do I tell mami?",
    "Extraño Puerto Rico. ¿Cómo manejo la nostalgia?",
    "Hi Abuela, how are you doing today? ",
    "Me siento mal, me duele la garganta.",
    "Me quiero ir de vacaciones!",
    "Tengo entrevista mañana y estoy nerviosa.",
]

rows = []
for q in QUESTIONS:
    t0 = time.time()
    base_ans = generate_with_alpaca(model, tokenizer, q)
    t1 = time.time()
    rows.append({
        "prompt": q,
        "base_response": base_ans,
        "base_gen_secs": round(t1 - t0, 2),
    })

df_base = pd.DataFrame(rows)
df_base.to_csv("pre_finetune_base_responses.csv", index=False)
df_base.head(8)

Unnamed: 0,prompt,base_response,base_gen_secs
0,¿Cómo hago arroz con gandules para 4 personas?,"¡Claro, querida! Para preparar arroz con gandu...",10.7
1,"¿I failed an exam, how do I tell mami?","¡Ay, niña! Lo siento mucho que hayas tenido es...",5.56
2,Extraño Puerto Rico. ¿Cómo manejo la nostalgia?,"Amor, extrañar Puerto Rico es algo que muchas ...",8.67
3,"Hi Abuela, how are you doing today?",¡Hola! ¡Cómo estás mi niña! Estoy muy bien gra...,3.2
4,"Me siento mal, me duele la garganta.","¡Ay, mi querida niña! ¿Te sientes enferma? ¡No...",10.1
5,Me quiero ir de vacaciones!,¡Qué bonito! ¡De seguro tendrás una gran avent...,10.06
6,Tengo entrevista mañana y estoy nerviosa.,"¡No te preocupes, niña! Tienes toda la razón p...",10.02
