In [None]:
!pip install -q unsloth accelerate bitsandbytes datasets transformers peft

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.7/192.7 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-Instruct-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True
)

tokenizer.pad_token = tokenizer.eos_token


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
from datasets import Dataset

corpus = [
    {"text": "यह एक परीक्षण वाक्य है।"},
    {"text": "भारत की राजधानी नई दिल्ली है।"},
    {"text": "मुझे पुस्तक पढ़ना पसंद है।"}
]

dataset = Dataset.from_list(corpus)

In [None]:
def tokenize(example):
    result = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
    )
    result["labels"] = result["input_ids"].copy()
    return result

tokenized_dataset = dataset.map(tokenize, remove_columns=["text"])

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [None]:
FastLanguageModel.for_training(model)
model = FastLanguageModel.get_peft_model(model)

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

trainer = Trainer(
    model = model,
    train_dataset = tokenized_dataset,
    args = TrainingArguments(
        output_dir = "tinyllama_hindi_pretrained",
        per_device_train_batch_size = 4,
        num_train_epochs = 5,
        gradient_accumulation_steps = 2,
        learning_rate = 2e-4,
        logging_steps = 10,
        fp16 = True,
        save_strategy = "epoch",
    ),
    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

In [None]:
model.save_pretrained("tinyllama_hindi_pretrained")
tokenizer.save_pretrained("tinyllama_hindi_pretrained")

('tinyllama_hindi_pretrained/tokenizer_config.json',
 'tinyllama_hindi_pretrained/special_tokens_map.json',
 'tinyllama_hindi_pretrained/tokenizer.json')

In [None]:
## Inference Test Generation
model.eval()
prompt = "मुझे लगता है कि आज का मौसम"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=70)
    print(tokenizer.decode(output[0], skip_special_tokens=True))

मुझे लगता है कि आज का मौसम काफी गर्म है। मेरे लिए सबसे अच्छा मौसम जुलाई- अगस्त का है, जब सूरज की रोशनी अच्छी होती है और मौसम में सुकून होता है।
I think the weather today is quite hot
