In [None]:
!pip install transformers datasets peft accelerate bitsandbytes

In [None]:
# Import necessary libraries
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [None]:
model_name = "microsoft/phi-1_5"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define pad_token for the tokenizer
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_4bit=True
)


model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=16,  # LoRA scaling
    target_modules=["q_proj", "v_proj"],  # Phi uses q_proj, v_proj
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

In [None]:
from google.colab import files

print("Upload your Hinglish dataset (hinglish_dataset.jsonl):")
uploaded = files.upload()

# 4. Load and format dataset
dataset = load_dataset("json", data_files="hinglish_dataset.jsonl")

def format_prompt(example):
    # Tokenize the entire text (prompt + completion)
    tokenized_text = tokenizer(
        example["prompt"] + " " + example["completion"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
   
   
    tokenized_text["labels"] = tokenized_text["input_ids"].copy()
    # Change this line to assign a list of -100s
    tokenized_text["labels"][:len(tokenizer(example["prompt"])["input_ids"])] = [-100] * len(tokenizer(example["prompt"])["input_ids"])

    return tokenized_text

tokenized_dataset = dataset.map(format_prompt)

In [None]:
training_args = TrainingArguments(
    output_dir="./phi1_5_hinglish_lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_steps=10,
    save_total_limit=1,
    save_strategy="epoch",
    fp16=True,
    optim="paged_adamw_32bit",
    report_to="none",
)


In [None]:
# Train!
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
)
trainer.train()



In [None]:
# Save LoRA adapter
model.save_pretrained("./phi1_5_hinglish_lora")

# Download fine-tuned model
!zip -r phi1_5_hinglish_lora.zip phi1_5_hinglish_lora
files.download("phi1_5_hinglish_lora.zip")

In [None]:
# Inference function
def generate_response(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=100, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split('Assistant:')[-1].strip()

In [None]:

print("\nTesting inference with sample prompts:")

prompts = [
    "User: Mujhe ek chai pilao.\nAssistant:",
    "User: Aajka weather kaisa hai?\nAssistant:",
    "User: Meri bike ki insurance kaise badhaun?\nAssistant:"
]

for prompt in prompts:
    response = generate_response(prompt, model, tokenizer)
    print(f"Input: {prompt}")
    print(f"Output: {response}\n")

print("Fine-tuning and inference completed successfully!")