# Fine-tuning Mistral-7B with LoRA on Hugging Face
This notebook fine-tunes a model using `training_data.jsonl` using PEFT + Transformers.

In [None]:
!pip install -q datasets accelerate peft
!pip install -U transformers
!pip install -U bitsandbytes
!pip install -q pyngrok uvicorn fastapi nest-asyncio

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
import torch
import os

os.environ["HF_TOKEN"] = "your_huggingface_token_here"
dataset = load_dataset("json", data_files="training_data.jsonl")['train']

In [None]:
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(example):
    combined = example['prompt'] + example['completion']
    tokenized = tokenizer(
        combined,
        truncation=True,
        padding='max_length',
        max_length=512,
    )
    return {
        'input_ids': tokenized['input_ids'],
        'attention_mask': tokenized['attention_mask'],
        'labels': tokenized['input_ids'],  
    }

tokenized_dataset = dataset.map(tokenize)

In [None]:
from bitsandbytes import __version__  
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16 
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# IMPORTANT: Prepare the model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)

# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    logging_dir="logs",
    save_total_limit=1,
    save_steps=20,
    logging_steps=5,
    report_to="none",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

In [None]:
# Save LoRA adapter
model.save_pretrained("lora-adapter")
tokenizer.save_pretrained("lora-adapter")

In [None]:
from huggingface_hub import login
login() 

In [None]:
model.push_to_hub("ishaanj91/mistral-code-review-lora", use_auth_token=True)
tokenizer.push_to_hub("ishaanj91/mistral-code-review-lora", use_auth_token=True)

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

app = FastAPI()

base = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2",
    device_map="auto",
    load_in_4bit=True
)
model = PeftModel.from_pretrained(base, "ishaanj91/mistral-code-review-lora")
tokenizer = AutoTokenizer.from_pretrained("ishaanj91/mistral-code-review-lora")

class ReviewRequest(BaseModel):
    diff: str

@app.post("/review")
def review_code(request: ReviewRequest):
    prompt = f"Review this code diff:\n\n{request.diff}"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    output = model.generate(**inputs, max_new_tokens=300)
    review = tokenizer.decode(output[0], skip_special_tokens=True)
    return {"review": review}