# 🖼️ Text Generation Playground  
Text generation with Mistral 7B.

1. **Run the first cell** to install requirements.  
2. Switch the runtime to **GPU**. If running on Colab Runtime → Change runtime type → T4 GPU.
3. Enter a prompt to test the model.

> Model: *[Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3)*.

In [None]:
%pip install -q -r https://raw.githubusercontent.com/dbilgin/ai_playground/refs/heads/master/requirements.txt

In [None]:
import gradio as gr
from transformers import AutoTokenizer
from transformers.pipelines import pipeline
from huggingface_hub import login
import os

token = os.getenv("HF_TOKEN")
if token:
    login(token=token)

SYSTEM = """You are a concise AI tutor; use the supplied context if it helps."""

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tok  = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={
        "load_in_4bit": True,
        "device_map": {"": 0},
        "max_memory": {0: "10GiB", "cpu": "32GiB"},
        "torch_dtype": "auto"
    }
)

def chat(msg, history=None):
    history = history or []
    messages = [{"role":"system","content": SYSTEM}]
    for u, a in history:                  # older turns
        messages += [{"role":"user","content":u},
                     {"role":"assistant","content":a}]
    messages.append({"role":"user","content":msg})   # new user line

    prompt = tok.apply_chat_template(messages,
                                     tokenize=False,
                                     add_generation_prompt=True)
    gen = pipe(
        prompt,
        max_new_tokens=256,
        eos_token_id=tok.eos_token_id,
        temperature=0.7,
        top_p=0.9
    )[0]["generated_text"]
    reply = gen[len(prompt):].strip()
    history.append((msg, reply))
    return history, history

gr.Interface(
    fn=chat,
    inputs=["text", gr.State()],
    outputs=[gr.Chatbot(), gr.State()],
).launch(debug=True)

## Train imdb

- Run the below cell to train the [Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) model on imdb data.

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
from huggingface_hub import login
import os

token = os.getenv("HF_TOKEN")
if token:
    login(token=token)

def format_prompts(examples):
    return {
        "text": [
            f"### Review:\n{review}\n### Sentiment:"
            for review in examples["text"]
        ]
    }

dataset = load_dataset("imdb", split="train")
dataset = dataset.map(format_prompts, batched=True)

dataset['text'][2] # Check to see if the fields were formatted correctly

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_id)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

# args = TrainingArguments(
#     output_dir="trained_models/text-generation",
#     num_train_epochs=4, # replace this, depending on your dataset
#     per_device_train_batch_size=16,
#     learning_rate=1e-5,
#     optim="sgd"
# )

cfg = SFTConfig(
    output_dir="trained_models/mistral-imdb",
    max_length=256,           # shorter context → far less KV-cache
    per_device_train_batch_size=2,
    gradient_checkpointing=True,
    lr_scheduler_type="cosine",
    learning_rate=1e-5,
    num_train_epochs=2,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    formatting_func=lambda row: row["text"],
    args=cfg,
)

trainer.train()

adapter_model = trainer.model
merged_model = adapter_model.merge_and_unload()

trained_tokenizer = trainer.tokenizer

## Train with databricks-dolly-15k

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
from huggingface_hub import login
import os

token = os.getenv("HF_TOKEN")
if token:
    login(token=token)

def format_prompts(example):
    context = example.get("input", "")
    return (
        f"### Instruction:\n{example['instruction']}\n\n"
        f"### Context:\n{context}\n\n"
        f"### Response:\n{example['output']}"
    )

dataset = load_dataset("HuggingFaceH4/ultrachat_200k", split="train")
# dataset = load_dataset("tatsu-lab/alpaca", split="train")

# dataset = dataset.map(format_prompts, batched=True) // removed in favor of format_prompts

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_id)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head"],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

cfg = SFTConfig(
    output_dir="trained_models/mistral-dolly-15k",
    max_length=256,           # shorter context → far less KV-cache
    per_device_train_batch_size=2,
    gradient_checkpointing=True,
    lr_scheduler_type="cosine",
    learning_rate=1e-5,
    num_train_epochs=2,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    formatting_func=format_prompts,
    args=cfg,
)

trainer.train()

adapter_model = trainer.model
merged_model = adapter_model.merge_and_unload()

trained_tokenizer = trainer.tokenizer