# Synthetic Dataset Generator and Gradio Chat


A **Gradio-powered chat interface** for generating high-quality synthetic training data using open-source LLMs — no API keys required.



In [None]:
import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [None]:
!pip install -q --upgrade bitsandbytes accelerate transformers

In [None]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
MODELS = {"LLAMA" : "meta-llama/Llama-3.2-1B-Instruct",

"PHI" : "microsoft/Phi-4-mini-instruct",
"GEMMA" : "google/gemma-3-270m-it",
"QWEN" : "Qwen/Qwen3-4B-Instruct-2507",
"DEEPSEEK" : "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"}

In [None]:
def generate(model_name, messages, quant=True, max_new_tokens=512):
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.pad_token = tokenizer.eos_token

  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  inputs = tokenizer(text, return_tensors="pt").to("cuda")
  input_ids = inputs["input_ids"]
  attention_mask = inputs["attention_mask"]

  streamer = TextStreamer(tokenizer, skip_prompt=True)
  if quant:
    llm = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config).to("cuda")
  else:
    llm = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

  outputs = llm.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=max_new_tokens, streamer=streamer)
  # Decode only the new tokens, not the prompt
  new_tokens = outputs[0][input_ids.shape[-1]:]
  return tokenizer.decode(new_tokens, skip_special_tokens=True)

In [None]:
json = generate(MODELS["QWEN"], [
    {"role": "user", "content": "generate json of synthetic data for training a football match model"}
  ])

print(json)

In [None]:
def put_message_in_chatbot(message, history):
    return "", history + [{"role": "user", "content": message}]

def chat(history, model_name):
    messages = [{"role": h["role"], "content": h["content"]} for h in history]
    model_id = MODELS[model_name]
    res = generate(model_id, messages)
    history.append({"role": "assistant", "content": res})
    return history


with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
    with gr.Row():
        message = gr.Textbox(label="What do you need data for?:")
    with gr.Row():
        model_selector = gr.Dropdown(
            choices=list(MODELS.keys()),
            value="PHI",
            label="Select Model",
        )

    message.submit(
        put_message_in_chatbot, inputs=[message, chatbot], outputs=[message, chatbot]
    ).then(
        chat, inputs=[chatbot, model_selector], outputs=chatbot
    )

ui.launch(inbrowser=True)