<a href="https://colab.research.google.com/github/RajuDasa/llm_engineering/blob/week3_branch/week3/community-contributions/raju/week3_exercise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Synthetic Code Generator:

In [None]:
!pip install -q --upgrade bitsandbytes accelerate  #for quantization

In [None]:
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from google.colab import userdata
import torch
import re

hf_token = userdata.get('HF_TOKEN')
login(hf_token)

In [None]:
MAX_NEW_TOKENS = 2000

Models = {
    "SmolLM3": "HuggingFaceTB/SmolLM3-3B",    #6GB
    "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",  #7GB (very slow with T4)
    "Phi-3-mini (quantized)": "microsoft/Phi-3-mini-4k-instruct"  #2GB
    }

# 2 ways to call a model: pipe, tokenizer
def strategy_method(chat_history, selected_model):
  if(selected_model == "SmolLM3"):
    return use_pipe(chat_history, Models[selected_model])
  elif("quantized" in selected_model):
    return use_tokenizer(chat_history, Models[selected_model], quantize=True)
  else:
    return use_tokenizer(chat_history, Models[selected_model], quantize=False)

pipe=None
def use_pipe(messages, model):
  global pipe
  if not pipe:
    pipe = pipeline("text-generation", model=model, max_new_tokens=MAX_NEW_TOKENS)
  response = pipe(messages)
  msg = response[0]['generated_text']
  # Extract the content of the last assistant message
  assistant_response = msg[-1]['content'] if msg and msg[-1]['role'] == 'assistant' else "Sorry, I couldn't generate data."
  assistant_response = re.sub(r"<think>.*?</think>", "", assistant_response, flags=re.DOTALL).strip()  #remove reasoning tags for Smol
  return assistant_response


In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
#preload phi3 tokenizer
tokenizer = AutoTokenizer.from_pretrained(Models["Phi-3-mini"])
#many LLM tokenizers do not define a padding token, and is required for batch processing.
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token

def use_tokenizer(messages, model, quantize=False):
  tensor = tokenizer.apply_chat_template(
      messages,
      tokenize=True,
      add_generation_prompt=True,
      return_tensors="pt").to("cuda")
  model = AutoModelForCausalLM.from_pretrained(model, device_map="auto", quantization_config = quant_config if quantize else None )

  # if quantize:    # 7GB -> 2GB
  #   memory = model.get_memory_footprint() / (1024*1024)
  #   print(f"Memory footprint: {memory:,.1f} MB")

  outputs = model.generate(tensor, max_new_tokens=MAX_NEW_TOKENS)
  #we need only final model response but not complete conversation
  generated_ids = outputs[0][tensor.shape[-1]:]
  assistant_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
  return assistant_text.strip()


In [None]:
system_prompt="""
ROLE:
You are a synthetic dataset generator.

PRIMARY FUNCTION:
Generate ONLY structured semantic data.

ALLOWED OUTPUT FORMATS:
- CSV (default)
- JSON (only if explicitly requested)

STRICT CONSTRAINTS:
- Output must be structured data ONLY.
- Do NOT provide explanations, reasoning, commentary, or natural language responses.
- Do NOT answer questions, provide analysis, or generate instructional content.
- Do NOT use Markdown or any formatting wrappers.
- Output must be directly usable as raw CSV or JSON.

FORMAT RULES:
- If the user does not specify a format, output CSV.
- If the user explicitly requests JSON, output JSON.
- If the user requests any other format (e.g., YAML, XML, Markdown, text, tables, prose), you MUST refuse.

REFUSAL BEHAVIOR:
- If the request violates format or scope rules, respond with a single-line refusal message.
- Do NOT include explanations or additional text.

STANDARD REFUSAL MESSAGES:
- For unsupported formats:
  "Unsupported format. Allowed formats: CSV or JSON."
- For unstructured output or question answering:
  "Unsupported request. Only structured dataset generation is allowed."

EXAMPLES:
1. User: Generate synthetic user data in YAML.
   Response: Unsupported format. Allowed formats: CSV or JSON.

2. User: Answer this question in Markdown.
   Response: Unsupported request. Only structured dataset generation is allowed.

3. User: Generate 100 rows of product data.
   Response: <CSV data only>
"""

In [None]:
def message_builder(role, msg):
  return [{"role": role, "content": msg}]

# chat function
def respond(message, chat_history, selected_model):
  if not selected_model:
    return message, message_builder("assistant", "please select a model")

  chat_message = message_builder("system", system_prompt) + message_builder("user", message)
    #[{"role": dic['role'], "content": dic['content']} for dic in chat_history] + \
    #message_builder("user", message)

  assistant_response = ""

  try:
    assistant_response = strategy_method(chat_message, selected_model)
  except Exception as e:
    assistant_response = "An error occurred during text generation"
    print(f"Error during text generation: {e}")

  display("result: ", assistant_response)
  return_message = message_builder("user", message) + message_builder("assistant", assistant_response)
  return "", return_message  # clear textbox, and send result

# options for dropdown
model_options = list(Models.keys())


In [None]:
!pip install -q --upgrade gradio

In [None]:
import gradio as gr

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=300, allow_tags=True)
    msg = gr.Textbox(label="Ask your dataset query directly (in CSV/JSON format):")
    with gr.Row():
        dropdown = gr.Dropdown(
            choices=model_options,
            value=model_options[0], # Default
            label="Select a Model"
        )
        with gr.Column():
            submit = gr.Button("submit", [msg, chatbot, dropdown])
            clear = gr.ClearButton([msg, chatbot, dropdown])

    msg.submit(
    fn=respond,
    inputs=[msg, chatbot, dropdown],
    outputs=[msg, chatbot]
    )
    submit.click(
    fn=respond,
    inputs=[msg, chatbot, dropdown],
    outputs=[msg, chatbot]
    )

demo.launch(debug=True, share=True)