## Sythentic Data Generator

In [None]:
# Imports
import os
from openai import OpenAI
import gradio as gr
from dotenv import load_dotenv
import pandas as pd
import io


In [None]:
# Load env variables
load_dotenv(override=True)
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if openrouter_api_key:
    print("OPENROUTER_API_KEY is set.")
else:
    print("OPENROUTER_API_KEY is not set.")

if groq_api_key:
    print("GROQ_API_KEY is set.")
else:
    print("GROQ_API_KEY is not set.")

In [None]:
# Constants
MODEL_GPT = 'openai/gpt-4o-mini'
MODEL_GROQ = 'llama-3.3-70b-versatile'
GROQ_URL = "https://api.groq.com/openai/v1"
OPENROUTER_URL = "https://openrouter.ai/api/v1"

In [None]:
# Initialize clients
openrouter = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_URL)
groq_client = OpenAI(api_key=groq_api_key, base_url=GROQ_URL)

In [None]:
system_prompt = """ 
You are a synthetic data generator.
Your task is to take a description of a dataset and generate synthetic data that matches the description.
Respond with only the generated data in csv format, without any additional text or explanations.
"""

In [None]:
def user_prompt(description):
    return f"""Generate synthetic data based on the following description:
{description}
Respond with only the generated data in csv format, without any additional text or explanations.
"""

In [None]:
# Function to process the response and convert it to a DataFrame
def process_response(response):
    try:
        df = pd.read_csv(io.StringIO(response))
        return df
    except Exception as e:
        print(f"Error processing response: {e}")
        return None

In [None]:
# Main function to generate synthetic data from both models
def generate_synthetic_data(description):
    if not description:
        print("Description is empty. Please provide a valid description.")
        return None, None
    prompt = user_prompt(description)
    response_groq = groq_client.chat.completions.create(
        model=MODEL_GROQ,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
    )
    response_openrouter = openrouter.chat.completions.create(
        model=MODEL_GPT,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
    )
    df_groq = process_response(response_groq.choices[0].message.content)
    df_openrouter = process_response(response_openrouter.choices[0].message.content)
    return df_groq, df_openrouter

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("### Synthetic Data Generator")
    with gr.Row():
        with gr.Column():
            description_input = gr.Textbox(label="Dataset Description", placeholder="e.g., A dataset of 10 customers with columns for name, age, and city.")
            generate_button = gr.Button("Generate Data", variant="primary")
    with gr.Row():
        with gr.Column():
            df_openai = gr.Dataframe(label=f"{MODEL_GPT} Generated Data")
        with gr.Column():
            df_groq = gr.Dataframe(label=f"{MODEL_GROQ} Generated Data")
    generate_button.click(
        fn=generate_synthetic_data,
        inputs=description_input,
        outputs=[df_groq, df_openai]
    )

demo.launch(inbrowser=True)