### Importing Libraries

In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import anthropic
import google.generativeai
from IPython.display import Markdown, display, update_display
# the httpx Library to disable the SSL certificate verificationin case of use from a corporate computer
import httpx
import pandas as pd
import gradio as gr
import ast

In [None]:
# Sign in to HuggingFace Hub

# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

In [None]:
# Connect to OpenAI, Anthropic and Google; comment out the Claude or Google lines if you're not using them

openai = OpenAI(http_client=httpx.Client(verify=False))

claude = anthropic.Anthropic(http_client=httpx.Client(verify=False))

#google.generativeai.configure(transport='rest')

In [None]:
# Defining the used models in the App.
GPT_3_5 = 'gpt-3.5-turbo'
GPT_4o_MINI = 'gpt-4o-mini'
GPT_4o = 'gpt-4o'
claude_3_5_sonnet_latest = "claude-3-5-sonnet-latest"
claude_3_HAIKU = "claude-3-haiku-20240307"

### Testing the GPT and Claude models before implementing in the app

In [None]:
data_subject = "Job Posting for an AI company"
n=3
user_prompt = f"Generate {n} instances of synthesized data of {data_subject}"
system_prompt = f"You are an AI assistant dedicated to generating high-quality synthetic testing data. Provide the generated data as a list of dictionaries that can be directly converted into a Pandas DataFrame. Ensure all dictionary values are strings. Do not use nested lists or nested dictionaries. Respond only with the list of dictionaries—no explanation or extra text."

In [None]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
  ]

In [None]:
print(system_prompt)
print(user_prompt)

In [None]:
completion = openai.chat.completions.create(
    model=GPT_4o_MINI,
    messages=messages,
    temperature=0.7
)

In [None]:
print(completion.choices[0].message.content)

In [None]:
pd.DataFrame(ast.literal_eval(completion.choices[0].message.content))

In [None]:
completions = claude.messages.create(
    model= "claude-3-5-sonnet-latest",
    max_tokens = 4000,
    system = system_prompt,
    messages = [{"role": "user", "content": user_prompt}])

In [None]:
completions.content[0].text

In [None]:
pd.DataFrame(ast.literal_eval(completions.content[0].text))

### The main Funtion to use in the APP

In [None]:
# Step 1: The Main Function to Generate the synthesized data
def gen_syn_data(data_subject, model, n):
    global df
    user_prompt = f"Generate {n} instances of synthesized data of {data_subject}"
    system_prompt = f"You are an AI assistant dedicated to generating high-quality synthetic testing data. Provide the generated data as a list of dictionaries that can be directly converted into a Pandas DataFrame. Ensure all dictionary values are strings. Do not use nested lists or nested dictionaries. Respond only with the list of dictionaries—no explanation or extra text."
    
    messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
    ]

    if "gpt" in model:
        completions = openai.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.7)
        
        res = ast.literal_eval(completions.choices[0].message.content)
        
    elif "claude" in model:
        completions = claude.messages.create(
            model = model,
            max_tokens = 4000 ,
            temperature = 0.7 ,
            system = system_prompt ,
            messages = [
                {"role": "user", "content": user_prompt},
            ])
        res = ast.literal_eval(completions.content[0].text)


    df = pd.DataFrame(res)
    
    return df

# Step 2: Function to save the DataFrame and return CSV path
def download_csv():
    with tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode='w') as f:
        df.to_csv(f.name, index=False)
        
        return f.name
        
# Step 3: Function to Change the visibility of the file component when the download button is clicked
def visible_component():
    
    return gr.update(visible=True)

### The Gradio Page

In [None]:
with gr.Blocks() as demo:
    
    gr.Markdown("# Welcome the the Synthetic Data Generator")
    with gr.Row():
        input_subject = gr.Textbox( label = "Input Subject Here" , placeholder="AI Job Postings.." , scale = 3 , lines = 2 )
        input_model = gr.Dropdown( [GPT_3_5, GPT_4o_MINI, GPT_4o, claude_3_5_sonnet_latest, claude_3_HAIKU] , label="Select an LLM" ,value= GPT_3_5, scale = 1 )
        input_n = gr.Number( label = "Enter Number of Data records", minimum=2 , value=2 , scale = 1 )

    # Generate Data Button
    generate_button = gr.Button("Generate Data")
    
    # Output components
    dataframe_output = gr.Dataframe()
 
    # Download Data as .csv Button    
    download_button = gr.Button("Download CSV")
    file_output = gr.File(label="Download CSV", interactive=False, visible=False)

    # Hook up buttons to actions
    generate_button.click(
        fn=gen_syn_data,
        inputs= [input_subject , input_model , input_n],
        outputs=dataframe_output
    )

    download_button.click(
        visible_component,
        outputs = file_output     
        ).then(
        fn=download_csv,
        outputs=file_output
        )

demo.launch(server_port=28058, inbrowser=True, share=True)