# datasetGPT

## Conversation GPT

In [2]:
import os
import random

import openai

from src.datasetGPT import (ConversationsGenerator, 
                            ConversationsGeneratorConfig, 
                            DatasetWriter)

from src.typing_effect import print_typing
from src.personas import personas
from src.topics import topics

from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key = os.environ['OPENAI_API_KEY']
# hf_api_key = os.environ['HF_API_KEY']

dataset_writer = DatasetWriter(single_file=True)

def get_random_topic():
    """Returns a random topic from the 'topics' dictionary."""
    random_topic = random.choice(list(topics.values()))
    return random_topic

random_topic = get_random_topic()

def get_two_random_personas():
    """Returns two random personas from the personas dictionary."""
    # Get a list of all keys (personality codes) from the dictionary
    personality_keys = list(personas.keys())

    # Randomly select two different keys
    selected_key_1, selected_key_2 = random.sample(list(personas.keys()), 2)
    persona_1, persona_2 = personas[selected_key_1], personas[selected_key_2]
    
    return persona_1, persona_2

persona_1, persona_2 = get_two_random_personas()

word_count_limit = 50

# Configure the Conversations Generator
generator_config = ConversationsGeneratorConfig(
    openai.api_key,
    agent1=f"""
        You are {persona_1}
        You are having a conversation with {persona_2}
        The topic is {random_topic}
        Respond in {word_count_limit} words or less
        Respond in the voice of {persona_1}
        """,
    agent2=f"""
        You are {persona_2}
        You are having a conversation with {persona_1}
        The topic is {random_topic}
        Respond in {word_count_limit} words or less
        Respond in the voice of {persona_2}
        """,
    num_samples=1,
    interruption="length",
    lengths=[4],
    temperatures=[1.3],
    # options=[("n", "2"), ("n", "3")])
)

# Print the topic and personas
print_typing(f"Topic: {random_topic}")
print_typing(f"Persona 1: {persona_1}")
print_typing(f"Persona 2: {persona_2}")

conversations_generator = ConversationsGenerator(generator_config)
for conversation in conversations_generator:
    dataset_writer.save_intermediate_result(conversation)

Topic: Challenges and Triumphs

Persona 1: Samuel L. Jackson, an iconic actor known for his strong and emphatic delivery. With a powerful presence on screen, he's famous for his roles in films like 'Pulp Fiction' and 'The Avengers.' His speech is often intense and peppered with colorful language, reflecting a no-nonsense attitude. Off-camera, he's intelligent, insightful, and passionate about his craft, with a voice that commands attention whether in character or expressing his personal views.

Persona 2: Yoda, the wise and ancient Jedi Master, speaking in a distinctive and enigmatic manner. His wisdom is profound, yet his sentences often structured unconventionally. Calm, thoughtful, and deeply connected to the Force, he imparts knowledge with riddles and paradoxes, guiding others to find answers within themselves.

Alright Yoda, challenges ain't nothin' but roadblocks! You've got to bulldoze through 'em, triumph ain't valiant victories but survivin'. Keepin' it together when insurmou

## Gradio Demo

In [None]:
import gradio as gr

def echo(message, history):
    return message

demo = gr.ChatInterface(fn=echo, examples=["hello", "hola", "merhaba"], title="Echo Bot")
# demo.launch()

In [None]:
gr.close_all()

In [None]:
# gradio
def topic(input, slider):
    # output = client.generate(input, max_new_tokens=slider).generated_text
    output = [
        conversation.get("utterances")
        for conversation in conversations_generator
    ]
    return output

demo = gr.Interface(fn=topic, 
                    inputs=[gr.Textbox(label="Prompt"),
                            gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)],
                    outputs=[gr.Textbox(label="Completion")])

demo.launch()

## PyAI

In [None]:
pyai = """
You are an expert Python developer with years of experience writing Python code and teaching Python to other programmers. 
You have vast experience mentoring developers.
You write idiomatic Python code and try to find the most Pythonic way to solve problems.
I want you to be my mentor while I write Python code. 
I'm an intermediate Python developer.
"""

In [None]:
conversation.predict(input=f"{pyai} Type 'okay' if you understood.")

In [None]:
inp = """
I'm working on a project to generate text datasets using LLMs. 
The main use case is to produce human-like conversations using AI generation.
One way to do this is to have two AI agents having a conversation.
Let's brainstorm approaches on how to do this. 
Do you have any ideas?
"""

In [None]:
conversation.predict(input=f"{inp}")

In [None]:
inp = """
Let's start with a basic approach.
It seems as if the 'dialogue rollouts' technique might be easier to implement, do you agree?
"""

In [None]:
conversation.predict(input=f"{inp}")

In [None]:
inp = """
You're going to help me write Python code to implement and produce interesting conversations between AI agents.
I'm familiar with LangChain but I'm open to using other Python libraries, if you feel there's a better approach.
Help me get started.
"""

In [None]:
conversation.predict(input=f"{inp}")

## Work In Progress

In [None]:
import random
from langchain.chains import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key = os.environ['OPENAI_API_KEY']

chat_model = ChatOpenAI(model='gpt-3.5-turbo')
memory = ConversationBufferMemory(size=5)
chain = ConversationChain(llm=chat_model, memory=memory, verbose=True)

# Define personalities
personalities = {
    "OM": "You are a 60-year-old retired schoolteacher with a hearty laugh and love for life. Known for his long-winded stories, he's endearing, kind-hearted, and a little forgetful at times. Often seen with a book in his hand or playing chess in the park, he has a youthful spirit and is always ready for an adventure. Your name is Oliver 'Ollie' Martinez.",
    "GK": "You are an ambitious and meticulous 30-year-old tech entrepreneur. Known for her innovative ideas and relentless work ethic, she's assertive, intelligent, and a little bit intimidating. Outside of work, she's a passionate foodie who loves trying out new recipes and exploring local eateries. Your name is Grace Kim.",
    "ST": "You are a free-spirited 25-year-old artist and environmental activist. Always carrying her sketchbook, she's spontaneous, compassionate, and a tad unconventional. She's the one friends call at 2 AM for deep conversations about life, philosophy, and how to save the world. Your name is Samara 'Sam' Thompson.",
    "ER": "You are a 35-year-old quiet and thoughtful architect with a dry sense of humor. He is analytical, introverted, and slightly reserved. He finds comfort in solitude and loves classical music. Though he may seem aloof, he's a loyal friend who listens more than he speaks. Your name is Ethan Reid.",
    "LA": "You are a chirpy and friendly 22-year-old aspiring actress working as a waitress. She is dramatic, vibrant, and full of energy. Despite the struggles of her profession, she's always positive, seeing every hurdle as an opportunity to grow. She loves the spotlight, performing arts, and meeting new people. Your name is Lily Anderson."
}

# Randomly select two personalities
def select_personalities(personalities):
    selected_keys = random.sample(list(personalities), 2)
    return personalities[selected_keys[0]], personalities[selected_keys[1]]

# Generate a random topic using the OpenAI model
def generate_topic():
    topic_prompt = "Generate a common conversation topic between two people."
    topic = chain.predict(input=topic_prompt)
    return topic

# Build conversation starter
def conversation_starter(personality1, personality2, topic):
    return f"{personality1[:2]} starts {topic}\n{personality1}\n{personality2[:2]} responds\n{personality2}"

# Select personalities and generate a topic
personality1, personality2 = select_personalities(personalities)
topic = generate_topic()
starter = conversation_starter(personality1, personality2, topic)

print(starter)

# Create prompts based on personalities and starter
prompts = [f"{personality1[:2]}: {starter}", f"{personality2[:2]}: {starter}"]

# Simulate conversation
for prompt in prompts:
    response = chain.predict(input=prompt)
    print(response)


In [None]:
import os
import pandas as pd
import json
import glob

def concat_jsons(directory):
    """
    Load and concatenate JSON files from a specified directory into a pandas DataFrame.

    Parameters:
    directory (str): The directory containing the JSON files to load.

    Returns:
    DataFrame: The concatenated DataFrame.
    """
    # Define a list to store the data
    data_frames = []

    # Use glob to match the pattern '*.json' in the specified directory
    json_files = glob.glob(os.path.join(directory, '*.json'))

    for file in json_files:
        with open(file, 'r') as f:
            # Load the JSON file into a dictionary
            data = json.load(f)

            # Normalize semi-structured JSON data into a flat table.
            temp_df = pd.json_normalize(data, 'utterances', ['sample_id', 'length', 'temperature', 'initial_utterance', 'n', 'agent1', 'agent2'], 
                                        record_prefix='utterance_')

            # Append the DataFrame to the list
            data_frames.append(temp_df)

    # Concatenate all the DataFrames in the list
    df = pd.concat(data_frames, ignore_index=True)

    return df
