<a href="https://colab.research.google.com/github/dsteele101/colab_notebooks/blob/main/Transcript_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Transcript Generator**

The below notebook will generate transcripts for use cases.

Replace the text in the variables CUSTOMER and USE_CASE to guide what transcripts are generated.

Replace TOKEN with your OpenAI API key.

Files are stored in Google Drive. Click on the folder icon in CoLab to view them.

In [None]:
import json
import re
import requests
from multiprocessing import Pool

In [None]:
CUSTOMER = "Amelia.ai"
USE_CASE = "IT Service Desk"
NUM_TRANSCRIPTS = 10
PROMPT = f"Generate chat transcripts labeled as user and agent. These transcripts should revolve around {CUSTOMER} {USE_CASE} topics, make sure there are at least four turns in each transcript. Each turn of a transcript should only be a single line. Each transcript should deal with a unique issue. The start of each transcript should contain a line which reads START OF TRANSCRIPT. The end of each transcript should contain a line which reads END OF TRANSCRIPT."
MAX_TRANSCRIPTS_PER_API_CALL = 5
TOKEN = ""

In [None]:
def extract_transcripts(text):
    transcript_blocks = re.findall(r"START OF TRANSCRIPT(.*?)END OF TRANSCRIPT", text, re.DOTALL)
    transcript_blocks = [block.strip() for block in transcript_blocks]
    return transcript_blocks

In [None]:
def generate_transcripts(counter):
    transcripts_remaining = NUM_TRANSCRIPTS - counter
    transcripts_to_generate = min(transcripts_remaining, MAX_TRANSCRIPTS_PER_API_CALL)
    prompt = f"{PROMPT} Generate {transcripts_to_generate} transcripts."
    pre_json = f'{{"model": "gpt-3.5-turbo-0613", "messages": [{{"role": "user", "content": "{prompt}"}}], "temperature": 0.7}}'
    payload = json.loads(pre_json)
    headers = {'Authorization': 'Bearer ' + TOKEN}
    r = requests.post('https://api.openai.com/v1/chat/completions', json=payload, headers=headers, verify=False)
    res = r.json()
    print(res)
    generated_transcripts = [choice["message"]["content"] for choice in res["choices"]]

    formatted_transcripts = []
    for transcript in generated_transcripts:
        transcript_blocks = extract_transcripts(transcript)

        for i, block in enumerate(transcript_blocks, start=counter):
            cleaned_block = re.sub(r"(User|Agent):\s*", r"\1\t", block)
            cleaned_block = cleaned_block.replace(":", "")
            formatted_transcripts.append(cleaned_block)
            print(f'Transcript {i} formatted successfully.')

            if len(formatted_transcripts) == NUM_TRANSCRIPTS:
                break  # Break the loop if the desired number of transcripts is reached

        counter += 1  # Increment the counter

        if counter == NUM_TRANSCRIPTS:
            break  # Break the loop if the desired number of transcripts is reached

    return formatted_transcripts

In [None]:
def main():
    pool = Pool()  # Create a process pool

    # Generate transcripts in parallel
    transcripts_counter = 0
    results = []
    while transcripts_counter < NUM_TRANSCRIPTS:
        results.append(pool.apply_async(generate_transcripts, args=(transcripts_counter,)))
        transcripts_counter += MAX_TRANSCRIPTS_PER_API_CALL

    pool.close()
    pool.join()

    # Retrieve the results from each process
    transcripts = []
    for result in results:
        transcripts.extend(result.get())

    # Save the transcripts to separate files
    for i, transcript in enumerate(transcripts):
        cleaned_customer = re.sub(r'\W+', '', CUSTOMER).replace('\n', '_')
        filename = f'{cleaned_customer}_transcript_{i:03}.txt'
        with open(filename, 'w') as file:
            file.write(transcript)

        print(f'File {filename} created successfully.')

    print(f"{NUM_TRANSCRIPTS} transcripts generated successfully.")

In [None]:
if __name__ == "__main__":
    main()



{'id': 'chatcmpl-7TEq1YDs1dgAUxcBaXsnIrWoFXgW7', 'object': 'chat.completion', 'created': 1687202905, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'START OF TRANSCRIPT\nUser: Hi, I am unable to access my email account.\nAgent: I\'m sorry to hear that. Can you please provide me with your email address?\nUser: sure, it\'s john@example.com.\nAgent: Thank you. Let me check the server logs to see if there are any issues.\nUser: Okay, please let me know what you find.\nAgent: It seems that there was a temporary server outage. The issue has been resolved, and you should be able to access your email now.\nEND OF TRANSCRIPT\n\nSTART OF TRANSCRIPT\nUser: Hello, I need assistance with resetting my password.\nAgent: Of course, I can help you with that. Can you please provide me with your username?\nUser: My username is jdoe123.\nAgent: Thank you. I will send a password reset link to your registered email address. Please check your inbox.\nUser: