In [41]:
import os
import json
import random
import pandas as pd
import importlib

import config
import AbstractModel
import gpt
import prompts as promptlib

importlib.reload(config)
importlib.reload(AbstractModel)
importlib.reload(gpt)
importlib.reload(promptlib)

config.source_dot_env()

SAMPLE_SIZE = 50
TOP_P = 0.65 # temperature-like control for higher diversity
data = {
    "skill": [],
    "prompt": [],
    "tone": [],
    "errors": [],
}
for i in range(SAMPLE_SIZE):
    skill = random.choice(list(promptlib.SKILLS.keys()))
    tone = random.choice(promptlib.TONES)
    skill_description = promptlib.SKILLS[skill]
    
    smart_errors = []
    # for extra diversity, ask for a specific word to be included in the response
    extra = f"And incorporate the word '{random.choice(promptlib.EDUCATION_WORDS)}' into your response. "
    if random.randint(0, 1) == 0:
        # randomly make a few errors in SMART formulation
        num_errors = random.choices([1, 2, 3], weights=[0.5, 0.35, 0.15])[0]
        smart_errors = random.sample(promptlib.SMART, num_errors)
        extra += f"Also intentionally formulate your SMART goal/plan such that the following attributes are NOT adherent to the SMART formulation: {', '.join(smart_errors)}."

    prompt = promptlib.PROMPT_SYTHNETIC_SMART.format(skill=skill, skill_description=skill_description, tone=tone, extra=extra)
    data["skill"].append(skill)
    data["tone"].append(tone)
    data["prompt"].append(prompt)
    smart_errors.sort(key=lambda x: promptlib.SMART.index(x))
    data["errors"].append(smart_errors)


df = pd.DataFrame(data)
# sort by len(errors) and reindex
df = df.sort_values(by="errors").reset_index(drop=True)

display(df)

Unnamed: 0,skill,prompt,tone,errors
0,voice volume,You're a student taking a course where you're ...,persuasive,[]
1,voice volume,You're a student taking a course where you're ...,objective,[]
2,pitch and intonation,You're a student taking a course where you're ...,authoritative,[]
3,pitch and intonation,You're a student taking a course where you're ...,humorous,[]
4,language use,You're a student taking a course where you're ...,sympathetic,[]
5,voice volume,You're a student taking a course where you're ...,sincere,[]
6,pitch and intonation,You're a student taking a course where you're ...,cynical,[]
7,non-verbal communication,You're a student taking a course where you're ...,assertive,[]
8,facial expression,You're a student taking a course where you're ...,motivating,[]
9,pitch and intonation,You're a student taking a course where you're ...,playful,[]


In [42]:
model = gpt.GPTModel() # gpt 3
outputs, meta = model(list(df["prompt"]), top_p=TOP_P, json_mode=True)
print(f"price = ${model.compute_price(meta):.3f}")

price = $0.030


In [43]:
smart, plan = [], []
for output in outputs:
    obj = json.loads(output)
    smart.append(obj["smart"])
    plan.append(obj["plan"])

print(smart)
print(plan)
df = df.assign(smart=smart, plan=plan)

['Improve my voice volume during educational presentations to ensure that all audience members can clearly hear and understand the information being shared.', 'Increase my voice volume during presentations to ensure clarity and engagement with the audience. I will achieve this by practicing speaking loudly and clearly in front of a mirror daily for 15 minutes, recording myself to assess progress, and asking for feedback from classmates during practice sessions.', 'Use varying pitches and intonations in my presentation to engage the audience and emphasize key points. During the presentation, I will practice speaking in different tones and pitches to convey emotions and highlight important information.', 'Use varied pitches and intonations in my presentation to provide guidance and engage the audience with my story. This will involve practicing different intonations and pitches while rehearsing my presentation.', 'Effortlessly incorporate precise and professional language in my written a

In [44]:
fname = "synthetic_smart_v2.csv"

df = df.assign(errors=df["errors"].apply(lambda x: ', '.join(x))) # list -> string
df.to_csv(fname, index=False)
print(f"saved to '{fname}'")

saved to 'synthetic_smart_v2.csv'
