In [34]:
import openai
import re
import config
import os
openai.organization = config.openai_organization
openai.api_key = config.openai_api_key
import pandas as pd
RANDOM_STATE = 42

In [35]:
os.chdir('./chatgpt_prompts/')
with open('msg_0_system.txt', 'r') as msg_0_system:
    msg_0_system = msg_0_system.read()

with open('msg_1_user.txt', 'r') as msg_1_user:
    msg_1_user = msg_1_user.read()

with open('msg_1_assistant.json', 'r') as msg_1_assistant:
    msg_1_assistant = msg_1_assistant.read()

with open('msg_2_user.txt', 'r') as msg_2_user:
    msg_2_user = msg_2_user.read()

with open('msg_2_assistant.json', 'r') as msg_2_assistant:
    msg_2_assistant = msg_2_assistant.read()

with open('msg_3_user.txt', 'r') as msg_3_user:
    msg_3_user = msg_3_user.read()

with open('msg_3_assistant.json', 'r') as msg_3_assistant:
    msg_3_assistant = msg_3_assistant.read()

default_messages = [
    {
        "role": "system",
        "content": msg_0_system,
    },
    {
        "role": "user",
        "content": msg_1_user,
    },
    {
        "role": "assistant",
        "content": msg_1_assistant,
    },
    {
        "role": "user",
        "content": msg_2_user,
    },
    {
        "role": "assistant",
        "content": msg_2_assistant,
    },
    {
        "role": "user",
        "content": msg_3_user,
    },
    {
        "role": "assistant",
        "content": msg_3_assistant,
    },
]
os.chdir("../")

In [47]:
import json

def generate(row):
    row['error'] = False
    try:
        idiom = row['idiom']
        messages = default_messages + [{
            "role": "user",
            "content": f"IDIOM: {idiom}"
        }]
        return generate_helper(row, messages)
    except: 
        row['error'] = True
        return row

def generate_helper(row, messages, i=0):
    if i == 5: return row
    text = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        max_tokens=512,
        temperature=0, # the higher this value, the less deterministic
        top_p=1, # the higher this value, the wider range of vocab is used
    ).choices[0].message.content.strip()

    try:
        response = json.loads(text)
        row['intended_ambiguous'] = response['AMBIGUOUS'].strip()
        row['intended_figurative'] = response['FIGURATIVE'].strip()
        row['intended_literal'] = response['LITERAL'].strip()
    except:
        row['not_found'] = True
        return row

    # Check ambiguous subsentence inclusion
    if row['intended_ambiguous'] not in row['intended_figurative']:
        messages = messages + [
            { "role": "assistant", "content": text },
            {
                "role": "user",
                "content": "The AMBIGUOUS phrase must be an exact substring of the FIGURATIVE sentence, but yours isn't. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
            },
        ]
        return generate_helper(row, messages, i+1)
    elif row['intended_ambiguous'] not in row['intended_literal']:
        messages = messages + [
            { "role": "assistant", "content": text },
            {
                "role": "user",
                "content": "The AMBIGUOUS phrase must be an exact substring of the LITERAL sentence, but yours isn't. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
            },
        ]
        return generate_helper(row, messages, i+1)
    
    # Ban the metnions of certain substrings
    banned_substrings = ["literal", "figurative", "ambiguous", "physical", "idiomatic"]
    for substring in banned_substrings:
        if substring in row['intended_ambiguous']:
            print(row['intended_ambiguous'])
            messages = messages + [
                { "role": "assistant", "content": text },
                {
                    "role": "user",
                    "content": f"One of the banned words \"{substring}\" was mentioned in your AMBIGUOUS phrase; you should never explicitly state these labels in your sentences. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
                },
            ]
            return generate_helper(row, messages, i+1)
        
        elif substring in row['intended_figurative']:
            print(row['intended_figurative'])
            messages = messages + [
                { "role": "assistant", "content": text },
                {
                    "role": "user",
                    "content": f"One of the banned words \"{substring}\" was mentioned in your FIGURATIVE sentence; you must not explicitly state these labels in your sentences. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
                },
            ]
            return generate_helper(row, messages, i+1)
        
        elif substring in row['intended_literal']:
            print(row['intended_literal'])
            messages = messages + [
                { "role": "assistant", "content": text },
                {
                    "role": "user",
                    "content": f"One of the banned words \"{substring}\" was mentioned in your LITERAL sentence; you must not explicitly state these labels in your sentences. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
                },
            ]
            return generate_helper(row, messages, i+1)
        
    # Ban the word "because"
    if "because" in row['intended_figurative']:
        print(row['intended_figurative'])
        messages = messages + [
            { "role": "assistant", "content": text },
            {
                "role": "user",
                "content": f"In your FIGURATIVE sentence, avoid using the word \"because\" since using it tends to create a FIGURATIVE sentence that simply re-states the figurative definition of the idiom without putting much effort in creating a good sentence. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
            },
        ]
        return generate_helper(row, messages, i+1)
    elif "because" in row['intended_literal']:
        print(row['intended_literal'])
        messages = messages + [
            { "role": "assistant", "content": text },
            {
                "role": "user",
                "content": f"In your LITERAL sentence, avoid using the word \"because\" since using it tends to create a LITERAL sentence that simply re-states the literal definition of the idiom without putting much effort in creating a good sentence. Rewrite your AMBIGUOUS phrase, FIGURATIVE sentence, and LITERAL sentence accordingly.",
            },
        ]
        return generate_helper(row, messages, i+1)

    row["iter"] = i
    return row

In [48]:
from tqdm import tqdm
tqdm.pandas()
idioms = pd.read_csv("gen_final_chatgpt_ver2.csv")
# result = idioms[idioms["well_formed"]
#     ].sample(frac=1, random_state=RANDOM_STATE)[:50
#     ].progress_apply(generate, axis=1)
result = result.progress_apply(generate, axis=1)

100%|██████████| 717/717 [00:00<00:00, 39272.30it/s]


In [49]:
result.to_csv('gen3_chatgpt.csv', index=False)

In [46]:
result[result['error']]

Unnamed: 0,idiom,meaning,intended_ambiguous,intended_figurative,intended_literal,is_literal,justification,converted,iter,bad_idiom,...,intended_figurative_old,intended_literal_old,maxvote - for sort,maxvote_ambiguous,maxvote_figurative,maxvote_literal,well_formed,annotations_ambiguous,annotations_figurative,annotations_literal
41,ants in pants,be extremely restless,had ants in his pants,he had ants in his pants and couldn't focus on...,"while picnicking, he accidentally sat on an an...",True,The hypothesis describes a situation where the...,False,1,False,...,She has ants in her pants because she's full o...,She has ants in her pants because she accident...,ambiguous : figurative : literal,ambiguous,figurative,literal,True,"['ambiguous', 'ambiguous', 'ambiguous']","['figurative', 'figurative', 'figurative']","['literal', 'literal', 'literal']"
586,snow under,to bury or cover something or somebody in the ...,snowed under with work,"during the busy season, she was snowed under w...",the unexpected blizzard left her car snowed un...,True,"In this case, the idiom 'snow under' is used l...",False,0,False,...,She was snowed under with work and came close ...,She was snowed under with all the snowfall las...,ambiguous : figurative : literal,ambiguous,figurative,literal,True,"['figurative', 'ambiguous', 'ambiguous']","['figurative', 'figurative', 'figurative']","['literal', 'literal', 'ambiguous']"


In [24]:
idioms = pd.read_csv("verif1_chatgpt_wfnwf.csv")