In [None]:
from openai import OpenAI
import pandas as pd 
import tqdm.notebook as tqdm
from tenacity import retry, stop_after_attempt, wait_fixed

client = OpenAI(api_key="")

@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
def query_openai(prompt):
    response = client.chat.completions.create(
        #model="gpt-4-turbo-preview",
        model ="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are an expert in causal reasoning, logical reasoning, and commonsense question-answering.  Do not provide an intro or concluding remarks in your response.  Be as concise as you can be when responding. "
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].message.content

prompt_pre = "Analyze the provided scenario to identify the underlying causal system and then generate knowledge triples that describe this system. Each triple should be formatted with a leading dash and follow this structure: \"- [Head Predicate]; [Relation]; [Tail Predicate]\". Make sure that the head and tail predicates are different, general in nature, and do not contain pronouns or specific referents. Use only these relations: cause-effect, has-contributing-factor, has-requirement, has-subevent, precedes, reacts-to, has-intent, magnifies, and mitigates. The triples should reflect general actions, events, or conditions and their typical outcomes or influences within a causal system, avoiding circular reasoning where the head and tail are the same. Do not use specifics, personal pronouns or refer to specific people or places when creating the triples. Do no use names of individuals in the triples. Create a brief title (2-3 words) and a generic description that encapsulates the general causal system, with a focus on clarity and brevity. Do no use individuals names (e.g. Sally, Bob, etc) or or proper nouns referring to specific people or places when generating the description. \n\nTask:\n\nProvide a concise description of the identified causal system.\nGenerate a brief title for the causal system.\nProduce non-circular knowledge triples based on the scenario. Maintain consistency in the head and tail entities across triples, avoiding repetition of the same entity in both positions, and incorporate as many of the 8 relevant relations as possible.\nExample Scenario:\nAir pollution in the city has worsened. What is the cause?\n\nExpected Output:\nA. Title:\nAir Pollution\n\nB. Description:\nEmissions from vehicles and industries lead to deteriorating air quality.\n\nC. Triples:\n- Vehicle and industrial emissions; cause-effect; worsening air quality\n- Worsening air quality; has-contributing-factor; increased emissions\n- Emission control policies; mitigates; worsening air quality\n- Worsening air quality; has-requirement; pollutant monitoring\n- Surge in emissions; precedes; public health concerns\n- Public health measures; reacts-to; worsening air quality\n\nScenario:\n"

In [2]:
print(prompt_pre)

Analyze the provided scenario to identify the underlying causal system and then generate knowledge triples that describe this system. Each triple should be formatted with a leading dash and follow this structure: "- [Head Predicate]; [Relation]; [Tail Predicate]". Make sure that the head and tail predicates are different, general in nature, and do not contain pronouns or specific referents. Use only these relations: cause-effect, has-contributing-factor, has-requirement, has-subevent, precedes, reacts-to, has-intent, magnifies, and mitigates. The triples should reflect general actions, events, or conditions and their typical outcomes or influences within a causal system, avoiding circular reasoning where the head and tail are the same. Do not use specifics, personal pronouns or refer to specific people or places when creating the triples. Do no use names of individuals in the triples. Create a brief title (2-3 words) and a generic description that encapsulates the general causal system

In [None]:
import re 

def extract_info(text):
    text = text.lower().strip()
    
    info_dict = {}

    # Extracting the title
    title_match = re.search(r'Title:\s*(.*)', text, re.IGNORECASE)
    if title_match:
        info_dict['title'] = title_match.group(1).strip().lower()

    # Extracting the description
    description_match = re.search(r'Desc.*:\s*(.*)', text, re.IGNORECASE)
    if description_match:
        info_dict['description'] = description_match.group(1).strip().lower()

    # Extracting the triples
    triples_match = re.search(r'Triples:\s*(.*)', text, re.IGNORECASE | re.DOTALL)
    if triples_match:
        triples_raw = triples_match.group(1).strip().split('\n')
        triples_list = [triple.strip().lstrip('- ').strip() for triple in triples_raw]
        info_dict['triples'] = triples_list

    return info_dict