In [None]:
import pandas as pd
from openai import OpenAI
import random
import os
import re

In [None]:
category = "figurative"
idiom = "make hit"
model ="gpt-4-turbo"

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key="OpenAI Key",
)

In [None]:
if not os.path.exists('response_data.csv'):
    pd.DataFrame(columns=["raw_response","category", "idiom", "type"]).to_csv('response_data.csv', index=False)

In [None]:
df_structured = pd.DataFrame(columns=["submission", "category", "idiom", "type"])

**Zero-Shot**

In [None]:
class ChatSession:
    def __init__(self, category, model=model):
        self.model = model
        system_message = f"Since English is your native language, you should use our language in rich and creative ways. Please avoid using similar structures in your sentences and craft sentences by using idioms correctly and with their {category} meanings. You should also avoid constructing sentences using personal names!"
        print(system_message)
        self.messages = [
            {"role": "system", "content": system_message}
        ]

    def get_completion(self, user_prompt):
        # Add the new user message
        self.messages.append({"role": "user", "content": user_prompt})
        
        # Make the API call
        response = client.chat.completions.create(
            model=self.model,
            messages=self.messages,
        )

        # Retrieve the response and append to messages to maintain context
        ai_response = response.choices[0].message.content.strip()
        self.messages.append({"role": "assistant", "content": ai_response})
        response_df = pd.DataFrame(columns=["raw_response", "idiom","type","category"])
        new_entry = {
            "raw_response": ai_response,
            "idiom": idiom,
            "type": type,
            "category":category
        }
        response_df = pd.concat([response_df, pd.DataFrame([new_entry])], ignore_index=True)
        response_df.to_csv('response_data.csv', mode='a', header=False, index=False)
        return ai_response

In [None]:
chat_session = ChatSession(category=category)

In [None]:
type = "Zero-shot"
step = 12

In [None]:
prompt = f"""
"{idiom}" is an English idiom. We can use this idiom in both literal and figurative senses. Please write five example sentences using this idiom in the {category} sense. In your response, return only the sentences.
"""
print(prompt)
print("\n")
responses = []
for i in range(step):
    result = chat_session.get_completion(prompt)
    responses.append(result)

In [None]:
responses

In [None]:
def parse_and_structure_data(raw_responses_list, idiom, category, response_type):
    # Initialize the list to store structured data
    structured_data = []
    # Iterate through each block of responses
    for block in raw_responses_list:
        # Split each block into individual sentences
        sentences = block.split('\n')
        # Remove the numbering from each sentence and strip leading/trailing whitespace
        clean_sentences = [sentence.split('. ', 1)[-1].strip() for sentence in sentences]
        # For each sentence, append a tuple with the structured data
        for sentence in clean_sentences:
            structured_data.append((sentence, category, idiom, response_type))
    return structured_data

In [None]:
structured_data = parse_and_structure_data(responses, idiom, category, type)

new_df= pd.DataFrame(structured_data, columns=["submission", "category", "idiom", "type"])
new_df

In [None]:
new_df = new_df[new_df['submission']!=""]
new_df.reset_index(drop=True,inplace=True)
new_df


In [None]:
df_structured = pd.concat([df_structured, new_df], ignore_index=True)
df_structured

In [None]:
df_structured

**Enhanced-prompting**

In [None]:
class ChatSession:
    def __init__(self, category, model=model):
        self.model = model
        system_message = f"Since English is your native language, you should use our language in rich and creative ways. Please respond carefully according to the specified request, without altering the root words of the idiom. Also, you should not make sentences using personal names!"
        print(system_message)
        self.messages = [
            {"role": "system", "content": system_message}
        ]

    def get_completion(self, user_prompt):
        # Add the new user message
        self.messages.append({"role": "user", "content": user_prompt})
        
        # Make the API call
        response = client.chat.completions.create(
            model=self.model,
            messages=self.messages,
        )

        # Retrieve the response and append to messages to maintain context
        ai_response = response.choices[0].message.content.strip()
        self.messages.append({"role": "assistant", "content": ai_response})
        response_df = pd.DataFrame(columns=["raw_response", "idiom","type","category"])
        new_entry = {
            "raw_response": ai_response,
            "idiom": idiom,
            "type": type,
            "category":category
        }
        response_df = pd.concat([response_df, pd.DataFrame([new_entry])], ignore_index=True)
        response_df.to_csv('response_data.csv', mode='a', header=False, index=False)
        return ai_response


prompt = f"""
The idiom 'break the ice' can be used in various educational situations, such as starting a new school year, introducing a new topic, engaging shy students, encouraging group discussions, or welcoming new students to the class. Similarly, can you list the situations where the idiom '{idiom}' is used in a {category} sense?
"""

In [None]:
chat_session = ChatSession(category=category)

In [None]:
type = "Enhanced-prompting"
step = 1

In [None]:
print(prompt)
print("\n")

for i in range(step):
    result = chat_session.get_completion(prompt)
    print(result)

In [None]:
prompt = f"""
For each of the scenarios listed in the previous message where the idiom '{idiom}' is used in the {category} sense, create four different sentences. The first sentence should be long and detailed, the second in the form of a question, the third in a negative construction, and the fourth should have other words interspersed among the words that make up the idiom. Please do not alter the root words of the idiom. You should create four different sentences for each category. Return only these sentences in your response.
"""

In [None]:
print(prompt)
responses = []
for i in range(step):
    result = chat_session.get_completion(prompt)
    responses.append(result)

In [None]:
responses

In [None]:
def parse_and_structure_data(raw_responses_list, idiom, category, response_type):
    structured_data = []
    # Iterate through each block of responses
    for block in raw_responses_list:
        # Split each block into individual lines
        lines = block.split('\n')
        # Skip the header line (assumes the header is the first line)
        for line in lines[1:]:  # this skips the header which is lines[0]
            # Check if the line contains an actual sentence
            if line.strip() and '-' in line:
                # Remove the bullet point and any leading/trailing spaces
                sentence = line.split('-', 1)[-1].strip()
                # Append the structured tuple
                structured_data.append((sentence, category, idiom, response_type))
    return structured_data

In [None]:
def parse_numbered_sentences(raw_texts):
    # Initialize the list to store extracted sentences
    extracted_sentences = []
    
    # Iterate through each text block in the list
    for raw_text in raw_texts:
        # Split the input text into lines
        lines = raw_text.split('\n')
        
        # Iterate through each line in the input text
        for line in lines:
            # Strip any leading/trailing whitespace from the line
            line = line.strip()
            
            # Check if the line starts with a numeric prefix followed by a dot and space (e.g., '1. ')
            if line.startswith(('1. ', '2. ', '3. ', '4. ')):
                # Extract the sentence after the numeric prefix
                sentence = line.split('. ', 1)[-1]
                # Append the extracted sentence to the list
                extracted_sentences.append((sentence, category, idiom, type))
    
    return extracted_sentences

In [None]:
parsed_data = parse_and_structure_data(responses, idiom, category, type)
parsed_data

In [None]:
new_df= pd.DataFrame(parsed_data, columns=["submission", "category", "idiom", "type"])
new_df

In [None]:
df_structured = pd.concat([df_structured, new_df], ignore_index=True)
df_structured

In [None]:
class ChatSession:
    def __init__(self, category, model=model):
        self.model = model
        system_message = f"Since English is your native language, you should use our language in rich and creative ways. Please avoid repeating your sentences and craft sentences using idioms correctly and with their {category} meanings, without altering the root words of the idiom. You should also avoid constructing sentences using personal names!"
        print(system_message)
        self.messages = [
            {"role": "system", "content": system_message}
        ]

    def get_completion(self, user_prompt):
        # Add the new user message
        self.messages.append({"role": "user", "content": user_prompt})
        
        # Make the API call
        response = client.chat.completions.create(
            model=self.model,
            messages=self.messages,
        )

        # Retrieve the response and append to messages to maintain context
        ai_response = response.choices[0].message.content.strip()
        self.messages.append({"role": "assistant", "content": ai_response})
        response_df = pd.DataFrame(columns=["raw_response", "idiom","type","category"])
        new_entry = {
            "raw_response": ai_response,
            "idiom": idiom,
            "type": type,
            "category":category
        }
        response_df = pd.concat([response_df, pd.DataFrame([new_entry])], ignore_index=True)
        response_df.to_csv('response_data.csv', mode='a', header=False, index=False)
        return ai_response

In [None]:
chat_session = ChatSession(category=category)

In [None]:
type = "Enhanced-prompting"
step = 2

In [None]:
prompt = f"""
Using the idiom '{idiom}' in the {category} sense, create five new sentences with different sentence structures. Use the idiom in various parts of the sentences. Please avoid repeating your sentences.
"""
print(prompt)
print("\n")
responses = []
for i in range(step):
    result = chat_session.get_completion(prompt)
    responses.append(result)

In [None]:
responses

In [None]:
def parse_and_structure_data(raw_responses_list, idiom, category, response_type):
    # Initialize the list to store structured data
    structured_data = []
    # Iterate through each block of responses
    for block in raw_responses_list:
        # Split each block into individual sentences
        sentences = block.split('\n')
        # Remove the numbering from each sentence and strip leading/trailing whitespace
        clean_sentences = [sentence.split('. ', 1)[-1].strip() for sentence in sentences]
        # For each sentence, append a tuple with the structured data
        for sentence in clean_sentences:
            structured_data.append((sentence, category, idiom, response_type))
    return structured_data

In [None]:
structured_data = parse_and_structure_data(responses, idiom, category, type)

new_df= pd.DataFrame(structured_data, columns=["submission", "category", "idiom", "type"])
new_df

In [None]:
new_df = new_df[new_df['submission']!=""]
new_df.reset_index(drop=True,inplace=True)
new_df


In [None]:
df_structured = pd.concat([df_structured, new_df], ignore_index=True)

df_structured

In [None]:
df_structured

In [None]:
# Define the path for the CSV file
file_path = 'gpt4_en_final_mecaz.csv'

# Check if the file exists
if os.path.exists(file_path):
    # Append without writing headers
    df_structured.to_csv(file_path, mode='a', index=False, header=False)
else:
    # Create a new file and write the DataFrame with headers
    df_structured.to_csv(file_path, mode='w', index=False, header=True)