In [5]:
import pandas as pd
import os
from dotenv import load_dotenv
from openai import OpenAI

In [7]:
# Load the API key from config file
load_dotenv("./config/api_key.env")
# Fetch the API key from the environment
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

In [14]:
# Load your dataset
df = pd.read_csv('./PostHistoryFirstVersion.csv')

# Create empty lists to store the new improved stuff
new_titles = []
new_bodies = []
new_tags = []

# Go through each row
for index, row in df.iterrows():
    # Construct the instruction for dev role
    instructions = f"""
    # Identity
    You are a developer assistant that improves Stack Overflow questions.
    
    # Instructions
    * Rewrite the Title to be more clear and professional.
    * Rewrite the Body to be clearer and more detailed if needed.
    * Add or Adjust Tags if necessary.
    
    * Format your response as:
    _ Title: <your improved title>\n
    _ Body: <your improved body>\n
    _ Tags: <your improved tags separated by commas>\n
    
    """
    
    # Construct the input for the user role
    prompt = f"""
    Rewrite the Stackoverflow question given 3 information below:
    _ Original Title:
    {row['Title']}
    
    _ Original Body:
    {row['Body']}
    
    _ Original Tags:
    {row['Tags']}
    """
    # Send the prompt to OpenAI
    completion = client.chat.completions.create(
        model="gpt-4.1-nano",  
        
        messages=[
            {
                "role": "developer",
                "content": instructions
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        
        n=1, 
        temperature=0.3,
        max_tokens=1000
    )
    
    # Parse the response
    reply = completion.choices[0].message.content
    
    # Very basic parsing
    title_only = reply.split('Title:')[1].split('Body:')[0].strip()
    body_only = reply.split('Body:')[1].split('Tags:')[0].strip()
    tags_only = reply.split('Tags:')[1].strip()
    
    new_titles.append(title_only)
    new_bodies.append(body_only)
    new_tags.append(tags_only)

In [15]:
# print(new_titles)
# print(new_bodies)
# print(new_tags)

# Add new columns to the DataFrame
df['Improved_Title'] = new_titles
df['Improved_Body'] = new_bodies
df['Improved_Tags'] = new_tags

# Save to new CSV
df.to_csv('ImprovedQuestions.csv', index=False)

In [None]:
print(df)