In [None]:
import os
from openai import AzureOpenAI
import pandas as pd
from concurrent.futures import ThreadPoolExecutor


In [None]:
downloads_folder = os.path.expanduser("~/Downloads")
file_path = os.path.join(downloads_folder, "latest_posts_cleaned.csv")
gpt_sentiment_path_csv = os.path.join(downloads_folder, "gpt_sentiment.csv")
gpt_sentiment_path_xlsx = os.path.join(downloads_folder, "gpt_sentiment.xlsx")
cleaned_posts = pd.read_csv(file_path)

client = AzureOpenAI(
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),
  azure_endpoint="https://oai-bandas-weu.openai.azure.com/",
  api_version="2023-03-15-preview"
)

In [None]:
# Function to get sentiment from GPT
def get_sentiment_from_gpt(post_content):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            temperature=0.7,
            messages=[
                {"role": "system", "content": "You are an assistant providing numerical sentiment analysis."},
                {"role": "user", "content": f"Rate the sentiment of the following text on a scale from -1.000 (very negative) to 1.000 (very positive). Provide only the numerical value, rounded to three decimal places. Do not include any text or explanation. Text: {post_content}"}
            ]
        )
        sentiment = float(response.choices[0].message.content.strip())
        return round (sentiment, 3)
    except Exception as e:
        print(f"Error for post: {post_content[:30]}... | {e}")
        return None

def classify_sentiment(score):
    if score is None:
        return "unknown"
    elif score > 0.05:
        return "positive"
    elif score < -0.05:
        return "negative"
    else:
        return "neutral"

# Parallel processing using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=20) as executor:  # Adjust max_workers as needed
    sentiments = list(executor.map(get_sentiment_from_gpt, cleaned_posts['Body']))

# Add GPT sentiments to the DataFrame
cleaned_posts.loc[:, 'GPT_Sentiment'] = sentiments

# Add sentiment category column
cleaned_posts['GPT_Sentiment_Category'] = cleaned_posts['GPT_Sentiment'].apply(classify_sentiment)

# Save the DataFrame as a CSV file
cleaned_posts.to_csv(gpt_sentiment_path_csv, index=False)
print(f"CSV file saved to: {gpt_sentiment_path_csv}")

# Save the DataFrame as an Excel file
cleaned_posts.to_excel(gpt_sentiment_path_xlsx, index=False, engine='openpyxl')  # Specify engine for .xlsx files
print(f"Excel file saved to: {gpt_sentiment_path_xlsx}")