### ChatGPT Use Case Addition

pip install openai

In [None]:
import os
import re
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
from openai import OpenAI
import time  # To optionally add a delay if needed


folder_path = "raw_data/announcements"
html_files = [f for f in os.listdir(folder_path) if f.endswith(".html")]

def extract_date_from_filename(filename):
    match = re.search(r"monetary(\d{8})", filename)
    if match:
        return datetime.strptime(match.group(1), "%Y%m%d")
    return None

# Pair filenames with their dates
dated_files = [(extract_date_from_filename(f), f) for f in html_files]
dated_files = [pair for pair in dated_files if pair[0] is not None]
dated_files.sort()  # Sort by date

ratings = []

for date, filename in dated_files:
    try:
        with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'html.parser')
            fomc_text = soup.get_text(separator=' ', strip=True)
        
        prompt = f"""
        You are a financial analyst specializing in monetary policy communications. Read the following FOMC announcement and answer the following questions:

        1. What is the sentiment at the beginning of the announcement? (Bearish, Neutral, Bullish)
        2. Provide a final numerical sentiment rating to the entire document based on your analysis of the tone shift and sentiment throughout, using this scale:
            -1 = Bearish
             0 = Neutral
             1 = Bullish
             The rating can be a decimal (e.g., -0.9,-0.8,-.0.7,-0.6,-0.5, -0.4,-0.3,-0.2,-0.1, 0.1, 0.2, 0.3, 0.4,0.5,0.6,0.7,0.8, 0.9, etc.). 

        Just output the rating number without any explanation.

        FOMC Text:
        {fomc_text}
        """

        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=500,
            temperature=0.4
        )

        response_text = response.choices[0].message.content
        match = re.search(r"2\.\s*(-?\d*\.\d+|\d+)", response_text)

        if match:
            extracted_rating = float(match.group(1))
        else:
            extracted_rating = None  # Or use np.nan if preferred

    except Exception as e:
        print(f"Error processing {filename}: {e}")
        extracted_rating = None

    ratings.append(extracted_rating)
    time.sleep(1.2)  # Optional: throttle requests to avoid hitting rate limits

# Create the final DataFrame
statements_chat_df = pd.DataFrame(ratings, columns=["Sentiment_Rating"])
print(statements_chat_df)
print(f"\nTotal processed: {len(statements_chat_df)}")


In [None]:
statements_chat_df.to_csv('raw_data/statements_chat_df.csv', index = False)