In [16]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import json

from data_processing.data_processing_utils import get_author_short_name

load_dotenv()  # reads .env
api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=api_key)

In [17]:
def score_tone(speech_text):
    prompt = f"""
    You are a financial policy analyst. Given the Federal Reserve speech below,
    rate the speakerâ€™s tone toward each of the following topics on a continuous scale
    from -1 (dovish) to +1 (hawkish). Return only six numbers in JSON format
    with keys exactly as shown below.

    Topics and scoring guide:
    - Fed Funds Rate: hawkish = likely rate hike; dovish = likely rate cut
    - Labor Market: hawkish = strong labor market; dovish = weak labor market
    - Inflation: hawkish = high or concerning inflation; dovish = low or less concerning inflation
    - Real Activity: hawkish = strong real activity; dovish = weak real activity
    - Financial Stability: hawkish = stable system; dovish = less stable system
    - Balance Sheet: hawkish = in or start of QT or balance sheet reduction, end of QE; dovish = start or in QE or expansion, end of QT

    Respond ONLY with JSON like this:
    {{
      "Fed Funds Rate": 0.4,
      "Labor Market": -0.2,
      "Inflation": 0.6,
      "Real Activity": 0.1,
      "Financial Stability": 0.3,
      "Balance Sheet": -0.5
    }}

    --- Speech ---
    {speech_text}
    """

    response = client.chat.completions.create(
        model="gpt-5",
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content


In [34]:
#author_key = "williams"

In [18]:

def json_by_id(select_id, author_key):
    with open(f"text_data/{author_key}.json", "r", encoding="utf-8") as f:
        text_data = json.load(f)
        selected_text = list(filter(lambda x: x["id"]==select_id, text_data))
        return selected_text[0]

def read_all_ids(author_key):
    with open(f"text_data/{author_key}.json", "r", encoding="utf-8") as f:
        text_data    = json.load(f)
        selected_ids = list(map(lambda x: x["id"], text_data))
        return selected_ids

In [19]:
from datetime import datetime

def parse_date(s: str):
    formats = [
        "%Y-%m-%d",      # 2020-10-13
        "%B %d, %Y",     # November 13, 2009
    ]

    for fmt in formats:
        try:
            return datetime.strptime(s, fmt)
        except ValueError:
            pass
    print(s)
    raise ValueError(f"Unknown date format: {s}")

def find_date_by_id(author_key, select_id):
    with open(f"text_data/{author_key}.json", "r", encoding="utf-8") as f:
        text_data = json.load(f)
        selected_text = list(filter(lambda x: x["id"]==select_id, text_data))[0]
        date = selected_text["date"]
        return parse_date(date)

In [20]:
def get_author_short_name(author_name):
    return author_name.split(":")[-1].split("-")[0]


In [21]:
import json
authors = []
with open(f"info_folder/author_maps.json", "r", encoding="utf-8") as f:
    text_data = json.load(f)
    for row in text_data:
        author_key = get_author_short_name(row["author_key"])
        print(author_key)
        authors += [author_key]

logan
powell
kugler
musalem
evans
waller
rosengren
george
bullard
yellen
schmid
williams
brainard
mester
daly
barr
bowman
kashkari
harker
jefferson
quarles
bostic
clarida
kaplan
miran
collins
dudley


In [23]:
start_date = datetime(2018, 1, 1)

for author_key in authors:

    if author_key in ["barr", "logan", "powell", "waller", "williams", "schmid", "bullard", "evans", "george", "kugler",
        "musalem", "rosengren", "yellen"]:
        continue

    ids = read_all_ids(author_key)

    data = []
    for text_id in ids:
        json_result = json_by_id(text_id, author_key)
        id = json_result["id"]
        date = parse_date(json_result["date"])
        if date <= start_date:
            continue
        tone_score = score_tone(json_result["text"])
        tone_score = json.loads(tone_score)
        result = {"id": text_id}
        result["gpt-5"] = tone_score
        print(result)
        data.append(result)

    with open(f"info_folder/score_{author_key}.json", "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)


{'id': 'fedgsq:95804', 'gpt-5': {'Fed Funds Rate': 0.8, 'Labor Market': 0.4, 'Inflation': 0.9, 'Real Activity': -0.2, 'Financial Stability': -0.4, 'Balance Sheet': 0.9}}
{'id': 'fedgsq:22161', 'gpt-5': {'Fed Funds Rate': -0.2, 'Labor Market': 0.7, 'Inflation': -0.1, 'Real Activity': 0.6, 'Financial Stability': 0.2, 'Balance Sheet': 0.0}}
{'id': 'fedgsq:88581', 'gpt-5': {'Fed Funds Rate': -0.5, 'Labor Market': -0.7, 'Inflation': -0.2, 'Real Activity': -0.5, 'Financial Stability': -0.3, 'Balance Sheet': 0.0}}
{'id': 'fedgsq:22338', 'gpt-5': {'Fed Funds Rate': 0.5, 'Labor Market': 0.6, 'Inflation': -0.1, 'Real Activity': 0.6, 'Financial Stability': -0.5, 'Balance Sheet': 0.5}}
{'id': 'fedgsq:86659', 'gpt-5': {'Fed Funds Rate': 0.0, 'Labor Market': 0.0, 'Inflation': 0.0, 'Real Activity': 0.0, 'Financial Stability': -0.6, 'Balance Sheet': 0.0}}
{'id': 'fedgsq:88680', 'gpt-5': {'Fed Funds Rate': -0.9, 'Labor Market': -0.7, 'Inflation': -0.8, 'Real Activity': -0.4, 'Financial Stability': 0.2,