In [1]:
import os
from dotenv import load_dotenv
import pandas_datareader.data as web
from datetime import datetime
import pandas as pd

load_dotenv()
fred_key = os.getenv("FRED_API_KEY")

# Fetch the Federal Funds Rate data from FRED 
start_date = datetime(2023, 12, 6) 
fedfunds = web.DataReader("FEDFUNDS", "fred", start=start_date, api_key=fred_key)

# Rename and clean up
fedfunds = fedfunds.rename(columns={"FEDFUNDS": "fed_funds_rate"})
fedfunds.index.name = "timestamp"

# Resample to hourly frequency (forward-fill from rate announcement dates)
fedfunds_hourly = fedfunds.resample("H").ffill()

# Save to CSV
output_path = "data/raw/macro/fedfunds_hourly.csv"
fedfunds_hourly.to_csv(output_path)

print(f"FEDFUNDS data saved to {output_path}")


FEDFUNDS data saved to data/raw/macro/fedfunds_hourly.csv


  fedfunds_hourly = fedfunds.resample("H").ffill()


In [7]:
import os
from transformers import pipeline
import pandas as pd
from transformers import AutoTokenizer

# Load FinBERT
sentiment_pipeline = pipeline("sentiment-analysis", model="ProsusAI/finbert")

# Prepare path
statements_dir = "data/raw/macro/statements"
results = []

for filename in os.listdir(statements_dir):
    if filename.endswith(".txt"):
        date_str = filename[:10]  # e.g., "2023-12-13"
        path = os.path.join(statements_dir, filename)
        
        with open(path, "r", encoding="utf-8") as f:
            text = f.read()
        
        tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

        # Tokenize and truncate to 512 tokens
        inputs = tokenizer(text, truncation=True, max_length=512, return_tensors="pt")

        # Manually pass through model pipeline
        result = sentiment_pipeline.tokenizer.decode(inputs["input_ids"][0])
        result = sentiment_pipeline(text, truncation=True, max_length=512)[0]

        label_map = {
            "positive": 1,
            "neutral": 0,
            "negative": -1
        }

        label = result["label"].lower()
        sentiment_score = label_map[label] * result["score"]


        results.append({
            "timestamp": f"{date_str} 14:00",  # FOMC statements release at 2:00 PM ET
            "label": result["label"],
            "confidence": result["score"],
            "sentiment_score": sentiment_score
        })

# Save to interim CSV
output_df = pd.DataFrame(results)
os.makedirs("data/interim/macro", exist_ok=True)
output_df.to_csv("data/interim/macro/fomc_sentiment.csv", index=False)

print("✅ FOMC sentiment scores saved.")


Device set to use cpu


✅ FOMC sentiment scores saved.
