In [76]:
import anthropic
import json

from pymongo import MongoClient
import pandas as pd
import re

import time

In [122]:
with open("config.json") as f:
    config = json.load(f)

password = config["MONGO_PASSWORD"]

# MongoDB connection
client = MongoClient(f"mongodb+srv://bootsmajames:{password}@jamesbcluster.wdq3i.mongodb.net/")
db = client["bank_of_canada"]
collection = db["monetary_policy_reports"]

# Load data into a pandas DataFrame
data = pd.DataFrame(list(collection.find()))
mongo_data = list(collection.find()) 

In [31]:
def clean_text(text):
    if not isinstance(text, str):
        return text
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'[^\w\s.,]', ' ', text)  # Remove special characters except punctuation
    text = text.strip().lower()  # Normalize case and trim
    return text

data["lead"] = data["lead"].apply(clean_text)
data["pdf_text"] = data["pdf_text"].apply(clean_text)
data["pr_title"] = data["pr_title"].apply(clean_text)
data["pr_body"] = data["pr_body"].apply(clean_text)

In [32]:
with open("config.json") as f:
    config = json.load(f)

API_KEY = config["CLAUDE_API_KEY"]

client = anthropic.Anthropic(api_key=API_KEY)

In [33]:
def create_prompt(entry):
    return f"""
        Summarize the following monetary policy report:
        {entry['lead']}

        Interest Rate Decision:
        {entry['pr_title']}

        Reasons:
        {entry['pr_body']}

        Report:
        {entry['pdf_text']}

        Provide a summary and sentiment analysis for the report.
        """

In [77]:
def GenerateMessage(data_point, current_tokens, time_left):
    tokens = client.messages.count_tokens(
        model="claude-3-5-sonnet-20241022",
        messages=[
            {"role": "user", "content" : create_prompt(data_point)}
        ]
    )

    current_tokens += tokens.input_tokens

    if current_tokens > 40000:
        time.sleep(time_left)
        current_tokens = 0

    message = client.messages.create(
        model="claude-3-5-sonnet-20241022",
        max_tokens=1024,
        messages=[
            {"role": "user", "content" : create_prompt(data_point)}
        ]
    )

    return message, current_tokens

In [86]:
try:
    claude_summary
except NameError:
    claude_summary = []

start_time = time.time()
current_tokens = 0
summary_len = len(claude_summary)

for i, row in data.iterrows():
    if i < summary_len:
        continue
    
    elapsed_time = (time.time() - start_time) % 60

    print(row['title'])
    message, current_tokens = GenerateMessage(row, current_tokens, 60 - elapsed_time)
    claude_summary.append(message)

Monetary Policy Report – January 2013
Monetary Policy Report – October 2012
Monetary Policy Report – July 2012
Monetary Policy Report – April 2012
Monetary Policy Report – January 2012
Monetary Policy Report – October 2011
Monetary Policy Report – July 2011
Monetary Policy Report – April 2011
Monetary Policy Report – January 2011
Monetary Policy Report – October 2010
Monetary Policy Report – July 2010
Monetary Policy Report – April 2010
Monetary Policy Report – January 2010
Monetary Policy Report – October 2009
Monetary Policy Report – July 2009
Monetary Policy Report – April 2009
Monetary Policy Report Update – January 2009
Monetary Policy Report – October 2008
Monetary Policy Report Update – July 2008
Monetary Policy Report – April 2008
Monetary Policy Report Update – January 2008
Monetary Policy Report – October 2007
Monetary Policy Report Update – July 2007
Monetary Policy Report – April 2007
Monetary Policy Report Update – January 2007
Monetary Policy Report – October 2006
Monetar

48: Monetary Policy Report – April 2013

In [142]:
for i, mpr in enumerate(mongo_data):
    mpr['claude_summary'] = claude_summary[i].content[0].text
    collection.update_one({"_id": mpr["_id"]}, {"$set": mpr})