In [17]:
import os
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.callbacks import get_openai_callback
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from typing import List
from dotenv import load_dotenv
load_dotenv("../.env")

OPENAI_KEY = os.getenv('OPENAI_API_KEY')

# Load Data

In [5]:
with open("../data/sample_news.txt", 'r') as f:
    raw_news = f.read()
f.close()

In [6]:
print(raw_news)

Aug 15 (Reuters) - U.S. corporate bond spreads, the premium over Treasuries that companies pay for debt, are starting to recoup some lost ground after recent strong economic data increased hopes for interest rate cuts and calmed recession fears.
Investment-grade corporate bond spreads on Wednesday tightened by 3 basis points to 105 basis points (bps), according to the ICE BofA Corporate U.S. Corporate Index (.MERC0A0), opens new tab.
Junk bond spreads finished Wednesday at 346 bps, also 3 bps tighter this week, according to the ICE BofA High Yield Index (.MERH0A0), opens new tab.
Both high-grade and junk bond spreads retraced much of early August's dramatic widening, after surprisingly weak July jobs and productivity reports prompted concerns of a sharp economic downturn and potential recession.
Economic data this week appears to have calmed recession fears. U.S. consumer prices in July rose at their slowest pace in nearly 3-1/2 years, while the cost of services fell by the most in nea

# Split News

In [7]:
doc = Document(
    page_content=raw_news,
    metadata={"source": "sample"},
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
splits = text_splitter.split_documents([doc])

In [8]:
splits

[Document(metadata={'source': 'sample'}, page_content='Aug 15 (Reuters) - U.S. corporate bond spreads, the premium over Treasuries that companies pay for debt, are starting to recoup some lost ground after recent strong economic data increased hopes for interest rate cuts and calmed recession fears.\nInvestment-grade corporate bond spreads on Wednesday tightened by 3 basis points to 105 basis points (bps), according to the ICE BofA Corporate U.S. Corporate Index (.MERC0A0), opens new tab.'),
 Document(metadata={'source': 'sample'}, page_content="Junk bond spreads finished Wednesday at 346 bps, also 3 bps tighter this week, according to the ICE BofA High Yield Index (.MERH0A0), opens new tab.\nBoth high-grade and junk bond spreads retraced much of early August's dramatic widening, after surprisingly weak July jobs and productivity reports prompted concerns of a sharp economic downturn and potential recession."),
 Document(metadata={'source': 'sample'}, page_content='Economic data this w

# Call ChatGPT

In [13]:
llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18", temperature=0.0)

## Risk Topic

In [30]:
class RiskTopic(BaseModel):
    """Risk-related topics mentioned in the news passage"""
    market: bool = Field(
        ..., 
        description="passage mentions risk about macro economy",
    )
    credit: bool = Field(
        ..., 
        description="passage mentions risk about borrowers",
    )
    liquidity: bool = Field(
        ..., 
        description="passage mentions risk the underlying investment",
    )
    investor: bool = Field(
        ..., 
        description="passage mentions risk about investors",
    )
    company_specific: bool = Field(
        ..., 
        description="passage mentions risk about specific company",
    )



In [31]:
system = """You are an expert investment analyst with specialization in the bond market.
Given a passage from a financial news, you need to identify the risk-related topics in the passage.

You can only choose from the following topics:
    - market: risk about macro economy
    - credit: risk about borrowers
    - liquidity: risk about the underlying investment
    - investor: risk about investors
    - company-specific: risk about specific company

Here are some examples of correct response:

example_user: Inflation continues to be a key driver of macro-economic risk. Central banks around the world, led by the U.S. Federal Reserve, have been aggressively hiking interest rates to tame persistent inflationary pressures.
example_assistant: {{"market": True, "credit": False, "liquidity": False, "investor": False, "company_specific": False}}

example_user: Credit risk within the corporate sector has spiked as businesses grapple with higher debt servicing costs. The sectors most impacted include real estate, retail, and small- to medium-sized enterprises (SMEs), many of which are dealing with weaker consumer demand and supply chain disruptions. In particular, real estate firms are facing difficulty as higher mortgage rates lead to a cooling housing market, reducing revenues and profitability.
example_assistant: {{"market": False, "credit": True, "liquidity": False, "investor": False, "company_specific": False}}

example_user: In a significant reversal of last year's tech sector downturn, major technology stocks are rallying on the back of strong earnings reports and investor enthusiasm around artificial intelligence (AI) and cloud computing growth. 
example_assistant: {{"market": False, "credit": False, "liquidity": False, "investor": False, "company_specific": False}}"""

prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{input}")])

In [32]:
print(splits[0].page_content)

Aug 15 (Reuters) - U.S. corporate bond spreads, the premium over Treasuries that companies pay for debt, are starting to recoup some lost ground after recent strong economic data increased hopes for interest rate cuts and calmed recession fears.
Investment-grade corporate bond spreads on Wednesday tightened by 3 basis points to 105 basis points (bps), according to the ICE BofA Corporate U.S. Corporate Index (.MERC0A0), opens new tab.


In [33]:
structured_llm = llm.with_structured_output(RiskTopic)

with get_openai_callback() as cb:
    result = structured_llm.invoke(splits[0].page_content)
    print(result)
    print("---")
print()

print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost (USD): ${cb.total_cost}")

market=True credit=True liquidity=False investor=False company_specific=False
---

Total Tokens: 231
Prompt Tokens: 206
Completion Tokens: 25
Total Cost (USD): $4.59e-05


In [36]:
result

RiskTopic(market=True, credit=True, liquidity=False, investor=False, company_specific=False)

# Estimate Cost

- $0.150 / 1M input tokens
- $0.600 / 1M output tokens

In [39]:
average_prompt_tokens = 206
average_completion_tokens = 25

N = 1

cost = (average_prompt_tokens * N) / 1e6 * 0.15 + (average_completion_tokens * N) / 1e6 * 0.6
cost

4.59e-05