In [None]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import PyPDF2

def extract_text_from_pdf(pdf_path, output_txt_path):
    with open(pdf_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        num_pages = len(pdf_reader.pages)

        all_text = ""
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            page_text = page.extract_text()
            all_text += page_text
    with open(output_txt_path, 'w') as txt_file:
        txt_file.write(all_text)

In [None]:
base_path = '/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base'

docs = ['sector_specific_guidance_filtered', 'ratios_and_adjustments', 'filtered_general_methodology']

for fname in docs:
  extract_text_from_pdf(f"{base_path}/{fname}.pdf", f"{base_path}/{fname}.txt")

In [None]:
!pip install -qq langchain
!pip install -qq openai
!pip install -qq tiktoken
!pip install -qq faiss-gpu
!pip install -qq langchain_experimental
!pip install -qq "langchain[docarray]"
!pip install -qq sentence-transformers
!pip install -qq langchain_community

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.indexes import VectorstoreIndexCreator
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain.agents.agent_types import AgentType
import tiktoken

In [None]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

base_path = '/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base'

docs = ['sector_specific_guidance_filtered', 'ratios_and_adjustments', 'filtered_general_methodology']

data = []

for doc in docs:
  loader = TextLoader(f'{base_path}/{doc}.txt', encoding="utf-8")
  doc_data = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50)
  doc_data = text_splitter.split_documents(doc_data)
  data += doc_data

data[:10]

[Document(page_content='Sector-Specific Guidance \n Sector Descriptions \n SECT OR  DESCRIPTION \n Aerospace \n and', metadata={'source': '/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/sector_specific_guidance_filtered.txt'}),
 Document(page_content='Aerospace \n and \n Defense  Companies that derive a majority of their revenues from the design,', metadata={'source': '/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/sector_specific_guidance_filtered.txt'}),
 Document(page_content='manufacture, or repair of civil aircraft (i.e., jetliners, business jets, regional', metadata={'source': '/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/sector_specific_guidance_filtered.txt'}),
 Document(page_content='jets, general aviation aircraft, and helicopters) or supply related', metadata={'source': '/content/drive/Shareddrives/ENGS Final Project/Da

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(data, embedding=embeddings)

  warn_deprecated(


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

hf_embeddings = HuggingFaceEmbeddings()

hf_vectorstore = FAISS.from_documents(data, embedding=hf_embeddings)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
hf_vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x799de9e5b8b0>

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.6, model_name="gpt-4-turbo-preview")

  warn_deprecated(


In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

template = """Use the following context to help you answer the task. If you don't know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_core.output_parsers import JsonOutputParser

hf_llm = HuggingFaceHub(
    # repo_id="HuggingFaceH4/zephyr-7b-beta",
    # repo_id="meta-llama/Llama-2-7b-chat-hf",
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 2500,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

hf_chain = (
    {"context": hf_vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | hf_llm
    | StrOutputParser()
)

In [None]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

fingpt_llm = HuggingFaceHub(
    repo_id="FinGPT/fingpt-mt_llama2-7b_lora",
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 512,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

fingpt_chain = (
    {"context": hf_vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | hf_llm
    | StrOutputParser()
)

In [None]:
from langchain_community.llms import HuggingFaceHub

mistral_llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 5000,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

mistral_template = """<s>[INST]Use the following context to help you answer the task. If you don't know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:
{context}

Question: {question} [/INST]
"""
mistral_prompt = ChatPromptTemplate.from_template(mistral_template)

mistral_chain = (
    {"context": hf_vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | mistral_prompt
    | mistral_llm
    | StrOutputParser()
)

In [None]:
mistral_chain.invoke(custom_json_prompt(df.iloc[0]))

'Human: <s>[INST]Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'financial risk profile assessment to determine its anchor (see table 3).\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RA

In [None]:
mistral_chain.invoke(custom_json_prompt(df.iloc[5]))

'Human: <s>[INST]Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'The medial volatility table (table 18) will generally apply for a company with a\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile

In [None]:
def mistral_extract_output(text):
  text_parts = text.split("[/INST]\n\n")
  json_string = text_parts[-1]
  try:
      json_data = json.loads(json_string)
      return(json_data)
  except json.JSONDecodeError:
      return({"error": "Invalid JSON format"})

In [None]:
mistral_chain.invoke(custom_json_prompt(df.iloc[0]))

'Human: <s>[INST]Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'financial risk profile assessment to determine its anchor (see table 3).\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RA

In [None]:
df.iloc[0]

Unnamed: 0                                                                   0
company_name                                                         AAR Corp.
sector                                                     AEROSPACE & DEFENSE
financial_risk_profile                                        [3] Intermediate
circa_rating                                             [3] Intermediate risk
business_description         AAR Corp. provides products and services to co...
ffo_to_debt_ltm                                                           0.18
debt_to_ebitda_ltm                                                        1.31
cfo_to_debt_ltm                                                           0.23
focf_to_debt_ltm                                                          0.11
dcf_to_debt_ltm                                                           0.11
ffo_interest_coverage_ltm                                                 2.93
ebitda_to_interest_ltm                              

In [None]:
hf_chain.invoke(custom_prompt(df.iloc[0]))

HfHubHTTPError: 424 Client Error: Failed Dependency for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: HcEfX1Wno5uZ6HVPQyr28)

Request failed during generation: Server error: Out of available cache blocks: asked 197, only 122 free blocks

In [None]:
fingpt_chain.invoke(custom_prompt(df.iloc[0]))

'Human: Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'financial risk profile assessment to determine its anchor (see table 3).\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowled

In [None]:
hf_chain.invoke(custom_prompt(df.iloc[0]))

'Human: Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'financial risk profile assessment to determine its anchor (see table 3).\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowled

In [None]:
hf_chain.invoke(custom_prompt(df.iloc[0]))

BadRequestError:  (Request ID: ioqbL4E1dSwOVa7wLUGEN)

Bad request:
Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query.

In [None]:
hf_chain.invoke(custom_json_prompt(df.iloc[0]))

'Human: Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content=\'financial risk profile assessment to determine its anchor (see table 3).\', metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowled

In [None]:
chain.invoke(custom_prompt(df.iloc[0]))

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
chain.invoke(custom_prompt(df.iloc[0]))

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
conversation_chain = ConversationalRetrievalChain.from_llm(
    chain_type="stuff",
    llm=llm,
    retriever=vectorstore.as_retriever(),
)

In [None]:
q = "What are the necessary inputs to cash flow/leverage and competitive position analysis?"
res = conversation_chain({"question": q})

res["answer"]

  warn_deprecated(


"To conduct a cash flow/leverage and competitive position analysis, the necessary inputs generally include:\n\n1. **Financial Statements**: This encompasses the balance sheet, income statement, and cash flow statement of a company. These documents provide the raw data needed to calculate various financial ratios and metrics.\n\n2. **Key Financial Ratios**: These ratios are crucial for analyzing a company's financial health and include, but are not limited to, debt-to-equity ratio, interest coverage ratio, operating margin, and net profit margin. Specific ratios relevant to cash flow/leverage analysis might include measures of liquidity, solvency, and operational efficiency.\n\n3. **Industry Benchmarks**: Comparing a company's performance against industry averages or key competitors provides insight into its competitive position. This requires access to industry data and performance metrics of competitors.\n\n4. **Historical Data**: Historical financial performance of the company is ess

In [None]:
import json
import re
from tqdm import tqdm

def custom_json_prompt(row):
  return (f"""Your goal is to predict the S&P financial risk profile from 1-6 for {row['company_name']}. Follow the methodology outlined in the knowledge base and the following steps:

  STEP 1: Determine the correct volability table (standard, medial, or low) to use for {row['company_name']} based on the following CICRA and industry:

  CICRA: {row['circa_rating']}
  Industry: {row['sector']}.
  Business description: {row['business_description']}

  The low volatility table will generally apply when a company's CICRA is '1' but can infrequently also apply to a company with a CICRA of '2' if the company exhibits or is expected to exhibit low levels of volatility.
  The medial volatility table will generally apply for a company with a CICRA of '2' but can infrequently also apply to a company with a CICRA of '1' if the company exhibits or is expected to exhibit medial levels of volatility.
  The standard volatility table serves as the relevant benchmark table for all CICRA scores other than '1', but we will always use it for companies with a CICRA of '1' or '2' whose competitive position is assessed as '5' or '6'.

  Standard Volatility Table:

  Minimal Risk: FFO/Debt > 60%, Debt/EBITDA < 1.5, FFO/Cash Interest > 13, EBITDA/Interest > 15, CFO/Debt > 50%, FOCF/Debt > 40, DCF/Debt > 25
  Modest Risk: FFO/Debt 45-60%, Debt/EBITDA 1.5-2, FFO/Cash Interest 9-13, EBITDA/Interest 10-15, CFO/Debt 35-50%, FOCF/Debt 25-40, DCF/Debt 15-25
  Intermediate Risk: FFO/Debt 30-45%, Debt/EBITDA 2-3, FFO/Cash Interest 6-9, EBITDA/Interest 6-10, CFO/Debt 25-35%, FOCF/Debt 15-25, DCF/Debt 10-15
  Significant Risk: FFO/Debt 20-30%, Debt/EBITDA 3-4, FFO/Cash Interest 4-6, EBITDA/Interest 3-6, CFO/Debt 15-25%, FOCF/Debt 10-15, DCF/Debt 5-10
  Aggressive Risk: FFO/Debt 12-20%, Debt/EBITDA 4-5, FFO/Cash Interest 2-4, EBITDA/Interest 2-3, CFO/Debt 10-15%, FOCF/Debt 5-10, DCF/Debt 2-5
  Highly Leveraged: FFO/Debt < 12%, Debt/EBITDA > 5, FFO/Cash Interest < 2, EBITDA/Interest < 2, CFO/Debt < 10%, FOCF/Debt < 5, DCF/Debt < 2

  Medial Volatility Table:

  Minimal Risk: FFO/Debt > 50%, Debt/EBITDA < 1.75, FFO/Cash Interest > 10.5, EBITDA/Interest > 14, CFO/Debt > 40%, FOCF/Debt > 30, DCF/Debt > 18
  Modest Risk: FFO/Debt 35-50%, Debt/EBITDA 1.75-2.5, FFO/Cash Interest 7.5-10.5, EBITDA/Interest 9-14, CFO/Debt 27.5-40%, FOCF/Debt 17.5-30, DCF/Debt 11-18
  Intermediate Risk: FFO/Debt 23-35%, Debt/EBITDA 2.5-3.5, FFO/Cash Interest 5-7.5, EBITDA/Interest 5-9, CFO/Debt 18.5-27.5%, FOCF/Debt 9.5-17.5, DCF/Debt 6.5-11
  Significant Risk: FFO/Debt 13-23%, Debt/EBITDA 3.5-4.5, FFO/Cash Interest 3-5, EBITDA/Interest 2.75-5, CFO/Debt 10.5-18.5%, FOCF/Debt 5-9.5, DCF/Debt 2.5-6.5
  Aggressive Risk: FFO/Debt 9-13%, Debt/EBITDA 4.5-5.5, FFO/Cash Interest 1.75-3, EBITDA/Interest 1.75-2.75, CFO/Debt 7-10.5%, FOCF/Debt 0-5, DCF/Debt (11)-2.5
  Highly Leveraged: FFO/Debt < 9%, Debt/EBITDA > 5.5, FFO/Cash Interest < 1.75, EBITDA/Interest < 1.75, CFO/Debt < 7%,, FOCF/Debt < 0, DCF/Debt < (11)

  Low Volatility Table:

  Minimal Risk: FFO/Debt > 35%, Debt/EBITDA < 2, FFO/Cash Interest > 8, EBITDA/Interest > 13, CFO/Debt > 30%, FOCF/Debt > 20, DCF/Debt > 11
  Modest Risk: FFO/Debt 23-35%, Debt/EBITDA 2-3, FFO/Cash Interest 5-8, EBITDA/Interest 7-13, CFO/Debt 20-30%, FOCF/Debt 10-20, DCF/Debt 7-11
  Intermediate Risk: FFO/Debt 13-23%, Debt/EBITDA 3-4, FFO/Cash Interest 3-5, EBITDA/Interest 4-7, CFO/Debt 12-20%, FOCF/Debt 4-10, DCF/Debt 3-7
  Significant Risk: FFO/Debt 9-13%, Debt/EBITDA 4-5, FFO/Cash Interest 2-3, EBITDA/Interest 2.5-4, CFO/Debt 8-12%, FOCF/Debt 0-4, DCF/Debt 0-3
  Aggressive Risk: FFO/Debt 6-9%, Debt/EBITDA 5-6, FFO/Cash Interest 1.5-2, EBITDA/Interest 1.5-2.5, CFO/Debt 5-8%, FOCF/Debt (10)-0, DCF/Debt (20)-0
  Highly Leveraged: FFO/Debt < 6%, Debt/EBITDA > 6, FFO/Cash Interest < 1.5, EBITDA/Interest < 1.5, CFO/Debt < 5%, , FOCF/Debt < (20)


  STEP 2: Also take into account the way these financial metrics should be weighed based on industry:

  STEP 3: Apply these financial metrics and utilize the corresponding table based on volatility to determine the financial risk (1-6).

  Key Financial Metrics:
  FFO to debt: {row['ffo_to_debt_ltm']}
  Debt to EBITDA: {row['debt_to_ebitda_ltm']}
  FFO cash interest cover: {row['ffo_interest_coverage_ltm']}
  EBITDA to interest: {row['ebitda_to_interest_ltm']}
  CFO to debt: {row['cfo_to_debt_ltm']}
  FOCF to debt: {row['focf_to_debt_ltm']}
  DCF to debt: {row['dcf_to_debt_ltm']}
  EBIT margin: {row['ebit_margin_ltm']}
  EBITDA margin: {row['ebitda_margin_ltm']}
  Return on capital: {row['return_on_capital_ltm']}
  CICRA: {row['circa_rating']}

  Think step by step and for your final answer, return a JSON object with 'answer' and 'reasoning' fields in the following format:

  {{"answer": "[a single category from 1-6]", "reasoning": "[A methodological explanation as to why you chose that specific rating]"}}

  Ensure that all strings are enclosed in double quotes.

  For example:

  {{"answer": "3", "reasoning": "To determine the financial risk profile for Becton, Dickinson and Company (BDX), we need to follow a systematic approach based on the provided methodology. Let's break down the task into steps:

  Determine the Volatility Category: Based on BDX's CICRA score of '2', we need to decide between the Standard, Medial, and Low volatility categories. Given the healthcare equipment sector's inherent characteristics and the company's specific financials, we should consider the operational volatility and guidelines provided.

  Apply the Correct Financial Metrics and Categories: After establishing the volatility category, we'll apply the relevant financial metrics and categories to BDX's key financial ratios to ascertain the risk rating.

  Step 1: Determine the Volatility Category
  Given BDX's CICRA score is '2', it initially suggests a potential fit for either the Medial or Standard volatility categories. However, because the company operates in the healthcare equipment sector, which typically exhibits lower volatility due to steady demand and regulatory barriers to entry, there's a rationale for considering the Low volatility table, especially if the healthcare sector's specific characteristics imply lower operational volatility for BDX.

  CICRA Score: '2' suggests Medial volatility but can also fit Low volatility if operational volatility is expected to be low.
  Since the company's operational volatility isn't directly provided, we infer based on the sector characteristics. Healthcare equipment, being essential and regulated, likely has lower volatility. However, without explicit data on BDX's operational volatility or expectations of such, we proceed with caution, leaning towards Medial volatility due to the lack of explicit low volatility indicators and the CICRA score guidance.

  Step 2: Apply Financial Metrics and Categories
  Given the uncertainty in operational volatility but considering the sector and CICRA score, we'll use the Medial Volatility Categories as a more conservative approach without explicit data suggesting otherwise. Now, let's match BDX's financial metrics to the Medial volatility risk categories:

  FFO to Debt: 21% falls into the Significant Risk category (13-23%).
  Debt to EBITDA: 2.95 fits within the Intermediate Risk category (2.5-3.5).
  CFO to Debt: 21%, not directly comparable but similar to FFO to Debt, indicating Significant Risk.
  FOCF to Debt: 16%, which is closely aligned with the Significant Risk category (5-9.5%).
  DCF to Debt: 19%, closer to Intermediate Risk (6.5-11%).
  FFO Cash Interest Cover: 7.48, which places it in the Intermediate Risk category (5-7.5).
  EBITDA to Interest: 11, fitting the Intermediate Risk category (5-9).
  Considering the above metrics, BDX most frequently aligns with the Intermediate Risk and Significant Risk categories across different metrics. The dominant category would be determined by the majority of metrics falling within a specific range. Given the spread, BDX shows a blend of Intermediate and Significant Risk indicators.

  However, since the decision between Intermediate and Significant Risk hinges on the broader context of operational stability, regulatory environment, and market position, which are inherently more stable in the healthcare equipment sector, the final judgment leans slightly towards the Intermediate Risk category as the more appropriate classification, acknowledging a borderline situation.

  Final Determination
  Given the balanced approach and the slight inclination towards Medial volatility without strong evidence for Low volatility, combined with the metrics predominantly indicating an Intermediate Risk, the final rating for Becton, Dickinson and Company can be categorized as Intermediate Risk (3)"}}""")


def custom_prompt(row):
  return  f"""
  Your goal is to predict the S&P financial risk profile from 1-6 for {row['company_name']}. Follow the methodology outlined in the knowledge base and the following steps:


  STEP 1: Determine the correct volability table (standard, medial, or low) to use for {row['company_name']} based on the following CICRA and industry:

  CICRA: {row['circa_rating']}
  Industry: {row['sector']}.
  Business description: {row['business_description']}

  The low volatility table will generally apply when a company's CICRA is '1' but can infrequently also apply to a company with a CICRA of '2' if the company exhibits or is expected to exhibit low levels of volatility.
  The medial volatility table will generally apply for a company with a CICRA of '2' but can infrequently also apply to a company with a CICRA of '1' if the company exhibits or is expected to exhibit medial levels of volatility.
  The standard volatility table serves as the relevant benchmark table for all CICRA scores other than '1', but we will always use it for companies with a CICRA of '1' or '2' whose competitive position is assessed as '5' or '6'.

  Standard Volatility Table:

  Minimal Risk: FFO/Debt > 60%, Debt/EBITDA < 1.5, FFO/Cash Interest > 13, EBITDA/Interest > 15, CFO/Debt > 50%, FOCF/Debt > 40, DCF/Debt > 25
  Modest Risk: FFO/Debt 45-60%, Debt/EBITDA 1.5-2, FFO/Cash Interest 9-13, EBITDA/Interest 10-15, CFO/Debt 35-50%, FOCF/Debt 25-40, DCF/Debt 15-25
  Intermediate Risk: FFO/Debt 30-45%, Debt/EBITDA 2-3, FFO/Cash Interest 6-9, EBITDA/Interest 6-10, CFO/Debt 25-35%, FOCF/Debt 15-25, DCF/Debt 10-15
  Significant Risk: FFO/Debt 20-30%, Debt/EBITDA 3-4, FFO/Cash Interest 4-6, EBITDA/Interest 3-6, CFO/Debt 15-25%, FOCF/Debt 10-15, DCF/Debt 5-10
  Aggressive Risk: FFO/Debt 12-20%, Debt/EBITDA 4-5, FFO/Cash Interest 2-4, EBITDA/Interest 2-3, CFO/Debt 10-15%, FOCF/Debt 5-10, DCF/Debt 2-5
  Highly Leveraged: FFO/Debt < 12%, Debt/EBITDA > 5, FFO/Cash Interest < 2, EBITDA/Interest < 2, CFO/Debt < 10%, FOCF/Debt < 5, DCF/Debt < 2

  Medial Volatility Table:

  Minimal Risk: FFO/Debt > 50%, Debt/EBITDA < 1.75, FFO/Cash Interest > 10.5, EBITDA/Interest > 14, CFO/Debt > 40%, FOCF/Debt > 30, DCF/Debt > 18
  Modest Risk: FFO/Debt 35-50%, Debt/EBITDA 1.75-2.5, FFO/Cash Interest 7.5-10.5, EBITDA/Interest 9-14, CFO/Debt 27.5-40%, FOCF/Debt 17.5-30, DCF/Debt 11-18
  Intermediate Risk: FFO/Debt 23-35%, Debt/EBITDA 2.5-3.5, FFO/Cash Interest 5-7.5, EBITDA/Interest 5-9, CFO/Debt 18.5-27.5%, FOCF/Debt 9.5-17.5, DCF/Debt 6.5-11
  Significant Risk: FFO/Debt 13-23%, Debt/EBITDA 3.5-4.5, FFO/Cash Interest 3-5, EBITDA/Interest 2.75-5, CFO/Debt 10.5-18.5%, FOCF/Debt 5-9.5, DCF/Debt 2.5-6.5
  Aggressive Risk: FFO/Debt 9-13%, Debt/EBITDA 4.5-5.5, FFO/Cash Interest 1.75-3, EBITDA/Interest 1.75-2.75, CFO/Debt 7-10.5%, FOCF/Debt 0-5, DCF/Debt (11)-2.5
  Highly Leveraged: FFO/Debt < 9%, Debt/EBITDA > 5.5, FFO/Cash Interest < 1.75, EBITDA/Interest < 1.75, CFO/Debt < 7%,, FOCF/Debt < 0, DCF/Debt < (11)

  Low Volatility Table:

  Minimal Risk: FFO/Debt > 35%, Debt/EBITDA < 2, FFO/Cash Interest > 8, EBITDA/Interest > 13, CFO/Debt > 30%, FOCF/Debt > 20, DCF/Debt > 11
  Modest Risk: FFO/Debt 23-35%, Debt/EBITDA 2-3, FFO/Cash Interest 5-8, EBITDA/Interest 7-13, CFO/Debt 20-30%, FOCF/Debt 10-20, DCF/Debt 7-11
  Intermediate Risk: FFO/Debt 13-23%, Debt/EBITDA 3-4, FFO/Cash Interest 3-5, EBITDA/Interest 4-7, CFO/Debt 12-20%, FOCF/Debt 4-10, DCF/Debt 3-7
  Significant Risk: FFO/Debt 9-13%, Debt/EBITDA 4-5, FFO/Cash Interest 2-3, EBITDA/Interest 2.5-4, CFO/Debt 8-12%, FOCF/Debt 0-4, DCF/Debt 0-3
  Aggressive Risk: FFO/Debt 6-9%, Debt/EBITDA 5-6, FFO/Cash Interest 1.5-2, EBITDA/Interest 1.5-2.5, CFO/Debt 5-8%, FOCF/Debt (10)-0, DCF/Debt (20)-0
  Highly Leveraged: FFO/Debt < 6%, Debt/EBITDA > 6, FFO/Cash Interest < 1.5, EBITDA/Interest < 1.5, CFO/Debt < 5%, , FOCF/Debt < (20)

  STEP 2: Locate the industry-specific supplemental ratios and guidelines for this company from the knowledge base.

  STEP 3: Using the corresponding volatility table and the industry-specific guidelines, apply these financial metrics to determine the financial risk profile (1-6).

  Key Financial Metrics:
  FFO to debt: {row['ffo_to_debt_ltm']}
  Debt to EBITDA: {row['debt_to_ebitda_ltm']}
  FFO cash interest cover: {row['ffo_interest_coverage_ltm']}
  EBITDA to interest: {row['ebitda_to_interest_ltm']}
  CFO to debt: {row['cfo_to_debt_ltm']}
  FOCF to debt: {row['focf_to_debt_ltm']}
  DCF to debt: {row['dcf_to_debt_ltm']}
  EBIT margin: {row['ebit_margin_ltm']}
  EBITDA margin: {row['ebitda_margin_ltm']}
  Return on capital: {row['return_on_capital_ltm']}
  CICRA: {row['circa_rating']}

  Think step by step and explain in a methodological way why you chose that specific rating.

  If you don't know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction.

  Label your final answer as ANSWER: [a single category from 1-6]."""

In [None]:

def extract_answer(text):
  pattern = r"ANSWER: (\d+)"
  pattern_match = re.search(pattern, text)
  if pattern_match:
      category = pattern_match.group(1)
      return category
  return '-1'

df["pred_score"] = "-1"
df["pred_reasoning"] = ""

for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f'Scoring each row in df'):
  prompt = custom_prompt(row)
  output = conversation_chain({"question": prompt})
  df.at[index, 'pred_score'] = extract_answer(output["answer"])
  df.at[index, 'pred_reasoning'] = output["answer"]

In [None]:
import json
import re
from tqdm import tqdm

from openai import OpenAI
client = OpenAI()

def custom_prompt(row):
  return  f"""
  Your goal is to predict the S&P financial risk profile from 1-6 for {row['company_name']}. Follow the methodology outlined in the knowledge base and the following steps:


  STEP 1: Determine the correct volability table (standard, medial, or low) to use for {row['company_name']} based on the following CICRA and industry:

  CICRA: {row['circa_rating']}
  Industry: {row['sector']}.
  Business description: {row['business_description']}

  The low volatility table will generally apply when a company's CICRA is '1' but can infrequently also apply to a company with a CICRA of '2' if the company exhibits or is expected to exhibit low levels of volatility.
  The medial volatility table will generally apply for a company with a CICRA of '2' but can infrequently also apply to a company with a CICRA of '1' if the company exhibits or is expected to exhibit medial levels of volatility.
  The standard volatility table serves as the relevant benchmark table for all CICRA scores other than '1', but we will always use it for companies with a CICRA of '1' or '2' whose competitive position is assessed as '5' or '6'.

  Standard Volatility Table:

  Minimal Risk: FFO/Debt > 60%, Debt/EBITDA < 1.5, FFO/Cash Interest > 13, EBITDA/Interest > 15, CFO/Debt > 50%, FOCF/Debt > 40, DCF/Debt > 25
  Modest Risk: FFO/Debt 45-60%, Debt/EBITDA 1.5-2, FFO/Cash Interest 9-13, EBITDA/Interest 10-15, CFO/Debt 35-50%, FOCF/Debt 25-40, DCF/Debt 15-25
  Intermediate Risk: FFO/Debt 30-45%, Debt/EBITDA 2-3, FFO/Cash Interest 6-9, EBITDA/Interest 6-10, CFO/Debt 25-35%, FOCF/Debt 15-25, DCF/Debt 10-15
  Significant Risk: FFO/Debt 20-30%, Debt/EBITDA 3-4, FFO/Cash Interest 4-6, EBITDA/Interest 3-6, CFO/Debt 15-25%, FOCF/Debt 10-15, DCF/Debt 5-10
  Aggressive Risk: FFO/Debt 12-20%, Debt/EBITDA 4-5, FFO/Cash Interest 2-4, EBITDA/Interest 2-3, CFO/Debt 10-15%, FOCF/Debt 5-10, DCF/Debt 2-5
  Highly Leveraged: FFO/Debt < 12%, Debt/EBITDA > 5, FFO/Cash Interest < 2, EBITDA/Interest < 2, CFO/Debt < 10%, FOCF/Debt < 5, DCF/Debt < 2

  Medial Volatility Table:

  Minimal Risk: FFO/Debt > 50%, Debt/EBITDA < 1.75, FFO/Cash Interest > 10.5, EBITDA/Interest > 14, CFO/Debt > 40%, FOCF/Debt > 30, DCF/Debt > 18
  Modest Risk: FFO/Debt 35-50%, Debt/EBITDA 1.75-2.5, FFO/Cash Interest 7.5-10.5, EBITDA/Interest 9-14, CFO/Debt 27.5-40%, FOCF/Debt 17.5-30, DCF/Debt 11-18
  Intermediate Risk: FFO/Debt 23-35%, Debt/EBITDA 2.5-3.5, FFO/Cash Interest 5-7.5, EBITDA/Interest 5-9, CFO/Debt 18.5-27.5%, FOCF/Debt 9.5-17.5, DCF/Debt 6.5-11
  Significant Risk: FFO/Debt 13-23%, Debt/EBITDA 3.5-4.5, FFO/Cash Interest 3-5, EBITDA/Interest 2.75-5, CFO/Debt 10.5-18.5%, FOCF/Debt 5-9.5, DCF/Debt 2.5-6.5
  Aggressive Risk: FFO/Debt 9-13%, Debt/EBITDA 4.5-5.5, FFO/Cash Interest 1.75-3, EBITDA/Interest 1.75-2.75, CFO/Debt 7-10.5%, FOCF/Debt 0-5, DCF/Debt (11)-2.5
  Highly Leveraged: FFO/Debt < 9%, Debt/EBITDA > 5.5, FFO/Cash Interest < 1.75, EBITDA/Interest < 1.75, CFO/Debt < 7%,, FOCF/Debt < 0, DCF/Debt < (11)

  Low Volatility Table:

  Minimal Risk: FFO/Debt > 35%, Debt/EBITDA < 2, FFO/Cash Interest > 8, EBITDA/Interest > 13, CFO/Debt > 30%, FOCF/Debt > 20, DCF/Debt > 11
  Modest Risk: FFO/Debt 23-35%, Debt/EBITDA 2-3, FFO/Cash Interest 5-8, EBITDA/Interest 7-13, CFO/Debt 20-30%, FOCF/Debt 10-20, DCF/Debt 7-11
  Intermediate Risk: FFO/Debt 13-23%, Debt/EBITDA 3-4, FFO/Cash Interest 3-5, EBITDA/Interest 4-7, CFO/Debt 12-20%, FOCF/Debt 4-10, DCF/Debt 3-7
  Significant Risk: FFO/Debt 9-13%, Debt/EBITDA 4-5, FFO/Cash Interest 2-3, EBITDA/Interest 2.5-4, CFO/Debt 8-12%, FOCF/Debt 0-4, DCF/Debt 0-3
  Aggressive Risk: FFO/Debt 6-9%, Debt/EBITDA 5-6, FFO/Cash Interest 1.5-2, EBITDA/Interest 1.5-2.5, CFO/Debt 5-8%, FOCF/Debt (10)-0, DCF/Debt (20)-0
  Highly Leveraged: FFO/Debt < 6%, Debt/EBITDA > 6, FFO/Cash Interest < 1.5, EBITDA/Interest < 1.5, CFO/Debt < 5%, , FOCF/Debt < (20)

  STEP 2: Locate the industry-specific supplemental ratios and guidelines for this company from the knowledge base.

  STEP 3: Using the corresponding volatility table and the industry-specific guidelines, apply these financial metrics to determine the financial risk profile (1-6).

  Key Financial Metrics:
  FFO to debt: {row['ffo_to_debt_ltm']}
  Debt to EBITDA: {row['debt_to_ebitda_ltm']}
  FFO cash interest cover: {row['ffo_interest_coverage_ltm']}
  EBITDA to interest: {row['ebitda_to_interest_ltm']}
  CFO to debt: {row['cfo_to_debt_ltm']}
  FOCF to debt: {row['focf_to_debt_ltm']}
  DCF to debt: {row['dcf_to_debt_ltm']}
  EBIT margin: {row['ebit_margin_ltm']}
  EBITDA margin: {row['ebitda_margin_ltm']}
  Return on capital: {row['return_on_capital_ltm']}
  CICRA: {row['circa_rating']}

  Think step by step and explain in a methodological way why you chose that specific rating.

  If you don't know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction.

  Label your final answer as ANSWER: [a single category from 1-6]."""

def extract_answer(text):
  pattern = r"ANSWER: \[(\d+)\]"
  pattern_match = re.search(pattern, text)
  if pattern_match:
      category = pattern_match.group(1)
      print(f'   extracted answer: {category}')
      return category
  return '-1'

df["pred_score"] = "-1"
df["pred_reasoning"] = ""

for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f'Scoring each row in df'):

  # response = client.chat.completions.create(
  #   model="gpt-4-turbo-preview",
  #     messages=[
  #       {"role": "system", "content": "You are a helpful assistant who is well-versed in the financial services world."},
  #       {"role": "user", "content": custom_prompt(row)},
  #     ]
  # )

  # output = {"answer": response.choices[0].message.content}

  output = {"answer": fingpt_chain.invoke(custom_prompt(row))}

  # prompt = custom_prompt(row)
  # output = conversation_chain({"question": prompt})

  mistral_chain.invoke(custom_json_prompt(df.iloc[0]))

  df.at[index, 'pred_score'] = extract_answer(output["answer"])
  df.at[index, 'pred_reasoning'] = output["answer"]

Scoring each row in df:   0%|          | 3/947 [00:17<1:33:46,  5.96s/it]


KeyboardInterrupt: 

In [None]:
df["pred_score"] = "-1"
df["pred_reasoning"] = ""

for index, row in tqdm(df.iterrows(), total=df.shape[0], desc=f'Scoring each row in df'):

  response = mistral_chain.invoke(custom_json_prompt(df.iloc[0]))

  output = mistral_extract_output(response)

  # print(output)

  if 'error' not in output:
    df.at[index, 'pred_score'] = output["answer"]
    df.at[index, 'pred_reasoning'] = output["reasoning"]

Scoring each row in df:   0%|          | 1/947 [00:01<31:00,  1.97s/it]





Scoring each row in df:   0%|          | 2/947 [00:04<34:32,  2.19s/it]





Scoring each row in df:   0%|          | 3/947 [00:05<30:18,  1.93s/it]





Scoring each row in df:   0%|          | 4/947 [00:07<28:06,  1.79s/it]





Scoring each row in df:   1%|          | 5/947 [00:09<27:21,  1.74s/it]





Scoring each row in df:   1%|          | 6/947 [00:11<28:15,  1.80s/it]





Scoring each row in df:   1%|          | 7/947 [00:12<27:41,  1.77s/it]





Scoring each row in df:   1%|          | 7/947 [00:14<31:52,  2.04s/it]


KeyboardInterrupt: 

In [None]:
def mistral_extract_output(text):
  text_parts = text.split("[/INST]\n\n")
  json_string = text_parts[-1]
  try:
      json_data = json.loads(json_string)
      return(json_data)
  except json.JSONDecodeError:
      try:
        val_string = json_string.split("can be categorized as ")[1]
        val = val_string.split('(')[1].split(')')[0]
        return {"answer": val, "reasoning": json_string}
      except:
        return({"error": "Invalid JSON format"})

In [None]:
display_cols = ['company_name', 'sector', 'financial_risk_profile', 'circa_rating', 'pred_score']

df[:50][display_cols]

Unnamed: 0,company_name,sector,financial_risk_profile,circa_rating,pred_score
0,AAR Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,5
1,Boeing Co.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,5
2,BWX Technologies Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,5
3,CACI International Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,5
4,General Dynamics Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,5
5,Hexcel Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,5
6,Howmet Aerospace Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,5
7,Huntington Ingalls Industries Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,-1
8,Kaman Corp.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,-1
9,KBR Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,-1


In [None]:
display_cols = ['company_name', 'sector', 'financial_risk_profile', 'circa_rating', 'pred_score']

df[:50][display_cols]

Unnamed: 0.1,Unnamed: 0,company_name,sector,financial_risk_profile,circa_rating,business_description,ffo_to_debt_ltm,debt_to_ebitda_ltm,cfo_to_debt_ltm,focf_to_debt_ltm,dcf_to_debt_ltm,ffo_interest_coverage_ltm,ebitda_to_interest_ltm,ebit_margin_ltm,ebitda_margin_ltm,return_on_capital_ltm,pred_score,pred_reasoning
0,0,AAR Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,AAR Corp. provides products and services to co...,0.18,1.31,0.23,0.11,0.11,2.93,10.3,7.25,8.7,7.05,5,Given the outlined methodology and the provide...
1,1,Boeing Co.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"The Boeing Company, together with its subsidia...",0.11,10.6,0.16,0.12,0.13,2.42,1.47,1.66,4.05,2.03,-1,Given the information and methodology provided...
2,2,BWX Technologies Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"BWX Technologies, Inc., together with its subs...",0.19,3.44,0.2,0.07,0.14,5.35,8.05,12.2,15.3,8.53,-1,To determine the S&P financial risk profile fo...
3,3,CACI International Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,"CACI International Inc, together with its subs...",0.15,2.42,0.17,0.13,0.36,3.2,8.06,8.06,10.0,6.87,-1,Given the information provided and following t...
4,4,General Dynamics Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,General Dynamics Corporation operates as an ae...,0.42,1.84,0.51,0.41,0.62,11.8,12.5,8.77,10.8,7.35,-1,


In [None]:
df[:10]

Unnamed: 0.1,Unnamed: 0,company_name,sector,financial_risk_profile,circa_rating,business_description,ffo_to_debt_ltm,debt_to_ebitda_ltm,cfo_to_debt_ltm,focf_to_debt_ltm,dcf_to_debt_ltm,ffo_interest_coverage_ltm,ebitda_to_interest_ltm,ebit_margin_ltm,ebitda_margin_ltm,return_on_capital_ltm,pred_score,pred_reasoning
0,0,AAR Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,AAR Corp. provides products and services to co...,0.18,1.31,0.23,0.11,0.11,2.93,10.3,7.25,8.7,7.05,-1,Human: Use the following context to help you a...
1,1,Boeing Co.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"The Boeing Company, together with its subsidia...",0.11,10.6,0.16,0.12,0.13,2.42,1.47,1.66,4.05,2.03,-1,Human: Use the following context to help you a...
2,2,BWX Technologies Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"BWX Technologies, Inc., together with its subs...",0.19,3.44,0.2,0.07,0.14,5.35,8.05,12.2,15.3,8.53,-1,Human: Use the following context to help you a...
3,3,CACI International Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,"CACI International Inc, together with its subs...",0.15,2.42,0.17,0.13,0.36,3.2,8.06,8.06,10.0,6.87,-1,
4,4,General Dynamics Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,General Dynamics Corporation operates as an ae...,0.42,1.84,0.51,0.41,0.62,11.8,12.5,8.77,10.8,7.35,-1,
5,5,Hexcel Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,"Hexcel Corporation develops, manufactures, and...",0.35,1.41,0.51,0.3,0.44,7.56,10.5,12.0,19.0,5.64,-1,
6,6,Howmet Aerospace Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,Howmet Aerospace Inc. provides advanced engine...,0.24,2.12,0.28,0.21,0.33,4.13,6.97,17.8,21.9,9.41,-1,
7,7,Huntington Ingalls Industries Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,"Huntington Ingalls Industries, Inc. designs, b...",0.36,1.84,0.42,0.3,0.42,10.2,13.1,6.74,9.77,7.14,-1,
8,8,Kaman Corp.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"Kaman Corporation, together with its subsidiar...",0.13,4.86,0.14,0.09,0.13,1.82,2.75,7.13,13.6,2.75,-1,
9,9,KBR Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"KBR, Inc. provides scientific, technology, and...",0.16,2.07,0.19,0.14,0.26,2.88,7.45,6.97,8.34,8.56,-1,


In [None]:
df[0]['pred_']

In [None]:
output['answer']

'Human: Use the following context to help you answer the task. If you don\'t know the answer, please think rationally and answer from your own knowledge base using the methodology provided. You have all the data you need to make this prediction:\n[Document(page_content="company\'s business risk profile, then evaluate its financial risk profile, then", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="The low volatility table (table 19) will generally apply when a company\'s CICRA is", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG Knowledge Base/filtered_general_methodology.txt\'}), Document(page_content="12. T o determine the assessment for a corporate issuer\'s business risk profile,", metadata={\'source\': \'/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/RAG 

In [None]:
output['answer']

"Starting with STEP 1, we determined the correct volatility table for AAR Corp. Given the CICRA score of [3] (Intermediate risk) for AAR Corp. and its industry (AEROSPACE & DEFENSE), we should use the Standard Volatility Table. This decision is based on the guidelines that the standard volatility table is used for all CICRA scores other than '1,' and considering AAR Corp.'s CICRA score and its industry, it doesn't align with the criteria for using the low or medial volatility tables exclusively.\n\nMoving onto STEP 2, unfortunately, there were no specific supplemental ratios and guidelines provided for the aerospace & defense industry within the instructions. Therefore, we proceed with the general methodology without industry-specific adjustments.\n\nIn STEP 3, we apply the key financial metrics of AAR Corp. to the Standard Volatility Table to determine its financial risk profile:\n\n1. FFO to debt is 18% (0.18), which aligns with the Aggressive Risk category.\n2. Debt to EBITDA is 1.3

In [None]:
output['answer']

'Given the information provided and following the methodology outlined, let\'s determine the financial risk profile for CACI International Inc. step by step.\n\n### Step 1: Determine the Volatility Table\n\nCACI International Inc. has a CICRA of [3] which indicates an \'Intermediate risk\' level. Based on the information, the aerospace and defense industry typically suggests a potential for varying levels of volatility, but not necessarily low. Given that the company\'s CICRA is \'3\' and considering no specific low volatility indicators were mentioned explicitly for the company\'s operational or financial characteristics, we opt for the **Standard Volatility Table** as the starting point. This choice is guided by the instructions which indicate that the standard volatility table serves as the relevant benchmark table for all CICRA scores other than \'1\'.\n\n### Step 2: Industry-Specific Supplemental Ratios\n\nThe task didn\'t specifically list industry-specific supplemental ratios fo

In [None]:
output

{'question': "\n  Your goal is to predict the S&P financial risk profile from 1-6 for AAR Corp.. Follow the methodology outlined in the knowledge base and the following steps:\n\n\n  STEP 1: Determine the correct volability table (standard, medial, or low) to use for AAR Corp. based on the following CICRA and industry:\n\n  CICRA: [3] Intermediate risk\n  Industry: AEROSPACE & DEFENSE.\n  Business description: AAR Corp. provides products and services to commercial aviation, government, and defense markets worldwide.\n\n  The low volatility table will generally apply when a company's CICRA is '1' but can infrequently also apply to a company with a CICRA of '2' if the company exhibits or is expected to exhibit low levels of volatility.\n  The medial volatility table will generally apply for a company with a CICRA of '2' but can infrequently also apply to a company with a CICRA of '1' if the company exhibits or is expected to exhibit medial levels of volatility.\n  The standard volatility

## Testing loop

In [None]:
# Mount drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("/content/drive/Shareddrives/ENGS Final Project/Data/Financial Risk Profile Data/Financial Data/CLEAN_merged_frp_data_v2.csv")
print(df.shape)
df.head()

(947, 16)


Unnamed: 0.1,Unnamed: 0,company_name,sector,financial_risk_profile,circa_rating,business_description,ffo_to_debt_ltm,debt_to_ebitda_ltm,cfo_to_debt_ltm,focf_to_debt_ltm,dcf_to_debt_ltm,ffo_interest_coverage_ltm,ebitda_to_interest_ltm,ebit_margin_ltm,ebitda_margin_ltm,return_on_capital_ltm
0,0,AAR Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,AAR Corp. provides products and services to co...,0.18,1.31,0.23,0.11,0.11,2.93,10.3,7.25,8.7,7.05
1,1,Boeing Co.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"The Boeing Company, together with its subsidia...",0.11,10.6,0.16,0.12,0.13,2.42,1.47,1.66,4.05,2.03
2,2,BWX Technologies Inc.,AEROSPACE & DEFENSE,[4] Significant,[3] Intermediate risk,"BWX Technologies, Inc., together with its subs...",0.19,3.44,0.2,0.07,0.14,5.35,8.05,12.2,15.3,8.53
3,3,CACI International Inc.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,"CACI International Inc, together with its subs...",0.15,2.42,0.17,0.13,0.36,3.2,8.06,8.06,10.0,6.87
4,4,General Dynamics Corp.,AEROSPACE & DEFENSE,[3] Intermediate,[3] Intermediate risk,General Dynamics Corporation operates as an ae...,0.42,1.84,0.51,0.41,0.62,11.8,12.5,8.77,10.8,7.35


In [None]:
# TESTING LOOP

for