In [29]:
import openai
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
# Add a constant to the model (intercept)
from sklearn.metrics import r2_score
import time
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

from source.models import *
from source.preprocessing import *
from source.variables import *
from source.helpers import *

from dotenv import load_dotenv
import os

load_dotenv()  # Load environment variables from .env file

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [30]:
aggregated = pd.read_csv('transcripts/aggregated_credit_df.csv', delimiter='|')

#PART = 'presentation'
PART = 'QnA'

In [37]:
client = openai.OpenAI(api_key=OPENAI_API_KEY)

output = []
start = 0

# import system_message.txt as a string
with open(f'prompts/system_{PART}_section.txt', 'r') as file:
    system_message = file.read()


print(system_message)

You are an AI language model designed to assist bond investors in analyzing company earnings call transcripts. Your task is to process the provided earnings call transcript, focusing specifically on the Q&A section, and keep only the parts where the CEO or other employees answer questions. Remove all irrelevant sections. Keep only the answers that are directly relevant to bond investors and factors affecting bond recovery rates.

Relevant topics include:
Debt levels and structures
Credit ratings
Interest obligations
Refinancing activities
Liquidity positions and cash flow
Covenant compliance
Leverage ratios
Credit facilities and access to capital
Default risks
Ability to meet financial obligations
Asset valuations and impairments
Restructuring efforts and bankruptcy proceedings
Management's strategic plans to address financial distress
Legal and regulatory issues impacting financial stability
Macroeconomic factors affecting financial obligations
Stakeholder negotiations
Forward-looking

In [32]:
import tiktoken


for idx, row in aggregated.iterrows():
    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            temperature=0,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": row[PART]}
            ]
        )
        # Instantly add the output as a new column entry for the corresponding row
        response = completion.choices[0].message.content

        print(f"Completed for row {idx}")
        
        # compute token count for the response
        # Initialize the OpenAI tokenizer
        encoding = tiktoken.encoding_for_model("gpt-4o-mini")

        # Function to calculate the number of tokens in a transcript using OpenAI's tokenizer
        def count_tokens(transcript):
            tokens = encoding.encode(transcript)
            return len(tokens)
        
        print(f"Token count: {count_tokens(response)}")

        aggregated.at[idx, f'{PART}_summary'] = response

    except openai.error.OpenAIError as e:
        print(f"An exception occurred: {e}")
        print("Waiting for 1 minute before retrying...")
        time.sleep(60)

Completed for row 0
Token count: 1121
Completed for row 1
Token count: 2110
Completed for row 2
Token count: 2101
Completed for row 3
Token count: 1567
Completed for row 4
Token count: 3662
Completed for row 5
Token count: 1987
Completed for row 6
Token count: 2533
Completed for row 7
Token count: 1472
Completed for row 8
Token count: 4646
Completed for row 9
Token count: 1103
Completed for row 10
Token count: 155
Completed for row 11
Token count: 2517
Completed for row 12
Token count: 1977
Completed for row 13
Token count: 3307
Completed for row 14
Token count: 872
Completed for row 15
Token count: 1544
Completed for row 16
Token count: 1192
Completed for row 17
Token count: 1556
Completed for row 18
Token count: 1769
Completed for row 19
Token count: 1804
Completed for row 20
Token count: 1633
Completed for row 21
Token count: 1572
Completed for row 22
Token count: 2121
Completed for row 23
Token count: 2103
Completed for row 24
Token count: 2521
Completed for row 25
Token count: 647

In [36]:
aggregated.head()

Unnamed: 0,call_ID,aggregated_RR,presentation,QnA,qna_summary
0,0,29.023972,Presentation\nOperator\nGood morning Ladies an...,Question and Answer\nOperator\n[Operator Instr...,"Bob Skinner \nWe gave that guidance, Jennifer..."
1,1,8.86942,"Presentation\nOperator\nLadies and gentlemen, ...",Question and Answer\nOperator\n(Operator Instr...,Mike Zafirovski \nWith respect to gross margi...
2,2,33.210455,Presentation\nOperator\nGreetings ladies and g...,Question and Answer\nOperator\nThank you. Ladi...,"Paul Tate \nWell, first of all, I would dispu..."
3,3,11.875,"Presentation\nOperator\nThank you, all parties...",Question and Answer\nOperator\n[Operator Instr...,"Richard L. Bond \nNo Tim, I told the Board th..."
4,4,18.34,Presentation\nOperator\nThank you for joining ...,Question and Answer\nOperator\n(Operator Instr...,"Tony Allott \nYeah. George, Tony. First of al..."


In [35]:
# save as csv
aggregated.to_csv(f'transcripts/{PART}_summary.csv', index=False, sep='|')