In [15]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
import json
import datetime
import shutil
import openai
import pandas as pd
import requests


In [16]:
OPENAI_KEY = os.getenv('OPENAI_KEY')
openai.api_key = OPENAI_KEY

In [17]:
OPENAI_KEY

'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'

In [18]:
from datasets import load_dataset
dataset = load_dataset("PatronusAI/financebench")

In [19]:
embeddings = OpenAIEmbeddings(openai_api_key= OPENAI_KEY)


In [20]:
df = pd.DataFrame(dataset['train'])

In [21]:
def create_knowledge_hub(path_to_10k):
    """From a 10-K document, create a Chroma DB knowledge hub ONLY PDF FILES.

    Args:
        path_to_10k: Relative path to the 10-K hosted locally on the user's computer

    Returns:
        vectordb: The vector database with the information from the 10-K
        db_directory: The path to the vector database
    """

    now = datetime.datetime.now()
    timestamp = now.strftime("%Y%m%d%H%M%S")
    db_directory = "db_" + timestamp

    loader = PyPDFLoader(path_to_10k)
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, 
        chunk_overlap=5,
        separators=["\n\n", "\n", " ", ""],
        length_function=len)
    
    texts = splitter.split_documents(documents)

    vectordb = Chroma.from_documents(
        documents=texts, 
        embedding=embeddings,
        persist_directory=db_directory
    )
    vectordb.persist()

    return vectordb, db_directory

In [22]:
folder_path = "C:\\Users\\liang\\OneDrive\\Desktop\\White\\Trial Run Learning\\intro_to_langchain\\financial_statements"

In [23]:
folder_path = "financial_statements"

In [24]:
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]


In [25]:
files = [ folder_path + '\\' + file for file in files ]


In [26]:
files[0]

'financial_statements\\0000002488-16-000111.pdf'

In [27]:
for file in files[:1]:
    print(file)
    vector_db, db_directory =  create_knowledge_hub(file)


financial_statements\0000002488-16-000111.pdf


In [28]:
vector_db

<langchain_community.vectorstores.chroma.Chroma at 0x22f22bf5930>

In [29]:
db_directory


'db_20231226115232'

In [30]:
df['doc_link']

0      https://investors.3m.com/financials/sec-filing...
1      https://investors.3m.com/financials/sec-filing...
2      https://investors.3m.com/financials/sec-filing...
3      https://investors.3m.com/financials/sec-filing...
4      https://investors.3m.com/financials/sec-filing...
                             ...                        
145    https://www.verizon.com/about/sites/default/fi...
146    https://www.verizon.com/about/sites/default/fi...
147    https://d18rn0p25nwr6d.cloudfront.net/CIK-0000...
148    https://d18rn0p25nwr6d.cloudfront.net/CIK-0000...
149    https://d18rn0p25nwr6d.cloudfront.net/CIK-0000...
Name: doc_link, Length: 150, dtype: object

In [31]:
question = 'Answer the following question as if you are an equity research analyst and have lost internet connection so you do not have access to financial metric providers. According to the details clearly outlined within the P&L statement and the statement of cash flows, what is the FY2015 depreciation and amortization (D&A from cash flow statement) % margin for AMD?'

In [32]:
source1 = vector_db.similarity_search(question, k = 3)[0].page_content
source2 = vector_db.similarity_search(question, k = 3)[1].page_content
source3 = vector_db.similarity_search(question, k = 3)[2].page_content

In [33]:
print(source1)

Advanced Micro Devices, Inc.
Consolidated Statements of Cash Flows 
  Year Ended
  December 26,
 2015  December 27,
 2014  December 28,
 2013
 (In millions)
Cash flows from operating activities:      
Net loss $ (660) $ (403) $ (83)
Adjustments to reconcile net loss to net cash used in operating activities:      
Depreciation and amortization 167  203  236 
Net loss on disposal of property, plant and equipment —  —  31 
Stock-based compensation expense 63  81  91 
Non-cash interest expense 11  17  25 
Goodwill impairment charge —  233  — 
Restructuring and other special charges, net 83  14  — 
Net loss on debt redemptions —  61  1 
Other (3) (13) (1)
Changes in operating assets and liabilities:      
Accounts receivable 280  7  (200)
Inventories (11) 199  (322)
Prepayments and other - GLOBALFOUNDRIES 84  (113) — 
Prepaid expenses and other assets (111) (7) (103)
Accounts payables, accrued liabilities and other (156) (231) 266 
Payable to GLOBALFOUNDRIES 27  (146) (89)


In [34]:
print(source2)

ITEM 8. FINANCIAL STATEMENTS AND SUPPLEMENTARY DATA
Advanced Micro Devices, Inc.
Consolidated Statements of Operations
 
  Year Ended
  December 26,
 2015  December 27,
 2014  December 28,
 2013
 (In millions, except per share amounts)
Net revenue $ 3,991  $ 5,506  $ 5,299 
Cost of sales 2,911  3,667  3,321 
Gross margin 1,080  1,839  1,978 
Research and development 947  1,072  1,201 
Marketing, general and administrative 482  604  674 
Amortization of acquired intangible assets 3  14  18 
Restructuring and other special charges, net 129  71  30 
Goodwill impairment charge —  233  — 
Legal settlements, net —  —  (48)
Operating income (loss) (481) (155) 103 
Interest expense (160) (177) (177)
Other expense, net (5) (66) — 
Loss before income taxes (646) (398) (74)
Provision for income taxes 14  5  9 
Net loss $ (660) $ (403) $ (83)
Net loss per share      
Basic $ (0.84) $ (0.53) $ (0.11)
Diluted $ (0.84) $ (0.53) $ (0.11)
Shares used in per share calculation


In [35]:
print(source3)

ITEM 6. SELECTED FINANCIAL DATA
Five Years Ended December 26, 2015
(In millions except per share amounts)
 
 2015(1)  2014(1)  2013(1)  2012(1)  2011(1)
Net revenue $ 3,991  $ 5,506  $ 5,299  $ 5,422  $ 6,568 
Income (loss) from continuing operations(2)(3)(4)(5)(6)(7)(660) (403) (83) (1,183) 495 
Loss from discontinued operations, net of tax(8)—  —  —  —  (4)
Net income (loss) attributable to AMD common stockholders $ (660) $ (403) $ (83) $ (1,183) $ 491 
Net income (loss) attributable to AMD common stockholders per
common share          
Basic          
Continuing operations $ (0.84) $ (0.53) $ (0.11) $ (1.60) $ 0.68 
Discontinued operations —  —  —  —  (0.01)
Basic net income (loss) attributable to AMD common stockholders per
common share $ (0.84) $ (0.53) $ (0.11) $ (1.60) $ 0.68 
Diluted          
Continuing operations $ (0.84) $ (0.53) $ (0.11) $ (1.60) $ 0.67 
Discontinued operations —  —  —  —  (0.01)
Diluted net income (loss) attributable to AMD common stockholders


In [38]:
question

'Answer the following question as if you are an equity research analyst and have lost internet connection so you do not have access to financial metric providers. According to the details clearly outlined within the P&L statement and the statement of cash flows, what is the FY2015 depreciation and amortization (D&A from cash flow statement) % margin for AMD?'

In [39]:
message_dict= [{"role": "system", "content": "You are a factual chatbot that answers questions about 10-K documents. You only answer with answers you find in the text, no outside information."}, {"role": "user", "content": f"{source1}{source2} Now, this is our question: {question}"}]

In [48]:
completion = openai.ChatCompletion.create(
    model="gpt-4",
    messages=message_dict,
)

In [49]:
response = completion.choices[0].message.content

In [50]:
print(response)

To calculate the depreciation and amortization (D&A from the cash flow statement) % margin for FY 2015 you would use the following formula: (D&A / Net Revenue) * 100.

From the FY2015 Consolidated Statements of Cash Flows, the depreciation and amortization is $167 million 
From the FY2015 Consolidated Statements of Operations, the net revenue is $3,991 million 

Therefore, the FY2015 depreciation and amortization % margin for AMD is: (167 / 3991) * 100 = 4.19%
