In [1]:
import os

import nest_asyncio
nest_asyncio.apply()

from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader, Document

In [2]:
LLAMAPARSE_API_KEY = os.environ.get('LLAMAPARSE_API_KEY')
if LLAMAPARSE_API_KEY is not None:
    print('API key found')
else:
    print('Check for API key in environment variable')

API key found


In [3]:
# instantiate parser
parser = LlamaParse(
    api_key=LLAMAPARSE_API_KEY,
    result_type="markdown", # or text
    # num_workers=4 # for multiple files
    verbose=True,
    language="en", # default is english
)

In [4]:
# load document and parse it 
# documents = parser.load_data('../data/axis-press-release-q3fy24.pdf')

In [5]:
file_extractor = {".pdf": parser}
filename_fn = lambda filename: {"file_name": filename}
reader = SimpleDirectoryReader(
    input_files=['../data/axis-press-release-q3fy24.pdf'], 
    file_extractor=file_extractor,
    filename_as_id=True,
    file_metadata = filename_fn,
    )
documents = reader.load_data()

Started parsing the file under job_id b6034132-72d1-4c79-9267-612310136535
.

In [6]:
for doc in documents:
    doc.metadata['file_descr'] = 'Axis bank quarterly earnings report for quarter ended December 2023'

In [7]:
# document = Document(
#     documents,
#     metadata={"filename": "axis-press-release-q3fy24",
#               "category":"press release",
#               "quarter":"q3",
#               "financial_year":"fy24",
#               },
# )

In [8]:
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import MarkdownElementNodeParser

In [9]:
llm = OpenAI(model='gpt-3.5-turbo-0125', temperature=0)

In [10]:
node_parser = MarkdownElementNodeParser(llm=llm)
nodes=node_parser.get_nodes_from_documents(documents)

Embeddings have been explicitly disabled. Using MockEmbedding.


7it [00:00, 4074.40it/s]
100%|██████████| 7/7 [00:08<00:00,  1.19s/it]


In [11]:
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

In [12]:
# split into nodes and create an index from parsed markdown
index = VectorStoreIndex(nodes=base_nodes+objects)

# create query engine
query_engine = index.as_query_engine()

In [14]:
query = "what are the highlights of axis bank results?"

resp = query_engine.query(query)
print(resp)

The highlights of Axis Bank's results include a 9% year-on-year growth in Net Interest Income, a 14% year-on-year growth in Core Operating revenues, a 23% year-on-year increase in Advances, a 15% year-on-year growth in Retail Term Deposits, a CASA ratio of 42%, a Net Interest Margin of 4.01%, a ROE of 18.61%, a ROA of 1.84%, a PCR of 78%, a GNPA% of 1.58%, an NNPA% of 0.36%, a credit card CIF market share of 14%, and an operating profit of ₹9,141 crores for the quarter.
