In [2]:
import os
import getpass
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.google import GoogleDriveReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.cohere import CohereEmbedding
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownNodeParser, SentenceSplitter
from llama_index.core import VectorStoreIndex, QueryBundle, Response, Document, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.embeddings import resolve_embed_model
from dotenv import load_dotenv
import nest_asyncio

nest_asyncio.apply()

# llm = Cohere(model="command-nightly", api_key=cohere_api_key)

  from .autonotebook import tqdm as notebook_tqdm


# Parse

In [2]:
node_parser = MarkdownNodeParser()

In [23]:
load_dotenv()

api_keys = os.getenv("LLAMA_INDEX_KEYS").split(',')
for i, api_key in enumerate(api_keys[1:]):
    api_keys[i+1] = api_key[1:]

In [32]:
documents = []

parser = LlamaParse(
    api_key=api_keys[0],
    result_type="markdown",
    verbose=True,
    language="en",
    num_workers=2,
)

# loaded_docs = parser.load_data("./Data/AMZN/AMZN2019.pdf")

In [26]:
def process_company_folder(company_folder_path):
    pdf_paths =[]
    for pdf_file in os.listdir(company_folder_path):
        pdf_path = os.path.join(company_folder_path, pdf_file)
        pdf_paths.append(pdf_path)
    for api_key in api_keys:
        try:
            parser = LlamaParse(
                api_key=api_key,
                result_type="markdown",
                verbose=True,
                language="en",
                num_workers=2,
            )
            loaded_docs = parser.load_data(pdf_paths)
            break
        except:
            continue
    documents.append(loaded_docs)

In [33]:
# Iterate and process company folders
file_path = 'Data'
for company in os.listdir(file_path):
    if company == '.DS_Store':
        continue
    company_folder_path = os.path.join(file_path, company)
    process_company_folder(company_folder_path)

Started parsing the file under job_id 34495a19-e5dd-49bc-811b-d1860a020f60
Started parsing the file under job_id d07282d7-9580-425e-b3f6-1695c8c239a0
Started parsing the file under job_id 082279a2-df43-4a07-b7ad-822893af9d0c
Started parsing the file under job_id 4e559d2d-e121-4905-b803-71232ede0d1b
Started parsing the file under job_id b863b0f1-f8b4-4507-a06d-61d3558f0212
..Started parsing the file under job_id 63f95b71-c110-46ff-b775-72e666fd3e10
Started parsing the file under job_id 93b276f0-c399-41c8-ab38-38bfb3434255
Started parsing the file under job_id 6e7eaffb-cc42-4ca7-ac81-9c9db82d9c92
Started parsing the file under job_id a247f79c-d5c1-4e85-8f90-11ab819c8e0f
Started parsing the file under job_id def51d3f-febf-49ef-a488-7a30a480d1e5
Started parsing the file under job_id e51c9331-74e2-4e60-be3e-e502ab4dd3f6
Started parsing the file under job_id cca32ca2-6f13-4a50-8301-1c8cd9624d34
Started parsing the file under job_id 3ee38181-16db-4f4c-80ab-613d0c84ac99
Started parsing the fil

In [34]:
company_nodes = []
for company in documents:
    nodes = node_parser.get_nodes_from_documents(company)
    company_nodes.append(nodes)

company_nodes = [item for company_node in company_nodes for item in company_node]

# Batch, Embed, Store/Rank

In [4]:
ELASTIC_CLOUD_ID = os.getenv("ELASTIC_CLOUD_ID")
ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY")

In [None]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)

ollama_embedding = OllamaEmbedding("mistral")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=350, chunk_overlap=50),
        ollama_embedding,
    ],
    vector_store=es_vector_store
)

pipeline.run(documents=company_nodes)
print(".....Done running pipeline.....\n")

In [None]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)

ollama_embedding = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1000, chunk_overlap=150),
        ollama_embedding,
    ],
    vector_store=es_vector_store
)

pipeline.run(documents=company_nodes)
print(".....Done running pipeline.....\n")

# Test/Run Q&A

In [5]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)


In [7]:
# Local LLM to send user query to
local_llm = Ollama(model="llama3:instruct", request_timeout=60.0)
Settings.embed_model= resolve_embed_model("local:BAAI/bge-small-en-v1.5")

index = VectorStoreIndex.from_vector_store(es_vector_store)
query_engine = index.as_query_engine(local_llm, similarity_top_k=10)

query="How much was the quarterly cash dividend per share in the second quarter of 2022 for Google"
bundle = QueryBundle(query, embedding=Settings.embed_model.get_query_embedding(query))
result = query_engine.query(bundle)
print(result)

Repeat the original answer since the new context doesn't provide useful information to rewrite an answer. There is no mention of a quarterly cash dividend per share in the provided context information for any quarter, including the second quarter of 2022, for Google or Alphabet Inc. The context only provides financial information and discussions about the company's operations, revenues, and expenses, but does not mention dividends.
