In [49]:
import os
import getpass
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.readers.google import GoogleDriveReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.cohere import CohereEmbedding
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownNodeParser, SentenceSplitter
from llama_index.core import VectorStoreIndex, QueryBundle, Response, Document, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.embeddings import resolve_embed_model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from dotenv import load_dotenv
import cohere
import time


import nest_asyncio

nest_asyncio.apply()

# llm = Cohere(model="command-nightly", api_key=cohere_api_key)

# Parse

In [41]:
node_parser = MarkdownNodeParser()

In [42]:
load_dotenv()

api_keys = os.getenv("LLAMA_INDEX_KEYS").split(',')
for i, api_key in enumerate(api_keys[1:]):
    api_keys[i+1] = api_key[1:]

ELASTIC_CLOUD_ID = os.getenv("ELASTIC_CLOUD_ID_MISTRAL")
ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY_MISTRAL")

In [10]:
documents = []

parser = LlamaParse(
    api_key=api_keys[0],
    result_type="markdown",
    verbose=True,
    language="en",
    num_workers=2,
)

In [11]:
def process_company_folder(company_folder_path):
    pdf_paths =[]
    for pdf_file in os.listdir(company_folder_path):
        pdf_path = os.path.join(company_folder_path, pdf_file)
        pdf_paths.append(pdf_path)
    for api_key in api_keys:
        try:
            parser = LlamaParse(
                api_key=api_key,
                result_type="markdown",
                verbose=True,
                language="en",
                num_workers=2,
            )
            loaded_docs = parser.load_data(pdf_paths)
            break
        except:
            continue
    documents.append(loaded_docs)

In [16]:
# Iterate and process company folders
file_path = 'Data'
for company in os.listdir(file_path):
    if company == '.DS_Store':
        continue
    company_folder_path = os.path.join(file_path, company)
    process_company_folder(company_folder_path)

Started parsing the file under job_id ef4e2720-c747-49a8-a87e-85c0399d86e0
Started parsing the file under job_id f6254d11-550e-4ab9-9e53-9fb44d75b33f
Started parsing the file under job_id a75fee58-b28d-4e3b-bc1a-6e995b5ca207
Started parsing the file under job_id 83521b96-b930-46b5-a8a1-cba477dd03ee
Started parsing the file under job_id 5be70019-1e15-41ef-8459-fe340fb87a1f
Started parsing the file under job_id 6efc1447-3e07-451d-b323-581e1633a87c
Started parsing the file under job_id e4d071f5-f748-4e1b-921c-fe418f5ef430
Started parsing the file under job_id 19e4fc23-ac35-4895-9ce0-8daaeb4149e4
Started parsing the file under job_id feb3217a-653c-4c1e-8251-4be11a7cc685
Started parsing the file under job_id a1dbb77a-bd61-494e-89e2-c9680f406b96
Started parsing the file under job_id a413d236-ca26-4da2-86c2-da6582461c16
Started parsing the file under job_id edaa9987-6f65-47a7-9788-857a7c254b79
Started parsing the file under job_id b8a64871-ec3e-4317-a16b-e4c1e1a2547e
Started parsing the file 

In [18]:
company_nodes = []
for company in documents:
    nodes = node_parser.get_nodes_from_documents(company)
    company_nodes.append(nodes)

company_nodes = [item for company_node in company_nodes for item in company_node]

# Batch, Embed, Store/Rank

In [44]:
ELASTIC_CLOUD_ID="Nlp_project:"+ELASTIC_CLOUD_ID

In [52]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)

embed_model = resolve_embed_model("local:BAAI/bge-large-en-v1.5")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1000, chunk_overlap=150),
        embed_model,
    ],
    vector_store=es_vector_store
)

pipeline.run(documents=company_nodes)
print(".....Done running pipeline.....\n")

In [None]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)

ollama_embedding = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1000, chunk_overlap=150),
        ollama_embedding,
    ],
    vector_store=es_vector_store
)

pipeline.run(documents=company_nodes)
print(".....Done running pipeline.....\n")

# Test/Run Q&A

In [None]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)


In [21]:
co = cohere.Client(os.environ["COHERE_API_KEY"])

query="How much was the quarterly cash dividend per share in the second quarter of 2022 for Google"

documents = [doc.page_content for doc in docs]

# Example query and passages
start = time.time()

results = co.rerank(query=query, documents=documents, top_n=4, model="rerank-english-v2.0")
print(f"Took {time.time() - start} seconds to re-rank documents with Cohere.")

KeyError: 'COHERE_API_KEY'

In [None]:
# Local LLM to send user query to
local_llm = Ollama(model="llama3:instruct", request_timeout=60.0)
Settings.embed_model= resolve_embed_model("local:BAAI/bge-small-en-v1.5")

index = VectorStoreIndex.from_vector_store(es_vector_store)
query_engine = index.as_query_engine(local_llm, similarity_top_k=10)

query="How much was the quarterly cash dividend per share in the second quarter of 2022 for Google"
bundle = QueryBundle(query, embedding=Settings.embed_model.get_query_embedding(query))
result = query_engine.query(bundle)
print(result)

Repeat the original answer since the new context doesn't provide useful information to rewrite an answer. There is no mention of a quarterly cash dividend per share in the provided context information for any quarter, including the second quarter of 2022, for Google or Alphabet Inc. The context only provides financial information and discussions about the company's operations, revenues, and expenses, but does not mention dividends.
