In [1]:
import os
import getpass
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.cohere import CohereEmbedding
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownNodeParser, SentenceSplitter
from llama_index.core import VectorStoreIndex, QueryBundle, Response, Document, Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.embeddings import resolve_embed_model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from dotenv import load_dotenv
import cohere
import time


import nest_asyncio

nest_asyncio.apply()

In [2]:
load_dotenv()

api_keys = os.getenv("LLAMA_INDEX_KEYS").split(',')
for i, api_key in enumerate(api_keys[1:]):
    api_keys[i+1] = api_key[1:]

ELASTIC_CLOUD_ID = os.getenv("ELASTIC_CLOUD_ID_MISTRAL")
ELASTIC_API_KEY = os.getenv("ELASTIC_API_KEY_MISTRAL")

In [3]:
es_vector_store = ElasticsearchStore(index_name="calls",
                                     vector_field='conversation_vector',
                                     text_field='conversation',
                                     es_cloud_id=ELASTIC_CLOUD_ID,
                                     es_api_key=ELASTIC_API_KEY)

In [7]:
# Local LLM to send user query to
from llama_index.core.indices.query.query_transform.base import (
    StepDecomposeQueryTransform,
)
from llama_index.core.query_engine import MultiStepQueryEngine

Settings.llm = Ollama(model="llama3:instruct", request_timeout=120.0)
Settings.embed_model= resolve_embed_model("local:BAAI/bge-large-en-v1.5")

index = VectorStoreIndex.from_vector_store(es_vector_store)
step_decompose_transform = StepDecomposeQueryTransform()
index_summary = "Used to answer medical questions from the given context"


query_engine = index.as_query_engine(Settings.llm, similarity_top_k=10)
query_engine = MultiStepQueryEngine(
    query_engine=query_engine,
    query_transform=step_decompose_transform,
    index_summary=index_summary,
)

In [8]:
questions = []
with open('Questions.txt', 'r', encoding='utf-8') as file:
    for line in file:
        line = line.strip()  # Remove any leading/trailing whitespace
        if line.endswith('?'):  # Check if the line ends with a question mark
            questions.append(line)

In [10]:
query_results = []
i = 0
for query in questions:
    bundle = QueryBundle(query, embedding=Settings.embed_model.get_query_embedding(query))
    result = query_engine.query(bundle)
    query_results.append({
        "query": query,
        "result": result.response
    })
    print(i)
    i+=1

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85


HTTPStatusError: Client error '400 Bad Request' for url 'http://localhost:11434/api/chat'
For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400

In [11]:
query_results

[{'query': 'What is the fiscal year end date for Apple Inc. as reported in their 2022 Form 10-K?',
  'result': 'October 29, 2022'},
 {'query': 'How many shares of common stock were outstanding for Apple Inc. as of October 14, 2022?',
  'result': 'According to the provided context, Apple Inc.\'s fiscal years ended in September, spanning either 52 or 53 weeks. The specific information about the date range is:\n\n* 20XX: 52 or 53 weeks ending on the last Saturday of September\n* 2022, 2021, and 2020: 52 weeks each\n\nAs for the company information, it is mentioned that the context is related to Apple Inc. and its wholly owned subsidiaries (collectively "Apple" or the "Company").\n\nSince the query asks about the number of shares outstanding as of October 14, 20XX, we can infer that this information is not explicitly provided in the given context.'},
 {'query': "What is the par value per share of Apple Inc.'s common stock as registered on The Nasdaq Stock Market LLC?",
  'result': '$0.0000

In [12]:
import json

json_data = json.dumps(query_results)

with open('query_results2.json', 'w') as file:
    file.write(json_data)