In [10]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv
Python-dotenv could not parse statement starting at line 5


In [11]:
from decouple import config
import logging
import sys
from llama_index import SimpleDirectoryReader, StorageContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
from llama_index import ServiceContext, set_global_service_context
from llama_index.llms import OpenAI
from llama_index.embeddings import OpenAIEmbedding, HuggingFaceEmbedding
from llama_index.node_parser import (
    SentenceWindowNodeParser,
)
from llama_index.text_splitter import SentenceSplitter

OPENAI_API_KEY = config('OPENAI_API_KEY')


In [12]:
import psycopg2

#initialize vectorstore config

PGVECTOR_CONNECTION_STRING = config('PGVECTOR_CONNECTION_STRING')
PGVECTOR_DATABASE = 'vector_db'

connection_string = PGVECTOR_CONNECTION_STRING
db_name = PGVECTOR_DATABASE
conn = psycopg2.connect(connection_string)
conn.autocommit = True


In [13]:


# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=30,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# base node parser is a sentence splitter
text_splitter = SentenceSplitter(chunk_size=400, chunk_overlap=0)

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
# embed_model = HuggingFaceEmbedding(
#     model_name="sentence-transformers/all-mpnet-base-v2", max_length=512
# )

embed_model=OpenAIEmbedding(embed_batch_size=50)
ctx = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    # node_parser=node_parser,
)


In [14]:
from sqlalchemy.engine import make_url

# Load documents
documents = SimpleDirectoryReader("data").load_data()

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="DeloitteFutureOfAI",
    embed_dim=1536,  # openai embedding dimension
)

# index = VectorStoreIndex.from_documents(
#     documents, storage_context=storage_context, show_progress=True
# )
# query_engine = index.as_query_engine()

nodes = node_parser.get_nodes_from_documents(documents)
base_nodes=text_splitter.get_nodes_from_documents(documents)

In [15]:

storage_context = StorageContext.from_defaults(vector_store=vector_store)
sentence_index = VectorStoreIndex(nodes, service_context=ctx, storage_context=storage_context)
base_index = VectorStoreIndex(base_nodes, service_context=ctx, storage_context=storage_context)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.open

In [20]:
from llama_index.postprocessor import MetadataReplacementPostProcessor
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

query_engine = sentence_index.as_query_engine(
    similarity_top_k=1,
    # the target key defaults to `window` to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)
window_response = query_engine.query(
    "What can we liken to C-3PO and Chewbacca?"
)
print(window_response)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
To achieve the Emerald Profit Sharing Bonus, one must 

In [21]:
window = window_response.source_nodes[0].node.metadata["window"]
sentence = window_response.source_nodes[0].node.metadata["original_text"]

print(f"Window: {window}")
print("------------------")
print(f"Original Sentence: {sentence}")

Window: THE PLAN  | ANNUAL BONUSES
A-20Each year, Amway sets aside money to fund three 
annual bonuses: the Emerald, Diamond and Diamond 
Plus Bonuses.  Each fund receives an amount equal to 
one-fourth of 1% (.0025) of the total North American 
BV.  In addition, the Emerald and Diamond funds 
include one-fourth of 1% (.0025) of qualified 
international volume.  Following is a description of how 
each of the bonus funds is disbursed.
 Emerald Bonus
Qualified Emeralds and above who personally or 
foster-register 3 or more North American groups, each 
of which qualifies at the 25% Performance Bonus Level 
for at least 6 months of a given fiscal year, receive 
Emerald Bonus points and payment on North American 
volume as well as the traceable volume from 
internationally sponsored qualified groups.
 At the end of the fiscal year, Amway identifies all 
internationally sponsored volume, links it to the 
appropriate international Sponsor and corresponding 
market (one country removed) and pa

In [None]:
query_engine = base_index.as_query_engine(similarity_top_k=2)
vector_response = query_engine.query(
    "What can we liken to C-3PO and Chewbacca?"
)
print(vector_response)