In [1]:
import os
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter


In [2]:
PARSED_PATH = '../data/Bill_FAQs/parsed'

## Loading & Ingestion

In [3]:
os.listdir(PARSED_PATH)

['what_is_pg&e_doing_to_make_rates_and_bills_more_affordable?.md',
 'why_are_you_estimating_my_bill?.md',
 'why_am_i_being_charged_a_delivery_charge?.md',
 'when_will_i_receive_my_delayed_bill?.md',
 'can_i_submit_a_claim_for_the_dixie_fire_impacts_to_my_property?.md',
 'how_does_electricity_usage_translate_to_bill_charges?.md',
 'will_i_be_required_to_pay_my_bill_all_at_once_when_i_receive_it?.md',
 'why_is_my_electric_bill_so_high?.md',
 'how_do_i_dispute_my_bill?.md',
 'i_have_not_received_my_bill_yet._where_is_it?.md',
 'how_do_i_set_up_bill_forecast_alerts?.md',
 'why_did_i_get_another_bill_after_i_stopped_service?.md',
 'why_does_my_collections_notice_only_show_my_pg&e_charges,_and_not_the_charges_from_my_cca?.md',
 'where_can_i_find_my_detail_of_bill_statements?.md',
 'my_refund_check_is_dated_over_90_days,_what_do_i_do?.md',
 'why_does_the_california_climate_credit_show_up_as_a_previous_unpaid_balance_on_my_bill?.md',
 'i_share_an_apartment,_how_can_i_make_my_roommate_pay?.md',

In [4]:
documents = SimpleDirectoryReader(PARSED_PATH).load_data()

In [5]:
pipeline = IngestionPipeline(transformations=[TokenTextSplitter()])

nodes = pipeline.run(documents=documents)

In [6]:
print(documents[0])

Doc ID: 22e6f3ce-b63d-4727-a7be-fee3547c011a
Text: If your billing is estimated, PG&E works hard to estimate your
usage as accurately as possible. PG&E uses a complex formula that
includes your own historical usage, rate information, and weather
trends to bill as close to accurate as possible. However, if your
actual energy usage is higher than the previous year, the billed
amount (estimated bil...


In [7]:
print(nodes[0])

Node ID: 42e64386-7e15-4c2c-8fc9-5df74ec614b2
Text: If your billing is estimated, PG&E works hard to estimate your
usage as accurately as possible. PG&E uses a complex formula that
includes your own historical usage, rate information, and weather
trends to bill as close to accurate as possible. However, if your
actual energy usage is higher than the previous year, the billed
amount (estimated bil...


## Indexing

In [8]:
# from llama_index.core import VectorStoreIndex

# index = VectorStoreIndex.from_documents(documents)

In [9]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex(nodes, show_progress=True)

Generating embeddings:   0%|          | 0/52 [00:00<?, ?it/s]

## Storing

In [10]:
PERSIST_DIR = '../data/RAG'

index.storage_context.persist(persist_dir=PERSIST_DIR)

## Loading the persisted index

In [11]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)

# load index
index = load_index_from_storage(storage_context)

## Using Vector Stores

In [12]:
CHROMA_DB_PERSISTENT_PATH = '../data/chroma_db'
CHROMA_DB_COLLECTION_NAME = "bills_faqs"

In [13]:
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# load some documents
documents = SimpleDirectoryReader(PARSED_PATH).load_data()

# initialize client, setting path to save data
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# create collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# create your index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

# create a query engine and query
query_engine = index.as_query_engine()
response = query_engine.query("What is the meaning of life?")
print(response)

The meaning of life is a philosophical question that has been debated for centuries and varies depending on individual beliefs, values, and perspectives.


## Load embeddings directly

In [14]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path=CHROMA_DB_PERSISTENT_PATH)

# get collection
chroma_collection = db.get_or_create_collection(CHROMA_DB_COLLECTION_NAME)

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine()
response = query_engine.query("What is llama2?")
print(response)

There is no information provided about "llama2" in the context.


In [15]:
question = "am I paying more if my bill is estimated?"
response = query_engine.query(question)
print(response)

If your bill is estimated, you may end up paying more if the estimated amount is higher than your actual usage. However, any overpayment will be credited towards your next bill(s). On the other hand, if the estimated amount is lower than your actual usage, you will owe an additional amount that will appear on your adjusted statement.


In [16]:
question = "Why is my gas bill so high?"
response = query_engine.query(question)
print(response)

Your gas bill may be high due to a combination of factors such as higher natural gas prices, increased energy use during colder months, and market conditions affecting gas prices. Additionally, the usage of natural gas can vary based on the season, changes in lifestyle, or the addition of family members, all of which can contribute to fluctuations in your gas bill. To better understand and potentially reduce your gas bill, you can monitor your usage patterns, consider energy-saving tips, and explore assistance programs offered by your gas provider.


In [17]:
response.response

'Your gas bill may be high due to a combination of factors such as higher natural gas prices, increased energy use during colder months, and market conditions affecting gas prices. Additionally, the usage of natural gas can vary based on the season, changes in lifestyle, or the addition of family members, all of which can contribute to fluctuations in your gas bill. To better understand and potentially reduce your gas bill, you can monitor your usage patterns, consider energy-saving tips, and explore assistance programs offered by your gas provider.'

## Inserting Documents or Nodes

In [18]:
# from llama_index.core import VectorStoreIndex

# index = VectorStoreIndex([])
# for doc in documents:
#     index.insert(doc)