In [14]:
import logging
import sys
import openai
from dotenv import load_dotenv, find_dotenv
from llama_index import (
    GPTVectorStoreIndex,
    LLMPredictor,
    ServiceContext,
    SimpleWebPageReader,
    TrafilaturaWebReader
) 
from langchain.chat_models import ChatOpenAI
from llama_index import StorageContext, load_index_from_storage
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
import os

#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

_ = load_dotenv(find_dotenv('.env_x'),override=True)  
openai.api_base = "https://api.openai.com/v1"
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_version = ""
index = None

llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
 
try:
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir='./webstorage')
    # load index
    index = load_index_from_storage(storage_context = storage_context, service_context = service_context)
     
except:
    print("No index found, creating a new one")

urls = ["https://support.microsoft.com/en-us/surface/surface-pro-8-update-history-1080bf34-7e87-408c-8619-80571283526e"]

if(index is None):    
    #documents = SimpleWebPageReader(html_to_text=True).load_data(urls)  
    documents = TrafilaturaWebReader().load_data(urls)
    index = GPTVectorStoreIndex.from_documents(documents,service_context=service_context)
    index.storage_context.persist(persist_dir='./webstorage')

query_engine = index.as_query_engine()
result = query_engine.query("what is this article talking about?")



INFO:llama_index.indices.loading:Loading all indices.
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 7 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1910 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens


In [20]:
query_engine = index.as_query_engine()
result = query_engine.query("The 'Windows Update Name' and 'Device Manager' are two columns of a table. What is the 'Windows Update Name' of 'Intel Camera - Control Logic -System devices'")


INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 37 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2072 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens


In [36]:
result = query_engine.query("what Update is available for 'Intel(R) Imaging Signal Processor -System devices'")

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 17 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2086 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens


In [24]:
result = query_engine.query("what's this article talking about?")

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 7 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1910 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens


In [38]:
result = query_engine.query("Generate an HTML table for the updates based on this content, one column is 'Windows Update Name', another columne is 'Device Manager'")

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 30 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2405 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens


In [39]:
print(result)

| Windows Update Name | Device Manager |
| --- | --- |
| Surface - Firmware - 18.102.141.0 | Surface UEFI – Firmware |
| Surface - Firmware - 9.5.139.0 | Surface System Aggregator - Firmware |
| Surface - Firmware - 15.0.1879.1 | Surface ME - Firmware |
| Surface - Firmware - 1.16.139.0 | Surface Pen0C0F Firmware Update - Firmware |
| Intel - System - 2120.100.0.1085 | Intel(R) Management Engine Interface - System Devices |
| Surface - Human Interface Devices - 3.54.139.0 | Surface Hid Mini Driver - Human Interface Devices |
| MBIHV -Firmware - 2.1.1.14 | Quectel MSFT MBIHV Mobile Broadband Firmware Device - Firmware |
| Surface - HIDClass - 161.36.24.0 | Surface TCON device -Human Interface Devices |
| Surface - Firmware - 66.1.0.17 | Surface SMF - Firmware |
| Surface - Firmware - 10.4.0.0 | Surface PD Controller - Firmware |
| Surface - Firmware - 15.0.1706.1 | Surface ME - Firmware |
| Surface - USB -1.77.137.0 | Surface Type Cover V7 Fingerprint UDE Controller _ USB Controllers |
