In [4]:
#reloads modules before executing user code
%load_ext autoreload
%autoreload 2

In [3]:
import sys
!{sys.executable} -m pip install -r ../requirements.txt

Collecting fastapi
  Using cached fastapi-0.98.0-py3-none-any.whl (56 kB)
Collecting uvicorn
  Using cached uvicorn-0.22.0-py3-none-any.whl (58 kB)
Collecting starlette<0.28.0,>=0.27.0
  Using cached starlette-0.27.0-py3-none-any.whl (66 kB)
Installing collected packages: uvicorn, starlette, fastapi
Successfully installed fastapi-0.98.0 starlette-0.27.0 uvicorn-0.22.0
--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_internal/utils/logging.py", line 177, in emit
    self.console.print(renderable, overflow="ignore", crop=False, style=style)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1673, in print
    extend(render(renderable, render_options))
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1305, in render
    for render_

In [5]:
# Ignore unclosed SSL socket warnings - optional in case you get these errors
import warnings

warnings.filterwarnings(action="ignore", message="unclosed", category=ImportWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Laying the foundations

### Storage

We're going to use Redis as our database for both document contents and the vector embeddings. You will need the full Redis Stack to enable use of Redisearch, which is the module that allows semantic search - more detail is in the [docs for Redis Stack](https://redis.io/docs/stack/get-started/install/docker/).

To set this up locally, you will need to install Docker and then run the following command: ```docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest```.

The code used here draws heavily on [this repo](https://github.com/RedisAI/vecsim-demo).

After setting up the Docker instance of Redis Stack, you can follow the below instructions to initiate a Redis connection and create a Hierarchical Navigable Small World (HNSW) index for semantic search.

In [6]:
# Setup Redis and running?
from database import get_redis_connection

redis_client = get_redis_connection()

redis_client.ping()

True

In [5]:
# Optional step to drop the indexes if they already exists
from importer import NOTION_INDEX_NAME, WEB_SCRAPE_INDEX_NAME

redis_client.ft(NOTION_INDEX_NAME).dropindex()
redis_client.ft(WEB_SCRAPE_INDEX_NAME).dropindex()

ResponseError: Unknown Index name

### Ingestion

We'll load up our Notion pages into documents

In [6]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [7]:
from importer import import_notion_data

notion_index = import_notion_data()

INFO:llama_index.vector_stores.redis:Creating index notion-fl-index
Creating index notion-fl-index
INFO:llama_index.vector_stores.redis:Added 3 documents to index notion-fl-index
Added 3 documents to index notion-fl-index
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 2108 tokens
> [build_index_from_nodes] Total embedding token usage: 2108 tokens


In [8]:
# Optional
# Proves that the redis database contains data

from importer import number_of_stored_notion_docs
print(number_of_stored_notion_docs())

56


In [7]:
# set Logging to DEBUG for more detailed outputs
query_engine = notion_index.as_query_engine()
response = query_engine.query("Where is the Denver office?")
response.response

NameError: name 'notion_index' is not defined

Adding web scraped data to index

In [10]:
from importer import import_web_scrape_data

web_scrape_index = import_web_scrape_data()

[Document(text='\n\nA digital transformation partner focused on software delivery\n\n\n\n      var show = localStorage.getItem(\'show\');\n      if(show === \'true\'){\n        document.documentElement.classList.add(\'dark\');\n      } \n    \n\nhsjQuery = window[\'jQuery\'];\n\n\n\n\n\na.cta_button{-moz-box-sizing:content-box !important;-webkit-box-sizing:content-box !important;box-sizing:content-box !important;vertical-align:middle}.hs-breadcrumb-menu{list-style-type:none;margin:0px 0px 0px 0px;padding:0px 0px 0px 0px}.hs-breadcrumb-menu-item{float:left;padding:10px 0px 10px 10px}.hs-breadcrumb-menu-divider:before{content:\'›\';padding-left:10px}.hs-featured-image-link{border:0}.hs-featured-image{float:right;margin:0 0 20px 20px;max-width:50%}@media (max-width: 568px){.hs-featured-image{float:none;margin:0;width:100%;max-width:100%}}.hs-screen-reader-text{clip:rect(1px, 1px, 1px, 1px);height:1px;overflow:hidden;position:absolute !important;width:1px}\n\n\n\n\n\n\n\n  \n  .cards_galle

In [11]:
# Optional
# Proves that the redis database contains data

from importer import number_of_stored_web_scrape_docs
print(number_of_stored_web_scrape_docs())

106


In [12]:
query_engine = web_scrape_index.as_query_engine()
response = query_engine.query("What are some of the solutions that Focused Labs has created?")
response.response

INFO:llama_index.vector_stores.redis:Using filters: *
Using filters: *
INFO:llama_index.vector_stores.redis:Querying index web-scrape-fl-index
Querying index web-scrape-fl-index
INFO:llama_index.vector_stores.redis:Found 2 results for query with id ['webscrapefocusedlabsdocs_6531c4b7-be65-4ab2-8ade-1c544f599da9', 'webscrapefocusedlabsdocs_5cdb55b6-3740-4026-a2a4-2fabeb2040d0']
Found 2 results for query with id ['webscrapefocusedlabsdocs_6531c4b7-be65-4ab2-8ade-1c544f599da9', 'webscrapefocusedlabsdocs_5cdb55b6-3740-4026-a2a4-2fabeb2040d0']
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 13 tokens
> [retrieve] Total embedding token usage: 13 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 2089 tokens
> [get_response] Total LLM token usage: 2089 tokens
INFO:llama_index.token

"The context information does not provide specific details about the solutions that Focused Labs has created. It only mentions that they build solutions tailored to the needs of their clients and focus on developing a DevOps culture to ensure best-in-class technology delivery. The context also includes testimonials from clients who have worked with Focused Labs and praise their approach and ability to deliver on their promises. Additionally, there is a case study mentioned where Focused Labs built and deployed software for streamlining onboarding with BTR Energy's Bridge platform and the management of EV charging data."

In [13]:
#optional if you haven't installed stop words
#go to the corpora tab, use the arrow key to scroll down to stop words and hit enter to install
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download()

NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------

---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------


True

In [1]:
# from importer import compose_graph
#
# graph = compose_graph()

In [None]:
# Optional
# Proves that the graph is built

# response = graph.query("What are some of the solutions that Focused Labs has created?")
#
# print(str(response))
# print(response.get_formatted_sources())

In [1]:
from chat_engine import create_chat_engine

chat_engine = create_chat_engine()

In [3]:

chat_engine.chat("Who are developers at Focused Labs?")

Querying with: Who are the developers at Focused Labs?


Response(response='The developers at Focused Labs are Rowan, ty, and Lexie.', source_nodes=[NodeWithScore(node=Node(text='\nThe developers at Focused Labs are Rowan (DevOps Engineer), ty (Software Engineer), and Lexie (Software Engineer).', doc_id='09721d64-3404-4143-9fef-eb9a0853c4ae', embedding=None, doc_hash='47a1e332eed8129aaaed7e9097d0674d3c0e282a2cf74e4f9e8b03bbff103f68', extra_info=None, node_info={'_node_type': <NodeType.TEXT: '1'>}, relationships={}), score=None), NodeWithScore(node=Node(text="\nThe developers at Focused Labs are the software engineers and other technical professionals who are responsible for creating and maintaining the company's products and services. They are also responsible for developing and implementing the remote culture and other initiatives that help the company achieve its vision.", doc_id='7c19ca78-60b2-4160-90fb-31567bed4867', embedding=None, doc_hash='a6c39c15b97acfd15577836bc60647fb6db64e37be19dd24d4744f5f5bd78d0f', extra_info=None, node_info={'