In [282]:
import os
import json
import glob
from zep_cloud.client import AsyncZep
from openai import AsyncOpenAI
import asyncio
from asyncio import Semaphore

from dotenv import load_dotenv

load_dotenv()

zep_api_key = os.getenv("ZEP_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

assert zep_api_key is not None, "ZEP_API_KEY is not set"
assert openai_api_key is not None, "OPENAI_API_KEY is not set"

zep_client = AsyncZep(api_key=zep_api_key)

oai_client = AsyncOpenAI(api_key=openai_api_key)

In [29]:
data_path = "english_chunks"

file_list = glob.glob(f"{data_path}/*")

file_contents = []

for file in file_list:
    with open(file, "r") as f:
        file_contents.append(json.load(f))

flattened_file_contents = [item for sublist in file_contents for item in sublist]

print(len(flattened_file_contents))

22645


In [30]:
flattened_file_contents[0]

{'_id': '05333657-245d-49ba-87b7-148f8388da9c',
 'text': 'In-PersonFR',
 'metadata': {'url': 'https://www.verkada.com/events/seminaire-verkada-et-ingelan-montpellier/',
  'language': 'en',
  'description': '',
  'header': 'Introduction',
  'header_level': 0,
  'chunk_index': 0,
  'total_chunks': 10,
  'parent_doc_id': ''}}

In [63]:
restructured_data = []

for item in flattened_file_contents:
    # Extract metadata fields if they exist
    url = item.get("metadata", {}).get("url", "")
    description = item.get("metadata", {}).get("description", "")
    header = item.get("metadata", {}).get("header", "")

    # Create new document structure
    new_doc = {
        "chunk": {
            "header": header,
            "description": description,
            "text": item.get("text", "")[
                :9000
            ],  # IMPORTANT: Zep has a limit of 10000 characters for graph.add
            "url": url,
        }
    }

    restructured_data.append(new_doc)

# Print example to verify structure
print("Example of restructured data:")
print(json.dumps(restructured_data[100], indent=2))
print(f"\nTotal documents restructured: {len(restructured_data)}")

Example of restructured data:
{
  "chunk": {
    "header": "Introduction",
    "description": "\"Don't miss an exclusive webinar hosted by Verkada and Insight, a leading national solutions and systems integrator. Led by a group of experienced industry professionals, you'll gain insight into cutting-edge developments in physical security and their potential advantages for your organization. \nDiscover why so many companies have switched from on-premise, DVR / NVR-based systems to Verkada's cloud-based approach. From its intuitive yet powerful management interface to its advanced people and vehicle analytics, see firsthand why more than 15,000 organizations trust Verkada to safeguard their people!\nYour hosts are:\nAmy Amy (Commercial Inside Sales Director, Insight)\nTyler  King (Account Executive, Verkada)\nAll webinar attendees are eligible to receive free Verkada trial products, including a free trial of our alarm monitoring service (where available). Attendees will also be qualified 

In [102]:
import random
import string

random_string = "".join(random.choices(string.ascii_letters + string.digits, k=6))
group_id = random_string + "_11x-rag-example"

In [None]:
#Uncomment to ingest the data

# verkada = (
#     "verkada: Security Systems for the Modern Enterprise\n"
#     "Modern, cloud-managed security cameras, access control, and smart building solutions in a single, fully integrated security system."
# )

# await zep_client.graph.add(group_id=group_id, data=verkada, type="json")


# async def add_document(sem, doc):
#     async with sem:
#         await zep_client.graph.add(group_id=group_id, data=json.dumps(doc), type="json")


# async def add():
#     sem = Semaphore(2)  # Limit concurrency to 2 to avoid rate limiting
#     tasks = [add_document(sem, doc) for doc in restructured_data[:2000]]
#     await asyncio.gather(*tasks)


# await add()

In [104]:
# await zep_client.group.delete(group_id=group_id)

In [267]:
query = (
    "which producrs are most relevant to an environmental quality specialist?"
)

In [270]:
results = await zep_client.graph.search(
    group_id=group_id, query=query, scope="nodes", limit=10, reranker="cross_encoder"
)

for node in results.nodes:
    print(node.summary)

Verkada emphasizes the importance of air quality in the workplace, particularly in the context of reducing indoor transmission of COVID-19. Their Environmental Sensors are designed to monitor indoor air quality, which is crucial for ensuring a safe and healthy work environment. These sensors help organizations assess air quality by monitoring Air Quality Index (AQI) and Total Volatile Organic Compounds (TVOCs), allowing for timely interventions such as improving ventilation, using better filters, or deploying air purifiers. By providing real-time environmental insights through customizable dashboards, Verkada enables teams to focus on relevant data for maintaining air quality. This proactive approach supports organizations in adhering to COVID-19 safety policies and promotes a safer return to the office.
The article from Verkada emphasizes the critical issue of poor indoor air quality (IAQ) in commercial office buildings, which costs the American economy approximately $168 billion annu

In [271]:
results = await zep_client.graph.search(
    group_id=group_id, query=query, scope="edges", limit=10, reranker="cross_encoder"
)

for edge in results.edges:
    print(edge.fact)

Verkada's six product lines include environmental sensors.
Leading experts partner with Verkada to simplify air quality monitoring.
Verkada has customers who use their products to manage air quality.
Environmental conditions are required to meet wellness and sustainability certifications.
The SV11 Environmental Sensor is manufactured by Verkada.
The SV11 Environmental Sensor is made by Verkada.
Environmental sensors are used to protect properties.
Organizations leverage environmental sensors to detect dangerous CO2 levels.
The air quality sensors integrate with third-party VMS systems.


In [280]:
results = await zep_client.graph.search(
    group_id=group_id, query=query, scope="nodes", limit=10, reranker="rrf"
)

for node in results.nodes:
    print(node.summary)

Germains Seed Technology and the City of Lancaster, California, utilize Verkada's integrated platform to enhance security and operational efficiency through advanced environmental monitoring. Germains Seed Technology employs environmental sensors, video surveillance, access control, and alarms, achieving a 30% annual cost savings and improved supply chain processes. This system allows for centralized control, proactive facility monitoring, real-time incident response, and better compliance management, significantly reducing enterprise risk. Similarly, Lancaster uses Verkada's hybrid cloud security solutions, including air quality sensors to monitor indoor contaminants, which enhances public safety and supports a data-driven policing model. Both organizations benefit from the integration of environmental monitoring with security features, leading to improved health and safety management. Additionally, users have the option to opt out of the sale or sharing of personal data collected thr

In [273]:
results = await zep_client.graph.search(
    group_id=group_id, query=query, scope="edges", limit=10, reranker="rrf"
)

for edge in results.edges:
    print(edge.fact)

Organizations leverage environmental sensors to detect dangerous CO2 levels.
Organizations leverage environmental sensors to detect dangerous CO2 levels.
Verkada's six product lines include environmental sensors.
Leading experts partner with Verkada to simplify air quality monitoring.
Environmental conditions are required to meet wellness and sustainability certifications.
Verkada has customers who use their products to manage air quality.
The SV11 Environmental Sensor is manufactured by Verkada.
The air quality sensors integrate with third-party VMS systems.
The SV11 Environmental Sensor is made by Verkada.
Environmental sensors are used to protect properties.


In [285]:
centroid_prompt = """Identify the type of product that the query is referring to. Return just the product category."""

centroid_prompt_response = await oai_client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "Identify the type of product that the query is referring to. Return just the product category."},
        {"role": "user", "content": query}
    ],
)

product_category = centroid_prompt_response.choices[0].message.content

results = await zep_client.graph.search(
    group_id=group_id, query=product_category, scope="nodes", limit=1, reranker="rrf"
)

print("Product Category:")
print(product_category) 
print("-" * 30)

results = await zep_client.graph.search(
    group_id=group_id, query=product_category, scope="nodes", limit=1, reranker="rrf"
)

centroid_node_uuid = results.nodes[0].uuid_

print("Centroid Node Summary:")
print(results.nodes[0].summary)
print("-" * 30)

result_from_centroid = await zep_client.graph.search(
    group_id=group_id,
    query=query,
    scope="nodes",
    limit=10,
    reranker="node_distance",
    center_node_uuid=centroid_node_uuid,
)


for node in result_from_centroid.nodes:
    print(node.summary)

Product Category:
Environmental monitoring equipment
------------------------------
Centroid Node Summary:
Verkada offers two advanced environmental monitoring solutions: the SV11 Environmental Sensor and Air Quality Sensors. The SV11 is specifically designed for server room monitoring, integrating with Verkada's cloud-based security products to effectively track environmental changes and protect critical equipment from potential failures. Resources, including a dedicated blog post, provide best practices for maintaining optimal server conditions. On the other hand, Verkada's Air Quality Sensors focus on indoor air quality (IAQ), providing real-time data and user-friendly features. Users can start with a 30-day free trial, utilizing cloud technology and easy installation via Power over Ethernet (PoE). The sensors are scalable, equipped with multiple sensors for comprehensive monitoring, and support remote management through a secure web interface. They also offer insights into humidity

In [286]:
result_from_centroid = await zep_client.graph.search(
    group_id=group_id,
    query=query,
    scope="edges",
    limit=10,
    reranker="node_distance",
    center_node_uuid=centroid_node_uuid,
)


for edge in result_from_centroid.edges:
    print(edge.fact)

The SV11 Environmental Sensor is manufactured by Verkada.
The SV11 Environmental Sensor is made by Verkada.
The SV11 Environmental Sensor has a customer story related to Worker Safety & Quality Control.
The SV11 Environmental Sensor is used by Carolina Ingredients for protecting food manufacturing.
Organizations leverage environmental sensors to detect dangerous CO2 levels.
Environmental sensors are used to protect properties.
Organizations leverage environmental sensors to keep indoor spaces safe.
Environmental sensors help keep indoor spaces safe.
Environmental sensors help keep indoor spaces safe by detecting dangerous CO2 levels.
Organizations leverage environmental sensors to detect dangerous CO2 levels.


In [276]:
results = await zep_client.graph.search(
    group_id=group_id,
    query=query,
    scope="nodes",
    limit=10,
    reranker="episode_mentions",
)


for node in results.nodes:
    print(node.summary)

At Verkada, the belief is that fostering a sense of belonging among employees leads to greater innovation, happiness, and success. This commitment to belonging is seen as essential for creating better products, as it enhances employee experiences and strengthens relationships with customers. Verkada is dedicated to building an equitable and inclusive workplace that empowers everyone to thrive, which ultimately contributes to the development of superior products.
Verkada emphasizes the importance of air quality in the workplace, particularly in the context of reducing indoor transmission of COVID-19. Their Environmental Sensors are designed to monitor indoor air quality, which is crucial for ensuring a safe and healthy work environment. These sensors help organizations assess air quality by monitoring Air Quality Index (AQI) and Total Volatile Organic Compounds (TVOCs), allowing for timely interventions such as improving ventilation, using better filters, or deploying air purifiers. By 

In [277]:
results = await zep_client.graph.search(
    group_id=group_id,
    query=query,
    scope="edges",
    limit=10,
    reranker="episode_mentions",
)


for edge in results.edges:
    print(edge.fact)

Organizations leverage environmental sensors to detect dangerous CO2 levels.
Organizations leverage environmental sensors to detect dangerous CO2 levels.
Verkada's six product lines include environmental sensors.
Leading experts partner with Verkada to simplify air quality monitoring.
Environmental conditions are required to meet wellness and sustainability certifications.
Verkada has customers who use their products to manage air quality.
The SV11 Environmental Sensor is manufactured by Verkada.
The air quality sensors integrate with third-party VMS systems.
The SV11 Environmental Sensor is made by Verkada.
Environmental sensors are used to protect properties.
