In [27]:
import pandas as pd

keyboards_df = pd.read_csv("keyboards_clean2.csv")
monitors_df = pd.read_csv("monitors_clean2.csv")
mice_df = pd.read_csv("mice_clean2.csv")
brands_df = pd.read_csv("brands.csv")

# Get all unique brands from product CSVs
product_brands = []
for df in [keyboards_df, monitors_df, mice_df]:
    product_brands.extend(df["Brand"].tolist())

product_brands_unique = set(product_brands)
existing_brands = set(brands_df["brand_name"].tolist())

# Find brands that are in products but NOT in brands.csv
missing_brands = product_brands_unique - existing_brands

print("Missing brands from brands.csv:")
for brand in sorted(missing_brands):
    print(f"{brand}")

print(f"\nTotal missing: {len(missing_brands)}")
print(f"Total unique product brands: {len(product_brands_unique)}")
print(f"Total brands in brands.csv: {len(existing_brands)}")

Missing brands from brands.csv:
3Dconnexion
AOC
ATTACK SHARK
Acer
Anker
BenQ
COUGAR
DELUX
DREVO
ELECOM
EQEOVGA
Endgame Gear
Fantech
Finalmouse
G-Wolves
GameBall
Gamesense
Gigabyte
HK Gaming
HP
Havit
Hitscan
INNOCN
J-Tech Digital
LAMZU
LG
Lepow
MSI
Mad Catz
Marsback
Mobile Pixels
Ninjutso
Nixeus
Philips
Pixio
Ploopy
Pulsar
Pwnage
Samsung
Sceptre
Scyrox
Sharkoon
Swiftpoint
UtechSmart
VGN
VXE
Vancer
Vaxee
Vegcoo
VicTsing
ViewSonic
WLmouse
XTRFY
Xenics
Xiaomi
Zaunkoenig
espresso

Total missing: 57
Total unique product brands: 122
Total brands in brands.csv: 65


In [None]:
import json
import re

with open("Guides-2.txt", "r") as f:
    texts = f.readlines()
    f.close()

category = None
spec = None
text = []
json_data = []
for line in texts:
    if "**" in line:
        category = line.replace("*", "").strip()
    elif "*" in line:
        if category and spec:
            json_data.append({
                "type": "spec",
                "category_name": category.lower() if "key" not in category.lower() else "keyboards",
                "spec_name": spec.lower(),
                "text": "\n".join(text)
            })
        spec = line.replace("*", "").strip()
        text = []
    else:
        text.append(line)

if spec and text:
    json_data.append({
        "category_name": category.lower(),
        "spec_name": spec,
        "text": " ".join(text)
    })

with open("texts.json", "w", encoding="utf-8") as f:
    json.dump(json_data, f, indent=2)

In [None]:
from perplexity import Perplexity
from dotenv import load_dotenv
import os

load_dotenv()

client = Perplexity(api_key=os.environ.get("PERPLEXITY_KEY"))

search = client.search.create(
    query=[
      ""
    ]
)

# for result in search.results:
#     print(f"{result.title}: {result.url}")
for result in search.results:
    print(result.snippet)
    break

# Today we are launching Comet.

Comet is a web browser built for today’s internet. In the last 30 years, the internet has evolved from something we simply “browse” or “search.” The internet is where we live, work, and connect.

It’s also where we ask questions.

Curious minds have questions everywhere, and they find answers on every page, in every idea, through every task. Yet we've been trapped in long lines of tabs and hyperlinks, disjointed experiences that interrupt our natural flow of thought.

In other words, the internet has become humanity's extended mind while our tools for using it remain primitive. Our interface for the web should be as fluid and responsive as human thought itself.

We built Comet to let the internet do what it has been begging to do: to amplify our intelligence.

### From Navigation To Cognition

Comet powers a shift from browsing to thinking.

Tabs that piled up waiting for your return now join one intelligent interface that understands how your mind work

## Create VectorDB

In [14]:
import os
import json
with open(os.path.join("guide", "texts.json"), "r") as f:
    data = json.load(f)
documents = [item["text"] for item in data]
metadatas = [
    {
        "type": item["type"],
        "category_name": item["category_name"],
        "spec_name": item["spec_name"]
    }
    for item in data
]
ids = [f"doc_{i}" for i in range(len(data))]

In [1]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

# Initialize OpenAI client
openai_client = OpenAI()

def get_embeddings(texts, model="text-embedding-3-small"):
    response = openai_client.embeddings.create(
        input=texts,
        model=model
    )
    return [d.embedding for d in response.data]

In [2]:
import chromadb

chroma_client = chromadb.PersistentClient(path="vectorDB")

# Create (or get) a collection
collection = chroma_client.get_or_create_collection(name="electronics")

In [None]:
embeddings = get_embeddings(documents)

# Add to Chroma collection
collection.add(
    embeddings=embeddings,
    documents=documents,
    metadatas=metadatas,
    ids=ids
)