In [None]:
from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')
api_key = userdata.get('GROQ_API_KEY')

In [None]:
!pip install datasets
!pip install chromadb

In [None]:
from huggingface_hub import login
from datasets import load_dataset
import chromadb
from tqdm import tqdm

In [None]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')

In [None]:
#Login to hugging face
login(hf_token, add_to_git_credential=True)

In [None]:
HF_USER = "ed-donner"
DATASET_NAME = f"{HF_USER}/pricer-data"
dataset = load_dataset(DATASET_NAME)
train = dataset['train']
test = dataset['test']

In [None]:
N_DATA = 1000
data = [train[i]['text'] for i in range(N_DATA) ]

**Chroma DB**

In [None]:
DB = "products_vectorstore"
client = chromadb.PersistentClient(path=DB)

In [None]:
## Creating Collection

collection_name = "products"

existing_collection_names = [collection for collection in client.list_collections()]
if collection_name in existing_collection_names:
    client.delete_collection(collection_name)
    print(f"Deleted existing collection: {collection_name}")

collection = client.create_collection(collection_name)
print(f"Existing collections: {client.list_collections()}")

**Embedding using SentenceTransformer**

https://huggingface.co/sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# vector = model.encode(["Well hi there"])[0]

In [None]:
batch_size = 100
for i in tqdm(range(0, len(data), batch_size)):
# for i in range(0, len(data), 10):
    documents = data[i: i+batch_size]
    vectors = model.encode(documents).astype(float).tolist()
    metadatas = [{"len": len(item)} for item in data[i: i+batch_size]]
    ids = [f"doc_{j}" for j in range(i, i+batch_size)]
    collection.add(
        ids=ids,
        documents=documents,
        embeddings=vectors,
        metadatas=metadatas
    )

**Read data from Chroma DB**

In [None]:
DB = "products_vectorstore"
client = chromadb.PersistentClient(path=DB)
print(f"Existing collections: {client.list_collections()}")

collection_name = "products"
collection = client.get_collection(collection_name)

print(f"Collection count: {collection.count()}")
MAXIMUM_DATAPOINTS = 500
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)

n = 27
print(result['ids'][n])
print(result['documents'][n])
print(result['metadatas'][n])
# print(result['embeddings'][n])

In [None]:
# from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def find_similars(text, n_results=5):
  vector = model.encode([text]).astype(float).tolist()

  full_results = collection.query(query_embeddings=vector, n_results=n_results)

  documents = full_results['documents'][0]
  return documents, full_results

text = "Quadcast HyperX condenser mic for high quality podcasting"
documents, full_results = find_similars(text)
documents

In [None]:
## Guess price with DeepSeek R1
!pip install groq
import groq
from groq import Groq
client = Groq(api_key=api_key)

In [48]:
text = "Quadcast HyperX condenser mic for high quality podcasting"
rag_documents = documents

prompt = """ Guess the price of this item based on the context below
{text}

Context:
{rag_documents}

Answer:
""".format(text=text, rag_documents='\n\n'.join(documents))

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        },
    ],
    model="deepseek-r1-distill-llama-70b",
)

result = chat_completion.choices[0].message.content

In [49]:
from IPython.display import Markdown, display

display(Markdown(result))

<think>
Okay, so I need to figure out the price of the Quadcast HyperX condenser mic for high-quality podcasting. Let me see. The context provided includes several other microphones with their prices, so maybe I can use those as a reference.

First, looking at the SYNCO Wireless Microphone System G2(A1), it's priced at $149. That's a wireless lavalier system, which seems more complex because it includes a transmitter and receiver, and it's designed for vlogging and streaming. The Quadcast is a condenser mic, which is typically used for podcasting and might be a USB mic, so it's probably more straightforward without the wireless aspect.

Next, the Comica UHF 48 Channels Professional Wireless Lavalier is priced at $109. Again, this is a wireless system, which usually costs more than a basic USB condenser mic. It has a lot of features like 48 channels and a long range, but it's still wireless, so maybe that's why it's priced lower than the SYNCO but higher than some USB mics.

Then there's the Behringer Xenyx Mixer with USB, priced at $119. This is a mixer, not just a microphone, so it has more features like multiple inputs, EQ, and effects. The Quadcast is just a microphone, so it might not need to be as expensive as a mixer.

The Four-Channel Powered Audio Mixer is priced at $230. That's a more advanced mixer with a built-in amplifier and DSP effects. Again, that's a mixer, not a standalone mic, so probably more expensive than the Quadcast.

Lastly, the Countryman B3 Lavalier is at $297. That's a high-end wireless lavalier mic, which explains the higher price. It's known for quality and durability, so that's a premium product.

So, the Quadcast HyperX is a condenser mic, likely a USB model aimed at podcasters. Comparing to other USB condenser mics on the market, like the Blue Yeti, which is around $130, or the HyperX QuadCast Gaming Microphone, which I think is around $140. Considering the context given, the other mics are either wireless systems or mixers, which tend to be more expensive. The Quadcast, being a USB condenser, should be more affordable, probably in the $130-$150 range. But looking at the examples, the Behringer mixer is $119, and the Comica is $109. Since the Quadcast is a higher quality condenser, maybe it's a bit more than those mixers but less than the high-end lavaliers. So I think around $140 would be a good estimate.
</think>

The Quadcast HyperX condenser microphone is estimated to cost approximately $140.