In [None]:
pip show chromadb

In [9]:
import chromadb
client = chromadb.Client()

In [3]:
texts = [
    "Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.",
    "His early life was marked by financial hardship and limited resources.",
    "He believed education could transform his life and pursued it relentlessly.",
    "Sudhanshu earned a Computer Science and Engineering degree.",
    "He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.",
    "He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.",
    "Sudhanshu remained committed to making education accessible for everyone.",
    "His teaching empowered countless students to build their careers.",
    "Many students credit him with changing their lives through affordable learning.",
    "His journey is a testament to triumph over adversity and the power of knowledge."
]


In [1]:

import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer euri-1f3025f0ddab44e290bf86bed48790c9f4fd1325a59b42e49701607ebb5b9546"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding



In [6]:
embeddings = [generate_embeddings(i).tolist() for i in texts]

In [7]:
len(embeddings[0])

1536

In [14]:
collection = client.create_collection(name="sudhanshus_collection")

In [15]:
collection.add(
    documents=texts,
    embeddings=embeddings,
    ids=[str(i) for i in range(len(texts))]
)


In [16]:
collection.count()

10

In [17]:
collection.get()

{'ids': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
 'embeddings': None,
 'documents': ['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
  'His early life was marked by financial hardship and limited resources.',
  'He believed education could transform his life and pursued it relentlessly.',
  'Sudhanshu earned a Computer Science and Engineering degree.',
  'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
  'He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.',
  'Sudhanshu remained committed to making education accessible for everyone.',
  'His teaching empowered countless students to build their careers.',
  'Many students credit him with changing their lives through affordable learning.',
  'His journey is a testament to triumph over adversity and the power of knowledge.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [None, None, None, None, None, 

In [20]:
query = "sudhanshu is from jamshedpur"
query_embd = generate_embeddings(query)

In [21]:
collection.query(
    query_embeddings=[query_embd],
    n_results=2
)

{'ids': [['0', '3']],
 'embeddings': None,
 'documents': [['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
   'Sudhanshu earned a Computer Science and Engineering degree.']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.5121122598648071, 0.9276772141456604]]}

In [None]:
pip install pinecone

In [None]:
from pinecone import Pinecone
pc = Pinecone(api_key="pcsk_5H2coG_GBAVKAGrCHUwmbSWRvWHvrmnX5ETwvMVYcoECiBsPfTueQE3CCokbT9Z1oumL6i")
index = pc.Index("sudhanshustory")

In [None]:
index.upsert(
    vectors=[(
        str(0),
        embeddings[0],
        {"text": texts[0]}
    )])


In [None]:
records = []
for i in range(len(texts)):
    records.append((str(i), embeddings[i], {"text": texts[i]}))


In [None]:
index.upsert(vectors=records)

In [None]:
query_text = "sudhanshu has worked at wipro"
query_embd = generate_embeddings(query_text).tolist()

In [None]:
index.query(vector=query_embd, top_k=3, include_metadata=True)