In [1]:
pip install chromadb

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip show chromadb

Name: chromadb
Version: 1.0.7
Summary: Chroma.
Home-page: https://github.com/chroma-core/chroma
Author: 
Author-email: Jeff Huber <jeff@trychroma.com>, Anton Troynikov <anton@trychroma.com>
License: 
Location: c:\users\scl\anaconda3\envs\cnn_class\lib\site-packages
Requires: bcrypt, build, chroma-hnswlib, fastapi, grpcio, httpx, importlib-resources, jsonschema, kubernetes, mmh3, numpy, onnxruntime, opentelemetry-api, opentelemetry-exporter-otlp-proto-grpc, opentelemetry-instrumentation-fastapi, opentelemetry-sdk, orjson, overrides, posthog, pydantic, pypika, pyyaml, rich, tenacity, tokenizers, tqdm, typer, typing-extensions, uvicorn
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [3]:
import chromadb
client = chromadb.Client()

In [4]:
texts = [
    "Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.",
    "His early life was marked by financial hardship and limited resources.",
    "He believed education could transform his life and pursued it relentlessly.",
    "Sudhanshu earned a Computer Science and Engineering degree.",
    "He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.",
    "He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.",
    "Sudhanshu remained committed to making education accessible for everyone.",
    "His teaching empowered countless students to build their careers.",
    "Many students credit him with changing their lives through affordable learning.",
    "His journey is a testament to triumph over adversity and the power of knowledge."
]

In [6]:

import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer euri-9734871cf6c4043d7509c8494c157b5a235201ff16826277830c543774ce47ee"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

In [8]:
embeddings = [generate_embeddings(i).tolist() for i in texts]


In [11]:
len(embeddings[0])

1536

In [12]:
collectiion = client.create_collection(name="sudhanshu_collection")

In [13]:
collectiion.add(
    documents=texts,
    embeddings=embeddings,
    ids=[str(i) for i in range(len(texts))]
)

In [14]:
collectiion.count()

10

In [15]:
collectiion.get()

{'ids': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
 'embeddings': None,
 'documents': ['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
  'His early life was marked by financial hardship and limited resources.',
  'He believed education could transform his life and pursued it relentlessly.',
  'Sudhanshu earned a Computer Science and Engineering degree.',
  'He worked at Wipro, Deloitte, Verizon Labs, and Ernst & Young.',
  'He gained expertise in SAP WebDynpro, Fiori UI5 HANA, Java, Big Data, and Data Analytics.',
  'Sudhanshu remained committed to making education accessible for everyone.',
  'His teaching empowered countless students to build their careers.',
  'Many students credit him with changing their lives through affordable learning.',
  'His journey is a testament to triumph over adversity and the power of knowledge.'],
 'uris': None,
 'included': ['metadatas', 'documents'],
 'data': None,
 'metadatas': [None, None, None, None, None, 

In [20]:
query = "sudhanshu was born in jamshedpur"

In [21]:
query_embed = generate_embeddings(query).tolist()

In [22]:
query_embed

[-0.028150795,
 -0.027069842,
 0.038385358,
 0.0025888267,
 -0.016662788,
 -0.0017306759,
 -0.010205814,
 0.035234492,
 0.01682378,
 -0.0631553,
 0.030243706,
 -0.041720215,
 -0.0029424366,
 -0.017628746,
 0.02476994,
 -0.0016429923,
 0.031646647,
 -0.032635603,
 0.007129695,
 5.811519e-06,
 0.05496765,
 0.017801238,
 0.02596589,
 -0.0397423,
 0.01542084,
 -0.010855536,
 -0.003133616,
 -0.036775425,
 0.00068026775,
 -0.03413054,
 -0.045032073,
 -0.012419469,
 0.013580919,
 0.0006252857,
 0.011493758,
 -0.048481926,
 0.004504932,
 0.023263505,
 -0.020250633,
 -0.013385427,
 0.0039213323,
 -0.05262175,
 0.015248348,
 0.047032986,
 -0.036131453,
 0.0027095715,
 -0.008354393,
 -0.0006098333,
 -0.020860108,
 -0.020032143,
 0.006428225,
 -0.03224462,
 0.04792995,
 0.01075779,
 -0.0076356735,
 -0.0048182937,
 0.0011434823,
 0.061453372,
 0.04130623,
 -0.038822338,
 0.0024838937,
 -0.021389084,
 0.038178366,
 0.019273175,
 0.02288402,
 -0.01661679,
 0.016455796,
 -0.03629245,
 -0.03086468,
 -0

In [23]:
collectiion.query(query_embeddings=[query_embed], n_results=2)

{'ids': [['0', '3']],
 'embeddings': None,
 'documents': [['Sudhanshu Kumar was born in Jamshedpur, Jharkhand, India, to a modest family.',
   'Sudhanshu earned a Computer Science and Engineering degree.']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.425162136554718, 0.9404656887054443]]}

In [24]:
import chromadb
  
client = chromadb.CloudClient(
  api_key='ck-B5exRGbadsLSjkXi8LtjG2q89FCUi28BteEX7cX3oRdw',
  tenant='3a0ae10f-51dd-474b-a8fb-8d05626bc9b0',
  database='test'
)

In [25]:
collectiion = client.create_collection(name="sudhanshu_kumar")

In [26]:
collectiion.add(
    documents=texts,
    embeddings=embeddings,
    ids=[str(i) for i in range(len(texts))]
)