In [1]:
import os
import openai
import dotenv
import pandas as pd
import seaborn as sns
import openai
from chromadb.utils import embedding_functions

# .env file must have OPENAI_API_KEY and OPENAI_API_BASE
dotenv.load_dotenv()
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-03-15-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")

# Using OpenAI Embeddings. This assumes you have the openai package installed
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.getenv("OPENAI_API_KEY"), # Replace with your own OpenAI API key
    model_name="text-embedding-ada-002"
)


Change default distance function

```python
 collection = client.create_collection(
        name="collection_name",
        metadata={"hnsw:space": "cosine"} # l2 is the default
    )
```

In [2]:
import chromadb
# setup Chroma in-memory, for easy prototyping. Can add persistence easily!
client = chromadb.Client()

# Create collection. get_collection, get_or_create_collection, delete_collection also available!
db_name = "quick-start"
collection = client.get_or_create_collection(db_name,  embedding_function=openai_ef)

In [3]:
# sample text in string
sample_text1 = "Azure Machine Learning is for data scientists, data engineers, and AI developers to build, train, deploy, and manage machine learning models."
sample_text2 = "I recommend AzureML to data scientists for building, training, deploying, and managing machine learning models."
sample_text3 = "There are many ways to consume coffee. Espresso, Latte and Cappuccino are popular ways to consume coffee. In addition to these, ice coffee is also popular."
sample_text4 = "IT professional looking up in air, holding a scrolled old paper in the right hand and clicking in upper space as. On the other side there is a robot  that has AI trying to respond to the human use matrix as background and have magic spell overlapped with the background"
sample_text5 = "Charming family home nestled in a tranquil neighborhood. This spacious abode boasts four bedrooms, three bathrooms, and a modern kitchen. Open-concept living area perfect for entertaining. Large windows bathe rooms in natural light. Landscaped backyard ideal for summer BBQs. Two-car garage. Close to schools and amenities. A warm, welcoming space to create lasting memories."
sample_text6 = "Inspect the jet engine thoroughly. Identify any visible damage. Check for leaks, cracks, or signs of wear. Replace faulty components. Clean the engine using approved solvents. Test engine systems for proper operation. Monitor performance metrics. Adjust settings as needed. Document all actions taken. Ensure safety procedures are followed at all times."

samples = [sample_text1, sample_text2, sample_text3, sample_text4, sample_text5, sample_text6]


In [4]:
# Add docs to the collection. Can also update and delete. Row-based API coming soon!
collection.add(
    documents=samples, # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well
    ids=["sample1", "sample2", "sample3", "sample4", "sample5", "sample6"], # unique for each doc
)

In [5]:
# get embedding for query text
query = "Azure Machine Learning"

query_results = collection.query(query_texts=query, n_results=3, include=['distances']) 
query_results


{'ids': [['sample1', 'sample2', 'sample4']],
 'distances': [[0.14872786402702332,
   0.19057705998420715,
   0.38556766510009766]],
 'metadatas': None,
 'embeddings': None,
 'documents': None}

In [6]:
client.delete_collection(db_name)