In [1]:
from notebook_utils import notebook_setup

project_root, config = notebook_setup(environment="local")

🚀 Setting up notebook environment...
✅ Already in Python path: C:\Users\aprilhazel\Source\sk_mcp_demo
✅ Loaded environment file: C:\Users\aprilhazel\Source\sk_mcp_demo\.env.local
✅ All required configuration loaded for 'local' environment
✅ Configuration loaded for 'local' environment
✨ Notebook setup complete!
   Project root: C:\Users\aprilhazel\Source\sk_mcp_demo
   Environment: local
   Config: Config(environment='local', openai_api_key=None, openai_model=None, openai_api_type='azure', azure_openai_endpoint='https://aoai-zvbgv7oohofri.cognitiveservices.azure.com/', azure_openai_api_key=***, azure_openai_model='gpt-4o', azure_openai_deployment='gpt-4o', azure_openai_api_version='2024-12-01-preview', azure_openai_embedding_endpoint='https://aoai-zvbgv7oohofri.cognitiveservices.azure.com/', azure_openai_embedding_api_key=***, azure_openai_embedding_model='text-embedding-3-small', azure_openai_embedding_deployment='text-embedding-3-small', azure_openai_embedding_api_version='2024-12-01

In [2]:
import chromadb
from chromadb.utils import embedding_functions

In [3]:
# ChromaDB embeding_function
aoai_embedding_function = embedding_functions.OpenAIEmbeddingFunction(
    api_key=config.azure_openai_embedding_api_key,
    api_base=config.azure_openai_embedding_endpoint,
    api_type=config.openai_api_type,
    api_version=config.azure_openai_embedding_api_version,
    model_name=config.azure_openai_embedding_model,
    deployment_id=config.azure_openai_embedding_deployment
)

In [10]:
from pathlib import Path

# Use pathlib for proper path handling across platforms
products_chroma_db = Path(config.project_root/config.chroma_db_path.lstrip('./')/'products_chroma_db')
names_chroma_db = Path(config.project_root/config.chroma_db_path.lstrip('./')/'names_chroma_db')

print(f"ChromaDB path: {config.chroma_db_path}")
print(f"Products ChromaDB: {products_chroma_db}")
print(f"Names ChromaDB: {names_chroma_db}")

ChromaDB path: /data/chroma_db
Products ChromaDB: C:\Users\aprilhazel\Source\sk_mcp_demo\data\chroma_db\products_chroma_db
Names ChromaDB: C:\Users\aprilhazel\Source\sk_mcp_demo\data\chroma_db\names_chroma_db


In [11]:
chrome_persistent_client_products = chromadb.PersistentClient(path=str(products_chroma_db))
chrome_persistent_client_names = chromadb.PersistentClient(path=str(names_chroma_db))

In [17]:
# Create or get the collection
try:
    products_collection = chrome_persistent_client_products.get_or_create_collection (
        name="product_collection", embedding_function=aoai_embedding_function, 

    )
except Exception as e:
    raise e

In [18]:
products_collections = chrome_persistent_client_products.list_collections()
products_collections

[Collection(name=product_collection)]

In [20]:
# https://docs.trychroma.com/docs/collections/manage-collections
# See the top 10 records
products_collection.peek()

{'ids': [],
 'embeddings': array([], dtype=float64),
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents', 'embeddings'],
 'data': None,
 'metadatas': []}

Import contoso products (csv source: https://github.com/Azure-Samples/contoso-chat/blob/main/data/product_info/products.csv)

In [21]:
import pandas as pd

# Read the CSV file
products_df = pd.read_csv('products.csv')

# Prepare data for ChromaDB insertion
documents = []
metadatas = []
ids = []

for _, row in products_df.iterrows():
    # Create a document string that includes all column information
    document = {
        'id': row['id'],
        'name': row['name'],
        'price': row['price'],
        'category': row['category'],
        'brand': row['brand'],
        'description': row['description']
    }
    
    # Convert the document dictionary to a string for ChromaDB
    document_text = f"ID: {row['id']}, Name: {row['name']}, Price: ${row['price']}, Category: {row['category']}, Brand: {row['brand']}, Description: {row['description']}"
    
    # Create metadata with id, name, price, category, and brand
    metadata = {
        'id': str(row['id']),  # Convert to string for metadata
        'name': row['name'],
        'price': float(row['price']),
        'category': row['category'],
        'brand': row['brand']
    }
    
    documents.append(document_text)
    metadatas.append(metadata)
    ids.append(str(row['id']))  # Use the id column as ChromaDB id

# Upsert data into ChromaDB collection
products_collection.upsert(
    documents=documents,
    metadatas=metadatas,
    ids=ids
)

print(f"Successfully upserted {len(documents)} products into the ChromaDB collection.")
print(f"Collection now contains {products_collection.count()} documents.")

Successfully upserted 20 products into the ChromaDB collection.
Collection now contains 20 documents.


Import a randomly generated list of names

In [23]:
# Create a list of names with categories and popularity
names_data = [
    # People - High Popularity
    {"id": 1, "name": "Alexander", "category": "people", "popularity": 9},
    {"id": 2, "name": "Emma", "category": "people", "popularity": 10},
    {"id": 3, "name": "William", "category": "people", "popularity": 8},
    {"id": 4, "name": "Olivia", "category": "people", "popularity": 9},
    {"id": 5, "name": "James", "category": "people", "popularity": 8},
    {"id": 6, "name": "Sophia", "category": "people", "popularity": 9},
    {"id": 7, "name": "Michael", "category": "people", "popularity": 7},
    {"id": 8, "name": "Isabella", "category": "people", "popularity": 8},
    {"id": 9, "name": "Benjamin", "category": "people", "popularity": 7},
    {"id": 10, "name": "Charlotte", "category": "people", "popularity": 8},
    
    # People - Medium Popularity
    {"id": 11, "name": "Theodore", "category": "people", "popularity": 6},
    {"id": 12, "name": "Amelia", "category": "people", "popularity": 6},
    {"id": 13, "name": "Sebastian", "category": "people", "popularity": 5},
    {"id": 14, "name": "Penelope", "category": "people", "popularity": 5},
    {"id": 15, "name": "Nathaniel", "category": "people", "popularity": 4},
    {"id": 16, "name": "Genevieve", "category": "people", "popularity": 4},
    {"id": 17, "name": "Maximilian", "category": "people", "popularity": 3},
    {"id": 18, "name": "Evangeline", "category": "people", "popularity": 3},
    
    # Places - High Popularity
    {"id": 19, "name": "New York", "category": "places", "popularity": 10},
    {"id": 20, "name": "London", "category": "places", "popularity": 9},
    {"id": 21, "name": "Paris", "category": "places", "popularity": 9},
    {"id": 22, "name": "Tokyo", "category": "places", "popularity": 8},
    {"id": 23, "name": "Sydney", "category": "places", "popularity": 7},
    {"id": 24, "name": "Rome", "category": "places", "popularity": 8},
    {"id": 25, "name": "Barcelona", "category": "places", "popularity": 7},
    {"id": 26, "name": "Amsterdam", "category": "places", "popularity": 6},
    
    # Places - Medium Popularity
    {"id": 27, "name": "Reykjavik", "category": "places", "popularity": 4},
    {"id": 28, "name": "Budapest", "category": "places", "popularity": 5},
    {"id": 29, "name": "Prague", "category": "places", "popularity": 6},
    {"id": 30, "name": "Vienna", "category": "places", "popularity": 5},
    {"id": 31, "name": "Stockholm", "category": "places", "popularity": 4},
    {"id": 32, "name": "Copenhagen", "category": "places", "popularity": 4},
    {"id": 33, "name": "Helsinki", "category": "places", "popularity": 3},
    {"id": 34, "name": "Tallinn", "category": "places", "popularity": 2},
    
    # Things - High Popularity
    {"id": 35, "name": "Smartphone", "category": "things", "popularity": 10},
    {"id": 36, "name": "Laptop", "category": "things", "popularity": 9},
    {"id": 37, "name": "Coffee", "category": "things", "popularity": 10},
    {"id": 38, "name": "Bicycle", "category": "things", "popularity": 7},
    {"id": 39, "name": "Camera", "category": "things", "popularity": 6},
    {"id": 40, "name": "Headphones", "category": "things", "popularity": 8},
    {"id": 41, "name": "Backpack", "category": "things", "popularity": 7},
    {"id": 42, "name": "Sunglasses", "category": "things", "popularity": 6},
    
    # Things - Medium Popularity
    {"id": 43, "name": "Telescope", "category": "things", "popularity": 3},
    {"id": 44, "name": "Microscope", "category": "things", "popularity": 2},
    {"id": 45, "name": "Synthesizer", "category": "things", "popularity": 4},
    {"id": 46, "name": "Skateboard", "category": "things", "popularity": 5},
    {"id": 47, "name": "Harmonica", "category": "things", "popularity": 3},
    {"id": 48, "name": "Kaleidoscope", "category": "things", "popularity": 2},
    {"id": 49, "name": "Binoculars", "category": "things", "popularity": 4},
    {"id": 50, "name": "Compass", "category": "things", "popularity": 3},
    
    # Fictional Characters - High Popularity
    {"id": 51, "name": "Sherlock Holmes", "category": "fictional", "popularity": 9},
    {"id": 52, "name": "Harry Potter", "category": "fictional", "popularity": 10},
    {"id": 53, "name": "Batman", "category": "fictional", "popularity": 9},
    {"id": 54, "name": "Wonder Woman", "category": "fictional", "popularity": 8},
    {"id": 55, "name": "Spider-Man", "category": "fictional", "popularity": 9},
    {"id": 56, "name": "Hermione Granger", "category": "fictional", "popularity": 8},
    {"id": 57, "name": "Luke Skywalker", "category": "fictional", "popularity": 8},
    {"id": 58, "name": "Princess Leia", "category": "fictional", "popularity": 7},
    
    # Fictional Characters - Medium Popularity
    {"id": 59, "name": "Gandalf", "category": "fictional", "popularity": 6},
    {"id": 60, "name": "Aragorn", "category": "fictional", "popularity": 5},
    {"id": 61, "name": "Legolas", "category": "fictional", "popularity": 4},
    {"id": 62, "name": "Gimli", "category": "fictional", "popularity": 3},
    {"id": 63, "name": "Frodo Baggins", "category": "fictional", "popularity": 6},
    {"id": 64, "name": "Samwise Gamgee", "category": "fictional", "popularity": 5},
    
    # Animals - High Popularity
    {"id": 65, "name": "Golden Retriever", "category": "animals", "popularity": 9},
    {"id": 66, "name": "Siamese Cat", "category": "animals", "popularity": 7},
    {"id": 67, "name": "German Shepherd", "category": "animals", "popularity": 8},
    {"id": 68, "name": "Persian Cat", "category": "animals", "popularity": 6},
    {"id": 69, "name": "Labrador", "category": "animals", "popularity": 9},
    {"id": 70, "name": "Maine Coon", "category": "animals", "popularity": 5},
    
    # Animals - Medium Popularity
    {"id": 71, "name": "Border Collie", "category": "animals", "popularity": 6},
    {"id": 72, "name": "Scottish Fold", "category": "animals", "popularity": 4},
    {"id": 73, "name": "Siberian Husky", "category": "animals", "popularity": 7},
    {"id": 74, "name": "Bengal Cat", "category": "animals", "popularity": 4},
    {"id": 75, "name": "Australian Shepherd", "category": "animals", "popularity": 5},
    {"id": 76, "name": "Ragdoll Cat", "category": "animals", "popularity": 3},
]


In [25]:
# Create or get the names collection
try:
    names_collection = chrome_persistent_client_names.get_or_create_collection(
        name="names_collection", 
        embedding_function=aoai_embedding_function
    )
except Exception as e:
    raise e

# Prepare data for ChromaDB insertion
names_documents = []
names_metadatas = []
names_ids = []

for name_data in names_data:
    # Create a document string that includes all information about the name
    document_text = f"ID: {name_data['id']}, Name: {name_data['name']}, Category: {name_data['category']}, Popularity: {name_data['popularity']}/10"
    
    # Create metadata with id, category and popularity
    metadata = {
        'id': name_data['id'],
        'name': name_data['name'],
        'category': name_data['category'],
        'popularity': name_data['popularity']
    }
    
    names_documents.append(document_text)
    names_metadatas.append(metadata)
    names_ids.append(str(name_data['id']))  # Use the id field as ChromaDB document ID

# Insert data into ChromaDB names collection
names_collection.upsert(
    documents=names_documents,
    metadatas=names_metadatas,
    ids=names_ids
)

print(f"Successfully upserted {len(names_documents)} names into the names ChromaDB collection.")
print(f"Names collection now contains {names_collection.count()} documents.")

Successfully upserted 76 names into the names ChromaDB collection.
Names collection now contains 76 documents.
