# Install required Python modules

*Note:* This step may take a while to run the first time.

In [1]:
# Install uv for simplified package management. (It is not necessary to understand this step.)
!curl -LsSf https://astral.sh/uv/install.sh | sh

downloading uv 0.8.0 x86_64-unknown-linux-gnu
no checksums to verify
installing to /opt/app-root/src/.local/bin
  uv
  uvx
everything's installed!


In [2]:
# Step 1: Install necessary libraries (run in a cell if needed)
!uv pip install -r requirements.txt

[2mUsing Python 3.11.11 environment at: /opt/app-root[0m
[2K[2mResolved [1m142 packages[0m [2min 86ms[0m[0m                                        [0m
[2mUninstalled [1m1 package[0m [2min 3ms[0m[0m
         If the cache and target directories are on different filesystems, hardlinking may not be supported.
[2K[2mInstalled [1m115 packages[0m [2min 53.38s[0m[0m                            [0m
 [32m+[39m [1maccelerate[0m[2m==1.9.0[0m
 [32m+[39m [1mannotated-types[0m[2m==0.7.0[0m
 [32m+[39m [1mboto3[0m[2m==1.34.103[0m
 [32m+[39m [1mbotocore[0m[2m==1.34.162[0m
 [32m+[39m [1mclick[0m[2m==8.2.1[0m
 [32m+[39m [1mcontourpy[0m[2m==1.3.2[0m
 [32m+[39m [1mcycler[0m[2m==0.12.1[0m
 [32m+[39m [1mdill[0m[2m==0.4.0[0m
 [32m+[39m [1mdistro[0m[2m==1.9.0[0m
 [32m+[39m [1mdocling[0m[2m==2.39.0[0m
 [32m+[39m [1mdocling-core[0m[2m==2.43.0[0m
 [32m+[39m [1mdocling-ibm-models[0m[2m==3.8.2[0m
 [32m+[39m [1mdocling-p

In [3]:
from pymilvus import connections, utility, Collection, CollectionSchema, FieldSchema, DataType
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

  from tqdm.autonotebook import tqdm, trange


# Shakeout minio connection

In [4]:
#!/usr/bin/env python3
"""
Shake-out test for a MinIO deployment on Kubernetes.

Environment variables:
  AWS_S3_ENDPOINT        – MinIO service DNS name (e.g. minio.minio.svc.cluster.local)
  AWS_ACCESS_KEY_ID      – MinIO access key
  AWS_SECRET_ACCESS_KEY  – MinIO secret key
  AWS_DEFAULT_REGION     – Dummy value; boto3 still expects one
"""
import os
import sys

import boto3
from botocore.client import Config
from botocore.exceptions import BotoCoreError, ClientError


endpoint = os.getenv("AWS_S3_ENDPOINT", "minio.minio.svc.cluster.local")
access_key = os.getenv("AWS_ACCESS_KEY_ID", "minio")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "test")
region = os.getenv("AWS_DEFAULT_REGION", "us-east-1") or "us-east-1"
region = os.getenv("AWS_S3_BUCKET", "rag-docs") or "rag-docs"


minio_status = "🟢 OK"

try:
    s3 = boto3.client(
        "s3",
        endpoint_url=f"http://{endpoint}",
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key,
        region_name=region,
        config=Config(signature_version="s3v4"),
    )

    resp = s3.list_buckets()
    buckets = [b["Name"] for b in resp.get("Buckets", [])]

    if buckets:
        print("🟢 Connection succeeded – buckets discovered:")
        for name in buckets:
            print(f"  • {name}")
    else:
        print("🟢 Connected but no buckets found.")

except (BotoCoreError, ClientError) as exc:
    print(f"🔴 MinIO connectivity test failed: {exc}", file=sys.stderr)
    minio_status="🔴 FAIL"

🟢 Connection succeeded – buckets discovered:
  • data
  • models


# Shakeout the Milvus connectivity


## Create the Milvus database

In [5]:
milvus_status = "🟢 OK"
# This is the name of the collection that this program will use.
collection_name = "shakeout_collection"

try:
    # Create the client object
    connections.connect(
        uri="http://milvus-service.milvus.svc.cluster.local:19530",
        alias="default"
    )
    
    # Make sure we start with a clean slate by deleting the collection if it exists from a prior run.
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    print(f"Collection list: {utility.list_collections()}") 

    # Databases need a schema. In this lab the schema will consist 
    # of an identifier and a vector that contains the embedding of a text string.
    
    # Define the primary key field for unique record identification
    id_field = FieldSchema(
        name="id",
        dtype=DataType.INT64,
        is_primary=True,
        auto_id=False
    )
    
    # Specify embedding model and its output dimension
    embedding_model = "all-MiniLM-L6-v2"  # Example Hugging Face model
    embedding_dim = 384  # Embedding vector size as per the model
    
    # Define the vector field to hold embedding values
    embedding_field = FieldSchema(
        name="embedding",
        dtype=DataType.FLOAT_VECTOR,
        dim=embedding_dim
    )
    
    # Assemble collection schema combining ID and embedding fields
    schema = CollectionSchema(
        fields=[id_field, embedding_field],
        description="Milvus shakeout test",
        enable_dynamic_field=False
    )

    # Instantiate the Milvus collection using the defined schema and configuration
    collection = Collection(
        name=collection_name, 
        schema=schema, 
        using='default', 
        shards_num=2,
        consistency_level="Strong"
    )
    
    # List all collections in Milvus to confirm creation
    print(f"Collection list: {utility.list_collections()}")

    # Close the Milvus connection
    collection.release()
    utility.drop_collection(collection_name)
except:
    milvus_status="🔴 FAIL"

Collection list: []
Collection list: ['shakeout_collection']


In [6]:
print(f"Minio status: {minio_status}")
print(f"Milvus status: {milvus_status}")

print("Return to your lab workbook for further instructions.")

Minio status: 🟢 OK
Milvus status: 🟢 OK
Return to your lab workbook for further instructions.
