In [None]:
!pip install openai pymilvus honeyhive

In [None]:
from openai import OpenAI
from pymilvus import MilvusClient
from honeyhive.tracer import HoneyHiveTracer
from honeyhive.tracer.custom import trace

# Initialize HoneyHive Tracer
HoneyHiveTracer.init(
    api_key="Your HoneyHive key",
    project="name of your project",
)

# Initialize OpenAI client
openai_client = OpenAI(api_key="your OpenAI key")

# Initialize Milvus client
milvus_client = MilvusClient("milvus_demo.db")  # Using Milvus Lite for demo

def embed_text(text):
    """Generate embeddings using OpenAI's text-embedding-ada-002 model"""
    res = openai_client.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return res.data[0].embedding

@trace(
    config={
        "collection_name": "demo_collection",
        "dimension": 1536,  # text-embedding-ada-002 dimension
    }
)
def setup_collection():
    """Set up Milvus collection with tracing"""
    # Drop collection if it exists
    if milvus_client.has_collection(collection_name="demo_collection"):
        milvus_client.drop_collection(collection_name="demo_collection")

    # Create new collection
    milvus_client.create_collection(
        collection_name="demo_collection",
        dimension=1536  # text-embedding-ada-002 dimension
    )

@trace(
    config={
        "embedding_model": "text-embedding-ada-002"
    }
)
def insert_documents(documents):
    """Insert documents with tracing"""
    vectors = [embed_text(doc) for doc in documents]
    data = [
        {
            "id": i,
            "vector": vectors[i],
            "text": documents[i],
            "subject": "general"
        }
        for i in range(len(vectors))
    ]

    res = milvus_client.insert(
        collection_name="demo_collection",
        data=data
    )
    return res

@trace(
    config={
        "embedding_model": "text-embedding-ada-002",
        "top_k": 3
    }
)
def search_similar_documents(query, top_k=3):
    """Search for similar documents with tracing"""
    query_vector = embed_text(query)

    results = milvus_client.search(
        collection_name="demo_collection",
        data=[query_vector],
        limit=top_k,
        output_fields=["text", "subject"]
    )

    return [match["entity"]["text"] for match in results[0]]

@trace(
    config={
        "model": "gpt-4",
        "prompt": "You are a helpful assistant"
    }
)
def generate_response(context, query):
    """Generate response using OpenAI with tracing"""
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

@trace()
def rag_pipeline(query):
    """Complete RAG pipeline with tracing"""
    # Get relevant documents
    relevant_docs = search_similar_documents(query)
    # Generate response
    response = generate_response("\n".join(relevant_docs), query)
    return response

def main():
    # Sample documents
    documents = [
        "Artificial intelligence was founded as an academic discipline in 1956.",
        "Machine learning is a subset of artificial intelligence.",
        "Deep learning is a type of machine learning based on artificial neural networks.",
        "Natural Language Processing (NLP) is a branch of AI that helps computers understand human language.",
    ]

    # Set up collection
    setup_collection()

    # Insert documents
    print("Inserting documents...")
    insert_documents(documents)

    # Test RAG pipeline
    query = "What is the relationship between AI and machine learning?"
    print(f"\nQuery: {query}")
    response = rag_pipeline(query)
    print(f"Response: {response}")

if __name__ == "__main__":
    main()
