# Imports

In [1]:
import nest_asyncio
nest_asyncio.apply()  # Enable async support in Jupyter

In [2]:
import nest_asyncio
nest_asyncio.apply()  # Enable async support in Jupyter

# Vector Database Testing

This notebook tests the QDrant lead storage implementation with LLM-based evaluation.

## 1. Setup and Imports

In [3]:
import os
import sys
from pathlib import Path

# Add project root to Python path
notebook_dir = Path().absolute()
project_root = notebook_dir.parent
sys.path.insert(0, str(project_root))

print(f"Added to Python path: {project_root}")

from langchain_openai import ChatOpenAI
from src.application.schema.lead import LeadCompleted
from src.application.schema.contact import Contact
from src.infrastructure.knowledge_base.vectordb.config import VectorDBSettings
from src.infrastructure.knowledge_base.vectordb.lead_storage import QDrantLeadStorage
from src.infrastructure.knowledge_base.vectordb.embedding_service import LeadEmbeddingService

Added to Python path: /Users/juan.melo/Documents/P/b2b_agent


## 2. Mock Lead Data

In [4]:
mock_lead = LeadCompleted(
    company="Tesla, Inc.",
    industry="Automotive & Technology",
    employee_count=127855,
    revenue_musd=81462.0,
    website="https://www.tesla.com",
    last_year_profit=12583.0,
    last_quarter_ebitda=4278.0,
    stock_variation_3m=15.7,
    contacts=[
        Contact(
            name="Elon Musk",
            email="elon.musk@tesla.com",
            phone="+1 (555) 123-4567",
            position="CEO"
        ),
        Contact(
            name="Zachary Kirkhorn",
            email="zachary.kirkhorn@tesla.com",
            phone="+1 (555) 234-5678",
            position="CFO"
        )
    ]
)

print("Mock lead created:")
print(mock_lead.model_dump_json(indent=2))

Mock lead created:
{
  "company": "Tesla, Inc.",
  "industry": "Automotive & Technology",
  "employee_count": 127855,
  "revenue_musd": 81462.0,
  "website": "https://www.tesla.com",
  "last_year_profit": 12583.0,
  "last_quarter_ebitda": 4278.0,
  "stock_variation_3m": 15.7,
  "contacts": [
    {
      "name": "Elon Musk",
      "email": "elon.musk@tesla.com",
      "phone": "+1 (555) 123-4567",
      "position": "CEO"
    },
    {
      "name": "Zachary Kirkhorn",
      "email": "zachary.kirkhorn@tesla.com",
      "phone": "+1 (555) 234-5678",
      "position": "CFO"
    }
  ]
}


## 3. Storage Test

In [5]:
# Initialize services
settings = VectorDBSettings(
    host="localhost",  # Using localhost since we're running locally
    port=6333,
    collection_name="test_leads",
    distance_metric="Cosine",
    vector_size=64,
    timeout=10
)

embedding_service = LeadEmbeddingService()
lead_storage = QDrantLeadStorage(settings, embedding_service)

In [6]:
# # Store the mock lead
# lead_id = await lead_storage.store_lead(mock_lead)
# print(f"Lead stored with ID: {lead_id}")

# # Verify storage by retrieving the lead
# stored_lead = await lead_storage.get_lead(lead_id)
# print("\nRetrieved lead:")
# print(stored_lead.model_dump_json(indent=2))

## 4. Similarity Search Test

In [7]:
# Create similar test leads
similar_leads = [
    LeadCompleted(
        company="Rivian Automotive",
        industry="Automotive & Technology",
        employee_count=14000,
        revenue_musd=1658.0,
        website="https://rivian.com",
        last_year_profit=-6752.0,
        last_quarter_ebitda=-1500.0,
        stock_variation_3m=-5.2,
        contacts=[
            Contact(
                name="RJ Scaringe",
                email="rj.scaringe@rivian.com",
                phone="+1 (555) 987-6543",
                position="CEO"
            )
        ]
    ),
    LeadCompleted(
        company="Lucid Motors",
        industry="Automotive & Technology",
        employee_count=7500,
        revenue_musd=608.0,
        website="https://lucidmotors.com",
        last_year_profit=-2587.0,
        last_quarter_ebitda=-752.0,
        stock_variation_3m=-12.3,
        contacts=[
            Contact(
                name="Peter Rawlinson",
                email="peter.rawlinson@lucidmotors.com",
                phone="+1 (555) 456-7890",
                position="CEO"
            )
        ]
    )
]

# Store similar leads
# for lead in similar_leads:
#     lead_id = await lead_storage.store_lead(lead)
#     print(f"Stored similar lead {lead.company} with ID: {lead_id}")

In [8]:
# Search for similar leads to Tesla
found_similar = await lead_storage.find_similar_leads(mock_lead, limit=2)
print("\nFound similar leads:")
for lead in found_similar:
    print(f"\n{lead.company}:")
    print(lead.model_dump_json(indent=2))

{'company': 'Tesla, Inc.', 'industry': 'Automotive & Technology', 'revenue_musd': 81462.0, 'website': 'https://www.tesla.com', 'stock_variation_3m': 15.7, 'contacts': [{'position': 'CEO', 'email': 'elon.musk@tesla.com', 'phone': '+1 (555) 123-4567', 'name': 'Elon Musk'}, {'position': 'CFO', 'email': 'zachary.kirkhorn@tesla.com', 'phone': '+1 (555) 234-5678', 'name': 'Zachary Kirkhorn'}], 'employee_count': 127855, 'last_year_profit': 12583.0, 'last_quarter_ebitda': 4278.0}
{'company': 'Lucid Motors', 'industry': 'Automotive & Technology', 'revenue_musd': 608.0, 'website': 'https://lucidmotors.com', 'stock_variation_3m': -12.3, 'employee_count': 7500, 'contacts': [{'position': 'CEO', 'email': 'peter.rawlinson@lucidmotors.com', 'phone': '+1 (555) 456-7890', 'name': 'Peter Rawlinson'}], 'last_year_profit': -2587.0, 'last_quarter_ebitda': -752.0}

Found similar leads:

Tesla, Inc.:
{
  "company": "Tesla, Inc.",
  "industry": "Automotive & Technology",
  "employee_count": 127855,
  "revenue_

## 5. LLM Evaluation

In [None]:
def evaluate_similarity(original: LeadCompleted, found: list[LeadCompleted]) -> str:
    """Use LLM to evaluate similarity between leads"""
    llm = ChatOpenAI(model="gpt-4")
    prompt = f"""
    Analyze the similarity between these companies:
    
    Original Company:
    {original.model_dump_json(indent=2)}
    
    Found Similar Companies:
    {[f.model_dump_json(indent=2) for f in found]}
    
    Evaluate:
    1. Industry alignment
    2. Company size comparison
    3. Financial metrics similarity
    4. Overall relevance
    
    Provide a detailed analysis of why these companies are considered similar.
    """
    return llm.invoke(prompt).content

# Run LLM evaluation
evaluation = evaluate_similarity(mock_lead, found_similar)
print("LLM Evaluation of Similar Companies:")
print(evaluation)

LLM Evaluation of Similar Companies:
Analyzing the similarity between Tesla, Inc. and Lucid Motors, we can evaluate the key aspects that contribute to their comparison.

### 1. Industry Alignment

- **Tesla, Inc.** and **Lucid Motors** are both prominent players in the **Automotive & Technology** industry. They are aligned in terms of their core focus on developing electric vehicles (EVs) and integrating advanced technology into their automotive designs. This industry alignment is a crucial factor for comparison, as it indicates both companies are targeting similar markets and technological advancements, such as electric powertrains, autonomous driving capabilities, and innovative in-car software.

### 2. Company Size Comparison

- **Employee Count**: Tesla is significantly larger, with 127,855 employees compared to Lucid Motors' 7,500. This size discrepancy highlights Tesla's more established position and larger operational scale.
- **Revenue**: Tesla’s revenue stands at $81,462 milli

## 6. Cleanup

In [None]:
# Clean up test data
# Note: In a real implementation, you might want to keep the data
await lead_storage.delete_lead("e623ee78-d93d-4b4e-9e53-2ace4e77fa15")
for lead in similar_leads:
    await lead_storage.delete_lead(lead.id)
print("Test data cleaned up")