In [1]:
from azure.cosmos import CosmosClient
from dotenv import load_dotenv
import json
from openai import AzureOpenAI
import os

def generate_openai_embeddings(input_text):
    client = AzureOpenAI(
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version = "2025-01-01-preview",
    azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
    )

    response = client.embeddings.create(
        input = input_text,
        model= "text-embedding-ada-002"
    )

    return response.data[0].embedding

cosmos_url =  os.getenv("COSMOS_ENDPOINT")
cosmos_key =  os.getenv("COSMOS_KEY")
cosmos_database_name = "linkedin_data"
cosmos_container_name = "linkedin_data_vector"

cosmos_client = CosmosClient(cosmos_url, cosmos_key)
cosmos_container = cosmos_client.get_database_client(cosmos_database_name).get_container_client(cosmos_container_name)

question = "List all employees working at M1."

query_embedding = generate_openai_embeddings(question)

top_n = 10

query = """
    SELECT c.full_name, c.full_name_url, c.role, c.company, VectorDistance(c.request_vector, @query_embedding) AS SimilarityScore
    FROM c
    """
parameters = [
    {"name": "@query_embedding", "value": query_embedding},
]

result = list(cosmos_container.query_items(
    query=query,
    parameters=parameters,
    enable_cross_partition_query=True
))

results = sorted(result, key=lambda x: x['SimilarityScore'], reverse=True)[:top_n]
results

[{'full_name': 'ChunJin Hoo',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAArT3ysBNEMGyxNYvUyLyZts0t7BfBcF1Xg,NAME_SEARCH,cQjN?_ntb=2l7hMCeUQFKi%2Br3KmkAhjQ%3D%3D',
  'role': 'General Manager, Core Network Planning (Packet Core & RSP)',
  'company': 'M1 Limited',
  'SimilarityScore': 0.8329576145114843},
 {'full_name': 'Dinesh Krishnarajah',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAAEAPQAB9ikP04sNz__sAuLmX1FFVXrhu48,NAME_SEARCH,vLvo?_ntb=2l7hMCeUQFKi%2Br3KmkAhjQ%3D%3D',
  'role': 'Viewed\nYou’ve already seen Dinesh Krishnarajah’s profile before.',
  'company': 'M1 Limited',
  'SimilarityScore': 0.8329576145114843},
 {'full_name': 'Denis Seek',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAAGrZHIBczqmqwcT-YAMtHpWQWbxTjNM5qI,NAME_SEARCH,QVLt?_ntb=2l7hMCeUQFKi%2Br3KmkAhjQ%3D%3D',
  'role': 'Chief Technical Officer',
  'company': 'M1 Limited',
  'SimilarityScore': 0.8329576145114843},
 {'full_name': 'Hasini K',
  'full_name_url': 'https://

In [6]:
from azure.cosmos import CosmosClient
from dotenv import load_dotenv
import json
from openai import AzureOpenAI
import os

def generate_openai_embeddings(input_text):
    client = AzureOpenAI(
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version = "2025-01-01-preview",
    azure_endpoint =os.getenv("AZURE_OPENAI_ENDPOINT") 
    )

    response = client.embeddings.create(
        input = input_text,
        model= "text-embedding-ada-002"
    )

    return response.data[0].embedding

# Parse question to get query parameters using OpenAI
def parse_question(question):
    client = AzureOpenAI(
        api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
        api_version = "2025-01-01-preview",
        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )
    
    prompt = f"""
    Extract query parameters from this question: "{question}"
    Respond in JSON format with these fields:
    - company_name: The specific company mentioned (exact match, null if none)
    - filter_by_company: true/false if results should be filtered by company
    - other_filters: any other filters mentioned
    
    Example: For "List all employees working at M1", return:
    {{"company_name": "M1 Limited", "filter_by_company": true, "other_filters": null}}
    """
    
    response = client.chat.completions.create(
        model="gpt-4o-mini", 
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"}
    )
    
    try:
        return json.loads(response.choices[0].message.content)
    except:
        return {"company_name": None, "filter_by_company": False, "other_filters": None}

cosmos_url = os.getenv("COSMOS_ENDPOINT")
cosmos_key = os.getenv("COSMOS_KEY")
cosmos_database_name = "linkedin_data"
cosmos_container_name = "linkedin_data_vector"

cosmos_client = CosmosClient(cosmos_url, cosmos_key)
cosmos_container = cosmos_client.get_database_client(cosmos_database_name).get_container_client(cosmos_container_name)

question = "List all employees working at morgan."

# Step 1: Parse the question to understand intent and extract parameters
query_params = parse_question(question)
print(f"Extracted parameters: {query_params}")

# Step 2: Generate embeddings for semantic search
query_embedding = generate_openai_embeddings(question)

# Step 3: Perform vector search with potential filtering
top_n = 10

# Two-stage approach: First get results using vector search
query = """
    SELECT c.full_name, c.full_name_url, c.role, c.company, VectorDistance(c.request_vector, @query_embedding) AS SimilarityScore
    FROM c
    """
parameters = [
    {"name": "@query_embedding", "value": query_embedding},
]

results = list(cosmos_container.query_items(
    query=query,
    parameters=parameters,
    enable_cross_partition_query=True
))

# Step 4: Post-process filtering based on query parameters
filtered_results = results
if query_params.get("filter_by_company") and query_params.get("company_name"):
    company_name = query_params["company_name"]
    # Exact match option (use this for precision)
    filtered_results = [r for r in results if r["company"] == company_name]
    
    # If no exact matches, try partial match
    if not filtered_results:
        filtered_results = [r for r in results if company_name.lower() in r["company"].lower()]

    print(f"Filtered to company: {company_name}")

# Sort by similarity and take top results
final_results = sorted(filtered_results, key=lambda x: x['SimilarityScore'], reverse=True)[:top_n]
final_results

Extracted parameters: {'company_name': 'Morgan', 'filter_by_company': True, 'other_filters': None}
Filtered to company: Morgan


[{'full_name': 'Pam Lim',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAAFOpLABAe_Z03rLXAZB6XIcZ6R9XqEQ15w,NAME_SEARCH,5iFh?_ntb=2l7hMCeUQFKi%2Br3KmkAhjQ%3D%3D',
  'role': 'Head of Technology & Transformation',
  'company': 'Morgan McKinley',
  'SimilarityScore': 0.8472370429580187},
 {'full_name': 'Dinu Dinesh',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAALyFRIBcikAtJiObsefjpLUwBFalIp2PUI,NAME_SEARCH,-lwa?_ntb=2l7hMCeUQFKi%2Br3KmkAhjQ%3D%3D',
  'role': 'Associate Director -Head of Technology & Transformation Services',
  'company': 'Morgan McKinley',
  'SimilarityScore': 0.8472370429580187},
 {'full_name': 'Dinu Dinesh',
  'full_name_url': 'https://www.linkedin.com/sales/lead/ACwAAALyFRIBcikAtJiObsefjpLUwBFalIp2PUI,NAME_SEARCH,-lwa?_ntb=MXeRlplrQIO4tFTD1iwhVw%3D%3D',
  'role': 'Associate Director -Head of Technology & Transformation Services',
  'company': 'Morgan McKinley',
  'SimilarityScore': 0.8472370429580187},
 {'full_name': 'Pam Lim',
  'full_na