# B2B Agent Workflow

This notebook demonstrates the B2B Agent multi-agent workflow system for lead finding, collection, and enrichment.



# Libs

In [15]:
from typing import Annotated
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from dotenv import load_dotenv
from IPython.display import Image, display
import gradio as gr
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import Optional
import random
from langchain_core.messages import ToolMessage

# Add memory
from langgraph.checkpoint.memory import MemorySaver
memory = MemorySaver()

import json

# Code

In [18]:
import os

from langchain_mcp_adapters.client import MultiServerMCPClient

async def get_mcp_client():
    client = MultiServerMCPClient(
        {
            "google_workspace": {
                "transport": "streamable_http",
                "url": f"{os.getenv('WORKSPACE_MCP_BASE_URI', 'http://localhost')}:{os.getenv('WORKSPACE_MCP_PORT', '8001')}/mcp",
            }
        }
    )
    return client

In [30]:
async def get_mcp_tools():
    """Async helper to get MCP tools.
    
    Returns:
        List of MCP tools converted to LangChain tools, or empty list if unavailable.
    """
    try:
        mcp_client = await get_mcp_client()
        return await mcp_client.get_tools()
    except Exception as e:
        # Log error but don't break the graph if MCP server is unavailable
        print(f"Warning: Could not load MCP tools: {e}")
        return []

In [31]:
import asyncio
t = asyncio.run(get_mcp_tools())

  for group in groupby(strings, lambda s: s[0] == first[0])) \


RuntimeError: asyncio.run() cannot be called from a running event loop

In [26]:
mcp_tools

[StructuredTool(name='start_google_auth', description='Manually initiate Google OAuth authentication flow.\n\nNOTE: This tool should typically NOT be called directly. The authentication system\nautomatically handles credential checks and prompts for authentication when needed.\nOnly use this tool if:\n1. You need to re-authenticate with different credentials\n2. You want to proactively authenticate before using other tools\n3. The automatic authentication flow failed and you need to retry\n\nIn most cases, simply try calling the Google Workspace tool you need - it will\nautomatically handle authentication if required.', args_schema={'properties': {'service_name': {'type': 'string'}, 'user_google_email': {'default': None, 'type': 'string'}}, 'required': ['service_name'], 'type': 'object'}, metadata={'_meta': {'_fastmcp': {'tags': []}}}, response_format='content_and_artifact', coroutine=<function convert_mcp_tool_to_langchain_tool.<locals>.call_tool at 0x11e10c040>),
 StructuredTool(name

In [19]:
# Get MCP tools (async)
mcp_client = await get_mcp_client()
mcp_tools = await mcp_client.get_tools()

In [20]:
mcp_tools

[StructuredTool(name='start_google_auth', description='Manually initiate Google OAuth authentication flow.\n\nNOTE: This tool should typically NOT be called directly. The authentication system\nautomatically handles credential checks and prompts for authentication when needed.\nOnly use this tool if:\n1. You need to re-authenticate with different credentials\n2. You want to proactively authenticate before using other tools\n3. The automatic authentication flow failed and you need to retry\n\nIn most cases, simply try calling the Google Workspace tool you need - it will\nautomatically handle authentication if required.', args_schema={'properties': {'service_name': {'type': 'string'}, 'user_google_email': {'default': None, 'type': 'string'}}, 'required': ['service_name'], 'type': 'object'}, metadata={'_meta': {'_fastmcp': {'tags': []}}}, response_format='content_and_artifact', coroutine=<function convert_mcp_tool_to_langchain_tool.<locals>.call_tool at 0x11dd86340>),
 StructuredTool(name

In [None]:
from typing import Optional

from pydantic import BaseModel, Field


class Lead(BaseModel):
    """Structured lead with enrichment fields."""

    company: str
    industry: str
    employee_count: int
    revenue_musd: float

    # Enrichment fields - initially None
    website: Optional[str] = None
    last_year_profit: Optional[float] = None
    last_quarter_ebitda: Optional[float] = None
    stock_variation_3m: Optional[float] = None

    def needs_enrichment(self) -> bool:
        """Check if lead still needs enrichment."""
        return (
            self.website is None
            or self.last_year_profit is None
            or self.last_quarter_ebitda is None
            or self.stock_variation_3m is None
        )


class LeadCompleted(BaseModel):
    """Completed lead with all enrichment fields."""

    company: str = Field(..., description="Name of the company")
    industry: str = Field(..., description="Industry sector of the company")
    employee_count: int = Field(..., description="Number of employees at the company")
    revenue_musd: float = Field(..., description="Annual revenue in millions of USD")
    website: str = Field(..., description="Official website URL of the company")
    last_year_profit: float = Field(
        ..., description="Company's profit for the last fiscal year in millions of USD"
    )
    last_quarter_ebitda: float = Field(
        ..., description="Company's EBITDA for the last quarter in millions of USD"
    )
    stock_variation_3m: float = Field(
        ..., description="Stock price variation over the last 3 months in percentage"
    )



In [None]:
from typing import Annotated, Any, Optional

from langgraph.graph.message import add_messages
from pydantic import BaseModel, field_validator

class IdealCustomerProfile(BaseModel):
    """Ideal Customer Profile criteria extracted from Google Sheets."""
    
    industries_allowed: Optional[list[str]] = None
    industries_blocked: Optional[list[str]] = None
    employee_min: Optional[int] = None
    employee_max: Optional[int] = None
    regions_allowed: Optional[list[str]] = None
    regions_blocked: Optional[list[str]] = None
    technologies_required: Optional[list[str]] = None
    buyer_personas: Optional[list[str]] = None
    excluded_personas: Optional[list[str]] = None
    
    @classmethod
    def parse_semicolon_list(cls, value: str) -> list[str]:
        """Parse semicolon-separated string into list."""
        if not value or not isinstance(value, str):
            return []
        return [item.strip() for item in value.split(';') if item.strip()]

class State(BaseModel):
    """Application state for the B2B workflow graph."""

    messages: Annotated[list, add_messages]
    leads: list[Lead] = []
    filtered_leads: list[Lead] = []
    next_action: str = ""
    icp: Optional[IdealCustomerProfile] = None  # NEW: Store ICP data

    @field_validator("leads", "filtered_leads", mode="before")
    @classmethod
    def validate_leads(cls, v: Any) -> list[Lead]:
        """Convert dicts to Lead objects."""
        if not v:
            return []
        if isinstance(v[0], dict):
            return [Lead(**lead) if isinstance(lead, dict) else lead for lead in v]
        return v

In [None]:
from langchain_core.tools import Tool
import pandas as pd

def retrieve_icp_tool(llm: ChatOpenAI) -> Tool:
    """Tool that retrieves and parses ICP data using structured output."""
    
    def _retrieve_icp(*args, **kwargs) -> IdealCustomerProfile:  # Accept both *args and **kwargs
        """Read ICP data from CSV and parse into structured IdealCustomerProfile format."""
        # Read raw data
        df = pd.read_csv('ICP.csv')
        icp_dict = dict(zip(df['Parameter'], df['Value']))
        
        # Format data for LLM parsing
        data_str = "\n".join([f"{k}: {v}" for k, v in icp_dict.items()])
        
        # Use structured output to parse
        parser = llm.with_structured_output(IdealCustomerProfile)
        
        prompt = f"""
        Parse the following Ideal Customer Profile (ICP) data into structured format.
        
        Raw Data:
        {data_str}
        
        Extract and structure:
        - Industries (allowed/blocked) - split semicolon-separated values into lists
        - Employee count range (min/max) - convert to integers
        - Regions (allowed/blocked) - split semicolon-separated values into lists
        - Technologies required - split semicolon-separated values into lists
        - Buyer personas - split semicolon-separated values into lists
        - Excluded personas - split semicolon-separated values into lists
        """
        
        # Parse into structured format and return directly
        icp = parser.invoke([{"role": "user", "content": prompt}])
        return json.dumps(icp.model_dump())
    
    return Tool(
        name="retrieve_icp",
        description="Retrieves and parses the Ideal Customer Profile (ICP) data.",
        func=_retrieve_icp,
    )

In [None]:
def chatbot_node(old_state: State, llm: ChatOpenAI, tools) -> dict:
    """Analyze user intent and route to appropriate workflow."""

    system_prompt = f"""
    You are a helpful assistant that 

    # Instructions
    - You can only perform two tasks:
        - Find the ICP (Ideal Customer Profile)
        - Find leads, if you already have the ICP (Ideal Customer Profile), you can use the tools to find leads.
        - Use tools to get information about the correct user profile that client wants to find.
    - You can use the following tools to find the ICP (Ideal Customer Profile):
    {tools}
    """
    
    from langchain_core.messages import ToolMessage, SystemMessage
    
    # Get all messages first - always initialize
    messages = list(old_state.messages) if old_state.messages else []
    last_message = messages[-1] if messages else None
    
    # DEBUG: Print current state ICP
    print("=" * 80)
    print("DEBUG: Current State ICP:")
    print(f"  ICP in state: {old_state.icp}")
    if old_state.icp:
        print(f"  ICP type: {type(old_state.icp)}")
        print(f"  ICP dict: {old_state.icp.model_dump()}")
    print("=" * 80)
    
    # If tool just executed, check if it's the ICP tool response
    if isinstance(last_message, ToolMessage):
        # Handle ICP object directly (or dict if serialized)
        content = last_message.content
        
        # DEBUG: Print tool response
        print("=" * 80)
        print("DEBUG: ToolMessage received:")
        print(f"  Content type: {type(content)}")
        print(f"  Content: {content}")
        if isinstance(content, dict):
            print(f"  Content keys: {content.keys() if hasattr(content, 'keys') else 'N/A'}")
        print("=" * 80)
        
        icp_dict = json.loads(last_message.content)
        icp = IdealCustomerProfile(**icp_dict)
        
        # Store ICP if we got it
        if icp:
            print("=" * 80)
            print("DEBUG: Storing ICP in state:")
            print(f"  ICP object: {icp}")
            print(f"  ICP model_dump(): {icp.model_dump()}")
            print("=" * 80)
            
            return {
                "icp": icp,
                "messages": [{"role": "assistant", "content": "ICP retrieved and stored successfully!"}],
                "next_action": "end",
            }
        
        # For other tool responses, continue normal flow
        # Add system message if needed
        if not messages or not isinstance(messages[0], SystemMessage):
            messages = [SystemMessage(content=system_prompt)] + messages
    else:
        # First call - add system message if needed
        if not messages or not isinstance(messages[0], SystemMessage):
            messages = [SystemMessage(content=system_prompt)] + messages
    
    # Normal LLM invocation (messages is always defined at this point)
    llm_with_tools = llm.bind_tools(tools)
    response = llm_with_tools.invoke(messages)

    return {
        "messages": [response],
        "next_action": "end",
    }

In [None]:
def create_orchestrator_node(llm: ChatOpenAI, tools):
    """Create orchestrator node with LLM dependency."""

    def node(state: State) -> dict:
        return chatbot_node(state, llm, tools)

    return node

In [None]:
def create_lead_finder_node(llm, tools):
    """
    Returns an agent node function that uses LLM with tools to find leads matching the user's ICP.
    """
    def node(state: State) -> dict:
        from langchain_core.messages import SystemMessage, ToolMessage
        
        # Get ICP from state
        icp = state.icp if hasattr(state, "icp") else None
        
        if icp:
            icp_info = f"\nUser's Ideal Customer Profile (ICP):\n{icp.model_dump_json()}\n"
        else:
            icp_info = "\nNo Ideal Customer Profile (ICP) available. Please retrieve it first.\n"

        system_prompt = (
            f"""You are a lead-finding agent.
            Use the following ICP to find matching leads:
            {icp_info}

            # Instructions
            - Use the available tools to find leads that match the ICP criteria
            - Find exactly 3 leads that match: industries, employee range, and regions from the ICP
            - Each lead must have: company name, industry, employee_count, and revenue_musd
            - Call tools to search for leads matching the ICP
            """
        )

        messages = list(state.messages) if state.messages else []
        last_message = messages[-1] if messages else None
        
        # If tool just executed, extract leads from tool response using structured output
        if isinstance(last_message, ToolMessage):
            # Tool response received - parse it into Lead objects
            tool_content = str(last_message.content)
            
            # Use structured output to parse tool response into leads
            from pydantic import BaseModel
            
            class LeadList(BaseModel):
                """List of leads extracted from tool response."""
                leads: list[Lead]
            
            # Parse tool response into structured leads
            parser = llm.with_structured_output(LeadList)
            prompt = f"""
            Extract leads from the following tool response.
            Convert the information into Lead objects with: company, industry, employee_count, revenue_musd.
            
            Tool Response:
            {tool_content}
            
            Extract exactly 3 leads that match the ICP criteria.
            """
            
            response = parser.invoke([{"role": "user", "content": prompt}])
            leads = response.leads if hasattr(response, 'leads') else []
            
            return {
                "leads": [lead.model_dump() for lead in leads],
                "messages": [{"role": "assistant", "content": f"Found {len(leads)} leads from tool results"}],
            }
        
        # First call or continuing - add system message and bind tools
        if not messages or not isinstance(messages[0], SystemMessage):
            messages = [SystemMessage(content=system_prompt)] + messages
        
        # Bind tools so LLM can call them
        llm_with_tools = llm.bind_tools(tools)
        response = llm_with_tools.invoke(messages)

        return {
            "messages": [response],
        }

    return node

In [None]:
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_core.tools import Tool


def create_search_tool() -> Tool:
    """Create search tool for company information."""
    serper_search = GoogleSerperAPIWrapper()

    return Tool(
        name="search_company_info",
        description=(
            "Search the web for detailed information about a company including recent news, "
            "technologies used, partnerships, and business updates. "
            "Use this when you need more context about a lead company."
        ),
        func=serper_search.run,
    )


In [None]:
from langgraph.prebuilt import ToolNode, tools_condition
from application.agents.summary_agent import create_summary_node  # Import the summary node

llm = ChatOpenAI(model="gpt-4o-mini")
read_icp_tool = retrieve_icp_tool(llm)
tools = [create_search_tool()]

# Add summary agent
summary_node = create_summary_node(llm)

graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", create_orchestrator_node(llm, [read_icp_tool]))
graph_builder.add_node("tools", ToolNode(tools=[read_icp_tool]))
graph_builder.add_node("search_tool", ToolNode(tools=tools))
graph_builder.add_node("lead_finder", create_lead_finder_node(llm, tools))
graph_builder.add_node("summary", summary_node)

# Add conditional edge to route to tools when LLM calls a tool
graph_builder.add_conditional_edges(
    "chatbot",
    tools_condition,  # This checks if there are tool calls in the response
    {
        "tools": "tools",  # Route to tools node if tool calls exist
        "__end__": "lead_finder",     # Otherwise end
    }
)
graph_builder.add_conditional_edges(
    "lead_finder",
    tools_condition,  # This checks if there are tool calls in the response
    {
        "tools": "search_tool",  # Route to tools node if tool calls exist
        "__end__": "summary",     # Otherwise end
    }
)
graph_builder.add_edge("tools", "chatbot")
graph_builder.add_edge("search_tool", "lead_finder")
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("summary", END)

graph = graph_builder.compile(checkpointer=memory)
display(Image(graph.get_graph().draw_mermaid_png()))


In [None]:
config = {"configurable": {"thread_id": "200000"}}

def chat(message, history):
    """Chat interface handler."""
    from langchain_core.messages import HumanMessage
    
    # Create new user message
    new_message = HumanMessage(content=message)
    
    # Invoke graph - it will automatically load previous messages from checkpointer
    result = graph.invoke(
        {"messages": [new_message]},  # ✅ Only new message, checkpointer handles history
        config=config
    )
    
    print("State:", result)
    print("-" * 100)
    
    # DEBUG: Print ICP from result
    print("=" * 80)
    print("DEBUG: ICP in result state:")
    if "icp" in result:
        icp = result["icp"]
        print(f"  ICP: {icp}")
        print(f"  ICP type: {type(icp)}")
        if icp:
            print(f"  ICP model_dump(): {icp.model_dump()}")
            print(f"  Industries allowed: {icp.industries_allowed}")
            print(f"  Employee range: {icp.employee_min} - {icp.employee_max}")
            print(f"  Regions allowed: {icp.regions_allowed}")
    else:
        print("  No ICP in result")
    print("=" * 80)
    
    # Get the last message content
    last_message = result["messages"][-1]
    if hasattr(last_message, 'content'):
        return last_message.content
    return str(last_message)

gr.ChatInterface(
    chat,
    title="B2B Lead Generation Assistant",
    description="Ask me to find and qualify B2B leads!",
).launch()

In [None]:
# import pandas as pd
# pd.read_csv('icp.csv')