In [2]:
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

In [7]:
# 🧪 리드 스카우트 단일 테스트 파일 (Tavily 버전)
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
import requests, json
from datetime import datetime, timezone
import os

# 1. 상태 정의
class LeadScoutState(TypedDict):
    company_info: dict
    planner_response: Annotated[list, add_messages]
    serp_results: Annotated[list, add_messages]
    selected_result: Annotated[list, add_messages]
    reports: Annotated[list, add_messages]
    final_lead_list: list

def get_current_utc_datetime():
    return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")

# 2. 각 노드 정의

def planner_node(state):
    company = state["company_info"]
    llm = ChatOpenAI(model="gpt-4.1", temperature=0)

    prompt = f"""
You are a lead planner. Generate a Google search query to find target companies for:
- Company: {company['company_name']}
- Industry: {company['industry']}
- Product: {company['product']}
- Target Customer: {company['target_customer']}
- Extra: {company['extra_info']}
"""
    msg = [{"role": "system", "content": prompt}]
    response = llm.invoke(msg)
    state["planner_response"].append(HumanMessage(content=response.content.strip()))
    return state

def tavily_node(state):
    search_term = state["planner_response"][-1].content

    if not tavily_api_key:
        raise ValueError("❌ TAVILY_API_KEY not set or loaded from environment.")

    headers = {
        "Authorization": f"Bearer {tavily_api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "query": search_term,
        "include_answer": False,
        "include_raw_content": False,
        "max_results": 10
    }

    try:
        res = requests.post("https://api.tavily.com/search", headers=headers, json=payload)
        print(f"📡 Tavily API Status: {res.status_code}")

        if res.status_code != 200:
            print(f"⚠️ Tavily Error: {res.text}")
            raise ValueError(f"Tavily returned status {res.status_code}")

        data = res.json()
        results = data.get("results", [])

        if not results:
            print("🚫 Tavily returned no results.")
        else:
            print(f"✅ Tavily returned {len(results)} results")

        formatted = []
        for r in results:
            formatted.append({
                "title": r.get("title", ""),
                "link": r.get("url", ""),
                "snippet": r.get("content", "")[:300]
            })

        state["serp_results"].append(HumanMessage(content=json.dumps(formatted, ensure_ascii=False)))
        return state

    except Exception as e:
        print(f"❌ Tavily API failed: {e}")
        state["serp_results"].append(HumanMessage(content="[]"))
        return state

def selector_node(state):
    serp_results = json.loads(state["serp_results"][-1].content)

    previous_links = [
        json.loads(m.content)["link"]
        for m in state["selected_result"]
        if isinstance(m, HumanMessage)
    ]

    text = ""
    for i, r in enumerate(serp_results):
        if r["link"] not in previous_links:  # 중복 제거
            text += f"[{i+1}] Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n"

    prompt = f"""
You are a lead selector agent. Your goal is to choose the most promising **company website** from the list below.

📌 Selection rules:
- Only select company homepages or company introduction pages.
- Do NOT select news articles, blog posts, or generic portals.
- Include the company name or brand name in your summary.
- Avoid any URL that has already been selected previously.

Here are your options:
{text}

Respond in the following JSON format:

{{
  "selected_index": 4,
  "summary": "회사 이름 또는 짧은 설명",
  "reason": "Why this is a good B2B lead for our client",
  "link": "https://..."
}}
"""

    llm = ChatOpenAI(model="gpt-4.1", temperature=0)
    response = llm.invoke([{"role": "system", "content": prompt}])
    state["selected_result"].append(HumanMessage(content=response.content))
    return state

def reporter_node(state):
    sel = json.loads(state["selected_result"][-1].content)
    company = state["company_info"]

    prompt = f"""
<역할> 당신은 세일즈 전문가입니다.
<맥락> 우리 회사에 적합한 유망 고객(리드)을 찾고 있습니다.
<임무> 아래 분석 내용을 바탕으로 잠재 고객으로 추천해 줄 수 있는 회사를
아래 조건을 고려해서 추천 사유와 함께 표로 정리해 주세요:

- 국가는 대한민국
- 지역은 파트너 위치와 가까울수록 선호
- 산업군은 NCS 기준으로 경쟁업체이면 배제
- 직원수는 많을수록 선호
- 매출은 많을수록 선호
- 순이익은 많을수록 선호
- 당면 문제는 파트너가 해결하는 문제와 유사할수록 선호

<대상 회사 정보>
- 이름: {sel['summary']}
- URL: {sel['link']}
- 설명: {sel['reason']}

<우리 회사 정보>
- 회사명: {company['company_name']}
- 산업군: {company['industry']}
- 제공 제품: {company['product']}
- 주요 고객군: {company['target_customer']}

<출력>
- 보고서 형식 표 형태로 작성
- 각 회사에 대한 요약 정보 + 추천 이유 포함
- 비즈니스 용어로 작성
- 문체는 CEO/임원진 대상 보고서 수준의 프로페셔널한 스타일로
"""

    llm = ChatOpenAI(model="gpt-4.1", temperature=0)
    response = llm.invoke([{"role": "system", "content": prompt}])
    state["reports"].append(HumanMessage(content=response.content))
    return state

def final_report_node(state):
    results = []
    for msg in state["reports"]:
        try:
            results.append(json.loads(msg.content))
        except:
            continue
    state["final_lead_list"] = results
    print("\n📌 Final Leads:")
    for i, r in enumerate(results, 1):
        print(f"{i}. {r['target_name']} | {r['fit_reason']}")
    return state

# 3. 그래프 구성 및 실행
def should_continue(state):
    return "final_report" if len(state["reports"]) >= 10 else "selector"

def run():
    graph = StateGraph(LeadScoutState)
    graph.add_node("planner", planner_node)
    graph.add_node("serper", tavily_node)
    graph.add_node("selector", selector_node)
    graph.add_node("reporter", reporter_node)
    graph.add_node("final_report", final_report_node)
    graph.set_entry_point("planner")
    graph.add_edge("planner", "serper")
    graph.add_edge("serper", "selector")
    graph.add_edge("selector", "reporter")
    graph.add_conditional_edges("reporter", should_continue)
    graph.add_edge("final_report", END)
    graph.set_finish_point("final_report")  
    app = graph.compile()

    input_state = {
        "company_info": {
            "company_name": "더선한 주식회사",
            "industry": "AI Agent 및 SaaS",
            "product": "AI 기반 agent 추천 시스템",
            "target_customer": "AI 기반 시스템 구축을 원하는 기업",
            "extra_info": "다양한 기업 구축 product 사례 보유"
        },
        "planner_response": [],
        "serp_results": [],
        "selected_result": [],
        "reports": [],
        "final_lead_list": []
    }

    for output in app.stream(input_state):
        for key, value in output.items():
            print(f"🌀 Step: {key} 완료")

if __name__ == "__main__":
    run()

🌀 Step: planner 완료
📡 Tavily API Status: 400
⚠️ Tavily Error: {"detail":{"error":"Query is too long. Max query length is 400 characters."}}
❌ Tavily API failed: Tavily returned status 400
🌀 Step: serper 완료
🌀 Step: selector 완료


KeyboardInterrupt: 