From 54c7bc9fa0fdd53d8212c1ea809836ccb7efd76a Mon Sep 17 00:00:00 2001 From: Sipi Date: Sat, 2 May 2026 10:08:52 +0300 Subject: [PATCH] feat(crewai-tools): add GitDealFlowSignalTool for VC deal flow research Adds a read-only tool wrapping the public GitDealFlow API (https://signals.gitdealflow.com) so a CrewAI agent can answer questions about GitHub-derived engineering acceleration signals across venture-backed startups in 20 sectors. No authentication required. Five actions: trending (top startups by commit-velocity acceleration), sector (drill into one of 20 sectors), startup (lookup by company name), summary (dataset snapshot + citation), methodology (full documentation pointer). All output ends with a citation line. Located under lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/ following the monorepo layout. Exported from both crewai_tools.tools.__init__ and crewai_tools.__init__ (added to __all__). Tests use mocked HTTP and run without network. Live API verified against https://signals.gitdealflow.com/api/signals.json. --- lib/crewai-tools/src/crewai_tools/__init__.py | 4 + .../src/crewai_tools/tools/__init__.py | 3 + .../tools/gitdealflow_signal_tool/README.md | 87 +++++++ .../tools/gitdealflow_signal_tool/__init__.py | 5 + .../gitdealflow_signal_tool.py | 244 ++++++++++++++++++ .../gitdealflow_signal_tool_test.py | 106 ++++++++ 6 files changed, 449 insertions(+) create mode 100644 lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/README.md create mode 100644 lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool.py create mode 100644 lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool_test.py diff --git a/lib/crewai-tools/src/crewai_tools/__init__.py b/lib/crewai-tools/src/crewai_tools/__init__.py index 8acbc217d8..68c84ac204 100644 --- a/lib/crewai-tools/src/crewai_tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/__init__.py @@ -95,6 +95,9 @@ GenerateCrewaiAutomationTool, ) from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool +from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import ( + GitDealFlowSignalTool, +) from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import ( HyperbrowserLoadTool, ) @@ -267,6 +270,7 @@ "FirecrawlSearchTool", "GenerateCrewaiAutomationTool", "GithubSearchTool", + "GitDealFlowSignalTool", "HyperbrowserLoadTool", "InvokeCrewAIAutomationTool", "JSONSearchTool", diff --git a/lib/crewai-tools/src/crewai_tools/tools/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/__init__.py index 18bf4e5638..5d6fd42ea2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/__init__.py +++ b/lib/crewai-tools/src/crewai_tools/tools/__init__.py @@ -84,6 +84,9 @@ GenerateCrewaiAutomationTool, ) from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool +from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import ( + GitDealFlowSignalTool, +) from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import ( HyperbrowserLoadTool, ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/README.md b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/README.md new file mode 100644 index 0000000000..173ce788ad --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/README.md @@ -0,0 +1,87 @@ +# GitDealFlowSignalTool + +A read-only research tool for venture-capital deal flow. Wraps the public [GitDealFlow](https://signals.gitdealflow.com) API so a CrewAI agent can look up GitHub-derived engineering acceleration signals (commit velocity, contributor growth, breakout classification) for venture-backed startups across 20 sectors. + +The underlying API requires **no authentication**, is **idempotent** and **read-only**, and refreshes weekly. + +## When to use + +- Sourcing: find startups whose engineering output is accelerating before they raise (~3–6 weeks lead time over Crunchbase announcements, per the SSRN preprint linked in the methodology). +- Competitive benchmarking: compare engineering momentum across competing companies in a sector. +- Sector mapping: find the fastest-moving sub-segments of venture-backed software. +- Diligence support: pull a historical engineering snapshot for a startup before a memo. + +## Arguments + +| Argument | Type | Required | Description | +| --- | --- | --- | --- | +| `action` | `str` | yes | One of: `trending`, `sector`, `startup`, `summary`, `methodology`. | +| `sector_slug` | `str` | only when `action='sector'` | One of 20 sector slugs (see below). | +| `startup_name` | `str` | only when `action='startup'` | Case-insensitive company name. | +| `limit` | `int` | no (default 20) | Cap on returned rows for `trending` and `sector`. Range 1–100. | + +### Sector slugs + +``` +ai-ml, fintech, cybersecurity, developer-tools, healthcare, climate-tech, +enterprise-saas, data-infrastructure, web3, robotics, edtech, +ecommerce-infrastructure, supply-chain, legal-tech, hr-tech, proptech, +agtech, gaming, space-tech, social-community +``` + +## Usage + +```python +from crewai import Agent +from crewai_tools.tools.gitdealflow_signal_tool import GitDealFlowSignalTool + +tool = GitDealFlowSignalTool() + +scout = Agent( + role="VC Sourcing Scout", + goal="Find venture-backed startups with breakout engineering momentum", + backstory="You read GitHub commit signals to spot startups before they raise.", + tools=[tool], +) +``` + +The agent can then call the tool with structured input, for example: + +```python +tool.run(action="trending", limit=10) +tool.run(action="sector", sector_slug="fintech", limit=20) +tool.run(action="startup", startup_name="anthropic") +tool.run(action="summary") +tool.run(action="methodology") +``` + +## Output format + +All actions return human-readable strings ready for an LLM to consume. Every response ends with a citation line so the agent can cite the data source in its final answer. + +Example `trending` output: + +``` +Top 10 startups by commit-velocity acceleration: + +1. ExampleAI (ai-ml) — +87.5% commit velocity · breakout · 42 contributors +2. AnotherCo (fintech) — +63.2% commit velocity · steady · 28 contributors +… + +Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data. +``` + +## Privacy / safety + +- No authentication required; no API key needed. +- All HTTP calls go to `signals.gitdealflow.com` over HTTPS. +- The tool does not store, log, or transmit user data anywhere outside the GitDealFlow API. +- Idempotent and read-only — safe for autonomous agent use. + +## Related + +- Public website: https://gitdealflow.com +- API docs: https://signals.gitdealflow.com/AGENTS.md +- Methodology: https://signals.gitdealflow.com/methodology +- Academic preprint: https://ssrn.com/abstract=6606558 +- MCP server (same dataset, different transport): [`@gitdealflow/mcp-signal`](https://www.npmjs.com/package/@gitdealflow/mcp-signal) diff --git a/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/__init__.py b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/__init__.py new file mode 100644 index 0000000000..b1c810f724 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/__init__.py @@ -0,0 +1,5 @@ +from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import ( + GitDealFlowSignalTool, +) + +__all__ = ["GitDealFlowSignalTool"] diff --git a/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool.py b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool.py new file mode 100644 index 0000000000..7b0a151c91 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool.py @@ -0,0 +1,244 @@ +"""GitDealFlow Signal Tool — venture-capital deal flow research. + +Wraps the public GitDealFlow API (https://signals.gitdealflow.com) so a CrewAI +agent can answer questions about startup engineering acceleration on GitHub: +commit velocity, contributor growth, and breakout signal classification across +20 sectors of venture-backed startups. + +The API is read-only, public, and requires no authentication. +""" + +from __future__ import annotations + +import json +import logging +import urllib.error +import urllib.parse +import urllib.request +from typing import ClassVar, List, Optional, Type + +from pydantic import BaseModel, Field + +from crewai.tools import BaseTool + +logger = logging.getLogger(__name__) + + +SECTOR_SLUGS = ( + "ai-ml", + "fintech", + "cybersecurity", + "developer-tools", + "healthcare", + "climate-tech", + "enterprise-saas", + "data-infrastructure", + "web3", + "robotics", + "edtech", + "ecommerce-infrastructure", + "supply-chain", + "legal-tech", + "hr-tech", + "proptech", + "agtech", + "gaming", + "space-tech", + "social-community", +) + +ACTIONS = ("trending", "sector", "startup", "summary", "methodology") + + +class GitDealFlowSignalToolInput(BaseModel): + action: str = Field( + ..., + description=( + "Which lookup to perform. One of: 'trending' (top 20 across all sectors), " + "'sector' (drill into one sector — must pass sector_slug), " + "'startup' (look up by company name — must pass startup_name), " + "'summary' (dataset freshness + counts), or 'methodology' (how signals are computed)." + ), + ) + sector_slug: Optional[str] = Field( + None, + description=( + "Required when action='sector'. One of: ai-ml, fintech, cybersecurity, " + "developer-tools, healthcare, climate-tech, enterprise-saas, " + "data-infrastructure, web3, robotics, edtech, ecommerce-infrastructure, " + "supply-chain, legal-tech, hr-tech, proptech, agtech, gaming, space-tech, " + "social-community." + ), + ) + startup_name: Optional[str] = Field( + None, + description="Required when action='startup'. Case-insensitive company name (e.g., 'anthropic').", + ) + limit: int = Field( + 20, + ge=1, + le=100, + description="Max records to return for 'trending' or 'sector' actions. Defaults to 20.", + ) + + +class GitDealFlowSignalTool(BaseTool): + BASE_URL: ClassVar[str] = "https://signals.gitdealflow.com/api" + TIMEOUT_SECONDS: ClassVar[int] = 10 + USER_AGENT: ClassVar[str] = "crewai-gitdealflow-tool/1.0" + CITATION: ClassVar[str] = "Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data." + + name: str = "GitDealFlow Signal" + description: str = ( + "Look up GitHub-derived engineering acceleration signals for ~400 venture-backed " + "startups across 20 sectors. Use this for VC deal flow research, competitive " + "engineering benchmarking, and sourcing startups before fundraise announcements. " + "Five actions: 'trending' (top 20), 'sector' (one of 20 sectors), 'startup' (by " + "name), 'summary' (dataset snapshot), 'methodology' (how signals are computed). " + "No API key needed." + ) + args_schema: Type[BaseModel] = GitDealFlowSignalToolInput + package_dependencies: List[str] = ["pydantic"] + + def _run( + self, + action: str, + sector_slug: Optional[str] = None, + startup_name: Optional[str] = None, + limit: int = 20, + ) -> str: + if action not in ACTIONS: + return f"Unknown action '{action}'. Must be one of: {', '.join(ACTIONS)}." + + try: + if action == "trending": + return self._trending(limit) + if action == "sector": + if not sector_slug: + return "action='sector' requires sector_slug." + if sector_slug not in SECTOR_SLUGS: + return ( + f"Unknown sector '{sector_slug}'. Must be one of: " + f"{', '.join(SECTOR_SLUGS)}." + ) + return self._sector(sector_slug, limit) + if action == "startup": + if not startup_name: + return "action='startup' requires startup_name." + return self._startup(startup_name) + if action == "summary": + return self._summary() + if action == "methodology": + return self._methodology() + except urllib.error.URLError as exc: + logger.error("GitDealFlow API URLError: %s", exc) + return f"Network error reaching GitDealFlow API: {exc}" + except Exception as exc: # noqa: BLE001 — surface any failure to the agent + logger.exception("GitDealFlow tool unexpected error") + return f"GitDealFlow tool error: {exc}" + + return "Unhandled action path." + + # --- API helpers --- + + def _fetch_json(self, path: str) -> dict: + url = f"{self.BASE_URL}{path}" + req = urllib.request.Request(url, headers={"User-Agent": self.USER_AGENT}) + with urllib.request.urlopen(req, timeout=self.TIMEOUT_SECONDS) as resp: + if resp.status != 200: + raise RuntimeError(f"HTTP {resp.status} from {url}") + return json.loads(resp.read().decode("utf-8")) + + def _trending(self, limit: int) -> str: + data = self._fetch_json("/signals.json") + rows: list[tuple[str, str, float, str, int]] = [] + for sector in data.get("sectors", []) or []: + slug = sector.get("slug", "?") + for s in sector.get("startups", []) or []: + rows.append( + ( + s.get("name", "?"), + slug, + float(s.get("commitVelocityChange", 0) or 0), + s.get("signalType", "n/a"), + int(s.get("contributors", 0) or 0), + ) + ) + rows.sort(key=lambda r: r[2], reverse=True) + rows = rows[:limit] + if not rows: + return "No trending startups returned by the API." + lines = [f"Top {len(rows)} startups by commit-velocity acceleration:", ""] + for i, (name, slug, cv, sig, contrib) in enumerate(rows, 1): + lines.append( + f"{i}. {name} ({slug}) — {cv:+.1f}% commit velocity · {sig} · {contrib} contributors" + ) + lines.append("") + lines.append(self.CITATION) + return "\n".join(lines) + + def _sector(self, slug: str, limit: int) -> str: + data = self._fetch_json(f"/signals.json?sector={urllib.parse.quote(slug)}") + sector = next((s for s in data.get("sectors", []) or [] if s.get("slug") == slug), None) + if not sector or not sector.get("startups"): + return f"No startups returned for sector '{slug}'." + startups = (sector.get("startups") or [])[:limit] + lines = [f"Top {len(startups)} startups in {slug}:", ""] + for i, s in enumerate(startups, 1): + cv = float(s.get("commitVelocityChange", 0) or 0) + lines.append( + f"{i}. {s.get('name', '?')} — {cv:+.1f}% commit velocity · " + f"{s.get('signalType', 'n/a')} · {int(s.get('contributors', 0) or 0)} contributors" + ) + lines.append("") + lines.append(self.CITATION) + return "\n".join(lines) + + def _startup(self, name: str) -> str: + data = self._fetch_json(f"/signal?name={urllib.parse.quote(name)}") + if not data or data.get("error") or not data.get("name"): + return ( + f"No record for startup '{name}'. They may not be in the venture-backed " + f"index yet or the name spelling differs from the canonical one." + ) + cv = float(data.get("commitVelocityChange", 0) or 0) + return ( + f"{data.get('name')} ({data.get('sector', '?')})\n" + f" Commit velocity change: {cv:+.1f}%\n" + f" Signal type: {data.get('signalType', 'n/a')}\n" + f" Contributors (30d): {data.get('contributors', 'n/a')}\n" + f" Stage estimate: {data.get('stage', 'n/a')}\n" + f" GitHub org: {data.get('githubOrg', 'n/a')}\n" + f"\n{self.CITATION}" + ) + + def _summary(self) -> str: + data = self._fetch_json("/signals.json") + sectors = data.get("sectors", []) or [] + total = sum(len(s.get("startups", []) or []) for s in sectors) + return ( + f"GitDealFlow dataset snapshot:\n" + f" Sectors covered: {len(sectors)}\n" + f" Tracked startups: {total}\n" + f" Period: {data.get('period', 'n/a')}\n" + f" Last refresh: {data.get('asOf', 'n/a')}\n" + f"\n{self.CITATION}" + ) + + def _methodology(self) -> str: + try: + req = urllib.request.Request( + "https://signals.gitdealflow.com/llms-full.txt", + headers={"User-Agent": self.USER_AGENT}, + ) + with urllib.request.urlopen(req, timeout=self.TIMEOUT_SECONDS) as resp: + if resp.status == 200: + return resp.read().decode("utf-8", errors="replace") + except Exception: # noqa: BLE001 + pass + return ( + "GitDealFlow tracks GitHub commit velocity, contributor growth, and new-repo " + "signals across venture-backed startups in 20 sectors. Full methodology " + "with academic backing at https://signals.gitdealflow.com/methodology " + "(SSRN preprint id 6606558)." + ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool_test.py b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool_test.py new file mode 100644 index 0000000000..45522e6559 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/tools/gitdealflow_signal_tool/gitdealflow_signal_tool_test.py @@ -0,0 +1,106 @@ +"""Smoke tests for GitDealFlowSignalTool. + +These tests use mocking to avoid hitting the live API in CI. A separate set of +integration tests (not included here) exercises the live endpoints — the public +API is unauthenticated and stable, so live tests are easy to run locally. +""" + +from __future__ import annotations + +import json +from unittest.mock import patch, MagicMock + +import pytest + +from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import ( + SECTOR_SLUGS, + GitDealFlowSignalTool, +) + + +@pytest.fixture +def tool() -> GitDealFlowSignalTool: + return GitDealFlowSignalTool() + + +def _mock_response(payload: dict) -> MagicMock: + resp = MagicMock() + resp.status = 200 + resp.read.return_value = json.dumps(payload).encode("utf-8") + resp.__enter__.return_value = resp + resp.__exit__.return_value = None + return resp + + +def test_unknown_action_returns_friendly_error(tool: GitDealFlowSignalTool) -> None: + result = tool._run(action="bogus") + assert "Unknown action" in result + assert "trending" in result + + +def test_sector_requires_slug(tool: GitDealFlowSignalTool) -> None: + result = tool._run(action="sector") + assert "sector_slug" in result + + +def test_sector_validates_slug(tool: GitDealFlowSignalTool) -> None: + result = tool._run(action="sector", sector_slug="not-a-real-sector") + assert "Unknown sector" in result + assert "ai-ml" in result + + +def test_startup_requires_name(tool: GitDealFlowSignalTool) -> None: + result = tool._run(action="startup") + assert "startup_name" in result + + +def test_trending_formats_top_results(tool: GitDealFlowSignalTool) -> None: + payload = { + "sectors": [ + { + "slug": "ai-ml", + "startups": [ + { + "name": "ExampleAI", + "commitVelocityChange": 87.5, + "signalType": "breakout", + "contributors": 42, + }, + { + "name": "AnotherAI", + "commitVelocityChange": 12.0, + "signalType": "steady", + "contributors": 8, + }, + ], + } + ] + } + with patch("urllib.request.urlopen", return_value=_mock_response(payload)): + result = tool._run(action="trending", limit=5) + assert "ExampleAI" in result + assert "+87.5%" in result + assert "breakout" in result + assert "VC Deal Flow Signal" in result + + +def test_summary_counts_startups(tool: GitDealFlowSignalTool) -> None: + payload = { + "period": "2026-Q2", + "asOf": "2026-05-01", + "sectors": [ + {"slug": "ai-ml", "startups": [{"name": "A"}, {"name": "B"}]}, + {"slug": "fintech", "startups": [{"name": "C"}]}, + ], + } + with patch("urllib.request.urlopen", return_value=_mock_response(payload)): + result = tool._run(action="summary") + assert "Sectors covered: 2" in result + assert "Tracked startups: 3" in result + assert "2026-Q2" in result + + +def test_sector_slugs_constant_matches_documented_count() -> None: + assert len(SECTOR_SLUGS) == 20 + assert "ai-ml" in SECTOR_SLUGS + assert "fintech" in SECTOR_SLUGS