Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/crewai-tools/src/crewai_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import (
GitDealFlowSignalTool,
)
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
Expand Down Expand Up @@ -267,6 +270,7 @@
"FirecrawlSearchTool",
"GenerateCrewaiAutomationTool",
"GithubSearchTool",
"GitDealFlowSignalTool",
"HyperbrowserLoadTool",
"InvokeCrewAIAutomationTool",
"JSONSearchTool",
Expand Down
3 changes: 3 additions & 0 deletions lib/crewai-tools/src/crewai_tools/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
GenerateCrewaiAutomationTool,
)
from crewai_tools.tools.github_search_tool.github_search_tool import GithubSearchTool
from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import (
GitDealFlowSignalTool,
)
from crewai_tools.tools.hyperbrowser_load_tool.hyperbrowser_load_tool import (
HyperbrowserLoadTool,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# GitDealFlowSignalTool

A read-only research tool for venture-capital deal flow. Wraps the public [GitDealFlow](https://signals.gitdealflow.com) API so a CrewAI agent can look up GitHub-derived engineering acceleration signals (commit velocity, contributor growth, breakout classification) for venture-backed startups across 20 sectors.

The underlying API requires **no authentication**, is **idempotent** and **read-only**, and refreshes weekly.

## When to use

- Sourcing: find startups whose engineering output is accelerating before they raise (~3–6 weeks lead time over Crunchbase announcements, per the SSRN preprint linked in the methodology).
- Competitive benchmarking: compare engineering momentum across competing companies in a sector.
- Sector mapping: find the fastest-moving sub-segments of venture-backed software.
- Diligence support: pull a historical engineering snapshot for a startup before a memo.

## Arguments

| Argument | Type | Required | Description |
| --- | --- | --- | --- |
| `action` | `str` | yes | One of: `trending`, `sector`, `startup`, `summary`, `methodology`. |
| `sector_slug` | `str` | only when `action='sector'` | One of 20 sector slugs (see below). |
| `startup_name` | `str` | only when `action='startup'` | Case-insensitive company name. |
| `limit` | `int` | no (default 20) | Cap on returned rows for `trending` and `sector`. Range 1–100. |

### Sector slugs

```
ai-ml, fintech, cybersecurity, developer-tools, healthcare, climate-tech,
enterprise-saas, data-infrastructure, web3, robotics, edtech,
ecommerce-infrastructure, supply-chain, legal-tech, hr-tech, proptech,
agtech, gaming, space-tech, social-community
```

## Usage

```python
from crewai import Agent
from crewai_tools.tools.gitdealflow_signal_tool import GitDealFlowSignalTool

tool = GitDealFlowSignalTool()

scout = Agent(
role="VC Sourcing Scout",
goal="Find venture-backed startups with breakout engineering momentum",
backstory="You read GitHub commit signals to spot startups before they raise.",
tools=[tool],
)
```

The agent can then call the tool with structured input, for example:

```python
tool.run(action="trending", limit=10)
tool.run(action="sector", sector_slug="fintech", limit=20)
tool.run(action="startup", startup_name="anthropic")
tool.run(action="summary")
tool.run(action="methodology")
```

## Output format

All actions return human-readable strings ready for an LLM to consume. Every response ends with a citation line so the agent can cite the data source in its final answer.

Example `trending` output:

```
Top 10 startups by commit-velocity acceleration:

1. ExampleAI (ai-ml) — +87.5% commit velocity · breakout · 42 contributors
2. AnotherCo (fintech) — +63.2% commit velocity · steady · 28 contributors

Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data.
```

## Privacy / safety

- No authentication required; no API key needed.
- All HTTP calls go to `signals.gitdealflow.com` over HTTPS.
- The tool does not store, log, or transmit user data anywhere outside the GitDealFlow API.
- Idempotent and read-only — safe for autonomous agent use.

## Related

- Public website: https://gitdealflow.com
- API docs: https://signals.gitdealflow.com/AGENTS.md
- Methodology: https://signals.gitdealflow.com/methodology
- Academic preprint: https://ssrn.com/abstract=6606558
- MCP server (same dataset, different transport): [`@gitdealflow/mcp-signal`](https://www.npmjs.com/package/@gitdealflow/mcp-signal)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from crewai_tools.tools.gitdealflow_signal_tool.gitdealflow_signal_tool import (
GitDealFlowSignalTool,
)

__all__ = ["GitDealFlowSignalTool"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
"""GitDealFlow Signal Tool — venture-capital deal flow research.

Wraps the public GitDealFlow API (https://signals.gitdealflow.com) so a CrewAI
agent can answer questions about startup engineering acceleration on GitHub:
commit velocity, contributor growth, and breakout signal classification across
20 sectors of venture-backed startups.

The API is read-only, public, and requires no authentication.
"""

from __future__ import annotations

import json
import logging
import urllib.error
import urllib.parse
import urllib.request
from typing import ClassVar, List, Optional, Type

from pydantic import BaseModel, Field

from crewai.tools import BaseTool

logger = logging.getLogger(__name__)


SECTOR_SLUGS = (
"ai-ml",
"fintech",
"cybersecurity",
"developer-tools",
"healthcare",
"climate-tech",
"enterprise-saas",
"data-infrastructure",
"web3",
"robotics",
"edtech",
"ecommerce-infrastructure",
"supply-chain",
"legal-tech",
"hr-tech",
"proptech",
"agtech",
"gaming",
"space-tech",
"social-community",
)

ACTIONS = ("trending", "sector", "startup", "summary", "methodology")


class GitDealFlowSignalToolInput(BaseModel):
action: str = Field(
...,
description=(
"Which lookup to perform. One of: 'trending' (top 20 across all sectors), "
"'sector' (drill into one sector — must pass sector_slug), "
"'startup' (look up by company name — must pass startup_name), "
"'summary' (dataset freshness + counts), or 'methodology' (how signals are computed)."
),
)
sector_slug: Optional[str] = Field(
None,
description=(
"Required when action='sector'. One of: ai-ml, fintech, cybersecurity, "
"developer-tools, healthcare, climate-tech, enterprise-saas, "
"data-infrastructure, web3, robotics, edtech, ecommerce-infrastructure, "
"supply-chain, legal-tech, hr-tech, proptech, agtech, gaming, space-tech, "
"social-community."
),
)
startup_name: Optional[str] = Field(
None,
description="Required when action='startup'. Case-insensitive company name (e.g., 'anthropic').",
)
limit: int = Field(
20,
ge=1,
le=100,
description="Max records to return for 'trending' or 'sector' actions. Defaults to 20.",
)


class GitDealFlowSignalTool(BaseTool):
BASE_URL: ClassVar[str] = "https://signals.gitdealflow.com/api"
TIMEOUT_SECONDS: ClassVar[int] = 10
USER_AGENT: ClassVar[str] = "crewai-gitdealflow-tool/1.0"
CITATION: ClassVar[str] = "Source: VC Deal Flow Signal (signals.gitdealflow.com), Q2 2026 data."

name: str = "GitDealFlow Signal"
description: str = (
"Look up GitHub-derived engineering acceleration signals for ~400 venture-backed "
"startups across 20 sectors. Use this for VC deal flow research, competitive "
"engineering benchmarking, and sourcing startups before fundraise announcements. "
"Five actions: 'trending' (top 20), 'sector' (one of 20 sectors), 'startup' (by "
"name), 'summary' (dataset snapshot), 'methodology' (how signals are computed). "
"No API key needed."
)
args_schema: Type[BaseModel] = GitDealFlowSignalToolInput
package_dependencies: List[str] = ["pydantic"]

def _run(
self,
action: str,
sector_slug: Optional[str] = None,
startup_name: Optional[str] = None,
limit: int = 20,
) -> str:
if action not in ACTIONS:
return f"Unknown action '{action}'. Must be one of: {', '.join(ACTIONS)}."

try:
if action == "trending":
return self._trending(limit)
if action == "sector":
if not sector_slug:
return "action='sector' requires sector_slug."
if sector_slug not in SECTOR_SLUGS:
return (
f"Unknown sector '{sector_slug}'. Must be one of: "
f"{', '.join(SECTOR_SLUGS)}."
)
return self._sector(sector_slug, limit)
if action == "startup":
if not startup_name:
return "action='startup' requires startup_name."
return self._startup(startup_name)
if action == "summary":
return self._summary()
if action == "methodology":
return self._methodology()
except urllib.error.URLError as exc:
logger.error("GitDealFlow API URLError: %s", exc)
return f"Network error reaching GitDealFlow API: {exc}"
except Exception as exc: # noqa: BLE001 — surface any failure to the agent
logger.exception("GitDealFlow tool unexpected error")
return f"GitDealFlow tool error: {exc}"

return "Unhandled action path."

# --- API helpers ---

def _fetch_json(self, path: str) -> dict:
url = f"{self.BASE_URL}{path}"
req = urllib.request.Request(url, headers={"User-Agent": self.USER_AGENT})
with urllib.request.urlopen(req, timeout=self.TIMEOUT_SECONDS) as resp:
if resp.status != 200:
raise RuntimeError(f"HTTP {resp.status} from {url}")
return json.loads(resp.read().decode("utf-8"))

def _trending(self, limit: int) -> str:
data = self._fetch_json("/signals.json")
rows: list[tuple[str, str, float, str, int]] = []
for sector in data.get("sectors", []) or []:
slug = sector.get("slug", "?")
for s in sector.get("startups", []) or []:
rows.append(
(
s.get("name", "?"),
slug,
float(s.get("commitVelocityChange", 0) or 0),
s.get("signalType", "n/a"),
int(s.get("contributors", 0) or 0),
)
)
rows.sort(key=lambda r: r[2], reverse=True)
rows = rows[:limit]
if not rows:
return "No trending startups returned by the API."
lines = [f"Top {len(rows)} startups by commit-velocity acceleration:", ""]
for i, (name, slug, cv, sig, contrib) in enumerate(rows, 1):
lines.append(
f"{i}. {name} ({slug}) — {cv:+.1f}% commit velocity · {sig} · {contrib} contributors"
)
lines.append("")
lines.append(self.CITATION)
return "\n".join(lines)

def _sector(self, slug: str, limit: int) -> str:
data = self._fetch_json(f"/signals.json?sector={urllib.parse.quote(slug)}")
sector = next((s for s in data.get("sectors", []) or [] if s.get("slug") == slug), None)
if not sector or not sector.get("startups"):
return f"No startups returned for sector '{slug}'."
startups = (sector.get("startups") or [])[:limit]
lines = [f"Top {len(startups)} startups in {slug}:", ""]
for i, s in enumerate(startups, 1):
cv = float(s.get("commitVelocityChange", 0) or 0)
lines.append(
f"{i}. {s.get('name', '?')} — {cv:+.1f}% commit velocity · "
f"{s.get('signalType', 'n/a')} · {int(s.get('contributors', 0) or 0)} contributors"
)
lines.append("")
lines.append(self.CITATION)
return "\n".join(lines)

def _startup(self, name: str) -> str:
data = self._fetch_json(f"/signal?name={urllib.parse.quote(name)}")
if not data or data.get("error") or not data.get("name"):
return (
f"No record for startup '{name}'. They may not be in the venture-backed "
f"index yet or the name spelling differs from the canonical one."
)
cv = float(data.get("commitVelocityChange", 0) or 0)
return (
f"{data.get('name')} ({data.get('sector', '?')})\n"
f" Commit velocity change: {cv:+.1f}%\n"
f" Signal type: {data.get('signalType', 'n/a')}\n"
f" Contributors (30d): {data.get('contributors', 'n/a')}\n"
f" Stage estimate: {data.get('stage', 'n/a')}\n"
f" GitHub org: {data.get('githubOrg', 'n/a')}\n"
f"\n{self.CITATION}"
)

def _summary(self) -> str:
data = self._fetch_json("/signals.json")
sectors = data.get("sectors", []) or []
total = sum(len(s.get("startups", []) or []) for s in sectors)
return (
f"GitDealFlow dataset snapshot:\n"
f" Sectors covered: {len(sectors)}\n"
f" Tracked startups: {total}\n"
f" Period: {data.get('period', 'n/a')}\n"
f" Last refresh: {data.get('asOf', 'n/a')}\n"
f"\n{self.CITATION}"
)

def _methodology(self) -> str:
try:
req = urllib.request.Request(
"https://signals.gitdealflow.com/llms-full.txt",
headers={"User-Agent": self.USER_AGENT},
)
with urllib.request.urlopen(req, timeout=self.TIMEOUT_SECONDS) as resp:
if resp.status == 200:
return resp.read().decode("utf-8", errors="replace")
except Exception: # noqa: BLE001
pass
return (
"GitDealFlow tracks GitHub commit velocity, contributor growth, and new-repo "
"signals across venture-backed startups in 20 sectors. Full methodology "
"with academic backing at https://signals.gitdealflow.com/methodology "
"(SSRN preprint id 6606558)."
)
Loading