Skip to content

Commit 6e4eeca

Browse files
committed
Add semantic search feature with CLI and API support
Implement vector-based semantic search across the knowledge graph with full CLI/API parity following 3-layer architecture. New features: - Core: semanticSearchCore() in src/core/search.ts - CLI: forest search "query" with table output and --json mode - API: GET /api/v1/search with query params - Server: Dual-stack IPv4/IPv6 support, configurable hostname Query parameters support limit, threshold, and include_scores for result tuning. 🤖 Generated with [Claude Code](https://claude.com/claude-code)
1 parent 880ac3e commit 6e4eeca

5 files changed

Lines changed: 352 additions & 9 deletions

File tree

src/cli/commands/search.ts

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import { semanticSearchCore } from '../../core/search';
2+
import { handleError, formatId, parseCsvList } from '../shared/utils';
3+
import { COMMAND_TLDR, emitTldrAndExit } from '../tldr';
4+
5+
type ClercModule = typeof import('clerc');
6+
7+
type SearchFlags = {
8+
query?: string;
9+
limit?: number;
10+
tags?: string;
11+
minScore?: number;
12+
json?: boolean;
13+
tldr?: string;
14+
};
15+
16+
export function createSearchCommand(clerc: ClercModule) {
17+
return clerc.defineCommand(
18+
{
19+
name: 'search',
20+
description: 'Search for notes using semantic similarity (embeddings)',
21+
parameters: ['[query]'],
22+
flags: {
23+
query: {
24+
type: String,
25+
alias: 'q',
26+
description: 'Search query (can also be passed as positional argument)',
27+
},
28+
limit: {
29+
type: Number,
30+
alias: 'l',
31+
description: 'Maximum number of results to return',
32+
default: 20,
33+
},
34+
tags: {
35+
type: String,
36+
alias: 't',
37+
description: 'Filter by tags (comma-separated, AND logic)',
38+
},
39+
minScore: {
40+
type: Number,
41+
description: 'Minimum similarity score (0.0 to 1.0)',
42+
default: 0.0,
43+
},
44+
json: {
45+
type: Boolean,
46+
description: 'Output results as JSON',
47+
},
48+
tldr: {
49+
type: String,
50+
description: 'Output command metadata for agent consumption (--tldr or --tldr=json)',
51+
},
52+
},
53+
},
54+
async ({ flags, parameters }) => {
55+
try {
56+
// Handle TLDR request first
57+
if (flags.tldr !== undefined) {
58+
const jsonMode = flags.tldr === 'json';
59+
emitTldrAndExit(COMMAND_TLDR.search, jsonMode);
60+
}
61+
await runSearch(flags as SearchFlags, parameters.query as string | undefined);
62+
} catch (error) {
63+
handleError(error);
64+
}
65+
},
66+
);
67+
}
68+
69+
async function runSearch(flags: SearchFlags, positionalQuery?: string) {
70+
// Resolve query from flag or positional argument
71+
const query = flags.query ?? positionalQuery;
72+
73+
if (!query || query.trim().length === 0) {
74+
console.error('✖ Search query is required. Use: forest search "your query" or --query "your query"');
75+
process.exitCode = 1;
76+
return;
77+
}
78+
79+
// Parse flags
80+
const limit = typeof flags.limit === 'number' ? flags.limit : 20;
81+
const minScore = typeof flags.minScore === 'number' ? flags.minScore : 0.0;
82+
const tags = parseCsvList(flags.tags);
83+
84+
// Validate minScore
85+
if (minScore < 0 || minScore > 1) {
86+
console.error('✖ minScore must be between 0.0 and 1.0');
87+
process.exitCode = 1;
88+
return;
89+
}
90+
91+
// Call core search function
92+
const result = await semanticSearchCore(query, {
93+
limit,
94+
offset: 0,
95+
minScore,
96+
tags: tags && tags.length > 0 ? tags : undefined,
97+
});
98+
99+
// Output results
100+
if (flags.json) {
101+
console.log(JSON.stringify({
102+
query,
103+
total: result.total,
104+
limit,
105+
minScore,
106+
tags: tags && tags.length > 0 ? tags : null,
107+
results: result.nodes.map(item => ({
108+
id: item.node.id,
109+
title: item.node.title,
110+
tags: item.node.tags,
111+
similarity: item.similarity,
112+
bodyPreview: item.node.body.slice(0, 100),
113+
createdAt: item.node.createdAt,
114+
updatedAt: item.node.updatedAt,
115+
})),
116+
}, null, 2));
117+
} else {
118+
printTextResults(query, result, tags || [], minScore);
119+
}
120+
}
121+
122+
function printTextResults(
123+
query: string,
124+
result: { nodes: Array<{ node: any; similarity: number }>; total: number },
125+
tags: string[],
126+
minScore: number,
127+
) {
128+
console.log(`Semantic search: "${query}"`);
129+
130+
if (tags && tags.length > 0) {
131+
console.log(`Filtered by tags: ${tags.join(', ')}`);
132+
}
133+
134+
if (minScore > 0) {
135+
console.log(`Minimum similarity: ${minScore.toFixed(2)}`);
136+
}
137+
138+
console.log(`\nFound ${result.total} ${result.total === 1 ? 'result' : 'results'}:\n`);
139+
140+
if (result.nodes.length === 0) {
141+
console.log(' (no matches)');
142+
return;
143+
}
144+
145+
// Calculate column widths
146+
const maxTitleWidth = 50;
147+
const scoreWidth = 10;
148+
const idWidth = 10;
149+
const tagsWidth = 30;
150+
151+
// Print header
152+
console.log(`${'SCORE'.padEnd(scoreWidth)} ${'ID'.padEnd(idWidth)} ${'TITLE'.padEnd(maxTitleWidth)} ${'TAGS'.padEnd(tagsWidth)}`);
153+
console.log('─'.repeat(scoreWidth + idWidth + maxTitleWidth + tagsWidth + 3));
154+
155+
// Print results
156+
for (const item of result.nodes) {
157+
const score = item.similarity.toFixed(3).padEnd(scoreWidth);
158+
const shortId = formatId(item.node.id).padEnd(idWidth);
159+
const title = truncate(item.node.title, maxTitleWidth).padEnd(maxTitleWidth);
160+
const tags = truncate(item.node.tags.join(', '), tagsWidth);
161+
162+
console.log(`${score} ${shortId} ${title} ${tags}`);
163+
}
164+
165+
console.log();
166+
}
167+
168+
function truncate(str: string, maxLength: number): string {
169+
if (str.length <= maxLength) return str;
170+
return str.slice(0, maxLength - 3) + '...';
171+
}

src/cli/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { createCaptureCommand } from './commands/capture';
22
import { createAdminRecomputeEmbeddingsCommand } from './commands/admin-recompute-embeddings';
33
import { createExploreCommand } from './commands/explore';
4+
import { createSearchCommand } from './commands/search';
45
import { registerExportCommands } from './commands/export';
56
import { registerEdgesCommands } from './commands/edges';
67
import { registerNodeCommands } from './commands/node';
@@ -40,6 +41,7 @@ export async function createForestCli() {
4041

4142
cli.command(createCaptureCommand(clerc));
4243
cli.command(createExploreCommand(clerc));
44+
cli.command(createSearchCommand(clerc));
4345
cli.command(createStatsCommand(clerc));
4446
cli.command(createHealthCommand(clerc));
4547
cli.command(createServeCommand(clerc));

src/core/search.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { NodeRecord, listNodes as dbListNodes } from '../lib/db';
2+
import { embedNoteText } from '../lib/embeddings';
3+
import { cosineEmbeddings } from '../lib/scoring';
4+
5+
export type SemanticSearchOptions = {
6+
limit?: number;
7+
offset?: number;
8+
minScore?: number;
9+
tags?: string[];
10+
};
11+
12+
export type SemanticSearchResult = {
13+
nodes: Array<{ node: NodeRecord; similarity: number }>;
14+
total: number;
15+
};
16+
17+
/**
18+
* Perform semantic search using vector embeddings.
19+
* Searches for nodes semantically similar to the query string.
20+
*/
21+
export async function semanticSearchCore(
22+
query: string,
23+
options: SemanticSearchOptions = {},
24+
): Promise<SemanticSearchResult> {
25+
// Validate query
26+
if (!query || query.trim().length === 0) {
27+
throw new Error('Search query cannot be empty');
28+
}
29+
30+
// Generate embedding for the query
31+
const queryEmbedding = await embedNoteText(query);
32+
if (!queryEmbedding || queryEmbedding.length === 0) {
33+
throw new Error('Failed to generate embedding for query. Check FOREST_EMBED_PROVIDER setting.');
34+
}
35+
36+
// Load all nodes from database
37+
const allNodes = await dbListNodes();
38+
39+
// Filter to only nodes that have embeddings
40+
const nodesWithEmbeddings = allNodes.filter((node) => node.embedding && node.embedding.length > 0);
41+
42+
if (nodesWithEmbeddings.length === 0) {
43+
return {
44+
nodes: [],
45+
total: 0,
46+
};
47+
}
48+
49+
// Compute similarity scores for all nodes
50+
const scoredNodes = nodesWithEmbeddings.map((node) => {
51+
const similarity = cosineEmbeddings(queryEmbedding, node.embedding);
52+
return {
53+
node,
54+
similarity,
55+
};
56+
});
57+
58+
// Apply minScore filter
59+
const minScore = options.minScore ?? 0.0;
60+
let filteredNodes = scoredNodes.filter((item) => item.similarity >= minScore);
61+
62+
// Apply tag filter (AND logic - all tags must be present)
63+
if (options.tags && options.tags.length > 0) {
64+
filteredNodes = filteredNodes.filter((item) =>
65+
options.tags!.every((tag) => item.node.tags.includes(tag)),
66+
);
67+
}
68+
69+
// Sort by similarity descending
70+
filteredNodes.sort((a, b) => b.similarity - a.similarity);
71+
72+
const total = filteredNodes.length;
73+
74+
// Apply pagination
75+
const limit = options.limit ?? 20;
76+
const offset = options.offset ?? 0;
77+
const paginatedNodes = filteredNodes.slice(offset, offset + limit);
78+
79+
return {
80+
nodes: paginatedNodes,
81+
total,
82+
};
83+
}

src/server/index.ts

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@ import { statsRoutes } from './routes/stats';
77
import { nodesRoutes } from './routes/nodes';
88
import { tagsRoutes } from './routes/tags';
99
import { edgesRoutes } from './routes/edges';
10+
import { searchRoutes } from './routes/search';
1011
import { websocketRoute } from './routes/websocket';
1112

1213
const DEFAULT_PORT = 3000;
14+
const DEFAULT_HOSTNAME = '::'; // Dual-stack: listens on both IPv4 and IPv6
1315

14-
export function createServer(options: { port?: number } = {}) {
16+
export function createServer(options: { port?: number; hostname?: string } = {}) {
1517
const port = options.port ?? DEFAULT_PORT;
18+
const hostname = options.hostname ?? DEFAULT_HOSTNAME;
1619

1720
const app = new Elysia()
1821
.use(cors())
@@ -29,6 +32,7 @@ export function createServer(options: { port?: number } = {}) {
2932
{ name: 'Nodes', description: 'Node CRUD operations' },
3033
{ name: 'Edges', description: 'Edge management' },
3134
{ name: 'Tags', description: 'Tag operations' },
35+
{ name: 'Search', description: 'Semantic search operations' },
3236
],
3337
},
3438
}),
@@ -43,27 +47,35 @@ export function createServer(options: { port?: number } = {}) {
4347
.use(nodesRoutes)
4448
.use(edgesRoutes)
4549
.use(tagsRoutes)
50+
.use(searchRoutes)
4651
.use(websocketRoute);
4752

48-
return { app, port };
53+
return { app, port, hostname };
4954
}
5055

51-
export async function startServer(options: { port?: number } = {}) {
52-
const { app, port } = createServer(options);
56+
export async function startServer(options: { port?: number; hostname?: string } = {}) {
57+
const { app, port, hostname } = createServer(options);
5358

54-
app.listen(port);
59+
app.listen({ hostname, port });
5560

56-
console.log(`🌲 Forest server running at http://localhost:${port}`);
57-
console.log(`📚 API docs available at http://localhost:${port}/swagger`);
61+
// Display user-friendly URL
62+
const displayHost = hostname === '::' || hostname === '0.0.0.0' ? 'localhost' : hostname;
63+
console.log(`🌲 Forest server running at http://${displayHost}:${port}`);
64+
console.log(`📚 API docs available at http://${displayHost}:${port}/swagger`);
65+
if (hostname === '::') {
66+
console.log(` (Dual-stack mode: IPv4 and IPv6 enabled)`);
67+
}
5868

5969
return app;
6070
}
6171

6272
// Start server if this file is run directly with Bun
63-
if (import.meta.main) {
73+
// @ts-ignore - import.meta.main is Bun-specific
74+
if (typeof (globalThis as any).Bun !== 'undefined' && import.meta.main) {
6475
const port = process.env.FOREST_PORT
6576
? parseInt(process.env.FOREST_PORT, 10)
6677
: DEFAULT_PORT;
78+
const hostname = process.env.FOREST_HOST ?? DEFAULT_HOSTNAME;
6779

68-
startServer({ port });
80+
startServer({ port, hostname });
6981
}

0 commit comments

Comments
 (0)