-
Notifications
You must be signed in to change notification settings - Fork 87
Expand file tree
/
Copy pathcustomSearch.ts
More file actions
105 lines (92 loc) · 4.15 KB
/
Copy pathcustomSearch.ts
File metadata and controls
105 lines (92 loc) · 4.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import {
CommonRetrieverOptionsSchema,
} from 'genkit/retriever';
import { z } from 'genkit';
import type { Genkit } from 'genkit';
import fetch from 'node-fetch';
import { scrapeUrlAction } from '@/lib/actions/scraperAction';
import { Document } from 'genkit/retriever';
// Update this schema to include necessary Google Custom Search parameters
const customSearchRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({
k: z.number().optional().default(10),
});
// Define interface for search result items
interface SearchResultItem {
link?: string;
title?: string;
snippet?: string;
displayLink?: string;
}
interface CleanedSearchResultItem {
cleanedHtmlContent: string;
link: string;
title?: string;
snippet?: string;
displayLink?: string;
}
// Export a factory function to create the custom search retriever
export async function createCustomSearchRetriever(ai: Genkit) {
return ai.defineSimpleRetriever(
{
name: `customSearchRetriever`,
configSchema: customSearchRetrieverOptionsSchema,
// Specify how to get the main text content from the Document object
content: (doc: Document) => doc.text,
// Specify how to get metadata from the Document object
metadata: (doc: Document) => ({ ...doc.metadata }), // Include all metadata from the action
},
async (input, options) => {
// Construct the search URL with query parameters
const query = encodeURIComponent(typeof input === 'object' && input.text ? input.text : String(input));
const searchUrl = `https://www.googleapis.com/customsearch/v1?key=${process.env.CUSTOM_SEARCH_API_KEY}&cx=${process.env.CUSTOM_SEARCH_ENGINE_ID}&q=${query}`;
try {
// Make request to Google Custom Search API
const response = await fetch(searchUrl);
const data = await response.json();
if (!response.ok) {
console.error('Google Search API error:', data);
throw new Error(`Search API returned error: ${data.error?.message || response.statusText}`);
}
// Parse the search results
const results = data.items || [];
const topKResults = results.slice(0, options.k); // Slice here before fetching content
// Fetch content for each result
const cleanedResults: CleanedSearchResultItem[] = await Promise.all(
topKResults.map(async (item: SearchResultItem, index: number) => {
let cleanedHtmlContent = '';
if (item.link) {
try {
console.log(`Fetching content from: ${item.link}`);
const scraperResult = await scrapeUrlAction(item.link);
cleanedHtmlContent = scraperResult.cleanHTML;
} catch (error) {
console.error(`Error fetching content from ${item.link}:`, error);
}
}
return {
...item,
cleanedHtmlContent: cleanedHtmlContent || '',
};
})
);
const resultDocs: Document[] = cleanedResults.map(item => {
const content = item.cleanedHtmlContent || item.snippet || item.title || 'Content unavailable';
return Document.fromText(
content,
{
title: item.title,
snippet: item.snippet,
link: item.link,
displayLink: item.displayLink,
}
);
});
// Return the top K results based on options
return resultDocs;
} catch (error) {
console.error('Error searching Google API:', error);
throw error;
}
}
);
}