/
categorize.ts
58 lines (49 loc) · 1.54 KB
/
categorize.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import { z } from "zod"
import { StructuredOutputParser } from "langchain/output_parsers"
import { PromptTemplate } from "langchain/prompts"
import { gpt3Model } from "~/lc/openai"
const parser = StructuredOutputParser.fromZodSchema(
z.object({
nonEnglish: z
.boolean()
.describe(
"Are the titles of most of the pages in a non-English language?"
),
companyPages: z
.array(z.string())
.describe(
"URL which describes what the company does and what type of companies they invest in. Limit to two URLs."
),
teamPages: z
.array(z.string())
.describe(
"URL which describes a individual team member or list of team members."
),
legalPages: z
.array(z.string())
.describe(
"URL for the terms of service, privacy policy, or other legal documents. Limit to two URLs."
),
})
)
const formatInstructions = parser.getFormatInstructions()
const prompt = new PromptTemplate({
template:
"Which of the following webpages, could contain this information:\n```json\n{urls}\n```\n\n{format_instructions}",
inputVariables: ["urls"],
partialVariables: { format_instructions: formatInstructions },
})
interface PageRepresentation {
url: string
title: string
}
export async function categorize(urls: PageRepresentation[]) {
const input = await prompt.format({
urls: JSON.stringify(urls),
})
debugger
const model = gpt3Model()
const response = await model.call(input)
const jsonResponse = await parser.parse(response)
return jsonResponse
}