diff --git a/README.md b/README.md index 5067ca9..cac4ad6 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ flowchart LR app -- create embedding --> embeddings app -- search embeddings --> vdb app -- retrieve documents --> db - app -- get text completion --> llm + app -- fetch text completion --> llm ``` The web application collects user input, creates an embedding, searches the vector database for similar embeddings, diff --git a/src/app.ts b/src/app.ts index 57731e5..374c051 100644 --- a/src/app.ts +++ b/src/app.ts @@ -10,6 +10,42 @@ const app = new Hono<{ Bindings: Env }>(); app.get('/static/*', serveStatic({root: './', manifest})) app.get('/', c => c.html(layout(indexHtml()))) + +const createEmbedding = async (ai: Ai, query: string): Promise => { + const queryVector: EmbeddingResponse = await ai.run('@cf/baai/bge-large-en-v1.5', { + text: [query], + }); + return queryVector.data[0]; +}; + +const searchEmbeddings = async (index: VectorizeIndex, queryVector: number[]): Promise => { + const result = await index.query(queryVector, {topK: 1, returnMetadata: true}); + return result.matches[0]?.metadata?.link as string | undefined; +}; + +const retrieveArticle = async (store: KVNamespace, matchLink: string) => await store.get(matchLink); + +const fetchTextCompletion = async (ai: Ai, article: string, query: string): Promise => { + const messages = [ + {role: "system", content: "You are a reporter for a major world newspaper."}, + { + role: "system", + content: `Use the following article as a source respond to the user's query: ${article.slice(0, 5500)}` + }, + { + role: "system", + content: "Write your response as if you were writing a short, high-quality news article for your paper. Limit your response to one paragraph" + }, + { + role: "user", + content: query, + }, + ]; + + const textResult = await ai.run("@cf/meta/llama-3-8b-instruct", {messages}); + return textResult.response; +}; + app.post('/', async c => { const data = await c.req.formData() const query = data.get("query") @@ -17,38 +53,21 @@ app.post('/', async c => { return c.html(layout(indexHtml({response: "Unable to answer query"}))); } - const queryVector: EmbeddingResponse = await c.env.AI.run('@cf/baai/bge-large-en-v1.5', { - text: [query], - }); - - const result = await c.env.VECTORIZE_INDEX.query(queryVector.data[0], { topK: 1, returnMetadata: true }); - const numberOfResults = result.matches.length; - - const totalVectors = (await c.env.VECTORIZE_INDEX.describe()).vectorsCount - console.log(`found ${numberOfResults} results among ${totalVectors} vectors`) + const queryVector = await createEmbedding(c.env.AI, query); - const metadata = result.matches[0]?.metadata; - if (metadata === undefined) { + const source = await searchEmbeddings(c.env.VECTORIZE_INDEX, queryVector); + if (source === undefined) { return c.html(layout(indexHtml({query, response: "Unable to answer query"}))); } - const matchLink = metadata.link as string; - const article = await c.env.ARTICLES.get(matchLink) + const article = await retrieveArticle(c.env.ARTICLES, source); if (article === null) { - return c.html(layout(indexHtml({query, response: "Unable to answer query", source: matchLink}))); + return c.html(layout(indexHtml({query, response: "Unable to answer query", source: source}))); } - const messages = [ - { role: "system", content: "You are a reporter for a major world newspaper." }, - { role: "system", content: `Use the following article as a source respond to the user's query: ${article.slice(0, 5500)}` }, - { role: "system", content: "Write your response as if you were writing a short, high-quality news article for your paper. Limit your response to one paragraph" }, - { - role: "user", - content: query, - }, - ]; - const textResult = await c.env.AI.run("@cf/meta/llama-3-8b-instruct", { messages }); - return c.html(layout(indexHtml({query, response: textResult.response, source: matchLink}))); + const response = await fetchTextCompletion(c.env.AI, article, query); + + return c.html(layout(indexHtml({query, response, source}))); }) export default app satisfies ExportedHandler;