Skip to content

Commit

Permalink
Add DocsQAWithSources component (#207)
Browse files Browse the repository at this point in the history
This extends on DocsQA by encouraging the LLM to provide citations for its
answer and allowing the developer to format the result as they like.
  • Loading branch information
mdepinet committed Jul 21, 2023
1 parent c925534 commit 80e25c7
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 26 deletions.
2 changes: 1 addition & 1 deletion packages/ai-jsx/package.json
Expand Up @@ -4,7 +4,7 @@
"repository": "fixie-ai/ai-jsx",
"bugs": "https://github.com/fixie-ai/ai-jsx/issues",
"homepage": "https://ai-jsx.com",
"version": "0.5.12",
"version": "0.5.13",
"volta": {
"extends": "../../package.json"
},
Expand Down
75 changes: 74 additions & 1 deletion packages/ai-jsx/src/batteries/docs.tsx
Expand Up @@ -11,10 +11,13 @@ import { VectorStore } from 'langchain/vectorstores';
import _ from 'lodash';
import { similarity } from 'ml-distance';
import { Jsonifiable } from 'type-fest';
import z from 'zod';
import { ChatCompletion, SystemMessage, UserMessage } from '../core/completion.js';
import { AIJSXError, ErrorCode } from '../core/errors.js';
import * as AI from '../index.js';
import { Node } from '../index.js';
import { getEnvVar } from '../lib/util.js';
import { AIJSXError, ErrorCode } from '../core/errors.js';
import { JsonChatCompletion } from './constrained-output.js';

/**
* A raw document loaded from an arbitrary source that has not yet been parsed.
Expand Down Expand Up @@ -765,6 +768,23 @@ export interface DocsQAProps<ChunkMetadata extends Jsonifiable = Jsonifiable> {
chunkFormatter?: (props: { doc: ScoredChunk<ChunkMetadata> }) => Node;
}

export interface DocsQAWithCitationsProps<ChunkMetadata extends Jsonifiable = Jsonifiable>
extends DocsQAProps<ChunkMetadata> {
/**
* The component used to format results from a DocsQAWithCitations query.
*
* ```tsx
* function FormatQAResult(result: QAWithCitationsResult) {
* if (result.sources?.length) {
* return `${result.answer}\n\nSources:\n${result.sources.join('\n')}`;
* }
* return result.answer;
* }
* ```
*/
resultFormatter?: (result: QAWithCitationsResult) => Node;
}

/**
* A component that can be used to answer questions about documents. This is a very common usecase for LLMs.
* @example
Expand All @@ -789,3 +809,56 @@ export async function DocsQA<ChunkMetadata extends Jsonifiable = Jsonifiable>(pr
</ChatCompletion>
);
}

const ResultSchema = z
.object({
answer: z.string().describe("The answer to the user's question"),
sources: z.array(z.string()).describe('The title or URL of each document used to answer the question'),
})
.required({ answer: true });

export type QAWithCitationsResult = Partial<z.infer<typeof ResultSchema>>;

/** A default component for formatting DocsQAWithCitations results. */
function DefaultQAResultFormatter(result: QAWithCitationsResult) {
if (result.sources?.length) {
return `${result.answer}\n\nSources:\n${result.sources.join('\n')}`;
}
return result.answer;
}

/**
* Similar to {@link DocsQA}, but encourages the LLM to return citations for its answer.
*/
export async function* DocsQAWithCitations<ChunkMetadata extends Jsonifiable = Jsonifiable>(
props: DocsQAWithCitationsProps<ChunkMetadata>,
{ render, logger }: AI.ComponentContext
) {
const chunks = await props.corpus.search(props.question, { limit: props.chunkLimit });
const chunkFormatter: (props: { doc: ScoredChunk<ChunkMetadata> }) => Node = props.chunkFormatter ?? DefaultFormatter;
const resultFormatter: (result: QAWithCitationsResult) => Node = props.resultFormatter ?? DefaultQAResultFormatter;

const stringifiedResult = (
<JsonChatCompletion schema={ResultSchema}>
<SystemMessage>
You are a trained question answerer. Answer questions truthfully, using only the document excerpts below. Do not
use any other knowledge you have about the world. If you don't know how to answer the question, just say "I
don't know." Here are the relevant document excerpts you have been given:
{chunks.map((chunk) => chunkFormatter({ doc: chunk }))}
And here is the question you must answer:
</SystemMessage>
<UserMessage>{props.question}</UserMessage>
</JsonChatCompletion>
);

const frames = render(stringifiedResult);
for await (const frame of frames) {
try {
yield resultFormatter(ResultSchema.parse(JSON.parse(frame)));
} catch (e) {
logger.debug(`Failed to parse DocsQAWithCitations frame: ${e}`);
yield frame;
}
}
return resultFormatter(ResultSchema.parse(JSON.parse(await frames)));
}
26 changes: 9 additions & 17 deletions packages/ai-jsx/src/lib/openai.tsx
Expand Up @@ -328,18 +328,12 @@ export async function* OpenAIChatModel(
);
}

if (props.forcedFunction) {
if (
!Object.entries(props.functionDefinitions)
.map(([functionName, _]) => functionName)
.find((f) => f == props.forcedFunction)
) {
throw new AIJSXError(
`The function ${props.forcedFunction} was forced, but no function with that name was defined.`,
ErrorCode.ChatCompletionBadInput,
'user'
);
}
if (props.forcedFunction && !Object.keys(props.functionDefinitions).includes(props.forcedFunction)) {
throw new AIJSXError(
`The function ${props.forcedFunction} was forced, but no function with that name was defined.`,
ErrorCode.ChatCompletionBadInput,
'user'
);
}

const messageElements = await render(props.children, {
Expand Down Expand Up @@ -412,11 +406,9 @@ export async function* OpenAIChatModel(
description: functionDefinition.description,
parameters: getParametersSchema(functionDefinition.parameters),
}));
const openaiFunctionCall: CreateChatCompletionRequestFunctionCall | undefined = !props.forcedFunction
? undefined
: {
name: props.forcedFunction,
};
const openaiFunctionCall: CreateChatCompletionRequestFunctionCall | undefined = props.forcedFunction
? { name: props.forcedFunction }
: undefined;

const openai = getContext(openAiClientContext);
const chatCompletionRequest = {
Expand Down
57 changes: 56 additions & 1 deletion packages/docs/docs/guides/docsqa.md
Expand Up @@ -86,7 +86,7 @@ flowchart LR
que[Query] -->|string| embed2[Embed] -->|vector| vdb2[(Vector DB)] -->|similar chunks| LLM
```

If you use the built-in DocsQA tag from AI.JSX, then you just need to decide how to present the chunk to your LLM:
If you use the built-in [`DocsQA`](../api/modules/batteries_docs.md#docsqa) component from AI.JSX, then you just need to decide how to present the chunk to your LLM:

```typescript
function ShowDoc({ doc }: { doc: Document<MyDocMetadata> }) {
Expand All @@ -109,6 +109,61 @@ function AskAndAnswer({ query }: { query: string }) {
}
```

The `DocsQA` component provides an answer, like:

```tsx
<DocsQA question="What is the atomic number of nitrogen?" corpus={corpus} docComponent={ShowDoc} />
/* Renders:
Nitogen's atomic number is 7
*/
```

If you want an answer that cites sources, use [`DocsQAWithCitations`](../api/modules/batteries_docs.md#docsqawithcitations):

```tsx
<DocsQAWithCitations question="What is the atomic number of nitrogen?" corpus={corpus} docComponent={ShowDoc} />
/* Renders:
Nitogen's atomic number is 7
Sources: https://en.wikipedia.org/wiki/Nitrogen
*/
```

If you want to customize how the citations are formatted, pass a `resultsFormatter`:

```tsx
function ResultFormatter(result: QAWithCitationsResult) {
return (
<>
{result.answer}
{result.sources.length && (
<>
Learn more:{'\n'}
{result.sources.map((source) => (
<>
* {source}
{'\n'}
</>
))}
</>
)}
</>
);
}

<DocsQAWithCitations
question="What is the atomic number of nitrogen?"
corpus={corpus}
docComponent={ShowDoc}
// highlight-next-line
resultFormatter={ResultFormatter}
/>;
/* Renders:
Nitogen's atomic number is 7
Learn more:
* https://en.wikipedia.org/wiki/Nitrogen
*/
```

## Picking a Corpus Implementation

To get you started, AI.JSX includes an in-memory corpus that can be used effectively for demos. When you've outgrown that, you could use a Langchain VectorStore like [Pinecone](https://www.pinecone.io/) or [Chroma](https://www.trychroma.com/). Alternatively, [Fixie](https://www.fixie.ai) provides a fully-managed Corpus solution you could drop in instead.
54 changes: 48 additions & 6 deletions packages/tutorial/src/docsqa.tsx
@@ -1,4 +1,12 @@
import { DocsQA, LocalCorpus, ScoredChunk, makeChunker, staticLoader } from 'ai-jsx/batteries/docs';
import {
DocsQA,
DocsQAWithCitations,
LocalCorpus,
QAWithCitationsResult,
ScoredChunk,
makeChunker,
staticLoader,
} from 'ai-jsx/batteries/docs';
import { showInspector } from 'ai-jsx/core/inspector';
import fetch from 'node-fetch';
import TurndownService from 'turndown';
Expand All @@ -17,22 +25,56 @@ const docs = [
const corpus = new LocalCorpus(staticLoader(docs), makeChunker(600, 100));
await corpus.load();

function GetChunk({ doc }: { doc: ScoredChunk }) {
function OptionalCustomChunkFormatter({ doc }: { doc: ScoredChunk }) {
/**
* This presents document chunks as a simple string with the chunk's contents instead of
* formatting it with metadata like a title.
*
* Note that not including a title makes it difficult to use DocsQAWithCitations since the LLM
* won't know how to refer to this doc.
*/
return doc.chunk.content;
}

function OptionalCustomResultFormatter(result: QAWithCitationsResult) {
/**
* The formats the result of a DocsQAWithCitations call to present the answer and sources as
* desired.
*/
const linkedSources =
result.sources?.map((source: string) => {
if (source == 'Wikipedia Article about Hurricane Katrina') {
return `<a href="${URL}">${source}</a>`;
}
return source;
}) ?? [];

if (linkedSources.length) {
return `${result.answer} (from ${linkedSources.join(', ')})`;
}
return result.answer;
}

function App() {
return (
<>
<DocsQA question="What was Hurricane Katrina?" corpus={corpus} chunkLimit={5} chunkFormatter={GetChunk} />
DocsQA without source citations:{'\n'}
<DocsQA
question="Which dates did the Hurricane Katrina occur?"
corpus={corpus}
chunkLimit={5}
chunkFormatter={OptionalCustomChunkFormatter}
/>
{'\n\n'}
<DocsQA question="Which dates did the storm occur?" corpus={corpus} chunkLimit={5} chunkFormatter={GetChunk} />
DocsQA with source citations:{'\n'}
<DocsQAWithCitations question="What was Hurricane Katrina?" corpus={corpus} chunkLimit={5} />
{'\n\n'}
<DocsQA
DocsQA with source citations and custom result formatter:{'\n'}
<DocsQAWithCitations
question="Where were the strongest winds reported?"
corpus={corpus}
chunkLimit={5}
chunkFormatter={GetChunk}
resultFormatter={OptionalCustomResultFormatter}
/>
</>
);
Expand Down

3 comments on commit 80e25c7

@vercel
Copy link

@vercel vercel bot commented on 80e25c7 Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

ai-jsx-docs – ./packages/docs

ai-jsx-docs-fixie-ai.vercel.app
ai-jsx-docs-git-main-fixie-ai.vercel.app
docs.ai-jsx.com
ai-jsx-docs.vercel.app

@vercel
Copy link

@vercel vercel bot commented on 80e25c7 Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

ai-jsx-nextjs-demo – ./packages/nextjs-demo

ai-jsx-nextjs-demo-git-main-fixie-ai.vercel.app
ai-jsx-nextjs-demo-fixie-ai.vercel.app
ai-jsx-nextjs-demo.vercel.app

@vercel
Copy link

@vercel vercel bot commented on 80e25c7 Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

ai-jsx-tutorial-nextjs – ./packages/tutorial-nextjs

ai-jsx-tutorial-nextjs.vercel.app
ai-jsx-tutorial-nextjs-git-main-fixie-ai.vercel.app
ai-jsx-tutorial-nextjs-fixie-ai.vercel.app

Please sign in to comment.