Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DocsQAWithSources component #207

Merged
merged 11 commits into from
Jul 21, 2023
2 changes: 1 addition & 1 deletion packages/ai-jsx/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"repository": "fixie-ai/ai-jsx",
"bugs": "https://github.com/fixie-ai/ai-jsx/issues",
"homepage": "https://ai-jsx.com",
"version": "0.5.12",
"version": "0.5.13",
"volta": {
"extends": "../../package.json"
},
Expand Down
75 changes: 74 additions & 1 deletion packages/ai-jsx/src/batteries/docs.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ import { VectorStore } from 'langchain/vectorstores';
import _ from 'lodash';
import { similarity } from 'ml-distance';
import { Jsonifiable } from 'type-fest';
import z from 'zod';
import { ChatCompletion, SystemMessage, UserMessage } from '../core/completion.js';
import { AIJSXError, ErrorCode } from '../core/errors.js';
import * as AI from '../index.js';
import { Node } from '../index.js';
import { getEnvVar } from '../lib/util.js';
import { AIJSXError, ErrorCode } from '../core/errors.js';
import { JsonChatCompletion } from './constrained-output.js';

/**
* A raw document loaded from an arbitrary source that has not yet been parsed.
Expand Down Expand Up @@ -765,6 +768,23 @@ export interface DocsQAProps<ChunkMetadata extends Jsonifiable = Jsonifiable> {
chunkFormatter?: (props: { doc: ScoredChunk<ChunkMetadata> }) => Node;
}

export interface DocsQAWithCitationsProps<ChunkMetadata extends Jsonifiable = Jsonifiable>
extends DocsQAProps<ChunkMetadata> {
/**
* The component used to format results from a DocsQAWithCitations query.
*
* ```tsx
* function FormatQAResult(result: QAWithCitationsResult) {
* if (result.sources?.length) {
* return `${result.answer}\n\nSources:\n${result.sources.join('\n')}`;
* }
* return result.answer;
* }
* ```
*/
resultFormatter?: (result: QAWithCitationsResult) => Node;
}

/**
* A component that can be used to answer questions about documents. This is a very common usecase for LLMs.
* @example
Expand All @@ -789,3 +809,56 @@ export async function DocsQA<ChunkMetadata extends Jsonifiable = Jsonifiable>(pr
</ChatCompletion>
);
}

mdepinet marked this conversation as resolved.
Show resolved Hide resolved
const ResultSchema = z
.object({
answer: z.string().describe("The answer to the user's question"),
sources: z.array(z.string()).describe('The title or URL of each document used to answer the question'),
})
.required({ answer: true });

export type QAWithCitationsResult = Partial<z.infer<typeof ResultSchema>>;

/** A default component for formatting DocsQAWithCitations results. */
function DefaultQAResultFormatter(result: QAWithCitationsResult) {
if (result.sources?.length) {
return `${result.answer}\n\nSources:\n${result.sources.join('\n')}`;
}
return result.answer;
}

/**
* Similar to {@link DocsQA}, but encourages the LLM to return citations for its answer.
*/
export async function* DocsQAWithCitations<ChunkMetadata extends Jsonifiable = Jsonifiable>(
props: DocsQAWithCitationsProps<ChunkMetadata>,
{ render, logger }: AI.ComponentContext
) {
const chunks = await props.corpus.search(props.question, { limit: props.chunkLimit });
const chunkFormatter: (props: { doc: ScoredChunk<ChunkMetadata> }) => Node = props.chunkFormatter ?? DefaultFormatter;
const resultFormatter: (result: QAWithCitationsResult) => Node = props.resultFormatter ?? DefaultQAResultFormatter;

const stringifiedResult = (
<JsonChatCompletion schema={ResultSchema}>
mdepinet marked this conversation as resolved.
Show resolved Hide resolved
<SystemMessage>
You are a trained question answerer. Answer questions truthfully, using only the document excerpts below. Do not
use any other knowledge you have about the world. If you don't know how to answer the question, just say "I
don't know." Here are the relevant document excerpts you have been given:
{chunks.map((chunk) => chunkFormatter({ doc: chunk }))}
And here is the question you must answer:
</SystemMessage>
<UserMessage>{props.question}</UserMessage>
</JsonChatCompletion>
);

const frames = render(stringifiedResult);
for await (const frame of frames) {
try {
yield resultFormatter(ResultSchema.parse(JSON.parse(frame)));
} catch (e) {
logger.debug(`Failed to parse DocsQAWithCitations frame: ${e}`);
yield frame;
}
}
return resultFormatter(ResultSchema.parse(JSON.parse(await frames)));
}
26 changes: 9 additions & 17 deletions packages/ai-jsx/src/lib/openai.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -328,18 +328,12 @@ export async function* OpenAIChatModel(
);
}

if (props.forcedFunction) {
if (
!Object.entries(props.functionDefinitions)
.map(([functionName, _]) => functionName)
.find((f) => f == props.forcedFunction)
) {
throw new AIJSXError(
`The function ${props.forcedFunction} was forced, but no function with that name was defined.`,
ErrorCode.ChatCompletionBadInput,
'user'
);
}
if (props.forcedFunction && !Object.keys(props.functionDefinitions).includes(props.forcedFunction)) {
throw new AIJSXError(
`The function ${props.forcedFunction} was forced, but no function with that name was defined.`,
ErrorCode.ChatCompletionBadInput,
'user'
);
}

const messageElements = await render(props.children, {
Expand Down Expand Up @@ -412,11 +406,9 @@ export async function* OpenAIChatModel(
description: functionDefinition.description,
parameters: getParametersSchema(functionDefinition.parameters),
}));
const openaiFunctionCall: CreateChatCompletionRequestFunctionCall | undefined = !props.forcedFunction
? undefined
: {
name: props.forcedFunction,
};
const openaiFunctionCall: CreateChatCompletionRequestFunctionCall | undefined = props.forcedFunction
? { name: props.forcedFunction }
: undefined;

const openai = getContext(openAiClientContext);
const chatCompletionRequest = {
Expand Down
57 changes: 56 additions & 1 deletion packages/docs/docs/guides/docsqa.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ flowchart LR
que[Query] -->|string| embed2[Embed] -->|vector| vdb2[(Vector DB)] -->|similar chunks| LLM
```

If you use the built-in DocsQA tag from AI.JSX, then you just need to decide how to present the chunk to your LLM:
If you use the built-in [`DocsQA`](../api/modules/batteries_docs.md#docsqa) component from AI.JSX, then you just need to decide how to present the chunk to your LLM:

```typescript
function ShowDoc({ doc }: { doc: Document<MyDocMetadata> }) {
Expand All @@ -109,6 +109,61 @@ function AskAndAnswer({ query }: { query: string }) {
}
```

The `DocsQA` component provides an answer, like:

```tsx
<DocsQA question="What is the atomic number of nitrogen?" corpus={corpus} docComponent={ShowDoc} />
/* Renders:
Nitogen's atomic number is 7
*/
```

If you want an answer that cites sources, use [`DocsQAWithCitations`](../api/modules/batteries_docs.md#docsqawithcitations):

```tsx
<DocsQAWithCitations question="What is the atomic number of nitrogen?" corpus={corpus} docComponent={ShowDoc} />
/* Renders:
Nitogen's atomic number is 7
Sources: https://en.wikipedia.org/wiki/Nitrogen
*/
```

If you want to customize how the citations are formatted, pass a `resultsFormatter`:

```tsx
function ResultFormatter(result: QAWithCitationsResult) {
return (
<>
{result.answer}
{result.sources.length && (
<>
Learn more:{'\n'}
{result.sources.map((source) => (
<>
* {source}
{'\n'}
</>
))}
</>
)}
</>
);
}

<DocsQAWithCitations
question="What is the atomic number of nitrogen?"
corpus={corpus}
docComponent={ShowDoc}
// highlight-next-line
resultFormatter={ResultFormatter}
/>;
/* Renders:
Nitogen's atomic number is 7
Learn more:
* https://en.wikipedia.org/wiki/Nitrogen
*/
```

## Picking a Corpus Implementation

To get you started, AI.JSX includes an in-memory corpus that can be used effectively for demos. When you've outgrown that, you could use a Langchain VectorStore like [Pinecone](https://www.pinecone.io/) or [Chroma](https://www.trychroma.com/). Alternatively, [Fixie](https://www.fixie.ai) provides a fully-managed Corpus solution you could drop in instead.
54 changes: 48 additions & 6 deletions packages/tutorial/src/docsqa.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import { DocsQA, LocalCorpus, ScoredChunk, makeChunker, staticLoader } from 'ai-jsx/batteries/docs';
import {
DocsQA,
DocsQAWithCitations,
LocalCorpus,
QAWithCitationsResult,
ScoredChunk,
makeChunker,
staticLoader,
} from 'ai-jsx/batteries/docs';
import { showInspector } from 'ai-jsx/core/inspector';
import fetch from 'node-fetch';
import TurndownService from 'turndown';
Expand All @@ -17,22 +25,56 @@ const docs = [
const corpus = new LocalCorpus(staticLoader(docs), makeChunker(600, 100));
await corpus.load();

function GetChunk({ doc }: { doc: ScoredChunk }) {
function OptionalCustomChunkFormatter({ doc }: { doc: ScoredChunk }) {
/**
* This presents document chunks as a simple string with the chunk's contents instead of
* formatting it with metadata like a title.
*
* Note that not including a title makes it difficult to use DocsQAWithCitations since the LLM
* won't know how to refer to this doc.
*/
return doc.chunk.content;
}

function OptionalCustomResultFormatter(result: QAWithCitationsResult) {
/**
* The formats the result of a DocsQAWithCitations call to present the answer and sources as
* desired.
*/
const linkedSources =
result.sources?.map((source: string) => {
if (source == 'Wikipedia Article about Hurricane Katrina') {
return `<a href="${URL}">${source}</a>`;
}
return source;
}) ?? [];

if (linkedSources.length) {
return `${result.answer} (from ${linkedSources.join(', ')})`;
}
return result.answer;
}

function App() {
mdepinet marked this conversation as resolved.
Show resolved Hide resolved
return (
<>
<DocsQA question="What was Hurricane Katrina?" corpus={corpus} chunkLimit={5} chunkFormatter={GetChunk} />
DocsQA without source citations:{'\n'}
<DocsQA
question="Which dates did the Hurricane Katrina occur?"
corpus={corpus}
chunkLimit={5}
chunkFormatter={OptionalCustomChunkFormatter}
/>
{'\n\n'}
<DocsQA question="Which dates did the storm occur?" corpus={corpus} chunkLimit={5} chunkFormatter={GetChunk} />
DocsQA with source citations:{'\n'}
<DocsQAWithCitations question="What was Hurricane Katrina?" corpus={corpus} chunkLimit={5} />
{'\n\n'}
<DocsQA
DocsQA with source citations and custom result formatter:{'\n'}
<DocsQAWithCitations
question="Where were the strongest winds reported?"
corpus={corpus}
chunkLimit={5}
chunkFormatter={GetChunk}
resultFormatter={OptionalCustomResultFormatter}
/>
</>
);
Expand Down