Skip to content

Commit

Permalink
Add Multi-Eval node (#265)
Browse files Browse the repository at this point in the history
* Port over and type MultiEvalNode code from the `multi-eval` branch

* Merge css changes from `multi-eval`

* Merge changes to inspector table view from `multi-eval`

* Criteria progress rings

* Debounce renders on text edits

* Add sandbox toggle to Python evals inside MultiEval

* Add uids to evals in MultiEval, for correct cache ids not dependent on name

* <Stack> scores

* Add debounce to editing code or prompts in eval UI

* Update package version
  • Loading branch information
ianarawjo committed Apr 25, 2024
1 parent 2998c99 commit 6fa3092
Show file tree
Hide file tree
Showing 9 changed files with 1,125 additions and 101 deletions.
13 changes: 13 additions & 0 deletions chainforge/react-server/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import {
IconArrowMerge,
IconArrowsSplit,
IconForms,
IconAbacus,
} from "@tabler/icons-react";
import RemoveEdge from "./RemoveEdge";
import TextFieldsNode from "./TextFieldsNode"; // Import a custom node
Expand Down Expand Up @@ -88,6 +89,7 @@ import {
isEdgeChromium,
isChromium,
} from "react-device-detect";
import MultiEvalNode from "./MultiEvalNode";

const IS_ACCEPTED_BROWSER =
(isChrome ||
Expand Down Expand Up @@ -157,6 +159,7 @@ const nodeTypes = {
simpleval: SimpleEvalNode,
evaluator: CodeEvaluatorNode,
llmeval: LLMEvaluatorNode,
multieval: MultiEvalNode,
vis: VisNode,
inspect: InspectNode,
script: ScriptNode,
Expand Down Expand Up @@ -328,6 +331,7 @@ const App = () => {
const addTabularDataNode = () => addNode("table");
const addCommentNode = () => addNode("comment");
const addLLMEvalNode = () => addNode("llmeval");
const addMultiEvalNode = () => addNode("multieval");
const addJoinNode = () => addNode("join");
const addSplitNode = () => addNode("split");
const addProcessorNode = (progLang: string) => {
Expand Down Expand Up @@ -1052,6 +1056,15 @@ const App = () => {
LLM Scorer{" "}
</Menu.Item>
</MenuTooltip>
<MenuTooltip label="Evaluate responses across multiple criteria (multiple code and/or LLM evaluators).">
<Menu.Item
onClick={addMultiEvalNode}
icon={<IconAbacus size="16px" />}
>
{" "}
Multi-Evaluator{" "}
</Menu.Item>
</MenuTooltip>
<Menu.Divider />
<Menu.Label>Visualizers</Menu.Label>
<MenuTooltip label="Plot evaluation results. (Attach an evaluator or scorer node as input.)">
Expand Down
16 changes: 14 additions & 2 deletions chainforge/react-server/src/CodeEvaluatorNode.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import "ace-builds/src-noconflict/theme-xcode";
import "ace-builds/src-noconflict/ext-language_tools";
import {
APP_IS_RUNNING_LOCALLY,
genDebounceFunc,
getVarsAndMetavars,
stripLLMDetailsFromResponses,
toStandardResponseFormat,
Expand Down Expand Up @@ -188,6 +189,7 @@ export interface CodeEvaluatorComponentProps {
onCodeEdit?: (code: string) => void;
onCodeChangedFromLastRun?: () => void;
onCodeEqualToLastRun?: () => void;
sandbox?: boolean;
}

/**
Expand All @@ -206,6 +208,7 @@ export const CodeEvaluatorComponent = forwardRef<
onCodeEdit,
onCodeChangedFromLastRun,
onCodeEqualToLastRun,
sandbox,
},
ref,
) {
Expand All @@ -215,6 +218,10 @@ export const CodeEvaluatorComponent = forwardRef<
false,
);

// Debounce helpers
const debounceTimeoutRef = useRef(null);
const debounce = genDebounceFunc(debounceTimeoutRef);

// Controlled handle when user edits code
const handleCodeEdit = (code: string) => {
if (codeTextOnLastRun !== false) {
Expand All @@ -223,7 +230,10 @@ export const CodeEvaluatorComponent = forwardRef<
else if (!code_changed && onCodeEqualToLastRun) onCodeEqualToLastRun();
}
setCodeText(code);
if (onCodeEdit) onCodeEdit(code);

// Debounce to control number of re-renders to parent, when user is editing/typing:
if (onCodeEdit)
debounce(() => onCodeEdit(code), 200)();
};

// Runs the code evaluator/processor over the inputs, returning the results as a Promise.
Expand All @@ -233,6 +243,8 @@ export const CodeEvaluatorComponent = forwardRef<
script_paths?: string[],
runInSandbox?: boolean,
) => {
if (runInSandbox === undefined) runInSandbox = sandbox;

// Double-check that the code includes an 'evaluate' or 'process' function, whichever is needed:
const find_func_regex =
node_type === "evaluator"
Expand Down Expand Up @@ -317,7 +329,7 @@ export const CodeEvaluatorComponent = forwardRef<
mode={progLang}
theme="xcode"
onChange={handleCodeEdit}
value={code}
value={codeText}
name={"aceeditor_" + id}
editorProps={{ $blockScrolling: true }}
width="100%"
Expand Down
129 changes: 65 additions & 64 deletions chainforge/react-server/src/LLMEvalNode.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import LLMResponseInspectorModal, {
} from "./LLMResponseInspectorModal";
import InspectFooter from "./InspectFooter";
import LLMResponseInspectorDrawer from "./LLMResponseInspectorDrawer";
import { stripLLMDetailsFromResponses } from "./backend/utils";
import { genDebounceFunc, stripLLMDetailsFromResponses } from "./backend/utils";
import { AlertModalContext } from "./AlertModal";
import { Dict, LLMResponse, LLMSpec, QueryProgress } from "./backend/typing";
import { Status } from "./StatusIndicatorComponent";
Expand Down Expand Up @@ -116,11 +116,18 @@ export const LLMEvaluatorComponent = forwardRef<
);
const apiKeys = useStore((state) => state.apiKeys);

// Debounce helpers
const debounceTimeoutRef = useRef(null);
const debounce = genDebounceFunc(debounceTimeoutRef);

const handlePromptChange = useCallback(
(e: React.ChangeEvent<HTMLTextAreaElement>) => {
// Store prompt text
setPromptText(e.target.value);
if (onPromptEdit) onPromptEdit(e.target.value);

// Update the caller, but debounce to reduce the number of callbacks when user is typing
if (onPromptEdit)
debounce(() => onPromptEdit(e.target.value), 200)();
},
[setPromptText, onPromptEdit],
);
Expand Down Expand Up @@ -157,36 +164,49 @@ export const LLMEvaluatorComponent = forwardRef<
" " +
formatting_instr +
"\n```\n{input}\n```";

// Keeping track of progress (unpacking the progress state since there's only a single LLM)
const llm_key = llmScorers[0].key ?? "";
const _progress_listener = onProgressChange
? (progress_by_llm: Dict<QueryProgress>) =>
onProgressChange({
success: progress_by_llm[llm_key].success,
error: progress_by_llm[llm_key].error,
})
: undefined;

// Run LLM as evaluator
return evalWithLLM(
id ?? Date.now().toString(),
llmScorers[0],
template,
input_node_ids,
apiKeys ?? {},
_progress_listener,
).then(function (res) {
// Check if there's an error; if so, bubble it up to user and exit:
if (res.errors && res.errors.length > 0) throw new Error(res.errors[0]);
else if (res.responses === undefined)
throw new Error(
"Unknown error encountered when requesting evaluations: empty response returned.",
);

// Success!
return res.responses;
});
// Fetch info about the number of queries we'll need to make
return grabResponses(input_node_ids)
.then(function (resps) {
// Create progress listener
// Keeping track of progress (unpacking the progress state since there's only a single LLM)
const num_resps_required = resps.reduce(
(acc, resp_obj) => acc + resp_obj.responses.length,
0,
);
return onProgressChange
? (progress_by_llm: Dict<QueryProgress>) =>
onProgressChange({
success:
(100 * progress_by_llm[llm_key].success) / num_resps_required,
error:
(100 * progress_by_llm[llm_key].error) / num_resps_required,
})
: undefined;
})
.then((progress_listener) => {
// Run LLM as evaluator
return evalWithLLM(
id ?? Date.now().toString(),
llmScorers[0],
template,
input_node_ids,
apiKeys ?? {},
progress_listener,
);
})
.then(function (res) {
// Check if there's an error; if so, bubble it up to user and exit:
if (res.errors && res.errors.length > 0) throw new Error(res.errors[0]);
else if (res.responses === undefined)
throw new Error(
"Unknown error encountered when requesting evaluations: empty response returned.",
);

// Success!
return res.responses;
});
};

// Export the current internal state as JSON
Expand Down Expand Up @@ -305,41 +325,22 @@ const LLMEvaluatorNode: React.FC<LLMEvaluatorNodeProps> = ({ data, id }) => {
if (showAlert) showAlert(typeof err === "string" ? err : err?.message);
};

// Fetch info about the number of queries we'll need to make
grabResponses(input_node_ids)
.then(function (resps) {
// Create progress listener
const num_resps_required = resps.reduce(
(acc, resp_obj) => acc + resp_obj.responses.length,
0,
);
const onProgressChange = (prog: QueryProgress) => {
setProgress({
success: (100 * prog.success) / num_resps_required,
error: (100 * prog.error) / num_resps_required,
});
};

// Run LLM evaluator
llmEvaluatorRef?.current
?.run(input_node_ids, onProgressChange)
.then(function (evald_resps) {
// Ping any vis + inspect nodes attached to this node to refresh their contents:
pingOutputNodes(id);

console.log(evald_resps);
setLastResponses(evald_resps);

if (!showDrawer) setUninspectedResponses(true);

setStatus(Status.READY);
setProgress(undefined);
})
.catch(handleError);
// Run LLM evaluator
llmEvaluatorRef?.current
?.run(input_node_ids, setProgress)
.then(function (evald_resps) {
// Ping any vis + inspect nodes attached to this node to refresh their contents:
pingOutputNodes(id);

console.log(evald_resps);
setLastResponses(evald_resps);

if (!showDrawer) setUninspectedResponses(true);

setStatus(Status.READY);
setProgress(undefined);
})
.catch(() => {
handleError("Error pulling input data for node: No input data found.");
});
.catch(handleError);
}, [
inputEdgesForNode,
llmEvaluatorRef,
Expand Down
Loading

0 comments on commit 6fa3092

Please sign in to comment.