= ({
const defaultOpened =
!first_opened ||
eatenvars.length === 0 ||
- eatenvars[eatenvars.length - 1] === "LLM";
+ eatenvars[eatenvars.length - 1] === "$LLM";
const grouped_resps_divs = Object.keys(grouped_resps).map((g) =>
groupByVars(
grouped_resps[g],
diff --git a/chainforge/react-server/src/MultiEvalNode.tsx b/chainforge/react-server/src/MultiEvalNode.tsx
new file mode 100644
index 00000000..d010679c
--- /dev/null
+++ b/chainforge/react-server/src/MultiEvalNode.tsx
@@ -0,0 +1,872 @@
+import React, {
+ useState,
+ useCallback,
+ useEffect,
+ useMemo,
+ useRef,
+ useContext,
+} from "react";
+import { Handle, Position } from "reactflow";
+import { v4 as uuid } from "uuid";
+import {
+ TextInput,
+ Text,
+ Group,
+ ActionIcon,
+ Menu,
+ Card,
+ rem,
+ Collapse,
+ Button,
+ Alert,
+ Tooltip,
+} from "@mantine/core";
+import { useDisclosure } from "@mantine/hooks";
+import {
+ IconAbacus,
+ IconBox,
+ IconChevronDown,
+ IconChevronRight,
+ IconDots,
+ IconPlus,
+ IconRobot,
+ IconSearch,
+ IconSparkles,
+ IconTerminal,
+ IconTrash,
+} from "@tabler/icons-react";
+import BaseNode from "./BaseNode";
+import NodeLabel from "./NodeLabelComponent";
+import InspectFooter from "./InspectFooter";
+import LLMResponseInspectorModal, {
+ LLMResponseInspectorModalRef,
+} from "./LLMResponseInspectorModal";
+import useStore from "./store";
+import {
+ APP_IS_RUNNING_LOCALLY,
+ batchResponsesByUID,
+ genDebounceFunc,
+ toStandardResponseFormat,
+} from "./backend/utils";
+import LLMResponseInspectorDrawer from "./LLMResponseInspectorDrawer";
+import {
+ CodeEvaluatorComponent,
+ CodeEvaluatorComponentRef,
+} from "./CodeEvaluatorNode";
+import { LLMEvaluatorComponent, LLMEvaluatorComponentRef } from "./LLMEvalNode";
+import { GatheringResponsesRingProgress } from "./LLMItemButtonGroup";
+import { Dict, LLMResponse, QueryProgress } from "./backend/typing";
+import { AlertModalContext } from "./AlertModal";
+import { Status } from "./StatusIndicatorComponent";
+
+const IS_RUNNING_LOCALLY = APP_IS_RUNNING_LOCALLY();
+
+const EVAL_TYPE_PRETTY_NAME = {
+ python: "Python",
+ javascript: "JavaScript",
+ llm: "LLM",
+};
+
+export interface EvaluatorContainerProps {
+ name: string;
+ type: string;
+ padding?: string | number;
+ onDelete: () => void;
+ onChangeTitle: (newTitle: string) => void;
+ progress?: QueryProgress;
+ customButton?: React.ReactNode;
+ children: React.ReactNode;
+ initiallyOpen?: boolean;
+}
+
+/** A wrapper for a single evaluator, that can be renamed */
+const EvaluatorContainer: React.FC
= ({
+ name,
+ type: evalType,
+ padding,
+ onDelete,
+ onChangeTitle,
+ progress,
+ customButton,
+ children,
+ initiallyOpen,
+}) => {
+ const [opened, { toggle }] = useDisclosure(initiallyOpen ?? false);
+ const _padding = useMemo(() => padding ?? "0px", [padding]);
+ const [title, setTitle] = useState(name ?? "Criteria");
+
+ const handleChangeTitle = (newTitle: string) => {
+ setTitle(newTitle);
+ if (onChangeTitle) onChangeTitle(newTitle);
+ };
+
+ return (
+
+
+
+
+
+ setTitle(e.target.value)}
+ onBlur={(e) => handleChangeTitle(e.target.value)}
+ placeholder="Criteria name"
+ variant="unstyled"
+ size="sm"
+ className="nodrag nowheel"
+ styles={{
+ input: {
+ padding: "0px",
+ height: "14pt",
+ minHeight: "0pt",
+ fontWeight: 500,
+ },
+ }}
+ />
+
+
+ {customButton}
+
+
+ {evalType}
+
+
+ {progress ? (
+
+ ) : (
+ <>>
+ )}
+ {/* */}
+
+
+
+
+
+
+ {children}
+
+
+ );
+};
+
+export interface EvaluatorContainerDesc {
+ name: string; // the user's nickname for the evaluator, which displays as the title of the banner
+ uid: string; // a unique identifier for this evaluator, since name can change
+ type: "python" | "javascript" | "llm"; // the type of evaluator
+ state: Dict; // the internal state necessary for that specific evaluator component (e.g., a prompt for llm eval, or code for code eval)
+ progress?: QueryProgress;
+ justAdded?: boolean;
+}
+
+export interface MultiEvalNodeProps {
+ data: {
+ evaluators: EvaluatorContainerDesc[];
+ refresh: boolean;
+ title: string;
+ };
+ id: string;
+}
+
+/** A node that stores multiple evaluator functions (can be mix of LLM scorer prompts and arbitrary code.) */
+const MultiEvalNode: React.FC = ({ data, id }) => {
+ const setDataPropsForNode = useStore((state) => state.setDataPropsForNode);
+ const pullInputData = useStore((state) => state.pullInputData);
+ const pingOutputNodes = useStore((state) => state.pingOutputNodes);
+ const bringNodeToFront = useStore((state) => state.bringNodeToFront);
+ const inputEdgesForNode = useStore((state) => state.inputEdgesForNode);
+
+ const flags = useStore((state) => state.flags);
+ const AI_SUPPORT_ENABLED = useMemo(() => {
+ return flags.aiSupport;
+ }, [flags]);
+
+ const [status, setStatus] = useState(Status.NONE);
+ // For displaying error messages to user
+ const showAlert = useContext(AlertModalContext);
+ const inspectModal = useRef(null);
+
+ // -- EvalGen access --
+ // const pickCriteriaModalRef = useRef(null);
+ // const onClickPickCriteria = () => {
+ // const inputs = handlePullInputs();
+ // pickCriteriaModalRef?.current?.trigger(inputs, (implementations: EvaluatorContainerDesc[]) => {
+ // // Returned if/when the Pick Criteria modal finishes generating implementations.
+ // console.warn(implementations);
+ // // Append the returned implementations to the end of the existing eval list
+ // setEvaluators((evs) => evs.concat(implementations));
+ // });
+ // };
+
+ const [uninspectedResponses, setUninspectedResponses] = useState(false);
+ const [lastResponses, setLastResponses] = useState([]);
+ const [lastRunSuccess, setLastRunSuccess] = useState(true);
+ const [showDrawer, setShowDrawer] = useState(false);
+
+ // Debounce helpers
+ const debounceTimeoutRef = useRef(null);
+ const debounce = genDebounceFunc(debounceTimeoutRef);
+
+ /** Store evaluators as array of JSON serialized state:
+ * { name: // the user's nickname for the evaluator, which displays as the title of the banner
+ * type: 'python' | 'javascript' | 'llm' // the type of evaluator
+ * state: // the internal state necessary for that specific evaluator component (e.g., a prompt for llm eval, or code for code eval)
+ * }
+ */
+ const [evaluators, setEvaluators] = useState(data.evaluators ?? []);
+
+ // Add an evaluator to the end of the list
+ const addEvaluator = useCallback(
+ (name: string, type: EvaluatorContainerDesc["type"], state: Dict) => {
+ setEvaluators(evaluators.concat({ name, uid: uuid(), type, state, justAdded: true }));
+ },
+ [evaluators],
+ );
+
+ // Sync evaluator state to stored state of this node
+ useEffect(() => {
+ setDataPropsForNode(id, { evaluators: evaluators.map((e) => ({...e, justAdded: undefined})) });
+ }, [evaluators]);
+
+ // Generate UI for the evaluator state
+ const evaluatorComponentRefs = useRef<
+ {
+ type: "code" | "llm";
+ name: string;
+ ref: CodeEvaluatorComponentRef | LLMEvaluatorComponentRef | null;
+ }[]
+ >([]);
+
+ const updateEvalState = (
+ idx: number,
+ transformFunc: (e: EvaluatorContainerDesc) => void,
+ ) => {
+ setStatus(Status.WARNING);
+ setEvaluators((es) =>
+ es.map((e, i) => {
+ if (idx === i) transformFunc(e);
+ return e;
+ }),
+ );
+ };
+
+ // const evaluatorComponents = useMemo(() => {
+ // // evaluatorComponentRefs.current = [];
+
+ // return evaluators.map((e, idx) => {
+ // let component: React.ReactNode;
+ // if (e.type === "python" || e.type === "javascript") {
+ // component = (
+ //
+ // (evaluatorComponentRefs.current[idx] = {
+ // type: "code",
+ // name: e.name,
+ // ref: el,
+ // })
+ // }
+ // code={e.state?.code}
+ // progLang={e.type}
+ // type="evaluator"
+ // id={id}
+ // onCodeEdit={(code) =>
+ // updateEvalState(idx, (e) => (e.state.code = code))
+ // }
+ // showUserInstruction={false}
+ // />
+ // );
+ // } else if (e.type === "llm") {
+ // component = (
+ //
+ // (evaluatorComponentRefs.current[idx] = {
+ // type: "llm",
+ // name: e.name,
+ // ref: el,
+ // })
+ // }
+ // prompt={e.state?.prompt}
+ // grader={e.state?.grader}
+ // format={e.state?.format}
+ // id={id}
+ // showUserInstruction={false}
+ // onPromptEdit={(prompt) =>
+ // updateEvalState(idx, (e) => (e.state.prompt = prompt))
+ // }
+ // onLLMGraderChange={(grader) =>
+ // updateEvalState(idx, (e) => (e.state.grader = grader))
+ // }
+ // onFormatChange={(format) =>
+ // updateEvalState(idx, (e) => (e.state.format = format))
+ // }
+ // />
+ // );
+ // } else {
+ // console.error(
+ // `Unknown evaluator type ${e.type} inside multi-evaluator node. Cannot display evaluator UI.`,
+ // );
+ // component = Error: Unknown evaluator type {e.type};
+ // }
+ // return (
+ // {
+ // delete evaluatorComponentRefs.current[idx];
+ // setEvaluators(evaluators.filter((_, i) => i !== idx));
+ // }}
+ // onChangeTitle={(newTitle) =>
+ // setEvaluators(
+ // evaluators.map((e, i) => {
+ // if (i === idx) e.name = newTitle;
+ // console.log(e);
+ // return e;
+ // }),
+ // )
+ // }
+ // padding={e.type === "llm" ? "8px" : undefined}
+ // >
+ // {component}
+ //
+ // );
+ // });
+ // }, [evaluators, id]);
+
+ const handleError = useCallback(
+ (err: Error | string) => {
+ console.error(err);
+ setStatus(Status.ERROR);
+ showAlert && showAlert(err);
+ },
+ [showAlert, setStatus],
+ );
+
+ const handlePullInputs = useCallback(() => {
+ // Pull input data
+ try {
+ const pulled_inputs = pullInputData(["responseBatch"], id);
+ if (!pulled_inputs || !pulled_inputs.responseBatch) {
+ console.warn(`No inputs to the Multi-Evaluator node.`);
+ return [];
+ }
+ // Convert to standard response format (StandardLLMResponseFormat)
+ return pulled_inputs.responseBatch.map(toStandardResponseFormat);
+ } catch (err) {
+ handleError(err as Error);
+ return [];
+ }
+ }, [pullInputData, id, toStandardResponseFormat]);
+
+ const handleRunClick = useCallback(() => {
+ // Pull inputs to the node
+ const pulled_inputs = handlePullInputs();
+ if (!pulled_inputs || pulled_inputs.length === 0) return;
+
+ // Get the ids from the connected input nodes:
+ // TODO: Remove this dependency; have everything go through pull instead.
+ const input_node_ids = inputEdgesForNode(id).map((e) => e.source);
+ if (input_node_ids.length === 0) {
+ console.warn("No inputs to multi-evaluator node.");
+ return;
+ }
+
+ // Sanity check that there's evaluators in the multieval node
+ if (
+ !evaluatorComponentRefs.current ||
+ evaluatorComponentRefs.current.length === 0
+ ) {
+ console.error("Cannot run multievals: No current evaluators found.");
+ return;
+ }
+
+ // Set status and created rejection callback
+ setStatus(Status.LOADING);
+ setLastResponses([]);
+
+ // Helper function to update progress ring on a single evaluator component
+ const updateProgressRing = (
+ evaluator_idx: number,
+ progress?: QueryProgress,
+ ) => {
+ // Update the progress rings, debouncing to avoid too many rerenders
+ debounce(
+ (_idx, _progress) =>
+ setEvaluators((evs) => {
+ if (_idx >= evs.length) return evs;
+ evs[_idx].progress = _progress;
+ return [...evs];
+ }),
+ 30,
+ )(evaluator_idx, progress);
+ };
+
+ // Run all evaluators here!
+ // TODO
+ const runPromises = evaluatorComponentRefs.current.map(
+ ({ type, name, ref }, idx) => {
+ if (ref === null) return { type: "error", name, result: null };
+
+ // Start loading spinner status on running evaluators
+ updateProgressRing(idx, { success: 0, error: 0 });
+
+ // Run each evaluator
+ if (type === "code") {
+ // Run code evaluator
+ // TODO: Change runInSandbox to be user-controlled, for Python code evals (right now it is always sandboxed)
+ return (ref as CodeEvaluatorComponentRef)
+ .run(pulled_inputs, undefined)
+ .then((ret) => {
+ console.log("Code evaluator done!", ret);
+ updateProgressRing(idx, undefined);
+ if (ret.error !== undefined) throw new Error(ret.error);
+ return {
+ type: "code",
+ name,
+ result: ret.responses,
+ };
+ });
+ } else {
+ // Run LLM-based evaluator
+ // TODO: Add back live progress, e.g. (progress) => updateProgressRing(idx, progress)) but with appropriate mapping for progress.
+ return (ref as LLMEvaluatorComponentRef)
+ .run(input_node_ids, (progress) => {
+ updateProgressRing(idx, progress);
+ })
+ .then((ret) => {
+ console.log("LLM evaluator done!", ret);
+ updateProgressRing(idx, undefined);
+ return {
+ type: "llm",
+ name,
+ result: ret,
+ };
+ });
+ }
+ },
+ );
+
+ // When all evaluators finish...
+ Promise.allSettled(runPromises).then((settled) => {
+ if (settled.some((s) => s.status === "rejected")) {
+ setStatus(Status.ERROR);
+ setLastRunSuccess(false);
+ // @ts-expect-error Reason exists on rejected settled promises, but TS doesn't know it for some reason.
+ handleError(settled.find((s) => s.status === "rejected").reason);
+ return;
+ }
+
+ // Remove progress rings without errors
+ setEvaluators((evs) =>
+ evs.map((e) => {
+ if (e.progress && !e.progress.error) e.progress = undefined;
+ return e;
+ }),
+ );
+
+ // Ignore null refs
+ settled = settled.filter(
+ (s) => s.status === "fulfilled" && s.value.result !== null,
+ );
+
+ // Success -- set the responses for the inspector
+ // First we need to group up all response evals by UID, *within* each evaluator.
+ const evalResults = settled.map((s) => {
+ const v =
+ s.status === "fulfilled"
+ ? s.value
+ : { type: "code", name: "Undefined", result: [] };
+ if (v.type === "llm") return v; // responses are already batched by uid
+ // If code evaluator, for some reason, in this version of CF the code eval has de-batched responses.
+ // We need to re-batch them by UID before returning, to correct this:
+ return {
+ type: v.type,
+ name: v.name,
+ result: batchResponsesByUID(v.result ?? []),
+ };
+ });
+
+ // Now we have a duplicates of each response object, one per evaluator run,
+ // with evaluation results per evaluator. They are not yet merged. We now need
+ // to merge the evaluation results within response objects with the same UIDs.
+ // It *should* be the case (invariant) that response objects with the same UID
+ // have exactly the same number of evaluation results (e.g. n=3 for num resps per prompt=3).
+ const merged_res_objs_by_uid: Dict = {};
+ // For each set of evaluation results...
+ evalResults.forEach(({ name, result }) => {
+ // For each response obj in the results...
+ result?.forEach((res_obj: LLMResponse) => {
+ // If it's not already in the merged dict, add it:
+ const uid = res_obj.uid;
+ if (
+ res_obj.eval_res !== undefined &&
+ !(uid in merged_res_objs_by_uid)
+ ) {
+ // Transform evaluation results into dict form, indexed by "name" of the evaluator:
+ res_obj.eval_res.items = res_obj.eval_res.items.map((item) => {
+ if (typeof item === "object") item = item.toString();
+ return {
+ [name]: item,
+ };
+ });
+ res_obj.eval_res.dtype = "KeyValue_Mixed"; // "KeyValue_Mixed" enum;
+ merged_res_objs_by_uid[uid] = res_obj; // we don't make a copy, to save time
+ } else {
+ // It is already in the merged dict, so add the new eval results
+ // Sanity check that the lengths of eval result lists are equal across evaluators:
+ if (merged_res_objs_by_uid[uid].eval_res === undefined) return;
+ else if (
+ // @ts-expect-error We've already checked that eval_res is defined, yet TS throws an error anyway... skip it:
+ merged_res_objs_by_uid[uid].eval_res.items.length !==
+ res_obj.eval_res?.items?.length
+ ) {
+ console.error(
+ `Critical error: Evaluation result lists for response ${uid} do not contain the same number of items per evaluator. Skipping...`,
+ );
+ return;
+ }
+ // Add the new evaluation result, keyed by evaluator name:
+ // @ts-expect-error We've already checked that eval_res is defined, yet TS throws an error anyway... skip it:
+ merged_res_objs_by_uid[uid].eval_res.items.forEach((item, idx) => {
+ if (typeof item === "object") {
+ let v = res_obj.eval_res?.items[idx];
+ if (typeof v === "object") v = v.toString();
+ item[name] = v ?? "undefined";
+ }
+ });
+ }
+ });
+ });
+
+ // We now have a dict of the form { uid: LLMResponse }
+ // We need return only the values of this dict:
+ setLastResponses(Object.values(merged_res_objs_by_uid));
+ setLastRunSuccess(true);
+
+ setStatus(Status.READY);
+ });
+ }, [
+ handlePullInputs,
+ pingOutputNodes,
+ status,
+ showDrawer,
+ evaluators,
+ evaluatorComponentRefs,
+ ]);
+
+ const showResponseInspector = useCallback(() => {
+ if (inspectModal && inspectModal.current && lastResponses) {
+ setUninspectedResponses(false);
+ inspectModal.current.trigger();
+ }
+ }, [inspectModal, lastResponses]);
+
+ // Something changed upstream
+ useEffect(() => {
+ if (data.refresh && data.refresh === true) {
+ setDataPropsForNode(id, { refresh: false });
+ setStatus(Status.WARNING);
+ }
+ }, [data]);
+
+ return (
+
+ }
+ status={status}
+ handleRunClick={handleRunClick}
+ runButtonTooltip="Run all evaluators over inputs"
+ />
+
+
+ {/* */}
+
+
+ {/* {evaluatorComponents} */}
+ {evaluators.map((e, idx) => (
+
+
+
+ ) : undefined
+ }
+ onDelete={() => {
+ delete evaluatorComponentRefs.current[idx];
+ setEvaluators(evaluators.filter((_, i) => i !== idx));
+ }}
+ onChangeTitle={(newTitle) =>
+ setEvaluators((evs) =>
+ evs.map((e, i) => {
+ if (i === idx) e.name = newTitle;
+ console.log(e);
+ return e;
+ }),
+ )
+ }
+ padding={e.type === "llm" ? "8px" : undefined}
+ >
+ {e.type === "python" || e.type === "javascript" ? (
+
+ (evaluatorComponentRefs.current[idx] = {
+ type: "code",
+ name: e.name,
+ ref: el,
+ })
+ }
+ code={e.state?.code}
+ progLang={e.type}
+ sandbox={e.state?.sandbox}
+ type="evaluator"
+ id={id}
+ onCodeEdit={(code) =>
+ updateEvalState(idx, (e) => (e.state.code = code))
+ }
+ showUserInstruction={false}
+ />
+ ) : e.type === "llm" ? (
+
+ (evaluatorComponentRefs.current[idx] = {
+ type: "llm",
+ name: e.name,
+ ref: el,
+ })
+ }
+ prompt={e.state?.prompt}
+ grader={e.state?.grader}
+ format={e.state?.format}
+ id={`${id}-${e.uid}`}
+ showUserInstruction={false}
+ onPromptEdit={(prompt) =>
+ updateEvalState(idx, (e) => (e.state.prompt = prompt))
+ }
+ onLLMGraderChange={(grader) =>
+ updateEvalState(idx, (e) => (e.state.grader = grader))
+ }
+ onFormatChange={(format) =>
+ updateEvalState(idx, (e) => (e.state.format = format))
+ }
+ />
+ ) : (
+ Error: Unknown evaluator type {e.type}
+ )}
+
+ ))}
+
+
+ {/* TO IMPLEMENT */}
+
+
+
+
+
+ {/* EvalGen {evaluators && evaluators.length === 0 ? (
+
+
+
+ */}
+ {/* */}
+ {/*
+ ) : (
+ <>>
+ )} */}
+
+ {lastRunSuccess && lastResponses && lastResponses.length > 0 ? (
+
+ Inspect scores
+
+ >
+ }
+ onClick={showResponseInspector}
+ showNotificationDot={uninspectedResponses}
+ isDrawerOpen={showDrawer}
+ showDrawerButton={true}
+ onDrawerClick={() => {
+ setShowDrawer(!showDrawer);
+ setUninspectedResponses(false);
+ bringNodeToFront(id);
+ }}
+ />
+ ) : (
+ <>>
+ )}
+
+
+
+ );
+};
+
+export default MultiEvalNode;
diff --git a/chainforge/react-server/src/ResponseBoxes.tsx b/chainforge/react-server/src/ResponseBoxes.tsx
index 7d484f9d..3fa0b395 100644
--- a/chainforge/react-server/src/ResponseBoxes.tsx
+++ b/chainforge/react-server/src/ResponseBoxes.tsx
@@ -1,5 +1,5 @@
import React, { Suspense, useMemo, lazy } from "react";
-import { Collapse, Flex } from "@mantine/core";
+import { Collapse, Flex, Stack } from "@mantine/core";
import { useDisclosure } from "@mantine/hooks";
import { truncStr } from "./backend/utils";
import {
@@ -15,19 +15,25 @@ const ResponseRatingToolbar = lazy(() => import("./ResponseRatingToolbar"));
/* HELPER FUNCTIONS */
const SUCCESS_EVAL_SCORES = new Set(["true", "yes"]);
const FAILURE_EVAL_SCORES = new Set(["false", "no"]);
-const getEvalResultStr = (
- eval_item: string[] | Dict | string | number | boolean,
+export const getEvalResultStr = (
+ eval_item: EvaluationScore,
+ hide_prefix: boolean,
) => {
if (Array.isArray(eval_item)) {
- return "scores: " + eval_item.join(", ");
+ return (hide_prefix ? "" : "scores: ") + eval_item.join(", ");
} else if (typeof eval_item === "object") {
- const strs = Object.keys(eval_item).map((key) => {
+ const strs = Object.keys(eval_item).map((key, j) => {
let val = eval_item[key];
if (typeof val === "number" && val.toString().indexOf(".") > -1)
val = val.toFixed(4); // truncate floats to 4 decimal places
- return `${key}: ${val}`;
+ return (
+
+ {key}:
+ {getEvalResultStr(val, true)}
+
+ );
});
- return strs.join(", ");
+ return {strs};
} else {
const eval_str = eval_item.toString().trim().toLowerCase();
const color = SUCCESS_EVAL_SCORES.has(eval_str)
@@ -37,7 +43,7 @@ const getEvalResultStr = (
: "black";
return (
<>
- {"score: "}
+ {!hide_prefix && {"score: "}}
{eval_str}
>
);
@@ -164,10 +170,12 @@ export const genResponseTextsDisplay = (
onlyShowScores?: boolean,
llmName?: string,
wideFormat?: boolean,
+ hideEvalScores?: boolean,
): React.ReactNode[] | React.ReactNode => {
if (!res_obj) return <>>;
- const eval_res_items = res_obj.eval_res ? res_obj.eval_res.items : null;
+ const eval_res_items =
+ !hideEvalScores && res_obj.eval_res ? res_obj.eval_res.items : null;
// Bucket responses that have the same text, and sort by the
// number of same responses so that the top div is the most prevalent response.
@@ -251,7 +259,7 @@ export const genResponseTextsDisplay = (
)}
{eval_res_items ? (
- {getEvalResultStr(resp_str_to_eval_res[r])}
+ {getEvalResultStr(resp_str_to_eval_res[r], true)}
) : (
<>>
diff --git a/chainforge/react-server/src/backend/typing.ts b/chainforge/react-server/src/backend/typing.ts
index 7608cdcd..3bbeb14b 100644
--- a/chainforge/react-server/src/backend/typing.ts
+++ b/chainforge/react-server/src/backend/typing.ts
@@ -199,6 +199,7 @@ export type EvaluationScore =
| number
| string
| Dict;
+
export type EvaluationResults = {
items: EvaluationScore[];
dtype:
diff --git a/chainforge/react-server/src/text-fields-node.css b/chainforge/react-server/src/text-fields-node.css
index 188d3ae9..8274f31a 100644
--- a/chainforge/react-server/src/text-fields-node.css
+++ b/chainforge/react-server/src/text-fields-node.css
@@ -4,6 +4,9 @@
.monofont {
font-family: var(--monofont);
}
+.linebreaks {
+ white-space: pre-wrap;
+}
.text-fields-node {
background-color: #fff;
@@ -390,7 +393,7 @@ g.ytick text {
padding-bottom: 20px;
min-width: 160px;
border-right: 1px solid #eee;
- padding-left: 8px !important;
+ padding-left: 0px !important;
padding-right: 0px !important;
}
.inspect-responses-drawer {
@@ -646,17 +649,18 @@ g.ytick text {
cursor: text;
}
.small-response-metrics {
- font-size: 10pt;
+ font-size: 9pt;
font-family: -apple-system, "Segoe UI", "Roboto", "Oxygen", "Ubuntu",
"Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif;
font-weight: 500;
text-align: center;
border-top-left-radius: 20px;
border-top-right-radius: 20px;
- padding: 0px 2px 1px 0px;
+ padding: 0px 2px 2px 0px;
margin: 8px 20% -6px 20%;
- background-color: rgba(255, 255, 255, 0.3);
+ /* background-color: rgba(255, 255, 255, 0.3); */
color: #333;
+ white-space: pre-wrap;
}
.num-same-responses {
position: relative;
diff --git a/setup.py b/setup.py
index 8563e37b..57534eb4 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ def readme():
setup(
name='chainforge',
- version='0.3.1.2',
+ version='0.3.1.5',
packages=find_packages(),
author="Ian Arawjo",
description="A Visual Programming Environment for Prompt Engineering",