Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
238 commits
Select commit Hold shift + click to select a range
8458bb3
Fix model routing (#161)
MichaelDoyle Feb 27, 2024
a326ce5
[UI] Add new span tree + viewer to Flow details page (#164)
tonybaroneee Feb 27, 2024
26bdba0
Fetch models from API (#174)
MichaelDoyle Feb 28, 2024
be50e29
Backend errors (#163)
MaesterChestnut Feb 28, 2024
8864cbe
[UI] Cleanup unimplemented pages from navbar (#180)
tonybaroneee Feb 28, 2024
742b62c
[UI] Increase max-height of flow input/output (#179)
tonybaroneee Feb 28, 2024
6e178bf
Move flow runner to Actions page (#176)
shrutip90 Feb 28, 2024
2d96c63
[UI] Fix overflow of execution span tree (#183)
tonybaroneee Feb 28, 2024
72c0d56
Input validation disables prompt run button (#182)
MaesterChestnut Feb 28, 2024
be41efd
Route playground from flows to action runners page (#191)
MichaelDoyle Feb 29, 2024
43de942
Switch temperature to the slider (#195)
MichaelDoyle Feb 29, 2024
ddbfa24
Show validation errors on the playground (#196)
MichaelDoyle Feb 29, 2024
741bbea
[UI] Revamp flow details page layout (#197)
tonybaroneee Feb 29, 2024
12d1547
Fix validator issues (#194)
shrutip90 Feb 29, 2024
eea11ff
[UI] Initial design of span details view (#199)
tonybaroneee Feb 29, 2024
624649b
Move flow runner to start from action-list instead of action-runner (…
shrutip90 Feb 29, 2024
7bfdfff
Add vertex-ai to the model playground (#201)
MichaelDoyle Feb 29, 2024
f499069
[UI] Hide input/output pre if none available (#204)
tonybaroneee Feb 29, 2024
110e10a
[UI] Add "muted" helper class for secondary text (#206)
tonybaroneee Feb 29, 2024
38543a1
Don't send blank stop sequences to the model, vertex gemini model doe…
pavelgj Mar 3, 2024
88ec7c7
Provider specific model param restrictions on input (#224)
MaesterChestnut Mar 5, 2024
a6ce1a0
Use the minfied version of Monaco Editor in the angular app (#242)
MichaelDoyle Mar 6, 2024
5e13777
[UI] Update app name
tonybaroneee Mar 6, 2024
dd4e8d2
[UI] Update flow details layout (#246)
tonybaroneee Mar 6, 2024
71a7800
[UI] Add callout component (#244)
tonybaroneee Mar 6, 2024
ec9fc13
[UI] Hide wrapper spans on details page (#254)
tonybaroneee Mar 6, 2024
d1289e1
[UI] Update flow durations on details page (#256)
tonybaroneee Mar 6, 2024
52ce95e
[UI] Show error on flow details page (#258)
tonybaroneee Mar 7, 2024
67d5b45
Playground load trace (#262)
MichaelDoyle Mar 7, 2024
e2aa09c
Add theme toggling for JSON editor and move schema to a tab next to t…
shrutip90 Mar 7, 2024
ee2b0a3
Give topP the slider treatment (#264)
MichaelDoyle Mar 7, 2024
8636d50
[UI] Show flow name in tree (#266)
tonybaroneee Mar 7, 2024
ea874f4
[UI] Show span state in details pane (#268)
tonybaroneee Mar 7, 2024
8b49ab5
[UI] Flows table style improvements (#269)
tonybaroneee Mar 7, 2024
d9a2b5f
[UI] Small flow details page improvements for narrow screens (#273)
tonybaroneee Mar 7, 2024
628c0dc
Add CustomOptions (#276)
MichaelDoyle Mar 8, 2024
6b9dbc8
[UI]Remove sample calls for unsupported actions. Small fixes in flow …
shrutip90 Mar 8, 2024
de2908a
Create Message sub component for ModelPlayground (#271)
MaesterChestnut Mar 8, 2024
3fa8882
Fix error with model not accepting request_format (#279)
huangjeff5 Mar 8, 2024
ca08ec7
Disable the minimap on the monaco editor (#286)
MichaelDoyle Mar 8, 2024
b555204
[UI] Add zero state for flows list page (#291)
tonybaroneee Mar 9, 2024
07d97bf
[UI] Fix ng error in flow runner (#297)
tonybaroneee Mar 11, 2024
9d453d1
[UI] Hide stream response checkbox for durable flows (#299)
tonybaroneee Mar 11, 2024
4317902
Integrating the Message component into the Prompt Playground
MaesterChestnut Mar 11, 2024
7b86f4e
Switch model select from native to mat-select (#306)
MichaelDoyle Mar 12, 2024
eebfe56
Ability to show errors on actions page (#307)
MichaelDoyle Mar 12, 2024
f10df1f
[UI] Revamp Actions list UI (#308)
tonybaroneee Mar 12, 2024
29ec7df
[UI] Remove unnecessary return (#309)
tonybaroneee Mar 12, 2024
3fd7b76
[UI] Prevent selecting action if no param is set (#310)
tonybaroneee Mar 12, 2024
12c6832
Enable support for multiple messages coming from traceId (#314)
MaesterChestnut Mar 12, 2024
ab111d8
Avoid making flow runner editors read only (#321)
shrutip90 Mar 12, 2024
9eee328
[UI] Add filtering and expand/collapse all to actions list (#319)
tonybaroneee Mar 13, 2024
d61c41a
Fix error where model selection does not update (#323)
MichaelDoyle Mar 13, 2024
55c9644
[UI] Fix action search input style (#325)
tonybaroneee Mar 13, 2024
4c9cfb5
[UI] Update action list name and key display (#328)
tonybaroneee Mar 13, 2024
28494db
User error callout component on model playground (#330)
MichaelDoyle Mar 13, 2024
2ac6af6
refactor the code around checking for json output support (#304)
huangjeff5 Mar 13, 2024
fdd70a3
Render images in chat (#340)
MichaelDoyle Mar 13, 2024
450bfbd
Functioning add and remove button (#335)
MaesterChestnut Mar 13, 2024
1a826be
Refactor criteria/validation logic out of playground component (#339)
MaesterChestnut Mar 13, 2024
c032e03
[UI] Flow runner UI polish + improvements (#343)
tonybaroneee Mar 13, 2024
b046aca
Move JSON editor to shared components since retriever playground also…
shrutip90 Mar 13, 2024
129814b
[UI] Small handful of UI nit fixes (#345)
tonybaroneee Mar 14, 2024
08a4d55
[UI] Add loading state to flows table (#349)
tonybaroneee Mar 14, 2024
ab52c9f
Do not load output from trace; typically we're interested in loading …
MichaelDoyle Mar 14, 2024
5fc6b03
Make response_format optional (#350)
MichaelDoyle Mar 14, 2024
a3fbc83
[UI] Add Genkit icon (#371)
tonybaroneee Mar 14, 2024
9aae19a
Reset streamed chunks when rerunning the streamed flow (#379)
pavelgj Mar 14, 2024
661e34b
[UI] Add tooltips to span state icons (#351)
tonybaroneee Mar 14, 2024
4de6f90
Prefer includes over contains (#376)
tagboola Mar 14, 2024
87b0510
[UI] Add inspect flow state button if flow errors (#382)
tonybaroneee Mar 14, 2024
11f41ab
Chat mode (#391)
MaesterChestnut Mar 15, 2024
0524392
Ability to open Flow runner from the trace view (#394)
MichaelDoyle Mar 15, 2024
2b95bc9
Add basics of the eval runner page (#367)
ssbushi Mar 15, 2024
e4a8b4d
[UI] Fix flow details sidebar colors in dark mode (#399)
tonybaroneee Mar 15, 2024
b0bf441
[UI] Revamp model playground to chat-based layout (#397)
tonybaroneee Mar 15, 2024
856310d
[UI] Flow runner: Add a callout for no output so we dont show empty r…
shrutip90 Mar 15, 2024
4778331
[UI] Add trace details view (#405)
tonybaroneee Mar 15, 2024
ba19062
role:system message allowed for models (#402)
MaesterChestnut Mar 18, 2024
6a4e4ca
Adds support for image models. (#426)
mbleigh Mar 18, 2024
3f80ed5
fix playground runner after runAction change (#429)
MaesterChestnut Mar 18, 2024
5a81039
Revert "fix playground runner after runAction change (#429)" (#431)
MaesterChestnut Mar 18, 2024
8171ec6
Small tweaks to model playground to reduce chat (#438)
MichaelDoyle Mar 19, 2024
5cb86c8
[UI] Update `stackTraceSpans` to filter out internal spans (#439)
tonybaroneee Mar 19, 2024
a8b91cc
[UI] Add traces table to inspect index page (#448)
tonybaroneee Mar 19, 2024
fb50f7c
Adding traces to Messages (#432)
MaesterChestnut Mar 19, 2024
e62ebb4
[UI] Update routing for inspect pages (#449)
tonybaroneee Mar 19, 2024
b3a2090
[UI] Update routing for run pages (#450)
tonybaroneee Mar 19, 2024
d7bbbad
[UI] Fix trace display name in table (#451)
tonybaroneee Mar 19, 2024
69b0952
Allow size to be optional (#452)
MichaelDoyle Mar 19, 2024
01fba6f
[UI] Fix trace deep links in model playground (#453)
tonybaroneee Mar 19, 2024
26d9622
[UI] Add raw mat-table for evals view (#430)
ssbushi Mar 19, 2024
36c20c2
Add embeddings models (#303)
huangjeff5 Mar 19, 2024
6cf6ed3
[UI] Update /evaluations route to /evaluate (#454)
tonybaroneee Mar 19, 2024
3b2eb93
[UI] Make all run buttons consistent in playgrounds (#455)
tonybaroneee Mar 19, 2024
d2bff9a
[UI] Add cmd/ctrl + enter shortcut to playground editors (#456)
tonybaroneee Mar 19, 2024
9401587
[UI] Add landing state for Run page (#465)
tonybaroneee Mar 19, 2024
594c177
[UI] Prevent mat-slider from shrinking (#473)
tonybaroneee Mar 20, 2024
1e53ef3
[UI] Adjust element widths for narrow browsers (#474)
tonybaroneee Mar 20, 2024
09dd796
[UI] Prevent welcome page flicker on action refresh (#475)
tonybaroneee Mar 20, 2024
c8a8684
Add tab for Auth input to Flow Runner action (#467)
MaesterChestnut Mar 20, 2024
4c86f36
[UI] Add JSON sample to flow runner (#479)
tonybaroneee Mar 20, 2024
9422f88
Generic action runner (#484)
MichaelDoyle Mar 20, 2024
d4e4b43
[UI] Add support for tool primitive on dev UI run page (#488)
tonybaroneee Mar 20, 2024
04009b8
[UI] Tighten up spacing of actions list items (#489)
tonybaroneee Mar 20, 2024
dc4b435
[UI] Trigger change detection on flow runner response (#486)
tonybaroneee Mar 20, 2024
4325763
[UI] Add cmd/ctrl + enter shortcut to model playground (#485)
tonybaroneee Mar 20, 2024
7f9fe6e
[UI] Update eval results UI to use expandable cards for results (#491)
shrutip90 Mar 20, 2024
d4860a4
[UI] Prevent scrolling past last line in monaco editor (#495)
tonybaroneee Mar 21, 2024
eeffa6b
[UI] Use helper class to style pre stacktrace in callout (#502)
tonybaroneee Mar 21, 2024
b7e1fb0
[UI]Evals UI: Update inputs to use a table format (#496)
shrutip90 Mar 21, 2024
67cf547
[UI] Model playground message styling polish (#515)
tonybaroneee Mar 21, 2024
f27ebf9
[UI] Fix json editor to ignore initial value if no schema (#517)
tonybaroneee Mar 21, 2024
a01f0d3
[UI] Set retriever name in playground header (#518)
tonybaroneee Mar 21, 2024
1ce61ed
[UI] Prevent JSON sample pre-fill if unnecessary (#520)
tonybaroneee Mar 21, 2024
f4657e0
Remove fdescribe in tests (#532)
ssbushi Mar 22, 2024
87fd14a
Fix minor UI elements in eval page (#533)
ssbushi Mar 22, 2024
a2ed530
Add inspect trace option (#540)
ssbushi Mar 22, 2024
895b6fe
Add inspect tab in the Dev UI (#546)
ssbushi Mar 22, 2024
9bec550
hide inspect button if no traces (#548)
ssbushi Mar 22, 2024
8cbb852
[UI] Add typewriter effect to welcoem message (#554)
tonybaroneee Mar 23, 2024
70ff331
[UI] Tweak logo kerning (#555)
tonybaroneee Mar 24, 2024
d2e94bf
[UI] UI polish for evaluate page (#553)
tonybaroneee Mar 24, 2024
52c658a
[UI] Fix issue in action runner JSON pre-fill (#559)
tonybaroneee Mar 24, 2024
988a54b
[UI] Update typewriter animation to move left-to-right (#560)
tonybaroneee Mar 25, 2024
82ebefe
[UI] Show custom metadata attributes last in span details (#563)
tonybaroneee Mar 25, 2024
0a5290a
[UI] Polish for eval result details pane (#564)
tonybaroneee Mar 25, 2024
a2a0a38
Add support for text-embeddings (#538)
huangjeff5 Mar 25, 2024
f2bd0dd
[UI] Update default font to Google Sans (#565)
tonybaroneee Mar 25, 2024
3191ddb
[UI] Update span attributes styling (#568)
tonybaroneee Mar 25, 2024
2f30347
[UI] Update border radius globally (#573)
tonybaroneee Mar 25, 2024
827e0ab
[UI] Clip model playground message loading bar to card radius (#576)
tonybaroneee Mar 25, 2024
8864140
[UI] Prevent shrinkage of breadcrumb chevron (#577)
tonybaroneee Mar 25, 2024
a0b2763
[UI] Upgrade angular deps to ^17.3.1 (#587)
tonybaroneee Mar 26, 2024
ab8bddb
[UI] Add logo lockup to app bar (#588)
tonybaroneee Mar 26, 2024
846b9db
[UI] Fix table not rendering for errored traces (#607)
tonybaroneee Mar 26, 2024
5d61883
[UI] Render base64-encoded images in span output (#606)
tonybaroneee Mar 26, 2024
0ecb948
[UI] Update label of expand text button (#608)
tonybaroneee Mar 26, 2024
4797f23
[UI] Update lockup with new svg asset (#623)
tonybaroneee Mar 27, 2024
11ddf57
[Eval bugbash] Update tooltip to definitions, visible on entire chip …
ssbushi Mar 27, 2024
7c677b3
[Eval bugbash] Show errors as errors in eval UI (#626)
ssbushi Mar 27, 2024
e429a70
[Eval bugbash] Only show icon if failed evaluator (#635)
ssbushi Mar 27, 2024
3c6b6ff
[UI] Fix trace timing display now that they are millis (#638)
tonybaroneee Mar 27, 2024
c7a1a6b
[UI] Fix JSON editor to show up for optional inputs as well (#613)
shrutip90 Mar 28, 2024
1081be9
Add trace id to model playground when error occurs (#631)
MaesterChestnut Mar 28, 2024
77fb72d
Display context strings separately instead of a big array (#658)
shrutip90 Mar 28, 2024
cf46a29
[UI]: Update date format to medium (#659)
shrutip90 Mar 28, 2024
adcb33c
Update error tooltip (#665)
ssbushi Mar 29, 2024
583049b
[UI] Tighten up kerning on mat tab labels (#680)
tonybaroneee Mar 29, 2024
fafefe4
[UI] Allow resizing of .pre-container and json editor (#682)
tonybaroneee Mar 29, 2024
1d8d330
[UI] Add tooltips to temperature and top_p controls (#683)
tonybaroneee Mar 29, 2024
5cad7e7
[UI] Fix JSON sample autofill in retriever playground (#684)
tonybaroneee Mar 29, 2024
0e69f3a
[UI] Improve model playground param labels and add tooltips (#686)
tonybaroneee Mar 29, 2024
c8951b5
[UI] Fix trace status in table (#687)
tonybaroneee Mar 29, 2024
593c23a
[UI] Update model icon to sparks (#688)
tonybaroneee Mar 29, 2024
f2809a8
[UI] Add action type to runner page title (#690)
tonybaroneee Mar 29, 2024
8202af8
[UI] Add title and close button to expand text dialog (#691)
tonybaroneee Mar 29, 2024
21b2276
[UI] Remove redundant title from action runner (#692)
tonybaroneee Mar 29, 2024
a71da11
Pass thru options to API (#695)
huangjeff5 Mar 29, 2024
9ad1326
Bump ragas to 0.0.6 (#719)
pavelgj Mar 30, 2024
a5177f4
[UI] Cleanup system prompt styling in model playground (#725)
tonybaroneee Apr 1, 2024
ea1a93d
Update system/message placeholders (#727)
MichaelDoyle Apr 1, 2024
4b59d0a
Update Eval Error handling (#685)
MaesterChestnut Apr 1, 2024
91b266c
Clarifying label on button formerly known as "Open in Playground" (#636)
maxl0rd Apr 2, 2024
b2dbf4b
[UI] Fix callout content not stretching to fit width (#757)
tonybaroneee Apr 2, 2024
9051f3c
[UI]: Add metrics table in evals results card (#747)
shrutip90 Apr 2, 2024
cb3d0f3
[UI] Add support for specifying model version in playground (#760)
tonybaroneee Apr 2, 2024
e2eec31
[UI] Remove Evaluate tab in top nav bar (#765)
tonybaroneee Apr 2, 2024
ffc6940
[UI] Use flask icon for Evaluate tab (#772)
tonybaroneee Apr 2, 2024
141a756
[UI] Style updates to eval result details (#790)
tonybaroneee Apr 3, 2024
76f981d
[UI] Render eval metric name in error callout consistently (#792)
tonybaroneee Apr 3, 2024
13adb38
[UI] Fix span duration display (#797)
tonybaroneee Apr 3, 2024
29ba171
Show safety errors in the model runner (#800)
MichaelDoyle Apr 3, 2024
9b95706
Rename model playground => runner (#803)
MichaelDoyle Apr 3, 2024
649a2f5
Rename retriever playground => runner (#805)
MichaelDoyle Apr 3, 2024
a2c9b25
[UI] Adjust metrics table to be full-width (#810)
tonybaroneee Apr 4, 2024
5a65e1e
[UI] Only show eval zero state when loaded (#811)
tonybaroneee Apr 4, 2024
9b1fbff
[UI] Set All traces as default in Inspect view (#812)
tonybaroneee Apr 4, 2024
debd2ce
[UI] ThemeToggleService unit tests (#816)
shrutip90 Apr 4, 2024
9629202
[UI] Make spans deep-linkable in trace + flow details views (#819)
tonybaroneee Apr 4, 2024
83914bc
[UI] Update model runner title to use selected model in config (#822)
tonybaroneee Apr 4, 2024
c88ea10
[UI] Clear out images from data-rendered upon receiving new input (#840)
tonybaroneee Apr 4, 2024
5556cdc
[UI] Hide append mode for models that do not support multiturn (#847)
tonybaroneee Apr 5, 2024
820f77e
[UI] Show banner for unsupported models (#848)
tonybaroneee Apr 5, 2024
6efe6fd
[UI] Reset scroll position of input/output when switching spans (#852)
tonybaroneee Apr 5, 2024
c1e68d1
[UI] Hide "Add message" if model does not support multiturn (#853)
tonybaroneee Apr 5, 2024
fe629f5
Fix missed version 0.5.0-rc.1 (#858)
pavelgj Apr 5, 2024
16aa954
[UI] Fix display of system prompt (#860)
tonybaroneee Apr 5, 2024
0104e55
[UI] Fix tools icon (#862)
tonybaroneee Apr 5, 2024
0d5a432
[UI] Prevent stuck browser back when redirecting to first evaluation …
tonybaroneee Apr 7, 2024
4ea7af7
[UI] Add missing app text color style (#16)
tonybaroneee Apr 8, 2024
1a8ea09
[UI] Apply theme to scrollbars (#20)
tonybaroneee Apr 9, 2024
594f959
[UI] Clarify ID in flows/traces tables (#23)
tonybaroneee Apr 10, 2024
1a67dc0
[UI] Show flow error in trace details view, if applicable (#28)
tonybaroneee Apr 10, 2024
634969f
[UI] Fix eval zero state callout spacing (#24)
tonybaroneee Apr 10, 2024
8149ebf
Export textEmbedding (#36)
huangjeff5 Apr 11, 2024
bc79813
[UI] Update README doc with up-to-date instructions (#50)
tonybaroneee Apr 11, 2024
98b13c7
[UI] Create skeleton prompt runner component (#54)
tonybaroneee Apr 12, 2024
4aad06c
[UI] Add icon to all view trace buttons (#57)
tonybaroneee Apr 12, 2024
1d282d1
[UI] Show template in prompt runner next to input (#58)
tonybaroneee Apr 12, 2024
979652c
[UI] Use button toggle group for inspect table filter (#56)
tonybaroneee Apr 12, 2024
7a60ea1
[UI] Update play icon for run/dispatch span states (#60)
tonybaroneee Apr 12, 2024
9a4dd85
More sensible default model params (#65)
MichaelDoyle Apr 16, 2024
0fff85e
Always clear message when not in chat mode - otherwise if an error is…
MichaelDoyle Apr 16, 2024
394fc0e
[UI] Show raw prompt template in modal (#70)
tonybaroneee Apr 16, 2024
5acbe65
Nesting user input in prompt runner (#72)
maxl0rd Apr 16, 2024
9ff3c50
[UI] Add support for prompt variants (#74)
tonybaroneee Apr 17, 2024
ef1c61f
Allow system role for Gemini 1.5 Pro (#85)
tonybaroneee Apr 17, 2024
a384483
Create modular component for a multi-modal message (#83)
MichaelDoyle Apr 17, 2024
7cb3f75
Update faithfulness to v0.1.7 (#87)
ssbushi Apr 17, 2024
b6abf1f
[UI] Add prompt variant to query params to support deep-linking (#88)
tonybaroneee Apr 18, 2024
4da8e9d
[UI] Fix race condition when setting content in monaco (#96)
tonybaroneee Apr 18, 2024
2435d63
[UI] Small visual fix in app nav bar (#98)
tonybaroneee Apr 18, 2024
c0a1eff
[UI] Fix incorrect height for modal runner header (#101)
tonybaroneee Apr 18, 2024
c45c26e
[UI] Update placeholder label for model version select (#100)
tonybaroneee Apr 18, 2024
6646c46
Message list component (#84)
MichaelDoyle Apr 18, 2024
9437a11
[UI] Fix view evaluation report button to read correct metdata (#119)
tonybaroneee Apr 19, 2024
fd19009
[UI] Save action sidebar expansion state to `localStorage` (#120)
tonybaroneee Apr 19, 2024
c52bc15
[UI]: Move model config params to a separate component (#103)
shrutip90 Apr 19, 2024
04267f6
[UI] Update model runner to use the new model config component (#124)
shrutip90 Apr 19, 2024
2e80916
[UI] Pull the new defaults for model config into the new config compo…
shrutip90 Apr 19, 2024
fd8d28f
[UI] Add ability to export prompt file from model runner (#115)
tonybaroneee Apr 19, 2024
4a32c0f
[UI] Fix model versions not being loaded on initial render (#131)
tonybaroneee Apr 21, 2024
3ec0908
Integrate the new MessageList component into the ModelRunner (#114)
MichaelDoyle Apr 21, 2024
fe36ca9
[UI] Refactor model-config to use signals (#133)
tonybaroneee Apr 22, 2024
4c8f2be
Create placeholder for system prompt and first user message (#144)
MichaelDoyle Apr 22, 2024
c4af591
[UI] Remove oops from model config template (#143)
tonybaroneee Apr 22, 2024
d3e1d5a
Ensure selected model is set when using left nav (#148)
MichaelDoyle Apr 22, 2024
15cd12e
[UI] Prevent button icons from flex-shrinking (#151)
tonybaroneee Apr 22, 2024
3b2b42b
Show large multimedia in a modal (#156)
MichaelDoyle Apr 23, 2024
0dc4352
Enable all image types in model runner (#160)
MichaelDoyle Apr 23, 2024
9dce794
Re-enable gemini vision models (#168)
MichaelDoyle Apr 23, 2024
efb081b
[UI] Remove system prompt for single-turn models (#169)
shrutip90 Apr 23, 2024
23b1b55
Set a reasonable (but arbitrary) number of media files per message (#…
MichaelDoyle Apr 24, 2024
0087bb0
[UI] Remove obsolete MONACO_PATH provider (unused) (#182)
tonybaroneee Apr 24, 2024
57eb604
[UI] Sort eval metrics for consistent/comparable viewing (#209)
tonybaroneee Apr 28, 2024
225dad6
change action latency name (#200)
jba May 1, 2024
983658f
Add strong response types for vertex evaluators
tagboola May 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions js/plugins/vertexai/src/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { BaseDataPoint } from '@genkit-ai/ai/evaluator';
import { Action } from '@genkit-ai/core';
import { GoogleAuth } from 'google-auth-library';
import { JSONClient } from 'google-auth-library/build/src/auth/googleauth';
import z from 'zod';
import { EvaluatorFactory } from './evaluator_factory';

/**
Expand Down Expand Up @@ -57,10 +58,6 @@ export function vertexEvaluators(
const metricType = isConfig(metric) ? metric.type : metric;
const metricSpec = isConfig(metric) ? metric.metricSpec : {};

console.log(
`Creating evaluator for metric ${metricType} with metricSpec ${metricSpec}`
);

switch (metricType) {
case VertexAIEvaluationMetricType.BLEU: {
return createBleuEvaluator(factory, metricSpec);
Expand All @@ -84,6 +81,12 @@ function isConfig(
return (config as VertexAIEvaluationMetricConfig).type !== undefined;
}

const BleuResponseSchema = z.object({
bleuResults: z.object({
bleuMetricValues: z.array(z.object({ score: z.number() })),
}),
});

// TODO: Add support for batch inputs
function createBleuEvaluator(
factory: EvaluatorFactory,
Expand All @@ -95,6 +98,7 @@ function createBleuEvaluator(
displayName: 'BLEU',
definition:
'Computes the BLEU score by comparing the output against the ground truth',
responseSchema: BleuResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
Expand Down Expand Up @@ -124,6 +128,12 @@ function createBleuEvaluator(
);
}

const RougeResponseSchema = z.object({
rougeResults: z.object({
rougeMetricValues: z.array(z.object({ score: z.number() })),
}),
});

// TODO: Add support for batch inputs
function createRougeEvaluator(
factory: EvaluatorFactory,
Expand All @@ -135,6 +145,7 @@ function createRougeEvaluator(
displayName: 'ROUGE',
definition:
'Computes the ROUGE score by comparing the output against the ground truth',
responseSchema: RougeResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
Expand Down Expand Up @@ -162,6 +173,14 @@ function createRougeEvaluator(
);
}

const SafetyResponseSchema = z.object({
safetyResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSafetyEvaluator(
factory: EvaluatorFactory,
metricSpec: any
Expand All @@ -171,6 +190,7 @@ function createSafetyEvaluator(
metric: VertexAIEvaluationMetricType.SAFETY,
displayName: 'Safety',
definition: 'Assesses the level of safety of an output',
responseSchema: SafetyResponseSchema,
},
(datapoint) => {
return {
Expand All @@ -182,7 +202,7 @@ function createSafetyEvaluator(
},
};
},
(response: any, datapoint: BaseDataPoint) => {
(response, datapoint: BaseDataPoint) => {
return {
testCaseId: datapoint.testCaseId,
evaluation: {
Expand All @@ -196,6 +216,14 @@ function createSafetyEvaluator(
);
}

const GroundednessResponseSchema = z.object({
groundednessResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createGroundednessEvaluator(
factory: EvaluatorFactory,
metricSpec: any
Expand All @@ -206,6 +234,7 @@ function createGroundednessEvaluator(
displayName: 'Groundedness',
definition:
'Assesses the ability to provide or reference information included only in the context',
responseSchema: GroundednessResponseSchema,
},
(datapoint) => {
return {
Expand All @@ -218,7 +247,7 @@ function createGroundednessEvaluator(
},
};
},
(response: any, datapoint: BaseDataPoint) => {
(response, datapoint: BaseDataPoint) => {
return {
testCaseId: datapoint.testCaseId,
evaluation: {
Expand Down
31 changes: 25 additions & 6 deletions js/plugins/vertexai/src/evaluator_factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { Action } from '@genkit-ai/core';
import { runInNewSpan } from '@genkit-ai/core/tracing';
import { GoogleAuth } from 'google-auth-library';
import { JSONClient } from 'google-auth-library/build/src/auth/googleauth';
import z from 'zod';
import { VertexAIEvaluationMetricType } from './evaluation';

export class EvaluatorFactory {
Expand All @@ -28,14 +29,18 @@ export class EvaluatorFactory {
private readonly projectId: string
) {}

create(
create<ResponseType extends z.ZodTypeAny>(
config: {
metric: VertexAIEvaluationMetricType;
displayName: string;
definition: string;
responseSchema: ResponseType;
},
toRequest: (datapoint: BaseDataPoint) => any,
responseHandler: (response: any, datapoint: BaseDataPoint) => any
responseHandler: (
response: z.infer<ResponseType>,
datapoint: BaseDataPoint
) => any
): Action {
return defineEvaluator(
{
Expand All @@ -44,14 +49,21 @@ export class EvaluatorFactory {
definition: config.definition,
},
async (datapoint: BaseDataPoint) => {
const response = await this.evaluateInstances(toRequest(datapoint));
const responseSchema = config.responseSchema;
const response = await this.evaluateInstances(
toRequest(datapoint),
responseSchema
);

return responseHandler(response, datapoint);
}
);
}

async evaluateInstances(partialRequest: any) {
async evaluateInstances<ResponseType extends z.ZodTypeAny>(
partialRequest: any,
responseSchema: ResponseType
): Promise<z.infer<ResponseType>> {
const locationName = `projects/${this.projectId}/locations/${this.location}`;
return await runInNewSpan(
{
Expand All @@ -64,15 +76,22 @@ export class EvaluatorFactory {
location: locationName,
...partialRequest,
};

metadata.input = request;
const client = await this.auth.getClient();
const url = `https://${this.location}-aiplatform.googleapis.com/v1beta1/${locationName}:evaluateInstances`;
const response = await client.request({
url: `https://${this.location}-aiplatform.googleapis.com/v1beta1/${locationName}:evaluateInstances`,
url,
method: 'POST',
body: JSON.stringify(request),
});
metadata.output = response.data;
return response.data as any;

try {
return responseSchema.parse(response.data);
} catch (e) {
throw new Error(`Error parsing ${url} API response: ${e}`);
}
}
);
}
Expand Down