From f8f991d780a32917224d506bdb831a32515399bb Mon Sep 17 00:00:00 2001 From: Tunde Agboola Date: Thu, 2 May 2024 16:28:08 -0400 Subject: [PATCH 1/3] Add vertex ai summarization metrics --- js/plugins/vertexai/src/evaluation.ts | 149 ++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 9 deletions(-) diff --git a/js/plugins/vertexai/src/evaluation.ts b/js/plugins/vertexai/src/evaluation.ts index ea494bcb61..91499d3851 100644 --- a/js/plugins/vertexai/src/evaluation.ts +++ b/js/plugins/vertexai/src/evaluation.ts @@ -10,7 +10,7 @@ * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and + * See the License for the specix c language governing permissions and * limitations under the License. */ @@ -31,6 +31,9 @@ export enum VertexAIEvaluationMetricType { FLUENCY = 'FLEUNCY', SAFETY = 'SAFETY', GROUNDEDNESS = 'GROUNDEDNESS', + SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY', + SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS', + SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY', } /** @@ -75,6 +78,15 @@ export function vertexEvaluators( case VertexAIEvaluationMetricType.GROUNDEDNESS: { return createGroundednessEvaluator(factory, metricSpec); } + case VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY: { + return createSummarizationQualityEvaluator(factory, metricSpec); + } + case VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS: { + return createSummarizationHelpfulnessEvaluator(factory, metricSpec); + } + case VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY: { + return createSummarizationVerbosityEvaluator(factory, metricSpec); + } } }); } @@ -105,10 +117,6 @@ function createBleuEvaluator( responseSchema: BleuResponseSchema, }, (datapoint) => { - if (!datapoint.reference) { - throw new Error('Reference is required'); - } - return { bleuInput: { metricSpec, @@ -149,10 +157,6 @@ function createRougeEvaluator( responseSchema: RougeResponseSchema, }, (datapoint) => { - if (!datapoint.reference) { - throw new Error('Reference is required'); - } - return { rougeInput: { metricSpec, @@ -292,3 +296,130 @@ function createGroundednessEvaluator( } ); } + +const SummarizationQualityResponseSchema = z.object({ + summarizationQualityResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationQualityEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY, + displayName: 'Summarization quality', + definition: 'Assesses the overall ability to summarize text', + responseSchema: SummarizationQualityResponseSchema, + }, + (datapoint) => { + return { + summarizationQualityInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationQualityResult.score, + details: { + reasoning: response.summarizationQualityResult.explanation, + }, + }; + } + ); +} + +const SummarizationHelpfulnessResponseSchema = z.object({ + summarizationHelpfulnessResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationHelpfulnessEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS, + displayName: 'Summarization helpfulness', + definition: + 'Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text', + responseSchema: SummarizationHelpfulnessResponseSchema, + }, + (datapoint) => { + return { + summarizationHelpfulnessInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationHelpfulnessResult.score, + details: { + reasoning: response.summarizationHelpfulnessResult.explanation, + }, + }; + } + ); +} + +const SummarizationVerbositySchema = z.object({ + summarizationVerbosityResult: z.object({ + score: z.number(), + explanation: z.string(), + confidence: z.number(), + }), +}); + +function createSummarizationVerbosityEvaluator( + factory: EvaluatorFactory, + metricSpec: any +): Action { + return factory.create( + { + metric: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY, + displayName: 'Summarization verbosity', + definition: 'Aassess the ability to provide a succinct summarization', + responseSchema: SummarizationVerbositySchema, + }, + (datapoint) => { + return { + summarizationVerbosityInput: { + metricSpec, + instance: { + prediction: datapoint.output as string, + instruction: datapoint.input as string, + context: datapoint.context?.join('. '), + }, + }, + }; + }, + (response) => { + return { + score: response.summarizationVerbosityResult.score, + details: { + reasoning: response.summarizationVerbosityResult.explanation, + }, + }; + } + ); +} From 69c3f3d7a09640d1083f9738a5f318a88a0dcfb5 Mon Sep 17 00:00:00 2001 From: Tunde Agboola Date: Thu, 2 May 2024 16:31:31 -0400 Subject: [PATCH 2/3] Fix license text --- js/plugins/vertexai/src/evaluation.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/plugins/vertexai/src/evaluation.ts b/js/plugins/vertexai/src/evaluation.ts index 91499d3851..57d886b010 100644 --- a/js/plugins/vertexai/src/evaluation.ts +++ b/js/plugins/vertexai/src/evaluation.ts @@ -10,7 +10,7 @@ * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specix c language governing permissions and + * See the License for the specific language governing permissions and * limitations under the License. */ From 2828e73e11f404c7e6b5b4ee7c39778b67b61565 Mon Sep 17 00:00:00 2001 From: Tunde Agboola Date: Thu, 2 May 2024 16:33:52 -0400 Subject: [PATCH 3/3] Add metrics to the documentaion --- docs/plugins/vertex-ai.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/plugins/vertex-ai.md b/docs/plugins/vertex-ai.md index 75972257ec..ac8d5fdc2a 100644 --- a/docs/plugins/vertex-ai.md +++ b/docs/plugins/vertex-ai.md @@ -14,6 +14,9 @@ It also provides access to subset of evaluation metrics through the Vertex AI [R - [Fluency](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#fluencyinput) - [Safety](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#safetyinput) - [Groundeness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#groundednessinput) +- [Summarization Quality](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationqualityinput) +- [Summarization Helpfulness](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationhelpfulnessinput) +- [Summarization Verbosity](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations/evaluateInstances#summarizationverbosityinput) ## Installation