diff --git a/v3-examples/model-customization-examples/ai_registry_example.ipynb b/v3-examples/model-customization-examples/ai_registry_example.ipynb index 4fdec6f222..3d0b5d770b 100644 --- a/v3-examples/model-customization-examples/ai_registry_example.ipynb +++ b/v3-examples/model-customization-examples/ai_registry_example.ipynb @@ -2,22 +2,29 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:19:59.132430Z", - "start_time": "2025-11-23T21:19:56.006725Z" - } - }, - "outputs": [], + "metadata": {}, "source": [ "from rich.pretty import pprint\n", "\n", "from sagemaker.ai_registry.air_constants import REWARD_FUNCTION, REWARD_PROMPT\n", "from sagemaker.ai_registry.dataset import DataSet, CustomizationTechnique\n", "from sagemaker.ai_registry.evaluator import Evaluator" - ] + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" + ], + "id": "665a0e71fef89bde" }, { "cell_type": "markdown", @@ -40,53 +47,8 @@ }, { "cell_type": "code", - "execution_count": 2, "id": "2234f21780b91625", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:01.103989Z", - "start_time": "2025-11-23T21:19:59.560308Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "370fbf5c468c4ec3822bbbf4333a20ee", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "\n", "# 1. S3 Data source\n", @@ -95,57 +57,14 @@ " source=\"s3://sdk-air-test-bucket/datasets/training-data/jamjee-sft-ds1.jsonl\",\n", " customization_technique=CustomizationTechnique.SFT\n", " )" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 3, "id": "61f55698ab27d70a", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:04.895127Z", - "start_time": "2025-11-23T21:20:04.009047Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "be0f938d37354b90a85203f01b7d9fb6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# 2. local dataset file source\n", "# ------------------------------------\n", @@ -158,197 +77,57 @@ " source=\"/Volumes/workplace/sagemaker-python-sdk-staging/recipes-data/rlvr/train_256.jsonl\",\n", " customization_technique=CustomizationTechnique.RLVR\n", " )" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 4, "id": "ee2980471f8ae0c0", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:08.277200Z", - "start_time": "2025-11-23T21:20:08.146133Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
DataSet(\n", - " name='sdkv3-gen-ds2',\n", - " version='0.0.3',\n", - " status='Available',\n", - " method='generated',\n", - " technique='sft',\n", - " source='s3://sdk-air-test-bucket/datasets/training-data',\n", - " created_time=datetime.datetime(2025, 11, 25, 18, 21, 31, 217000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 25, 18, 21, 31, 217000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.3'\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdkv3-gen-ds2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.3'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'sft'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/datasets/training-data'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m31\u001b[0m, \u001b[1;36m217000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m31\u001b[0m, \u001b[1;36m217000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.3'\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Refreshes status from hub\n", "dataset.refresh()\n", - "pprint(dataset)" - ] + "pprint(dataset.__dict__)" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 5, "id": "30c1b17ad232110b", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:12.671509Z", - "start_time": "2025-11-23T21:20:11.549025Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n", - "│ DataSet(\n", - " name='sdkv3-gen-ds2',\n", - " version='0.0.1',\n", - " status='Available',\n", - " method='generated',\n", - " technique='sft',\n", - " source='s3://sdk-air-test-bucket/datasets/training-data',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 9, 23, 196000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 23, 13, 9, 23, 196000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.1'\n", - "),\n", - "│ DataSet(\n", - " name='sdkv3-gen-ds2',\n", - " version='0.0.2',\n", - " status='Available',\n", - " method='generated',\n", - " technique='sft',\n", - " source='s3://sdk-air-test-bucket/datasets/training-data',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 20, 0, 813000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 23, 13, 20, 0, 813000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.2'\n", - "),\n", - "│ DataSet(\n", - " name='sdkv3-gen-ds2',\n", - " version='0.0.3',\n", - " status='Available',\n", - " method='generated',\n", - " technique='sft',\n", - " source='s3://sdk-air-test-bucket/datasets/training-data',\n", - " created_time=datetime.datetime(2025, 11, 25, 18, 21, 31, 217000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 25, 18, 21, 31, 217000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.3'\n", - ")\n", - "]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdkv3-gen-ds2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'sft'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/datasets/training-data'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m196000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m196000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.1'\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdkv3-gen-ds2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'sft'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/datasets/training-data'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m813000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m813000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.2'\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdkv3-gen-ds2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.3'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'sft'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/datasets/training-data'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m31\u001b[0m, \u001b[1;36m217000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m31\u001b[0m, \u001b[1;36m217000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.3'\u001b[0m\n", - "\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "versions = dataset.get_versions()\n", - "pprint(versions)" - ] + "pprint(versions.__dict__)" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 6, "id": "332be046d91fcefc", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:26.601118Z", - "start_time": "2025-11-23T21:20:26.388646Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, "source": [ "# delete specific version\n", "dataset.delete(version=\"0.0.4\")\n", "#dataset.delete(version=\"use a version from versions\")\n", "#pprint(versions)\n", "# specified deleted version should not be part of output" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "510d1a015e7a565c", "metadata": {}, - "outputs": [], "source": [ "# deletes all versions of this dataset by default\n", "dataset.delete()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -360,75 +139,16 @@ }, { "cell_type": "code", - "execution_count": 8, "id": "d89a8741dd64f92e", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:20:48.237129Z", - "start_time": "2025-11-23T21:20:47.888610Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
[DataSet(\n", - " name='demo-nargokul-6',\n", - " version='0.0.3',\n", - " status='Available',\n", - " method='generated',\n", - " technique='dpo',\n", - " source='s3://nova-mlflow-us-west-2/dataset',\n", - " created_time=datetime.datetime(2025, 11, 22, 11, 4, 50, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 22, 11, 4, 50, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-6/0.0.3'\n", - "), DataSet(\n", - " name='demo-nargokul-8',\n", - " version='0.0.7',\n", - " status='Available',\n", - " method='generated',\n", - " technique='dpo',\n", - " source='s3://nova-mlflow-us-west-2/dataset',\n", - " created_time=datetime.datetime(2025, 11, 22, 15, 40, 0, 373000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 22, 15, 40, 0, 373000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-8/0.0.7'\n", - ")]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'demo-nargokul-6'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.3'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'dpo'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://nova-mlflow-us-west-2/dataset'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m50\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m50\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-6/0.0.3'\u001b[0m\n", - "\u001b[1m)\u001b[0m, \u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'demo-nargokul-8'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.7'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'dpo'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0msource\u001b[0m=\u001b[38;2;0;135;0m's3://nova-mlflow-us-west-2/dataset'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m15\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m373000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m15\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m373000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-8/0.0.7'\u001b[0m\n", - "\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "#Optional max_results argument for pagination or else use default config\n", "datasets = DataSet.get_all(max_results=2)\n", "for dataset in datasets:\n", " pprint(dataset)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -440,62 +160,21 @@ }, { "cell_type": "code", - "execution_count": null, "id": "572d4184cf42c7fa", "metadata": {}, - "outputs": [], "source": [ "# Use a dataset from iterator\n", "dataset = next(DataSet.get_all(max_results=2))\n", "for dataset in datasets:\n", - " pprint(dataset)" - ] + " pprint(dataset.__dict__)" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 4, "id": "ae056f626cd7e931", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:09:35.634928Z", - "start_time": "2025-11-23T21:09:35.499741Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
DataSet(\n", - " name='sdkv3-gen-ds2',\n", - " version='0.0.1',\n", - " status='Available',\n", - " method='generated',\n", - " technique='sft',\n", - " data_location='s3://sdk-air-test-bucket/datasets/training-data',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 9, 23, 196000, tzinfo=tzlocal()),\n", - " updated_time=datetime.datetime(2025, 11, 23, 13, 9, 23, 196000, tzinfo=tzlocal()),\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.1'\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mDataSet\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdkv3-gen-ds2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'generated'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtechnique\u001b[0m=\u001b[38;2;0;135;0m'sft'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mdata_location\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/datasets/training-data'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m196000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mupdated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m196000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/sdkv3-gen-ds2/0.0.1'\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Use a dataset by name\n", "dataset = DataSet.get(name=\"sdkv3-gen-ds2\")\n", @@ -503,29 +182,31 @@ "\n", "# We can do CRUD operation on this DataSet\n", "# e.g. dataset.delete()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "44d7a8150b4b7846", "metadata": {}, - "outputs": [], "source": [ "#Create a new version of this dataset\n", "dataset.create_version(source=\"s3://sdk-air-test-bucket/datasets/test_ds\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "ba3ae7101c5281de", "metadata": {}, - "outputs": [], "source": [ "versions = dataset.get_versions()\n", "pprint(versions)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -537,26 +218,24 @@ }, { "cell_type": "code", - "execution_count": null, "id": "2d0ff33265d2c8dd", "metadata": {}, - "outputs": [], "source": [ "# Method : Lambda\n", "evaluator = Evaluator.create(\n", " name = \"sdk-new-rf11\",\n", - " source=\"arn:aws:lambda:us-west-2:052150106756:function:sm-eval-vinayshm-rlvr-llama-321b-instruct-v1-1762713051528\",\n", + " source=\"arn:aws:lambda:us-west-2:<>:function:sm-eval-vinayshm-rlvr-llama-321b-instruct-v1-<>8\",\n", " type=REWARD_FUNCTION\n", "\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "ab2896e0b68b9384", "metadata": {}, - "outputs": [], "source": [ "# Method : BYOC\n", "\n", @@ -565,57 +244,14 @@ " source=\"/Volumes/workplace/sagemaker-python-sdk-staging/recipes-data/eval_lambda_1.py\",\n", " type = REWARD_FUNCTION\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 9, "id": "813243a997e3946b", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:03.720214Z", - "start_time": "2025-11-23T21:21:02.707180Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "23d9bb7117124f05a845ff371790ad87", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Reward Prompt\n", "# ------------------------------------\n", @@ -628,521 +264,101 @@ " source=\"/Users/jamjee/workplace/hubpuller/prompt/custom_prompt.jinja\",\n", " type = REWARD_PROMPT\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 12, "id": "a7aef9b8a54766eb", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:23.312196Z", - "start_time": "2025-11-23T21:21:23.176318Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bc95b13dd9a343d682b5928aae40acb2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Optional wait, by default we have wait = True during create call.\n", "evaluator.wait()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 11, "id": "13ff6d34eab34a07", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:18.257558Z", - "start_time": "2025-11-23T21:21:18.133175Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.4',\n", - " status='Available',\n", - " type='RewardPrompt',\n", - " method=None,\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.4',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 21, 3, 424000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.4'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardPrompt'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.4'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m424000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "evaluator.refresh()\n", "pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 10, "id": "345214df-f320-4de0-ba97-860429f1f5bb", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:14.637956Z", - "start_time": "2025-11-23T21:21:14.156724Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='complex-evaluator-0ba18e4f',\n", - " version='0.0.1',\n", - " status='Available',\n", - " type='RewardFunction',\n", - " method='byoc',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/complex-evaluator-0ba18e4f/0.0.1',\n", - " reference='arn:aws:lambda:us-west-2:052150106756:function:SageMaker-evaluator-complex-evaluator-0ba18e4f',\n", - " created_time=datetime.datetime(2025, 11, 25, 12, 12, 21, 385000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'complex-evaluator-0ba18e4f'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardFunction'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'byoc'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/complex-evaluator-0ba18e4f/0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:lambda:us-west-2:052150106756:function:SageMaker-evaluator-complex-evaluator-0ba18e4f'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m385000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='complex-evaluator-180612af',\n", - " version='0.0.1',\n", - " status='Available',\n", - " type='RewardFunction',\n", - " method='byoc',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/complex-evaluator-180612af/0.0.1',\n", - " reference='arn:aws:lambda:us-west-2:052150106756:function:SageMaker-evaluator-complex-evaluator-180612af',\n", - " created_time=datetime.datetime(2025, 11, 25, 13, 3, 31, 776000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'complex-evaluator-180612af'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardFunction'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'byoc'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/complex-evaluator-180612af/0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:lambda:us-west-2:052150106756:function:SageMaker-evaluator-complex-evaluator-180612af'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m31\u001b[0m, \u001b[1;36m776000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Optional max_results for pagination\n", "evaluators = Evaluator.get_all(max_results=2)\n", "for evaluator in evaluators:\n", " pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "b0f2cb26d5bb9a08", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='eval-wait-test-03ab8232',\n", - " version='0.0.1',\n", - " status='Available',\n", - " type='RewardPrompt',\n", - " method=None,\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-wait-test-03ab8232/0.0.1',\n", - " reference='s3://sdk-air-test-bucket/evaluators/eval-wait-test-03ab8232',\n", - " created_time=datetime.datetime(2025, 11, 25, 11, 35, 9, 48000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-wait-test-03ab8232'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardPrompt'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-wait-test-03ab8232/0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/eval-wait-test-03ab8232'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m35\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m48000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='eval-wait-test-0f253c3e',\n", - " version='0.0.1',\n", - " status='Available',\n", - " type='RewardPrompt',\n", - " method=None,\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-wait-test-0f253c3e/0.0.1',\n", - " reference='s3://sdk-air-test-bucket/evaluators/eval-wait-test-0f253c3e',\n", - " created_time=datetime.datetime(2025, 11, 25, 12, 44, 32, 544000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-wait-test-0f253c3e'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardPrompt'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-wait-test-0f253c3e/0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/eval-wait-test-0f253c3e'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m44\u001b[0m, \u001b[1;36m32\u001b[0m, \u001b[1;36m544000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Get evaluators by type\n", "evaluators = Evaluator.get_all(type='RewardPrompt', max_results=2)\n", "for evaluator in evaluators:\n", " pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 13, "id": "1c62ec2f94eb9ac5", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:27.268574Z", - "start_time": "2025-11-23T21:21:27.138475Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Evaluator(\n", - " name='sdk-new-rf11',\n", - " version='0.0.6',\n", - " status='Available',\n", - " type='RewardFunction',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/sdk-new-rf11/0.0.6',\n", - " reference='arn:aws:lambda:us-west-2:052150106756:function:sm-eval-vinayshm-rlvr-llama-321b-instruct-v1-1762713051528',\n", - " created_time=datetime.datetime(2025, 11, 25, 18, 24, 33, 503000, tzinfo=tzlocal())\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'sdk-new-rf11'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.6'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m'RewardFunction'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/sdk-new-rf11/0.0.6'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:lambda:us-west-2:052150106756:function:sm-eval-vinayshm-rlvr-llama-321b-instruct-v1-1762713051528'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m25\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m33\u001b[0m, \u001b[1;36m503000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "# Get an evaluator by name\n", "evaluator = Evaluator.get(name=\"sdk-new-rf11\")\n", "pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 14, "id": "b1a2154e870e623c", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:30.674522Z", - "start_time": "2025-11-23T21:21:30.159779Z" - } - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8ee2613128d54052bd45c8f1d0b6477b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, "source": [ "evaluator.create_version(source=evaluator.reference)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 15, "id": "72faf70127208509", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-23T21:21:35.036943Z", - "start_time": "2025-11-23T21:21:34.359472Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n", - "│ Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.1',\n", - " status='Available',\n", - " type='',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.1',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 11, 16, 18, 242000, tzinfo=tzlocal())\n", - "),\n", - "│ Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.2',\n", - " status='Available',\n", - " type='',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.2',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 11, 17, 54, 404000, tzinfo=tzlocal())\n", - "),\n", - "│ Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.3',\n", - " status='Available',\n", - " type='',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.3',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 11, 18, 9, 567000, tzinfo=tzlocal())\n", - "),\n", - "│ Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.4',\n", - " status='Available',\n", - " type='',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.4',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 21, 3, 424000, tzinfo=tzlocal())\n", - "),\n", - "│ Evaluator(\n", - " name='jamj-rp2',\n", - " version='0.0.5',\n", - " status='Available',\n", - " type='',\n", - " method='lambda',\n", - " arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.5',\n", - " reference='s3://sdk-air-test-bucket/evaluators/jamj-rp2',\n", - " created_time=datetime.datetime(2025, 11, 23, 13, 21, 30, 398000, tzinfo=tzlocal())\n", - ")\n", - "]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.1'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m16\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m242000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m17\u001b[0m, \u001b[1;36m54\u001b[0m, \u001b[1;36m404000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.3'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.3'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m18\u001b[0m, \u001b[1;36m9\u001b[0m, \u001b[1;36m567000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.4'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.4'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m424000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;38;2;225;0;225mEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mversion\u001b[0m=\u001b[38;2;0;135;0m'0.0.5'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Available'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mtype\u001b[0m=\u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mmethod\u001b[0m=\u001b[38;2;0;135;0m'lambda'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/jamj-rp2/0.0.5'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mreference\u001b[0m=\u001b[38;2;0;135;0m's3://sdk-air-test-bucket/evaluators/jamj-rp2'\u001b[0m,\n", - "\u001b[2;32m \u001b[0m\u001b[38;2;215;175;0mcreated_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m23\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m21\u001b[0m, \u001b[1;36m30\u001b[0m, \u001b[1;36m398000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, "source": [ "versions = evaluator.get_versions()\n", "pprint(versions)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "id": "0dc1107a-126b-4484-9639-07ba5de4ade6", "metadata": {}, - "outputs": [], "source": [ "# delete evaluator, option version argument or delete all versions.\n", "evaluator.delete()" - ] + ], + "outputs": [], + "execution_count": null } ], "metadata": { diff --git a/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb b/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb index d8047fcb79..cac62ffc56 100644 --- a/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb +++ b/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb @@ -7,6 +7,17 @@ "# Bedrock ModelBuilder Example\n" ] }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" + ] + }, { "cell_type": "code", "metadata": {}, @@ -28,7 +39,7 @@ "source": [ "# Configuration\n", "TRAINING_JOB_NAME = 'meta-textgeneration-llama-3-2-1b-instruct-sft-20251123162832'\n", - "ROLE_ARN = \"arn:aws:iam::052150106756:role/Admin\"\n", + "ROLE_ARN = \"arn:aws:iam::<>:role/Admin\"\n", "REGION = 'us-west-2'\n", "BUCKET = 'open-models-testing-pdx'" ], @@ -286,7 +297,7 @@ "from sagemaker.core.resources import TrainingJob\n", "from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder\n", "\n", - "training_job = TrainingJob.get(training_job_name=\"kssharda-sft-lora-lite-2-ui-run-2bn3c-1764134996968\",\n", + "training_job = TrainingJob.get(training_job_name=\"kssharda-sft-lora-lite-2-ui-run-2bn3c-<>8\",\n", " region=\"us-east-1\")\n", "pprint(training_job.model_artifacts.s3_model_artifacts)\n" ], @@ -342,7 +353,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "'s3://nova-studio-output-data/sft/final/kssharda-sft-lora-lite-2-ui-run-2bn3c-1764134996968/output/model'\n" + "'s3://nova-studio-output-data/sft/final/kssharda-sft-lora-lite-2-ui-run-2bn3c-<>8/output/model'\n" ] } ], @@ -364,7 +375,7 @@ "\n", "bedrock_model_builder.deploy(job_name = \"nargokul-26-01\",\n", " custom_model_name = \"nargokul-26-01\",\n", - " role_arn=\"arn:aws:iam::618100645563:role/Admin\")" + " role_arn=\"arn:aws:iam::<>:role/Admin\")" ], "outputs": [ { @@ -385,12 +396,12 @@ "text/plain": [ "\u001B[2;36m[11/26/25 17:09:29]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m S3 artifacts path: \u001B]8;id=340743;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=618013;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#209\u001B\\\u001B[2m209\u001B[0m\u001B]8;;\u001B\\\n", "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-1764134996968/output/\u001B[0m\u001B[38;2;225;0;225mmodel\u001B[0m \u001B[2m \u001B[0m\n" + "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/\u001B[0m\u001B[38;2;225;0;225mmodel\u001B[0m \u001B[2m \u001B[0m\n" ], "text/html": [ "
[11/26/25 17:09:29] INFO S3 artifacts path: bedrock_model_builder.py:209\n", " s3://nova-studio-output-data/sft/final/kssharda-sft-lora- \n", - " lite-2-ui-run-2bn3c-1764134996968/output/model \n", + " lite-2-ui-run-2bn3c-<>8/output/model \n", "\n" ] }, @@ -402,13 +413,13 @@ "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Manifest path: \u001B]8;id=541474;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=80220;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#216\u001B\\\u001B[2m216\u001B[0m\u001B]8;;\u001B\\\n", "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-1764134996968/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mjson\u001B[0m \u001B[2m \u001B[0m\n" ], "text/html": [ "
INFO Manifest path: bedrock_model_builder.py:216\n", " s3://nova-studio-output-data/sft/final/kssharda-sft-lora- \n", - " lite-2-ui-run-2bn3c-1764134996968/output/output/manifest. \n", + " lite-2-ui-run-2bn3c-<>8/output/output/manifest. \n", " json \n", "\n" ] @@ -421,13 +432,13 @@ "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Looking for manifest at \u001B]8;id=356570;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=618595;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#223\u001B\\\u001B[2m223\u001B[0m\u001B]8;;\u001B\\\n", "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-1764134996968/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mjson\u001B[0m \u001B[2m \u001B[0m\n" ], "text/html": [ "
INFO Looking for manifest at bedrock_model_builder.py:223\n", " s3://nova-studio-output-data/sft/final/kssharda-sft-lora- \n", - " lite-2-ui-run-2bn3c-1764134996968/output/output/manifest. \n", + " lite-2-ui-run-2bn3c-<>8/output/output/manifest. \n", " json \n", "\n" ] @@ -439,19 +450,19 @@ "data": { "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Manifest content: \u001B[1m{\u001B[0m\u001B[38;2;0;135;0m'checkpoint_s3_bucket'\u001B[0m: \u001B]8;id=291479;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=238165;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#229\u001B\\\u001B[2m229\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m's3://customer-escrow-618100645563-smtj-3ff597fc/kssharda\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m-sft-lora-lite-2-ui-run-2bn3c-1764134996968/step_4'\u001B[0m, \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m's3://customer-escrow-<>-smtj-3ff597fc/kssharda\u001B[0m \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m-sft-lora-lite-2-ui-run-2bn3c-<>8/step_4'\u001B[0m, \u001B[2m \u001B[0m\n", "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m'intermediate_checkpoints'\u001B[0m: \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[1m[\u001B[0m\u001B[38;2;0;135;0m's3://customer-escrow-618100645563-smtj-3ff597fc/ksshard\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0ma-sft-lora-lite-2-ui-run-2bn3c-1764134996968/step_3'\u001B[0m\u001B[1m]\u001B[0m\u001B[1m}\u001B[0m \u001B[2m \u001B[0m\n" + "\u001B[2;36m \u001B[0m \u001B[1m[\u001B[0m\u001B[38;2;0;135;0m's3://customer-escrow-<>-smtj-3ff597fc/ksshard\u001B[0m \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0ma-sft-lora-lite-2-ui-run-2bn3c-<>8/step_3'\u001B[0m\u001B[1m]\u001B[0m\u001B[1m}\u001B[0m \u001B[2m \u001B[0m\n" ], "text/html": [ "
INFO Manifest content: {'checkpoint_s3_bucket': bedrock_model_builder.py:229\n", - " 's3://customer-escrow-618100645563-smtj-3ff597fc/kssharda \n", - " -sft-lora-lite-2-ui-run-2bn3c-1764134996968/step_4', \n", + " 's3://customer-escrow-<>-smtj-3ff597fc/kssharda \n", + " -sft-lora-lite-2-ui-run-2bn3c-<>8/step_4', \n", " 'intermediate_checkpoints': \n", - " ['s3://customer-escrow-618100645563-smtj-3ff597fc/ksshard \n", - " a-sft-lora-lite-2-ui-run-2bn3c-1764134996968/step_3']} \n", + " ['s3://customer-escrow-<>-smtj-3ff597fc/ksshard \n", + " a-sft-lora-lite-2-ui-run-2bn3c-<>8/step_3']} \n", "\n" ] }, @@ -462,13 +473,13 @@ "data": { "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Checkpoint URI: \u001B]8;id=545156;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=779715;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#236\u001B\\\u001B[2m236\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/customer-escrow-618100645563-smtj-3ff597fc/kssharda-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225msft-lora-lite-2-ui-run-2bn3c-1764134996968/\u001B[0m\u001B[38;2;225;0;225mstep_4\u001B[0m \u001B[2m \u001B[0m\n" + "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/customer-escrow-<>-smtj-3ff597fc/kssharda-\u001B[0m \u001B[2m \u001B[0m\n", + "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225msft-lora-lite-2-ui-run-2bn3c-<>8/\u001B[0m\u001B[38;2;225;0;225mstep_4\u001B[0m \u001B[2m \u001B[0m\n" ], "text/html": [ "
INFO Checkpoint URI: bedrock_model_builder.py:236\n", - " s3://customer-escrow-618100645563-smtj-3ff597fc/kssharda- \n", - " sft-lora-lite-2-ui-run-2bn3c-1764134996968/step_4 \n", + " s3://customer-escrow-<>-smtj-3ff597fc/kssharda- \n", + " sft-lora-lite-2-ui-run-2bn3c-<>8/step_4 \n", "\n" ] }, @@ -486,7 +497,7 @@ " 'connection': 'keep-alive',\n", " 'x-amzn-requestid': '95bc35c0-0f8e-48cb-95e2-00fb77b17b4d'},\n", " 'RetryAttempts': 0},\n", - " 'modelArn': 'arn:aws:bedrock:us-east-1:618100645563:custom-model/imported/pl4keb8mfank'}" + " 'modelArn': 'arn:aws:bedrock:us-east-1:<>:custom-model/imported/pl4keb8mfank'}" ] }, "execution_count": 2, @@ -502,7 +513,7 @@ "source": [ "from sagemaker.ai_registry.dataset import DataSet\n", "\n", - "dataset = DataSet.get(name=\"arn:aws:sagemaker:us-east-1:618100645563:hub-content/MDG6N5CA58D0IJMC1OPJOPIKOS2VPPLP0AM6UBOT9D73B8A34HTG/DataSet/nova-2-0-sft-dataset/1.0.0\")\n", + "dataset = DataSet.get(name=\"arn:aws:sagemaker:us-east-1:<>:hub-content/MDG6N5CA58D0IJMC1OPJOPIKOS2VPPLP0AM6UBOT9D73B8A34HTG/DataSet/nova-2-0-sft-dataset/1.0.0\")\n", "\n", "pprint(dataset.__dict__)" ], diff --git a/v3-examples/model-customization-examples/benchmark_demo.ipynb b/v3-examples/model-customization-examples/benchmark_demo.ipynb index 5cb75f506c..81544a6a20 100644 --- a/v3-examples/model-customization-examples/benchmark_demo.ipynb +++ b/v3-examples/model-customization-examples/benchmark_demo.ipynb @@ -20,75 +20,19 @@ ] }, { - "cell_type": "code", - "execution_count": 2, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[\n", - "│ <_Benchmark.MMLU: 'mmlu'>,\n", - "│ <_Benchmark.MMLU_PRO: 'mmlu_pro'>,\n", - "│ <_Benchmark.BBH: 'bbh'>,\n", - "│ <_Benchmark.GPQA: 'gpqa'>,\n", - "│ <_Benchmark.MATH: 'math'>,\n", - "│ <_Benchmark.STRONG_REJECT: 'strong_reject'>,\n", - "│ <_Benchmark.IFEVAL: 'ifeval'>,\n", - "│ <_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ <_Benchmark.MMMU: 'mmmu'>,\n", - "│ <_Benchmark.LLM_JUDGE: 'llm_judge'>,\n", - "│ <_Benchmark.INFERENCE_ONLY: 'inference_only'>\n", - "]\n", - "\n" - ], - "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225m_Benchmark.MMLU:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'mmlu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMLU_PRO: \u001b[0m\u001b[38;2;0;135;0m'mmlu_pro'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.BBH: \u001b[0m\u001b[38;2;0;135;0m'bbh'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GPQA: \u001b[0m\u001b[38;2;0;135;0m'gpqa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MATH: \u001b[0m\u001b[38;2;0;135;0m'math'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.STRONG_REJECT: \u001b[0m\u001b[38;2;0;135;0m'strong_reject'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.IFEVAL: \u001b[0m\u001b[38;2;0;135;0m'ifeval'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.MMMU: \u001b[0m\u001b[38;2;0;135;0m'mmmu'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.LLM_JUDGE: \u001b[0m\u001b[38;2;0;135;0m'llm_judge'\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[39m<_Benchmark.INFERENCE_ONLY: \u001b[0m\u001b[38;2;0;135;0m'inference_only'\u001b[0m\u001b[1m>\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'modality': 'Multi-Modal (image)',\n", - "│ 'description': 'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.',\n", - "│ 'metrics': ['all'],\n", - "│ 'strategy': 'gen_qa',\n", - "│ 'subtask_available': False,\n", - "│ 'subtasks': None\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'modality'\u001b[0m: \u001b[38;2;0;135;0m'Multi-Modal \u001b[0m\u001b[1;38;2;0;135;0m(\u001b[0m\u001b[38;2;0;135;0mimage\u001b[0m\u001b[1;38;2;0;135;0m)\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'description'\u001b[0m: \u001b[38;2;0;135;0m'Custom Dataset Evaluation – Lets you supply your own dataset for benchmarking, comparing model outputs to reference answers with metrics such as ROUGE and BLEU. gen_qa supports image inference for models which have multimodal support.'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'metrics'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'all'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtask_available'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'subtasks'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "cell_type": "code", + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "metadata": {}, "source": [ "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", "from rich.pretty import pprint\n", @@ -106,7 +50,9 @@ "\n", "# Print properties for a specific benchmark\n", "pprint(get_benchmark_properties(benchmark=Benchmark.GEN_QA))" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -132,110 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:39:45] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:39:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=314173;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126855;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=480390;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=329695;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Model package group provided as ARN: base_evaluator.py:145\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \n", - " mple-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Model package group provided as ARN: \u001b]8;id=572070;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=299487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#145\u001b\\\u001b[2m145\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchMarkEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x13cd28e60>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='gen-qa-eval-demo',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group='arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo',\n", - "│ benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ subtasks=None,\n", - "│ dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x13cd28e60\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "from sagemaker.train.evaluate import BenchMarkEvaluator\n", "\n", @@ -243,60 +86,24 @@ "# These values match our successfully tested configuration\n", "evaluator = BenchMarkEvaluator(\n", " benchmark=Benchmark.GEN_QA,\n", - " model=\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\",\n", + " model=\"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/28\",\n", " s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\",\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\",\n", - " dataset=\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", - " model_package_group=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/example-name-aovqo\", # Optional inferred from model if model package\n", + " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\",\n", + " dataset=\"s3://sagemaker-us-west-2-<>/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", + " model_package_group=\"arn:aws:sagemaker:us-west-2:<>:model-package-group/example-name-aovqo\", # Optional inferred from model if model package\n", " base_eval_name=\"gen-qa-eval-demo\",\n", " # Note: sagemaker_session is optional and will be auto-created if not provided\n", " # Note: region is optional and will be auto deduced using environment variables - SAGEMAKER_REGION, AWS_REGION\n", ")\n", "\n", "pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:13 │\n", - "│ │\n", - "│ 10 # Create evaluator with GEN_QA benchmark │\n", - "│ 11 # These values match our successfully tested configuration │\n", - "│ 12 evaluator = BenchMarkEvaluator( │\n", - "│ ❱ 13 │ benchmark=Benchmark.GEN_QA, │\n", - "│ 14 │ model=\"meta-textgeneration-llama-3-2-1b-instruct\", │\n", - "│ 15 │ s3_output_path=\"s3://mufi-test-serverless-smtj/eval/\", │\n", - "│ 16 │ mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "NameError: name 'Benchmark' is not defined\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in
BenchMarkEvaluator(\n", - "│ region='us-east-1',\n", - "│ sagemaker_session=<sagemaker_core.helper.session_helper.Session object at 0x356a03950>,\n", - "│ model='arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3',\n", - "│ base_eval_name='gen-qa-eval-demo',\n", - "│ s3_output_path='s3://mufi-test-serverless-iad/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group='arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models',\n", - "│ benchmark=<_Benchmark.GEN_QA: 'gen_qa'>,\n", - "│ subtasks=None,\n", - "│ dataset='s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=True\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchMarkEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[38;2;0;135;0m'us-east-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker_core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x356a03950\u001b[0m\u001b[39m>,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m's3://mufi-test-serverless-iad/eval/'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbenchmark\u001b[0m\u001b[39m=<_Benchmark.GEN_QA: \u001b[0m\u001b[38;2;0;135;0m'gen_qa'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msubtasks\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;0;135;0mTrue\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# # [Optional] Nova testing IAD Prod\n", "\n", @@ -387,12 +140,12 @@ "# # These values match our successfully tested configuration\n", "# evaluator = BenchMarkEvaluator(\n", "# benchmark=Benchmark.GEN_QA,\n", - "# # model=\"arn:aws:sagemaker:us-east-1:052150106756:model-package/bgrv-nova-micro-sft-lora/1\",\n", - "# model=\"arn:aws:sagemaker:us-east-1:052150106756:model-package/test-nova-finetuned-models/3\",\n", + "# # model=\"arn:aws:sagemaker:us-east-1:<>:model-package/bgrv-nova-micro-sft-lora/1\",\n", + "# model=\"arn:aws:sagemaker:us-east-1:<>:model-package/test-nova-finetuned-models/3\",\n", "# s3_output_path=\"s3://mufi-test-serverless-iad/eval/\",\n", - "# mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:052150106756:mlflow-tracking-server/mlflow-prod-server\",\n", - "# dataset=\"s3://sagemaker-us-east-1-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", - "# model_package_group=\"arn:aws:sagemaker:us-east-1:052150106756:model-package-group/test-nova-finetuned-models\", # Optional inferred from model if model package\n", + "# mlflow_resource_arn=\"arn:aws:sagemaker:us-east-1:<>:mlflow-tracking-server/mlflow-prod-server\",\n", + "# dataset=\"s3://sagemaker-us-east-1-<>/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\",\n", + "# model_package_group=\"arn:aws:sagemaker:us-east-1:<>:model-package-group/test-nova-finetuned-models\", # Optional inferred from model if model package\n", "# base_eval_name=\"gen-qa-eval-demo\",\n", "# region=\"us-east-1\",\n", "# # Note: sagemaker_session is optional and will be auto-created if not provided\n", @@ -400,7 +153,9 @@ "# )\n", "\n", "# pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -411,156 +166,7 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:26:31] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:26:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=665742;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=28065;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n", - " property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=668827;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=344195;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=912465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=530916;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No region provided. Using default region. utils.py:340\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=483608;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=394176;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Runs on sagemaker us-west-2, region:us-west-2 utils.py:354\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=127187;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=740445;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=26417;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=309515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762738;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=1149;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'max_new_tokens': '8192',\n", - "│ 'temperature': '0',\n", - "│ 'top_k': '-1',\n", - "│ 'top_p': '1.0',\n", - "│ 'aggregation': '',\n", - "│ 'postprocessing': 'False',\n", - "│ 'max_model_len': '12000'\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -569,7 +175,9 @@ "\n", "# optionally get more info on types, limits, defaults.\n", "# evaluator.hyperparameters.get_info()\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -591,1114 +199,7 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:40:20] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=39435;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=899931;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=774478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=222956;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=672788;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=533927;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=555230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=311641;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using resolved model_package_group ARN: base_evaluator.py:414\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \n", - " mple-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using resolved model_package_group ARN: \u001b]8;id=350625;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=393598;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#414\u001b\\\u001b[2m414\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/exa \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mple-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: benchmark_evaluator.py:644\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \n", - " p/example-name-aovqo \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=534430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=895229;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#644\u001b\\\u001b[2m644\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-grou \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m p/example-name-aovqo \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: benchmark_evaluator.py:647\n", - " meta-textgeneration-llama-3-2-1b-instruct, base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \n", - " Hub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0, \n", - " source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test \n", - " -finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=1084;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=849460;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#647\u001b\\\u001b[2m647\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublic \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m Hub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=537782;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=387290;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for hyperparameters benchmark_evaluator.py:495\n", - " property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for hyperparameters \u001b]8;id=706064;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=284205;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#495\u001b\\\u001b[2m495\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=502448;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=531984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=67072;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=119115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=954396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=959350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:21] INFO Using configured hyperparameters: {'max_new_tokens': benchmark_evaluator.py:568\n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', \n", - " 'aggregation': '', 'postprocessing': 'False', \n", - " 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=584498;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py\u001b\\\u001b[2mbenchmark_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=126531;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py#568\u001b\\\u001b[2m568\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=556396;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=773270;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task': \n", - " 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', \n", - " 'subtask': '', 'pipeline_name': \n", - " 'SagemakerEvaluation-Deterministic', 'evaluate_base_model': \n", - " True, 'max_new_tokens': '8192', 'temperature': '0', 'top_k': \n", - " '-1', 'top_p': '1.0', 'aggregation': '', 'postprocessing': \n", - " 'False', 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=970601;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=386360;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'subtask'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[3;38;2;0;135;0mTrue\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerEvaluation-Deterministic\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateBaseModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"\", \n", - " \"postprocessing\": \"False\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex \n", - " ample-name-aovqo\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"\", \n", - " \"postprocessing\": \"False\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"base-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"base-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=330131;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=262009;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateBaseModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/ex\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mample-name-aovqo\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"False\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateBaseModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"base-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing pipeline: execution.py:199\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=588942;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=925025;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline execution.py:202\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 with latest definition \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=746487;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=234699;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline resource. resources.py:30306\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=908194;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=233215;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:40:22] INFO Successfully updated pipeline: execution.py:208\n", - " SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b2 \n", - " 9171c42 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:40:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=321336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=381496;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b2\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m9171c42\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: gen-qa-eval-demo-1764452422 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: gen-qa-eval-demo-\u001b[1;36m1764452422\u001b[0m \u001b]8;id=359442;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=958972;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/9 \n", - " 5qr3e96dblb \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=73999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -BenchmarkEvaluation-\u001b[93mc344c91d-6f62-4907-85cc-7e6b29171c42\u001b[0m/execution/9 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 5qr3e96dblb \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb',\n", - "│ name='gen-qa-eval-demo',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 13, 40, 22, 284000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m13\u001b[0m, \u001b[1;36m40\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m284000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/95qr3e96dblb\n", - "Initial Status: Executing\n" - ] - } - ], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -1706,7 +207,9 @@ "\n", "print(f\"\\nPipeline Execution ARN: {execution.arn}\")\n", "print(f\"Initial Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1719,14 +222,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# Override subtasks at evaluation time\n", "# execution = mmlu_evaluator.evaluate(subtask=\"abstract_algebra\") # Single subtask\n", "# execution = mmlu_evaluator.evaluate(subtask=[\"abstract_algebra\", \"anatomy\"]) # Multiple subtasks" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1739,92 +242,7 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:26:38.084000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateBaseModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ end_time='2025-11-29T13:26:42.759000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 0.5s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 3.3s │\n", - "│ EvaluateCustomModel Succeeded 3714.0s │\n", - "│ EvaluateBaseModel Succeeded 5366.2s │\n", - "│ CreateEvaluationAction Succeeded 2.7s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.5s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3.3s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m3714.0s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateBaseModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m5366.2s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.7s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:20] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:20]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=401306;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=749;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], "source": [ "# Wait for job completion with progress updates\n", "# This will show a rich progress display in Jupyter\n", "execution.wait(target_status=\"Succeeded\", poll=5, timeout=3600)\n", "\n", "print(f\"\\nFinal Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1956,348 +319,14 @@ }, { "cell_type": "code", - "execution_count": 10, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
's3://mufi-test-serverless-smtj/eval/'\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "[11/29/25 16:21:25] INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:130\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:25]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=671086;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=908024;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \n", - " step: EvaluateCustomModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=813615;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=57499;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:26] INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \n", - " step: EvaluateBaseModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:26]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=745707;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953308;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateBaseModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E \n", - " valuateCustomModel-F51y8F3Pg7/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=805603;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=739949;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-F51y8F3Pg7/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \n", - " utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \n", - " -or8pa/eval_results/results_2025-11-29T22-41-53.186048+00-00 \n", - " .json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=188825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=667854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T22-\u001b[1;36m41\u001b[0m-\u001b[1;36m53.186048\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E \n", - " valuateBaseModel-VA9YzcdIVI/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=270113;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844454;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateBaseModel-VA9YzcdIVI/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \n", - " put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \n", - " r8pa/eval_results/results_2025-11-29T23-09-21.277725+00-00.j \n", - " son \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=221667;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=736866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/out \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m put/output/eval-meta_textgeneration_llama_3_2_1b_instruct--o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m r8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m09\u001b[0m-\u001b[1;36m21.277725\u001b[0m+\u001b[1;36m00-00.j\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m son \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using metrics from 'all' key (standard benchmark format) show_results_utils.py:93\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=431825;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75452;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using metrics from 'all' key (standard benchmark format) show_results_utils.py:93\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from \u001b[38;2;0;135;0m'all'\u001b[0m key \u001b[1m(\u001b[0mstandard benchmark format\u001b[1m)\u001b[0m \u001b]8;id=866976;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=697222;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#93\u001b\\\u001b[2m93\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Custom Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7801 │\n", - "│ em │ 1.23% │\n", - "│ em_stderr │ 0.0018 │\n", - "│ f1 │ 19.04% │\n", - "│ f1_score_quasi │ 25.25% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.16% │\n", - "│ qem_stderr │ 0.0024 │\n", - "│ rouge1 │ 25.69% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.09% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 25.02% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7801\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.23%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0018\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.25%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.16%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.69%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.02%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Base Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7803 │\n", - "│ em │ 1.29% │\n", - "│ em_stderr │ 0.0019 │\n", - "│ f1 │ 19.09% │\n", - "│ f1_score_quasi │ 25.22% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.18% │\n", - "│ qem_stderr │ 0.0024 │\n", - "│ rouge1 │ 25.61% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.04% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 24.95% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;33mBase Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;33m \u001b[0m\u001b[1;33mMetric \u001b[0m\u001b[1;33m \u001b[0m│\u001b[1;33m \u001b[0m\u001b[1;33m Value\u001b[0m\u001b[1;33m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7803\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.09%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.22%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.18%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0024\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.61%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 24.95%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ │\n", - "│ Custom Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non │\n", - "│ e/eval_results/ │\n", - "│ │\n", - "│ Base Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/ │\n", - "│ eval_results/ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;33mBase Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-95qr3e96dblb-EvaluateBaseModel-VA9YzcdIVI/output/output/None/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36meval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "pprint(execution.s3_output_path)\n", "# Display results in a formatted table\n", "execution.show_results()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -2310,199 +339,7 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:35:47] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:35:47]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=148252;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes',\n", - "│ name='inlsexrd7jes',\n", - "│ status=PipelineExecutionStatus(\n", - "│ │ overall_status='Executing',\n", - "│ │ step_details=[\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='EvaluateCustomModel',\n", - "│ │ │ │ status='Executing',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.084000-08:00',\n", - "│ │ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ ),\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='EvaluateBaseModel',\n", - "│ │ │ │ status='Executing',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120de0b60>',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ ),\n", - "│ │ │ StepDetail(\n", - "│ │ │ │ name='CreateEvaluationAction',\n", - "│ │ │ │ status='Succeeded',\n", - "│ │ │ │ start_time='2025-11-29T13:26:38.083000-08:00',\n", - "│ │ │ │ end_time='2025-11-29T13:26:42.759000-08:00',\n", - "│ │ │ │ display_name=None,\n", - "│ │ │ │ failure_reason=None\n", - "│ │ │ )\n", - "│ │ ],\n", - "│ │ failure_reason=None\n", - "│ ),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 13, 26, 37, 300000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-BenchmarkEvaluation-c344c91d-6f62-4907-85cc-7e6b29171c42/execution/inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'inlsexrd7jes'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:26:38.084000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120de0b60>'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m\u001b[39m=\u001b[0m\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1;39m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'EvaluateBaseModel'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'Executing'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'2025-11-29T13:26:38.083000-08:00'\u001b[0m\u001b[39m,\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m\u001b[39m=\u001b[0m\u001b[38;2;0;135;0m'
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:22 │\n", - "│ │\n", - "│ 19 pprint(existing_execution) │\n", - "│ 20 print(f\"\\nStatus: {existing_execution.status.overall_status}\") │\n", - "│ 21 │\n", - "│ ❱ 22 existing_execution.show_results() │\n", - "│ 23 │\n", - "│ │\n", - "│ /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/tele │\n", - "│ metry_logging.py:175 in wrapper │\n", - "│ │\n", - "│ 172 │ │ │ │ │ \"sagemaker_session is not provided or not valid.\", │\n", - "│ 173 │ │ │ │ │ func_name, │\n", - "│ 174 │ │ │ │ ) │\n", - "│ ❱ 175 │ │ │ │ return func(*args, **kwargs) │\n", - "│ 176 │ │ │\n", - "│ 177 │ │ return wrapper │\n", - "│ 178 │\n", - "│ │\n", - "│ /Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/exe │\n", - "│ cution.py:1223 in show_results │\n", - "│ │\n", - "│ 1220 │ │ self.refresh() │\n", - "│ 1221 │ │ │\n", - "│ 1222 │ │ if self.status.overall_status != \"Succeeded\": │\n", - "│ ❱ 1223 │ │ │ raise ValueError( │\n", - "│ 1224 │ │ │ │ f\"Cannot show results. Execution status is '{self.status.overall_status} │\n", - "│ 1225 │ │ │ │ f\"Results are only available after successful execution. \" │\n", - "│ 1226 │ │ │ │ f\"Use execution.wait() to wait for completion or check execution.status │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "ValueError: Cannot show results. Execution status is 'Executing'. Results are only available after successful \n", - "execution. Use execution.wait() to wait for completion or check execution.status for details.\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[11/22/25 12:24:36] INFO Updating pipeline resource. resources.py:30485\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 12:24:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=707103;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=260368;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/resources.py#30485\u001b\\\u001b[2m30485\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker_core.main.resources - Updating pipeline resource.\n", - "INFO - sagemaker.modules.evaluate.execution - Successfully updated pipeline: SagemakerEvaluation-benchmark\n", - "INFO - sagemaker.modules.evaluate.execution - Starting pipeline execution: gen-qa-eval-demo-1763843077\n", - "INFO - sagemaker.modules.evaluate.execution - Pipeline execution started: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n" - ] - }, - { - "data": { - "text/html": [ - "
BenchmarkEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8',\n", - "│ name='gen-qa-eval-demo',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 22, 12, 24, 37, 828000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.BENCHMARK: 'benchmark'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mBenchmarkEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'gen-qa-eval-demo'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m12\u001b[0m, \u001b[1;36m24\u001b[0m, \u001b[1;36m37\u001b[0m, \u001b[1;36m828000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.BENCHMARK:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'benchmark'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-benchmark/execution/gv93gtwgr7w8\n", - "Initial Status: Executing\n" - ] - } - ], "source": [ "# Run evaluation with configured parameters\n", "execution = evaluator.evaluate()\n", @@ -2614,7 +374,9 @@ "\n", "print(f\"\\nPipeline Execution ARN: {execution.arn}\")\n", "print(f\"Initial Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -2627,54 +389,7 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:41:19] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:41:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=166943;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=816278;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-95qr3e96dblb-EvaluateCustomModel-F51y8F3Pg7: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \n", - " s3://mufi-test-serverless-smtj/eval/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=521868;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=351282;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-inlsexrd7jes-EvaluateCustomModel-NuPrIoRW4Q: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 evaluation(s)\n", - "\n", - " 95qr3e96dblb: Executing\n", - " inlsexrd7jes: Executing\n" - ] - } - ], "source": [ "# Get all benchmark evaluations (returns iterator)\n", "all_executions_iter = BenchMarkEvaluator.get_all(region=\"us-west-2\")\n", @@ -2683,7 +398,9 @@ "print(f\"Found {len(all_executions)} evaluation(s)\\n\")\n", "for exec in all_executions[:5]: # Show first 5\n", " print(f\" {exec.name}: {exec.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -2696,73 +413,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:2350: UserWarning: Field name \"schema\" in \"AutoMLSnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class AutoMLSnowflakeDatasetDefinition(Base):\n", - "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/shapes.py:6372: UserWarning: Field name \"schema\" in \"SnowflakeDatasetDefinition\" shadows an attribute in parent \"Base\"\n", - " class SnowflakeDatasetDefinition(Base):\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/22/25 18:32:01] WARNING No boto3 session provided. Creating a new session. utils.py:339\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/22/25 18:32:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No boto3 session provided. Creating a new session. \u001b]8;id=549422;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=573139;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#339\u001b\\\u001b[2m339\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No config provided. Using default config. utils.py:347\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No config provided. Using default config. \u001b]8;id=278829;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=978800;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/sagemaker_core/main/utils.py#347\u001b\\\u001b[2m347\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Succeeded\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "AWS service error when stopping pipeline execution: Pipeline execution with ARN arn:aws:sagemaker:us-west-2:052150106756:pipeline/sagemakerevaluation-benchmark/execution/7rr30o7c2qfb status 'Succeeded'. Only pipelines with 'Executing' status can be stopped.\n" - ] - } - ], "source": [ "# Uncomment to stop the job\n", "# existing_execution.stop()\n", "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/v3-examples/model-customization-examples/custom_scorer_demo.ipynb b/v3-examples/model-customization-examples/custom_scorer_demo.ipynb index 6cf049cb79..c3706c9cf9 100644 --- a/v3-examples/model-customization-examples/custom_scorer_demo.ipynb +++ b/v3-examples/model-customization-examples/custom_scorer_demo.ipynb @@ -19,10 +19,19 @@ ] }, { - "cell_type": "code", - "execution_count": 1, "metadata": {}, + "cell_type": "code", + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" + ], "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "metadata": {}, "source": [ "from sagemaker.train.evaluate import CustomScorerEvaluator\n", "from rich.pretty import pprint\n", @@ -33,7 +42,9 @@ " level=logging.INFO,\n", " format='%(levelname)s - %(name)s - %(message)s'\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -46,48 +57,36 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Configuration:\n", - " Evaluator: arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\n", - " Dataset: s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\n", - " Base Model: arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\n", - " Output Location: s3://mufi-test-serverless-smtj/eval/\n" - ] - } - ], "source": [ "# Evaluator ARN (custom evaluator from AI Registry)\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/00-goga-qa-evaluation/1.0.0\"\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/nikmehta-reward-function/1.0.0\"\n", - "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/eval-lambda-test/0.0.1\"\n", - "evaluator_arn = \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\"\n", + "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/JsonDoc/00-goga-qa-evaluation/1.0.0\"\n", + "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/JsonDoc/nikmehta-reward-function/1.0.0\"\n", + "# evaluator_arn = \"arn:aws:sagemaker:us-west-2:<>:hub-content/AIRegistry/JsonDoc/eval-lambda-test/0.0.1\"\n", + "evaluator_arn = \"arn:aws:sagemaker:us-west-2:<>:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1\"\n", "\n", "# Dataset - can be S3 URI or AIRegistry DataSet ARN\n", - "dataset = \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\n", + "dataset = \"s3://sagemaker-us-west-2-<>/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\n", "\n", "# Base model - can be:\n", "# 1. Model package ARN: \"arn:aws:sagemaker:region:account:model-package/name/version\"\n", "# 2. JumpStart model ID: \"llama-3-2-1b-instruct\" [Evaluation with Base Model Only is yet to be implemented/tested - Not Working currently]\n", - "base_model = \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\"\n", + "base_model = \"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/28\"\n", "\n", "# S3 location for outputs\n", "s3_output_path = \"s3://mufi-test-serverless-smtj/eval/\"\n", "\n", "# Optional: MLflow tracking server ARN\n", - "mlflow_resource_arn = \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\"\n", + "mlflow_resource_arn = \"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\"\n", "\n", "print(\"Configuration:\")\n", "print(f\" Evaluator: {evaluator_arn}\")\n", "print(f\" Dataset: {dataset}\")\n", "print(f\" Base Model: {base_model}\")\n", "print(f\" Output Location: {s3_output_path}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -103,99 +102,7 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:33] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:33]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=639873;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=963387;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=342593;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=318918;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ CustomScorerEvaluator created successfully\n" - ] - }, - { - "data": { - "text/html": [ - "
CustomScorerEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x116ae9f40>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='eval-meta-1b49b716',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group=None,\n", - "│ evaluator='arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1',\n", - "│ dataset='s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl',\n", - "│ evaluate_base_model=False\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mCustomScorerEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x116ae9f40\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-1b49b716'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-test/0.0.1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Create evaluator with custom evaluator ARN\n", "evaluator = CustomScorerEvaluator(\n", @@ -204,13 +111,15 @@ " model=base_model,\n", " s3_output_path=s3_output_path,\n", " mlflow_resource_arn=mlflow_resource_arn,\n", - " # model_package_group=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/Demo-test-deb-2\", \n", + " # model_package_group=\"arn:aws:sagemaker:us-west-2:<>:model-package-group/Demo-test-deb-2\", \n", " evaluate_base_model=False # Set to True to also evaluate the base model\n", ")\n", "\n", "print(\"\\n✓ CustomScorerEvaluator created successfully\")\n", "pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -221,156 +130,7 @@ }, { "cell_type": "code", - "execution_count": 4, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:38] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:38]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=848286;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=998219;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching evaluation override parameters for custom_scorer_evaluator.py:236\n", - " hyperparameters property \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching evaluation override parameters for \u001b]8;id=20210;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=113368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#236\u001b\\\u001b[2m236\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m hyperparameters property \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Fetching hub content metadata for recipe_utils.py:201\n", - " meta-textgeneration-llama-3-2-1b-instruct from SageMakerPublicHub \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Fetching hub content metadata for \u001b]8;id=402391;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385188;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#201\u001b\\\u001b[2m201\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct from SageMakerPublicHub \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING No region provided. Using default region. utils.py:340\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=442028;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=947914;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#340\u001b\\\u001b[2m340\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Runs on sagemaker us-west-2, region:us-west-2 utils.py:354\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker us-west-\u001b[1;36m2\u001b[0m, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=708289;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=968385;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001b\\\u001b[2m354\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for evaluation recipe with Type='Evaluation' and recipe_utils.py:221\n", - " EvaluationType='DeterministicEvaluation' \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for evaluation recipe with \u001b[38;2;215;175;0mType\u001b[0m=\u001b[38;2;0;135;0m'Evaluation'\u001b[0m and \u001b]8;id=711157;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=750371;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#221\u001b\\\u001b[2m221\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;215;175;0mEvaluationType\u001b[0m=\u001b[38;2;0;135;0m'DeterministicEvaluation'\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Downloading override parameters from recipe_utils.py:249\n", - " s3://jumpstart-cache-beta-us-west-2/recipes/open-source-eval-meta- \n", - " textgeneration-llama-3-2-1b-instruct-deterministic_override_params \n", - " _sm_jobs_v1.0.19.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Downloading override parameters from \u001b]8;id=762518;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py\u001b\\\u001b[2mrecipe_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=755839;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/recipe_utils.py#249\u001b\\\u001b[2m249\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/jumpstart-cache-beta-us-west-2/recipes/\u001b[0m\u001b[38;2;225;0;225mopen-source-eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mtextgeneration-llama-3-2-1b-instruct-deterministic_override_params\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m_sm_jobs_v1.0.19.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'max_new_tokens': '8192',\n", - "│ 'temperature': '0',\n", - "│ 'top_k': '-1',\n", - "│ 'top_p': '1.0',\n", - "│ 'aggregation': '',\n", - "│ 'postprocessing': 'False',\n", - "│ 'max_model_len': '12000'\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "pprint(evaluator.hyperparameters.to_dict())\n", "\n", @@ -379,7 +139,9 @@ "\n", "# optionally get more info on types, limits, defaults.\n", "# evaluator.hyperparameters.get_info()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -392,9 +154,7 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [], "source": [ "# Example with built-in metrics (commented out)\n", "# from sagemaker.train.evaluate import get_builtin_metrics\n", @@ -407,7 +167,9 @@ "# base_model=base_model,\n", "# s3_output_path=s3_output_path\n", "# )" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -423,916 +185,7 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 13:42:43] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=201476;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=125527;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=336129;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=429516;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=916341;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=92767;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=110957;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=865654;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Inferred model package group ARN: base_evaluator.py:386\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma from \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=126121;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=198580;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Automatically inferred model_package_group: base_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=183930;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=417470;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: custom_scorer_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package- \n", - " group/test-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=191140;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=51752;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m group/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: custom_scorer_evaluator.py:424\n", - " meta-textgeneration-llama-3-2-1b-instruct, \n", - " base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \n", - " blicHub/Model/meta-textgeneration-llama-3-2-1b-instruct \n", - " /1.10.0, source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/ \n", - " test-finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=359160;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=935533;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#424\u001b\\\u001b[2m424\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPu \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m blicHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m /\u001b[1;36m1.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m test-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=189431;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=22751;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using configured hyperparameters: {'max_new_tokens': custom_scorer_evaluator.py:299\n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': \n", - " '1.0', 'aggregation': '', 'postprocessing': 'False', \n", - " 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using configured hyperparameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b]8;id=536279;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py\u001b\\\u001b[2mcustom_scorer_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=194605;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/custom_scorer_evaluator.py#299\u001b\\\u001b[2m299\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m''\u001b[0m, \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'False'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=164880;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=880373;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:44] INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval/', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl', 'task': \n", - " 'gen_qa', 'strategy': 'gen_qa', 'evaluation_metric': 'all', \n", - " 'pipeline_name': 'SagemakerEvaluation-Deterministic', \n", - " 'evaluate_base_model': False, 'evaluator_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW \n", - " PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t \n", - " est/0.0.1', 'max_new_tokens': '8192', 'temperature': '0', \n", - " 'top_k': '-1', 'top_p': '1.0', 'aggregation': 'mean', \n", - " 'postprocessing': 'True', 'max_model_len': '12000'} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:44]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=863350;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=151185;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'task'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'strategy'\u001b[0m: \u001b[38;2;0;135;0m'gen_qa'\u001b[0m, \u001b[38;2;0;135;0m'evaluation_metric'\u001b[0m: \u001b[38;2;0;135;0m'all'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerEvaluation-Deterministic'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m, \u001b[38;2;0;135;0m'evaluator_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[38;2;0;135;0m'aggregation'\u001b[0m: \u001b[38;2;0;135;0m'mean'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'postprocessing'\u001b[0m: \u001b[38;2;0;135;0m'True'\u001b[0m, \u001b[38;2;0;135;0m'max_model_len'\u001b[0m: \u001b[38;2;0;135;0m'12000'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerEvaluation-Deterministic\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"CustomScorerEvaluation\", \n", - " \"EvaluatorArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW \n", - " PZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t \n", - " est/0.0.1\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"task\": \"gen_qa\", \n", - " \"strategy\": \"gen_qa\", \n", - " \"evaluation_metric\": \"all\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\", \n", - " \"max_model_len\": \"12000\", \n", - " \"aggregation\": \"mean\", \n", - " \"postprocessing\": \"True\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \n", - " \"s3://mufi-test-serverless-smtj/eval/\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19 \n", - " 5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=395506;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=123517;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerEvaluation-Deterministic\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomScorerEvaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluatorArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKW\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/JsonDoc/eval-lambda-t\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mest/0.0.1\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"strategy\"\u001b[0m: \u001b[38;2;0;135;0m\"gen_qa\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"evaluation_metric\"\u001b[0m: \u001b[38;2;0;135;0m\"all\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_model_len\"\u001b[0m: \u001b[38;2;0;135;0m\"12000\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"aggregation\"\u001b[0m: \u001b[38;2;0;135;0m\"mean\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"postprocessing\"\u001b[0m: \u001b[38;2;0;135;0m\"True\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t19\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m5443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModel.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO No existing pipeline found with prefix execution.py:212\n", - " SagemakerEvaluation-CustomScorerEvaluation, creating new one \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m No existing pipeline found with prefix \u001b]8;id=437465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=501901;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#212\u001b\\\u001b[2m212\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation, creating new one \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Creating new pipeline: execution.py:57\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating new pipeline: \u001b]8;id=91501;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=923226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#57\u001b\\\u001b[2m57\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Creating pipeline resource. resources.py:30147\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating pipeline resource. \u001b]8;id=877192;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=410393;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30147\u001b\\\u001b[2m30147\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Successfully created pipeline: execution.py:76\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully created pipeline: \u001b]8;id=802515;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=256656;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#76\u001b\\\u001b[2m76\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Waiting for pipeline execution.py:79\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 to be ready... \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Waiting for pipeline \u001b]8;id=984002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=40351;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#79\u001b\\\u001b[2m79\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m to be ready\u001b[33m...\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
- "install \"ipywidgets\" for Jupyter support\n",
- " warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
- "\n"
- ],
- "text/plain": [
- "/Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/rich/live.py:231: UserWarning: \n",
- "install \"ipywidgets\" for Jupyter support\n",
- " warnings.warn('install \"ipywidgets\" for Jupyter support')\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "INFO Final Resource Status: Active resources.py:30410\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mActive\u001b[0m \u001b]8;id=750224;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=46929;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30410\u001b\\\u001b[2m30410\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution.py:82\n", - " SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e82 \n", - " 3cbe579c3 is now active and ready for execution \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline \u001b]8;id=674167;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265281;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#82\u001b\\\u001b[2m82\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e82\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m3cbe579c3\u001b[0m is now active and ready for execution \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: eval-meta-1b49b716-1764452564 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-1b49b716-\u001b[1;36m1764452564\u001b[0m \u001b]8;id=27465;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=541837;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 13:42:45] INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/executio \n", - " n/u2q2dl1w5aiq \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:42:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=368377;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=144012;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -CustomScorerEvaluation-\u001b[93m1c2e4a67-ecb4-4c89-8e82-e823cbe579c3\u001b[0m/executio \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m n/u2q2dl1w5aiq \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "✓ Evaluation execution started successfully!\n", - " Execution Name: eval-meta-1b49b716\n", - " Pipeline Execution ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-CustomScorerEvaluation-1c2e4a67-ecb4-4c89-8e82-e823cbe579c3/execution/u2q2dl1w5aiq\n", - " Status: Executing\n" - ] - } - ], "source": [ "# Start evaluation\n", "execution = evaluator.evaluate()\n", @@ -1341,7 +194,9 @@ "print(f\" Execution Name: {execution.name}\")\n", "print(f\" Pipeline Execution ARN: {execution.arn}\")\n", "print(f\" Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1354,79 +209,16 @@ }, { "cell_type": "code", - "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModel',\n", - "│ │ │ status='Executing',\n", - "│ │ │ start_time='2025-11-29T13:42:45.523000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x120ab8f80>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-29T13:42:45.523000-08:00',\n", - "│ │ │ end_time='2025-11-29T13:42:48.017000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x120ab8f80\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:45.523000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T13:42:48.017000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Check current status\n", "execution.refresh()\n", "print(f\"Current Status: {execution.status.overall_status}\")\n", "\n", "pprint(execution.status)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1439,265 +231,25 @@ }, { "cell_type": "code", - "execution_count": 8, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 0.9s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 1.9s │\n", - "│ EvaluateCustomModel Succeeded 7462.5s │\n", - "│ CreateEvaluationAction Succeeded 2.5s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m0.9s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m7462.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m2.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:36] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=693225;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=873243;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Final Status: Succeeded\n" - ] - } - ], "source": [ "# Wait for job to complete (with rich visual feedback)\n", "execution.wait(poll=30, timeout=3600)\n", "\n", "print(f\"\\nFinal Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 9, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:21:42] INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:130\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:42]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=425698;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=639097;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#130\u001b\\\u001b[2m130\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:63\n", - " pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \n", - " step: EvaluateCustomModel \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=993672;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=652226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#63\u001b\\\u001b[2m63\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModel \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for results_*.json in show_results_utils.py:150\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E \n", - " valuateCustomModel-FNSg2Knqlf/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for results_*.json in \u001b]8;id=724854;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=324888;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#150\u001b\\\u001b[2m150\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModel-FNSg2Knqlf/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:168\n", - " eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \n", - " utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \n", - " -or8pa/eval_results/results_2025-11-29T23-46-45.108093+00-00 \n", - " .json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=770358;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=338226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/o \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m utput/output/eval-meta_textgeneration_llama_3_2_1b_instruct- \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -or8pa/eval_results/results_2025-\u001b[1;36m11\u001b[0m-29T23-\u001b[1;36m46\u001b[0m-\u001b[1;36m45.108093\u001b[0m+\u001b[1;36m00-00\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.j\u001b[0mson \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:21:43] INFO Using metrics from key: 'custom|gen_qa_gen_qa|0' (gen_qa or show_results_utils.py:100\n", - " custom_scorer format) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:21:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using metrics from key: \u001b[38;2;0;135;0m'custom|gen_qa_gen_qa|0'\u001b[0m \u001b[1m(\u001b[0mgen_qa or \u001b]8;id=904034;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=137242;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#100\u001b\\\u001b[2m100\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m custom_scorer format\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Custom Model Results \n", - "╭────────────────────────────────┬─────────────────╮\n", - "│ Metric │ Value │\n", - "├────────────────────────────────┼─────────────────┤\n", - "│ bleu │ 6.6928 │\n", - "│ bleu_stderr │ 0.7769 │\n", - "│ byoc_failure_count │ 3572.0000 │\n", - "│ em │ 1.26% │\n", - "│ em_stderr │ 0.0019 │\n", - "│ f1 │ 19.13% │\n", - "│ f1_score_quasi │ 25.29% │\n", - "│ f1_score_quasi_stderr │ 0.0049 │\n", - "│ f1_stderr │ 0.0047 │\n", - "│ qem │ 2.21% │\n", - "│ qem_stderr │ 0.0025 │\n", - "│ rouge1 │ 25.73% │\n", - "│ rouge1_stderr │ 0.0047 │\n", - "│ rouge2 │ 19.15% │\n", - "│ rouge2_stderr │ 0.0047 │\n", - "│ rougeL │ 25.04% │\n", - "│ rougeL_stderr │ 0.0047 │\n", - "╰────────────────────────────────┴─────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3;32mCustom Model Results\u001b[0m\u001b[3m \u001b[0m\n", - "╭────────────────────────────────┬─────────────────╮\n", - "│\u001b[1;32m \u001b[0m\u001b[1;32mMetric \u001b[0m\u001b[1;32m \u001b[0m│\u001b[1;32m \u001b[0m\u001b[1;32m Value\u001b[0m\u001b[1;32m \u001b[0m│\n", - "├────────────────────────────────┼─────────────────┤\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 6.6928\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbleu_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.7769\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mbyoc_failure_count \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 3572.0000\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 1.26%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0019\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.13%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.29%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_score_quasi_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0049\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mf1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 2.21%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mqem_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0025\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.73%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge1_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2 \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 19.15%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrouge2_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 25.04%\u001b[0m\u001b[37m \u001b[0m│\n", - "│\u001b[36m \u001b[0m\u001b[36mrougeL_stderr \u001b[0m\u001b[36m \u001b[0m│\u001b[37m \u001b[0m\u001b[37m 0.0047\u001b[0m\u001b[37m \u001b[0m│\n", - "╰────────────────────────────────┴─────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ │\n", - "│ Custom Model: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non │\n", - "│ e/eval_results/ │\n", - "│ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;32mCustom Model:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-u2q2dl1w5aiq-EvaluateCustomModel-FNSg2Knqlf/output/output/Non\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36me/eval_results/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# show results\n", "execution.show_results()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1710,25 +262,7 @@ }, { "cell_type": "code", - "execution_count": 9, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO - sagemaker.modules.evaluate.execution - Extracted s3_output_path from training job pipelines-amlk8q2ukw8x-EvaluateCustomModel-VElzvyVY19: s3://mufi-test-serverless-smtj/eval/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Retrieved job: amlk8q2ukw8x\n", - "Status: Succeeded\n" - ] - } - ], "source": [ "from sagemaker.train.evaluate import EvaluationPipelineExecution\n", "\n", @@ -1739,7 +273,9 @@ "\n", "print(f\"Retrieved job: {existing_exec.name}\")\n", "print(f\"Status: {existing_exec.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1752,18 +288,7 @@ }, { "cell_type": "code", - "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 0 custom scorer evaluation(s):\n", - "\n" - ] - } - ], "source": [ "# Get all custom scorer evaluations\n", "all_executions = list(CustomScorerEvaluator.get_all())\n", @@ -1771,7 +296,9 @@ "print(f\"Found {len(all_executions)} custom scorer evaluation(s):\\n\")\n", "for execution in all_executions:\n", " print(f\" - {execution.name} - {execution.arn}: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1784,14 +311,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# Uncomment to stop the job\n", "# execution.stop()\n", "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb b/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb index 49671ad7d0..ae4f366446 100644 --- a/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb +++ b/v3-examples/model-customization-examples/dpo-trainer-e2e.ipynb @@ -43,57 +43,22 @@ "The dataset should be in JSONL format with each line containing one preference example." ] }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" + ], + "id": "3878997b198befc0" + }, { "cell_type": "code", - "execution_count": 5, "id": "ed5d2927f430664b", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0131065d360044028eedd45df8e1edb8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Final Resource Status: Available\n", - "\n" - ], - "text/plain": [ - "Final Resource Status: Available\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset ARN: arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-6/0.0.4\n" - ] - } - ], "source": [ "from sagemaker.ai_registry.dataset import DataSet\n", "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", @@ -110,14 +75,16 @@ "# Register dataset in SageMaker AI Registry\n", "# This creates a versioned dataset that can be referenced by ARN\n", "dataset = DataSet.create(\n", - " name=\"demo-nargokul-6\", \n", + " name=\"demo-6\",\n", " data_location=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\", \n", " customization_technique=CustomizationTechnique.DPO, \n", " wait=True\n", ")\n", "\n", "print(f\"Dataset ARN: {dataset.arn}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -144,149 +111,24 @@ }, { "cell_type": "code", - "execution_count": null, "id": "e42719df1e792227", "metadata": {}, - "outputs": [], "source": [ "import random\n", - "! ada credentials update --provider=isengard --account=052150106756 --role=Admin --profile=default --once\n", + "! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "! aws configure set region us-west-2\n", "\n", "from sagemaker.train.dpo_trainer import DPOTrainer\n", "from sagemaker.train.common import TrainingType\n", "\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 2, "id": "0352bdaa-fa13-44c5-a70c-0d9bf7a10477", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/24/25 17:08:50] INFO SageMaker session not provided. Using default Session. defaults.py:61\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/24/25 17:08:50]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker session not provided. Using default Session. \u001b]8;id=142678;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py\u001b\\\u001b[2mdefaults.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=446735;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py#61\u001b\\\u001b[2m61\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting DPO training job...\n", - "Job name: dpo-llama-721\n", - "Base model: meta-textgeneration-llama-3-2-1b-instruct\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/24/25 17:08:51] INFO SageMaker session not provided. Using default Session. defaults.py:61\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/24/25 17:08:51]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker session not provided. Using default Session. \u001b]8;id=911996;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py\u001b\\\u001b[2mdefaults.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=58495;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py#61\u001b\\\u001b[2m61\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Training Job Name: dpo-llama-721-20251124170851 dpo_trainer.py:115\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Training Job Name: dpo-llama-\u001b[1;36m721\u001b[0m-\u001b[1;36m20251124170851\u001b[0m \u001b]8;id=517485;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/dpo_trainer.py\u001b\\\u001b[2mdpo_trainer.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=652836;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/dpo_trainer.py#115\u001b\\\u001b[2m115\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training Job Name: dpo-llama-721-20251124170851\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO MLflow resource ARN: finetune_utils.py:435\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " ashwpat-test \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m MLflow resource ARN: \u001b]8;id=293371;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/common_utils/finetune_utils.py\u001b\\\u001b[2mfinetune_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=444970;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/common_utils/finetune_utils.py#435\u001b\\\u001b[2m435\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ashwpat-test \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Creating training_job resource. resources.py:35539\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating training_job resource. \u001b]8;id=617267;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=485192;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/resources.py#35539\u001b\\\u001b[2m35539\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1de56e8cfed6421f955e995ae7f19c88", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/24/25 17:17:28] INFO Final Resource Status: Completed resources.py:35872\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/24/25 17:17:28]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mCompleted\u001b[0m \u001b]8;id=678286;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=690969;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/resources.py#35872\u001b\\\u001b[2m35872\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n" - ], - "text/plain": [] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Create DPOTrainer instance with comprehensive configuration\n", "trainer = DPOTrainer(\n", @@ -297,19 +139,19 @@ " training_type=TrainingType.LORA,\n", " \n", " # Model versioning and storage\n", - " model_package_group_name=\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/test-finetuned-models-gamma\",\n", + " model_package_group_name=\"arn:aws:sagemaker:us-west-2:<>:model-package-group/test-finetuned-models-gamma\",\n", " \n", " # MLflow experiment tracking\n", - " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ashwpat-test\",\n", + " mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/ashwpat-test\",\n", " \n", " # Training data (from Step 1)\n", - " training_dataset=\"arn:aws:sagemaker:us-west-2:052150106756:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-nargokul-6/0.0.4\",\n", + " training_dataset=\"arn:aws:sagemaker:us-west-2:<>:hub-content/F3LMYANDKWPZCROJVCKMJ7TOML6QMZBZRRQOVTUL45VUK7PJ4SXA/DataSet/demo-6/0.0.4\",\n", " \n", " # Output configuration\n", " s3_output_path=\"s3://nova-mlflow-us-west-2/output\",\n", " \n", " # IAM role for training job\n", - " role=\"arn:aws:iam::052150106756:role/Admin\",\n", + " role=\"arn:aws:iam::<>:role/Admin\",\n", " \n", " # Unique job name\n", " base_job_name=f\"dpo-llama-{random.randint(1, 1000)}\",\n", @@ -327,27 +169,24 @@ "training_job = trainer.train(wait=True)\n", "\n", "print(f\"Training completed! Job ARN: {training_job.training_job_arn}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 4, "id": "22f6a210-0a0c-4b7a-af4d-2e08eae1c048", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "training_job_name='dpo-llama-721-20251124170851' training_job_arn='arn:aws:sagemaker:us-west-2:052150106756:training-job/dpo-llama-721-20251124170851' processing_job_arn=
[12/01/25 13:29:09] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[12/01/25 13:29:09]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=972233;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=418127;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=586988;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=34773;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/rsareddy/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/plain": [ - "'dataset = DataSet.create(\\n name=\"demo-nargokul-6\", \\n data_location=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\", \\n customization_technique=CustomizationTechnique.DPO, \\n wait=True\\n)\\n\\nprint(f\"Dataset ARN: {dataset.arn}\")'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "from sagemaker.ai_registry.dataset import DataSet\n", "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", @@ -110,14 +63,16 @@ "# Register dataset in SageMaker AI Registry\n", "# This creates a versioned dataset that can be referenced by ARN\n", "'''dataset = DataSet.create(\n", - " name=\"demo-nargokul-6\", \n", + " name=\"demo-6\",\n", " data_location=\"s3://nova-mlflow-us-west-2/dataset/preference_dataset_train_256.jsonl\", \n", " customization_technique=CustomizationTechnique.DPO, \n", " wait=True\n", ")\n", "\n", "print(f\"Dataset ARN: {dataset.arn}\")'''" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -144,122 +99,29 @@ }, { "cell_type": "code", - "execution_count": 1, "id": "e42719df1e792227", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[12/01/25 13:40:16] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[12/01/25 13:40:16]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=467839;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=684274;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[12/01/25 13:40:17] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[12/01/25 13:40:17]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=535804;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=730749;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/rsareddy/Library/Application Support/sagemaker/config.yaml\n" - ] - } - ], "source": [ "import random\n", - "#! ada credentials update --provider=isengard --account=052150106756 --role=Admin --profile=default --once\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "#! aws configure set region us-west-2\n", "\n", "from sagemaker.train.dpo_trainer import DPOTrainer\n", "from sagemaker.train.common import TrainingType\n", "\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 2, "id": "0352bdaa-fa13-44c5-a70c-0d9bf7a10477", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭────────────────────────────────── Training Job Status ───────────────────────────────────╮\n", - "│ TrainingJob Name dpo-llama-695-20251201134040 │\n", - "│ │\n", - "│ Job Status Completed │\n", - "│ Secondary Status Completed │\n", - "│ Elapsed Time 216.7s │\n", - "│ │\n", - "│ Status Transitions │\n", - "│ │\n", - "│ Step Details Duration │\n", - "│ ─────────────────────────────────────────────────────────────────────────── │\n", - "│ ✓ Starting Starting the training job 0.7s │\n", - "│ ✓ Pending Preparing the instances for 24.0s │\n", - "│ training │\n", - "│ ✓ Downloading Downloading the training image 10.5s │\n", - "│ ✓ Training Training image download completed. 165.9s │\n", - "│ Training in progress. │\n", - "│ ✓ Uploading Uploading generated training model 12.9s │\n", - "│ ✓ Completed Training job completed 0.0s │\n", - "│ │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[38;5;172m╭─\u001b[0m\u001b[38;5;172m─────────────────────────────────\u001b[0m\u001b[38;5;172m \u001b[0m\u001b[1;94mTraining Job Status\u001b[0m\u001b[38;5;172m \u001b[0m\u001b[38;5;172m──────────────────────────────────\u001b[0m\u001b[38;5;172m─╮\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTrainingJob Name \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;32mdpo-llama-695-20251201134040\u001b[0m\u001b[37m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mJob Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;38;5;172mCompleted\u001b[0m\u001b[37m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mSecondary Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;33mCompleted\u001b[0m\u001b[37m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;91m216.7s\u001b[0m\u001b[37m \u001b[0m\u001b[37m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;35mStatus Transitions\u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mDetails \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m ─────────────────────────────────────────────────────────────────────────── \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mStarting \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mStarting the training job \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m0.7s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mPending \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mPreparing the instances for \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m24.0s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m \u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mtraining \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mDownloading \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mDownloading the training image \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m10.5s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mTraining \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mTraining image download completed. \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m165.9s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m \u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mTraining in progress. \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mUploading \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mUploading generated training model \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m12.9s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[32m \u001b[0m\u001b[32m✓ \u001b[0m\u001b[32m \u001b[0m \u001b[36m \u001b[0m\u001b[36mCompleted \u001b[0m\u001b[36m \u001b[0m \u001b[38;5;172m \u001b[0m\u001b[38;5;172mTraining job completed \u001b[0m\u001b[38;5;172m \u001b[0m \u001b[32m \u001b[0m\u001b[32m0.0s \u001b[0m\u001b[32m \u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m│\u001b[0m \u001b[38;5;172m│\u001b[0m\n", - "\u001b[38;5;172m╰──────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training completed! Job ARN: arn:aws:sagemaker:us-west-2:729646638167:training-job/dpo-llama-695-20251201134040\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-05T19:30:51.233369Z", + "start_time": "2025-12-05T19:30:51.101703Z" } - ], + }, "source": [ "# Create DPOTrainer instance with comprehensive configuration\n", "trainer = DPOTrainer(\n", @@ -273,7 +135,7 @@ " model_package_group_name=\"sdk-test-finetuned-models\",\n", " \n", " # MLflow experiment tracking\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ashwpat-test\",\n", + " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:{Account-ID}:mlflow-tracking-server/{MLFLOW-NAME}\",\n", " \n", " # Training data (from Step 1)\n", " training_dataset=\"s3://mc-flows-sdk-testing/input_data/dpo/preference_dataset_train_256.jsonl\",\n", @@ -282,7 +144,7 @@ " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " \n", " # IAM role for training job\n", - " #role=\"arn:aws:iam::052150106756:role/Admin\",\n", + " #role=\"arn:aws:iam::{Account-ID}:role/Admin\",\n", " \n", " # Unique job name\n", " base_job_name=f\"dpo-llama-{random.randint(1, 1000)}\",\n", @@ -301,50 +163,77 @@ "training_job = trainer.train(wait=True)\n", "\n", "print(f\"Training completed! Job ARN: {training_job.training_job_arn}\")" - ] + ], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'DataSet' is not defined", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[1], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;66;03m# Create DPOTrainer instance with comprehensive configuration\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m dataset \u001B[38;5;241m=\u001B[39m \u001B[43mDataSet\u001B[49m\u001B[38;5;241m.\u001B[39mget(name\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124marn:aws:sagemaker:us-east-1:729646638167:hub-content/sdktest/DataSet/dpo-nova-1-test-data/0.0.1\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 4\u001B[0m trainer \u001B[38;5;241m=\u001B[39m DPOTrainer(\n\u001B[1;32m 5\u001B[0m \u001B[38;5;66;03m# Base model from SageMaker Hub\u001B[39;00m\n\u001B[1;32m 6\u001B[0m model\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta-textgeneration-llama-3-2-1b-instruct\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 28\u001B[0m accept_eula\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[1;32m 29\u001B[0m )\n\u001B[1;32m 31\u001B[0m \u001B[38;5;66;03m# Customize training hyperparameters\u001B[39;00m\n\u001B[1;32m 32\u001B[0m \u001B[38;5;66;03m# DPO-specific parameters are automatically loaded from the model's recipe\u001B[39;00m\n", + "\u001B[0;31mNameError\u001B[0m: name 'DataSet' is not defined" + ] + } + ], + "execution_count": 1 }, { "cell_type": "code", - "execution_count": 3, "id": "22f6a210-0a0c-4b7a-af4d-2e08eae1c048", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "training_job_name='dpo-llama-45-20251129130016' training_job_arn='arn:aws:sagemaker:us-west-2:052150106756:training-job/dpo-llama-45-20251129130016' processing_job_arn=
[11/29/25 13:43:52] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 13:43:52]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=406523;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=534480;file:///Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/mufi/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Resolved MLflow resource ARN: base_evaluator.py:113\n", - " arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \n", - " mmlu-eval-experiment \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved MLflow resource ARN: \u001b]8;id=360312;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=805617;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#113\u001b\\\u001b[2m113\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m mmlu-eval-experiment \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
LLMAsJudgeEvaluator(\n", - "│ region=None,\n", - "│ sagemaker_session=<sagemaker.core.helper.session_helper.Session object at 0x15f5c11c0>,\n", - "│ model='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28',\n", - "│ base_eval_name='eval-meta-04295d90',\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ mlflow_experiment_name=None,\n", - "│ mlflow_run_name=None,\n", - "│ networking=None,\n", - "│ kms_key_id=None,\n", - "│ model_package_group=None,\n", - "│ evaluator_model='anthropic.claude-3-5-haiku-20241022-v1:0',\n", - "│ dataset='s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl',\n", - "│ builtin_metrics=['Completeness', 'Faithfulness'],\n", - "│ custom_metrics='[{\"customMetricDefinition\": {\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\", \"ratingScale\": [{\"definition\": \"Good\", \"value\": {\"floatValue\": 1}}, {\"definition\": \"Poor\", \"value\": {\"floatValue\": 0}}]}}]',\n", - "│ evaluate_base_model=False\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAsJudgeEvaluator\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mregion\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msagemaker_session\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225msagemaker.core.helper.session_helper.Session\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x15f5c11c0\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbase_eval_name\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_resource_arn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_experiment_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmlflow_run_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mnetworking\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mkms_key_id\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mmodel_package_group\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluator_model\u001b[0m=\u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mdataset\u001b[0m=\u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-dataset/gen_qa.jsonl'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mbuiltin_metrics\u001b[0m=\u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'Completeness'\u001b[0m, \u001b[38;2;0;135;0m'Faithfulness'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mcustom_metrics\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"customMetricDefinition\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"name\": \"PositiveSentiment\", \"instructions\": \"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprompt\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\\\\nResponse: \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0mprediction\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m\", \"ratingScale\": \u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Good\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 1\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[38;2;0;135;0m, \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"definition\": \"Poor\", \"value\": \u001b[0m\u001b[1;38;2;0;135;0m{\u001b[0m\u001b[38;2;0;135;0m\"floatValue\": 0\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m}\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mevaluate_base_model\u001b[0m=\u001b[3;38;2;215;0;0mFalse\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "\n", "# Create evaluator with custom metrics\n", "evaluator = LLMAsJudgeEvaluator(\n", - " # base_model='arn:aws:sagemaker:us-west-2:052150106756:model-package/Demo-test-deb-2/1', # Required\n", - " model=\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/28\",\n", + " # base_model='arn:aws:sagemaker:us-west-2:<>:model-package/Demo-test-deb-2/1', # Required\n", + " model=\"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/28\",\n", " evaluator_model=\"anthropic.claude-3-5-haiku-20241022-v1:0\", # Required\n", " dataset=DATASET, # Required: S3 URI or Dataset ARN\n", " builtin_metrics=[\"Completeness\", \"Faithfulness\"], # Optional: Can combine with custom metrics\n", @@ -231,7 +154,9 @@ ")\n", "\n", "pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -242,9 +167,7 @@ }, { "cell_type": "code", - "execution_count": 13, "metadata": {}, - "outputs": [], "source": [ "# # Create multiple custom metrics\n", "# custom_metrics_list = [\n", @@ -290,7 +213,9 @@ "\n", "# print(f\"✅ Created evaluator with {len(json.loads(custom_metrics_json))} custom metrics\")\n", "# pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -303,9 +228,7 @@ }, { "cell_type": "code", - "execution_count": 14, "metadata": {}, - "outputs": [], "source": [ "# # Define custom metrics (same as test script)\n", "# custom_metrics = \"[{\\\"customMetricDefinition\\\":{\\\"name\\\":\\\"GoodMetric\\\",\\\"instructions\\\":\\\"You are an expert evaluator. Your task is to assess if the sentiment of the response is positive. Rate the response based on whether it conveys positive sentiment, helpfulness, and constructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a positive, encouraging tone?\\\\n- Is the response helpful and constructive?\\\\n- Does it avoid negative language or criticism?\\\\n\\\\nRate on this scale:\\\\n- Good: Response has positive sentiment\\\\n- Poor: Response lacks positive sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\\\",\\\"ratingScale\\\":[{\\\"definition\\\":\\\"Good\\\",\\\"value\\\":{\\\"floatValue\\\":1}},{\\\"definition\\\":\\\"Poor\\\",\\\"value\\\":{\\\"floatValue\\\":0}}]}},{\\\"customMetricDefinition\\\":{\\\"name\\\":\\\"BadMetric\\\",\\\"instructions\\\":\\\"You are an expert evaluator. Your task is to assess if the sentiment of the response is negative. Rate the response based on whether it conveys negative sentiment, unhelpfulness, or destructive tone.\\\\n\\\\nConsider the following:\\\\n- Does the response have a negative, discouraging tone?\\\\n- Is the response unhelpful or destructive?\\\\n- Does it use negative language or harsh criticism?\\\\n\\\\nRate on this scale:\\\\n- Bad: Response has negative sentiment\\\\n- Good: Response lacks negative sentiment\\\\n\\\\nHere is the actual task:\\\\nPrompt: {{prompt}}\\\\nResponse: {{prediction}}\\\",\\\"ratingScale\\\":[{\\\"definition\\\":\\\"Bad\\\",\\\"value\\\":{\\\"floatValue\\\":1}},{\\\"definition\\\":\\\"Good\\\",\\\"value\\\":{\\\"floatValue\\\":0}}]}}]\"\n", @@ -326,7 +249,9 @@ "\n", "# print(\"✅ Created evaluator (custom model only)\")\n", "# pprint(evaluator)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -342,1122 +267,7 @@ }, { "cell_type": "code", - "execution_count": 8, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 16:22:01] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:91\n", - " understand our user's needs, diagnose issues, and deliver \n", - " additional features. \n", - " To opt out of telemetry, please disable via TelemetryOptOut \n", - " parameter in SDK defaults config. For more information, refer \n", - " to \n", - " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", - " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=931878;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=760856;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/telemetry/telemetry_logging.py#91\u001b\\\u001b[2m91\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Getting or creating artifact for source: base_evaluator.py:597\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Getting or creating artifact for source: \u001b]8;id=179503;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=71430;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#597\u001b\\\u001b[2m597\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for existing artifact for model package: base_evaluator.py:459\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for existing artifact for model package: \u001b]8;id=2444;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=787547;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#459\u001b\\\u001b[2m459\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found existing artifact: base_evaluator.py:468\n", - " arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \n", - " 138877d772ec489bef \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing artifact: \u001b]8;id=808361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=665812;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#468\u001b\\\u001b[2m468\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b3 \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m 138877d772ec489bef \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Inferred model package group ARN: base_evaluator.py:386\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma from \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \n", - " tuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Inferred model package group ARN: \u001b]8;id=361400;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=518747;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#386\u001b\\\u001b[2m386\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fine \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m tuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Automatically inferred model_package_group: base_evaluator.py:421\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \n", - " t-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Automatically inferred model_package_group: \u001b]8;id=299761;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=867866;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#421\u001b\\\u001b[2m421\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-group/tes \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m t-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using ModelPackage - model_package_group_arn: llm_as_judge_evaluator.py:319\n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package-g \n", - " roup/test-finetuned-models-gamma \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using ModelPackage - model_package_group_arn: \u001b]8;id=538256;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=292230;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#319\u001b\\\u001b[2m319\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package-g \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m roup/test-finetuned-models-gamma \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved model info - base_model_name: llm_as_judge_evaluator.py:322\n", - " meta-textgeneration-llama-3-2-1b-instruct, \n", - " base_model_arn: \n", - " arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \n", - " licHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1 \n", - " .10.0, source_model_package_arn: \n", - " arn:aws:sagemaker:us-west-2:052150106756:model-package/t \n", - " est-finetuned-models-gamma/28 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved model info - base_model_name: \u001b]8;id=854970;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=553794;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#322\u001b\\\u001b[2m322\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m base_model_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPub \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m licHub/Model/meta-textgeneration-llama-\u001b[1;36m3\u001b[0m-\u001b[1;36m2\u001b[0m-1b-instruct/\u001b[1;36m1\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m.10\u001b[0m.\u001b[1;36m0\u001b[0m, source_model_package_arn: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:model-package/t \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m est-finetuned-models-gamma/\u001b[1;36m28\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Uploading custom metrics to S3: llm_as_judge_evaluator.py:220\n", - " s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva \n", - " l-meta-04295d9020251130-002201/custom-metrics.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Uploading custom metrics to S3: \u001b]8;id=657021;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=5404;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#220\u001b\\\u001b[2m220\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Successfully uploaded custom metrics to: llm_as_judge_evaluator.py:228\n", - " s3://mufi-test-serverless-smtj/eval/evaluationinputs/eva \n", - " l-meta-04295d9020251130-002201/custom-metrics.json \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully uploaded custom metrics to: \u001b]8;id=718083;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py\u001b\\\u001b[2mllm_as_judge_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=581773;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/llm_as_judge_evaluator.py#228\u001b\\\u001b[2m228\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/evaluationinputs/eva\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225ml-meta-04295d9020251130-002201/\u001b[0m\u001b[38;2;225;0;225mcustom-metrics.json\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Using full template for ModelPackage base_evaluator.py:655\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Using full template for ModelPackage \u001b]8;id=143249;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=489338;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#655\u001b\\\u001b[2m655\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Resolved template parameters: {'role_arn': base_evaluator.py:693\n", - " 'arn:aws:iam::052150106756:role/Admin', 'mlflow_resource_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment', 'mlflow_experiment_name': None, \n", - " 'mlflow_run_name': None, 'model_package_group_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma', 'source_model_package_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28', 'base_model_arn': \n", - " 'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0', \n", - " 's3_output_path': 's3://mufi-test-serverless-smtj/eval', \n", - " 'dataset_artifact_arn': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef', 'action_arn_prefix': \n", - " 'arn:aws:sagemaker:us-west-2:052150106756:action', \n", - " 'dataset_uri': \n", - " 's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas \n", - " et/gen_qa.jsonl', 'judge_model_id': \n", - " 'anthropic.claude-3-5-haiku-20241022-v1:0', 'llmaj_metrics': \n", - " '[\"Completeness\", \"Faithfulness\"]', 'custom_metrics_s3_path': \n", - " 's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta- \n", - " 04295d9020251130-002201/custom-metrics.json', 'max_new_tokens': \n", - " '8192', 'temperature': '0', 'top_k': '-1', 'top_p': '1.0', \n", - " 'pipeline_name': 'SagemakerModelEvaluationType2-llmaj', \n", - " 'evaluate_base_model': False} \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Resolved template parameters: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'role_arn'\u001b[0m: \u001b]8;id=109479;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=566018;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#693\u001b\\\u001b[2m693\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:iam::052150106756:role/Admin'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_resource_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment'\u001b[0m, \u001b[38;2;0;135;0m'mlflow_experiment_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'mlflow_run_name'\u001b[0m: \u001b[3;38;2;225;0;225mNone\u001b[0m, \u001b[38;2;0;135;0m'model_package_group_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma'\u001b[0m, \u001b[38;2;0;135;0m'source_model_package_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28'\u001b[0m, \u001b[38;2;0;135;0m'base_model_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3_output_path'\u001b[0m: \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_artifact_arn'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef'\u001b[0m, \u001b[38;2;0;135;0m'action_arn_prefix'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:action'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'dataset_uri'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl'\u001b[0m, \u001b[38;2;0;135;0m'judge_model_id'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'anthropic.claude-3-5-haiku-20241022-v1:0'\u001b[0m, \u001b[38;2;0;135;0m'llmaj_metrics'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\"Completeness\", \"Faithfulness\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m, \u001b[38;2;0;135;0m'custom_metrics_s3_path'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json'\u001b[0m, \u001b[38;2;0;135;0m'max_new_tokens'\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'8192'\u001b[0m, \u001b[38;2;0;135;0m'temperature'\u001b[0m: \u001b[38;2;0;135;0m'0'\u001b[0m, \u001b[38;2;0;135;0m'top_k'\u001b[0m: \u001b[38;2;0;135;0m'-1'\u001b[0m, \u001b[38;2;0;135;0m'top_p'\u001b[0m: \u001b[38;2;0;135;0m'1.0'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pipeline_name'\u001b[0m: \u001b[38;2;0;135;0m'SagemakerModelEvaluationType2-llmaj'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'evaluate_base_model'\u001b[0m: \u001b[3;38;2;215;0;0mFalse\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Rendered pipeline definition: base_evaluator.py:702\n", - " { \n", - " \"Version\": \"2020-12-01\", \n", - " \"Metadata\": {}, \n", - " \"MlflowConfig\": { \n", - " \"MlflowResourceArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server \n", - " /mmlu-eval-experiment\" \n", - " }, \n", - " \"Parameters\": [], \n", - " \"Steps\": [ \n", - " { \n", - " \"Name\": \"CreateEvaluationAction\", \n", - " \"Type\": \"Lineage\", \n", - " \"Arguments\": { \n", - " \"Actions\": [ \n", - " { \n", - " \"ActionName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ActionType\": \"Evaluation\", \n", - " \"Source\": { \n", - " \"SourceUri\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\", \n", - " \"SourceType\": \"ModelPackage\" \n", - " }, \n", - " \"Properties\": { \n", - " \"PipelineExecutionArn\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " }, \n", - " \"PipelineName\": \n", - " \"SagemakerModelEvaluationType2-llmaj\" \n", - " } \n", - " } \n", - " ], \n", - " \"Contexts\": [ \n", - " { \n", - " \"ContextName\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"ContextType\": \"PipelineExecution\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \"Execution.PipelineExecutionArn\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Action\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Name\": { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"Type\": \"Context\" \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Arn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b \n", - " 3138877d772ec489bef\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomInferenceModel\", \n", - " \"Type\": \"Training\", \n", - " \"Arguments\": { \n", - " \"TrainingJobName\": \"CustomInference\", \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"BenchmarkEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"name\": \"CustomInference\", \n", - " \"task\": \"inference_only\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " }, \n", - " \"InputDataConfig\": [ \n", - " { \n", - " \"ChannelName\": \"train\", \n", - " \"DataSource\": { \n", - " \"S3DataSource\": { \n", - " \"S3DataType\": \"S3Prefix\", \n", - " \"S3Uri\": \n", - " \"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas \n", - " et/gen_qa.jsonl\" \n", - " } \n", - " } \n", - " } \n", - " ] \n", - " }, \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ] \n", - " }, \n", - " { \n", - " \"Name\": \"EvaluateCustomModelMetrics\", \n", - " \"Type\": \"Training\", \n", - " \"DependsOn\": [ \n", - " \"EvaluateCustomInferenceModel\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"TrainingJobName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " \"custom-llmaj-eval\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " \"RoleArn\": \"arn:aws:iam::052150106756:role/Admin\", \n", - " \"ServerlessJobConfig\": { \n", - " \"BaseModelArn\": \n", - " \"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/ \n", - " Model/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\", \n", - " \"AcceptEula\": true, \n", - " \"JobType\": \"Evaluation\", \n", - " \"EvaluationType\": \"LLMAJEvaluation\" \n", - " }, \n", - " \"StoppingCondition\": { \n", - " \"MaxRuntimeInSeconds\": 86400 \n", - " }, \n", - " \"HyperParameters\": { \n", - " \"name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " \"custom-llmaj-eval\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " }, \n", - " \"judge_model_id\": \n", - " \"anthropic.claude-3-5-haiku-20241022-v1:0\", \n", - " \"inference_data_s3_path\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat \n", - " h\" \n", - " }, \n", - " \"/\", \n", - " { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.TrainingJobName\" \n", - " }, \n", - " \"/output/output/\", \n", - " \"CustomInference\", \n", - " \"/eval_results/inference_output.jsonl\" \n", - " ] \n", - " } \n", - " }, \n", - " \"output_path\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"llmaj_metrics\": \"[\\\"Completeness\\\", \n", - " \\\"Faithfulness\\\"]\", \n", - " \"custom_metrics_s3_path\": \n", - " \"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta- \n", - " 04295d9020251130-002201/custom-metrics.json\", \n", - " \"max_new_tokens\": \"8192\", \n", - " \"temperature\": \"0\", \n", - " \"top_k\": \"-1\", \n", - " \"top_p\": \"1.0\" \n", - " }, \n", - " \"OutputDataConfig\": { \n", - " \"S3OutputPath\": \"s3://mufi-test-serverless-smtj/eval\", \n", - " \"CompressionType\": \"NONE\" \n", - " }, \n", - " \"ModelPackageConfig\": { \n", - " \"ModelPackageGroupArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te \n", - " st-finetuned-models-gamma\", \n", - " \"SourceModelPackageArn\": \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin \n", - " etuned-models-gamma/28\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"Name\": \"AssociateLineage\", \n", - " \"Type\": \"Lineage\", \n", - " \"DependsOn\": [ \n", - " \"CreateEvaluationAction\" \n", - " ], \n", - " \"Arguments\": { \n", - " \"Artifacts\": [ \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-inference-results\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"InferenceResults\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat \n", - " h\" \n", - " } \n", - " } \n", - " }, \n", - " { \n", - " \"ArtifactName\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"ArtifactType\": \"EvaluationReport\", \n", - " \"Source\": { \n", - " \"SourceUri\": { \n", - " \"Get\": \n", - " \"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\" \n", - " } \n", - " } \n", - " } \n", - " ], \n", - " \"Associations\": [ \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-inference-results\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " }, \n", - " { \n", - " \"Source\": { \n", - " \"Name\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"-\", \n", - " \"Values\": [ \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " }, \n", - " \"custom-eval-report\" \n", - " ] \n", - " } \n", - " }, \n", - " \"Type\": \"Artifact\" \n", - " }, \n", - " \"Destination\": { \n", - " \"Arn\": { \n", - " \"Std:Join\": { \n", - " \"On\": \"/\", \n", - " \"Values\": [ \n", - " \"arn:aws:sagemaker:us-west-2:052150106756:ac \n", - " tion\", \n", - " { \n", - " \"Get\": \"Execution.PipelineExecutionId\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " }, \n", - " \"AssociationType\": \"ContributedTo\" \n", - " } \n", - " ] \n", - " } \n", - " } \n", - " ] \n", - " } \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Rendered pipeline definition: \u001b]8;id=358999;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py\u001b\\\u001b[2mbase_evaluator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=565177;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/base_evaluator.py#702\u001b\\\u001b[2m702\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Version\"\u001b[0m: \u001b[38;2;0;135;0m\"2020-12-01\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Metadata\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MlflowResourceArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m/mmlu-eval-experiment\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Parameters\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Actions\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ActionType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceType\"\u001b[0m: \u001b[38;2;0;135;0m\"ModelPackage\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Properties\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineExecutionArn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"PipelineName\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SagemakerModelEvaluationType2-llmaj\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Contexts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ContextType\"\u001b[0m: \u001b[38;2;0;135;0m\"PipelineExecution\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionArn\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Action\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Context\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:artifact/2b64ef9fe915b\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m3138877d772ec489bef\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"BenchmarkEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"task\"\u001b[0m: \u001b[38;2;0;135;0m\"inference_only\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"InputDataConfig\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ChannelName\"\u001b[0m: \u001b[38;2;0;135;0m\"train\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataSource\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3DataType\"\u001b[0m: \u001b[38;2;0;135;0m\"S3Prefix\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3Uri\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://my-sagemaker-sherpa-dataset/dataset/gen-qa-formatted-datas\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0met/gen_qa.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluateCustomModelMetrics\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Training\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluateCustomInferenceModel\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"TrainingJobName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"RoleArn\"\u001b[0m: \u001b[38;2;0;135;0m\"arn:aws:iam::052150106756:role/Admin\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ServerlessJobConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"BaseModelArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mModel/meta-textgeneration-llama-3-2-1b-instruct/1.10.0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AcceptEula\"\u001b[0m: true, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"JobType\"\u001b[0m: \u001b[38;2;0;135;0m\"Evaluation\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"EvaluationType\"\u001b[0m: \u001b[38;2;0;135;0m\"LLMAJEvaluation\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"StoppingCondition\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"MaxRuntimeInSeconds\"\u001b[0m: \u001b[1;36m86400\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"HyperParameters\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-llmaj-eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"judge_model_id\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"anthropic.claude-3-5-haiku-20241022-v1:0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"inference_data_s3_path\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.TrainingJobName\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/output/output/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CustomInference\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"/eval_results/inference_output.jsonl\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"output_path\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"llmaj_metrics\"\u001b[0m: \u001b[38;2;0;135;0m\"\u001b[0m\u001b[1;38;2;0;135;0m[\u001b[0m\u001b[38;2;0;135;0m\\\"Completeness\\\", \u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\\\"Faithfulness\\\"\u001b[0m\u001b[1;38;2;0;135;0m]\u001b[0m\u001b[38;2;0;135;0m\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom_metrics_s3_path\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval/evaluationinputs/eval-meta-\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m04295d9020251130-002201/custom-metrics.json\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"max_new_tokens\"\u001b[0m: \u001b[38;2;0;135;0m\"8192\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"temperature\"\u001b[0m: \u001b[38;2;0;135;0m\"0\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_k\"\u001b[0m: \u001b[38;2;0;135;0m\"-1\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"top_p\"\u001b[0m: \u001b[38;2;0;135;0m\"1.0\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"OutputDataConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"S3OutputPath\"\u001b[0m: \u001b[38;2;0;135;0m\"s3://mufi-test-serverless-smtj/eval\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CompressionType\"\u001b[0m: \u001b[38;2;0;135;0m\"NONE\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageConfig\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ModelPackageGroupArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package-group/te\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mst-finetuned-models-gamma\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceModelPackageArn\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-fin\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0metuned-models-gamma/28\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[38;2;0;135;0m\"AssociateLineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Lineage\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"DependsOn\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"CreateEvaluationAction\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arguments\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Artifacts\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"InferenceResults\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomInferenceModel.OutputDataConfig.S3OutputPat\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mh\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactName\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"ArtifactType\"\u001b[0m: \u001b[38;2;0;135;0m\"EvaluationReport\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"SourceUri\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Steps.EvaluateCustomModelMetrics.OutputDataConfig.S3OutputPath\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Associations\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-inference-results\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Source\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Name\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"-\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"custom-eval-report\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Type\"\u001b[0m: \u001b[38;2;0;135;0m\"Artifact\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Destination\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Arn\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Std:Join\"\u001b[0m: \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"On\"\u001b[0m: \u001b[38;2;0;135;0m\"/\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Values\"\u001b[0m: \u001b[1m[\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"arn:aws:sagemaker:us-west-2:052150106756:ac\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mtion\"\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m{\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"Get\"\u001b[0m: \u001b[38;2;0;135;0m\"Execution.PipelineExecutionId\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m\"AssociationType\"\u001b[0m: \u001b[38;2;0;135;0m\"ContributedTo\"\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m]\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:02] INFO Found existing pipeline: execution.py:199\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found existing pipeline: \u001b]8;id=729179;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=511166;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline execution.py:202\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 with latest definition \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline \u001b]8;id=567297;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249002;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#202\u001b\\\u001b[2m202\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m with latest definition \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Updating pipeline resource. resources.py:30306\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Updating pipeline resource. \u001b]8;id=897054;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=497721;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/resources.py#30306\u001b\\\u001b[2m30306\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:22:03] INFO Successfully updated pipeline: execution.py:208\n", - " SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c \n", - " 6e9 \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:22:03]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Successfully updated pipeline: \u001b]8;id=916795;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=385336;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#208\u001b\\\u001b[2m208\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m SagemakerEvaluation-LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m6e9\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Starting pipeline execution: eval-meta-04295d90-1764462123 execution.py:263\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Starting pipeline execution: eval-meta-04295d90-\u001b[1;36m1764462123\u001b[0m \u001b]8;id=41189;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=464412;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#263\u001b\\\u001b[2m263\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Pipeline execution started: execution.py:274\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318n \n", - " ngjk32f \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Pipeline execution started: \u001b]8;id=227887;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=844359;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#274\u001b\\\u001b[2m274\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -LLMAJEvaluation-\u001b[93mf952b79f-4afe-4f2f-b45d-17894533c6e9\u001b[0m/execution/m318n \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ngjk32f \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Evaluation job started!\n", - "Job ARN: arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f\n", - "Job Name: eval-meta-04295d90\n", - "Status: Executing\n" - ] - }, - { - "data": { - "text/html": [ - "
LLMAJEvaluationExecution(\n", - "│ arn='arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f',\n", - "│ name='eval-meta-04295d90',\n", - "│ status=PipelineExecutionStatus(overall_status='Executing', step_details=[], failure_reason=None),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 29, 16, 22, 3, 689000, tzinfo=tzlocal()),\n", - "│ eval_type=<EvalType.LLM_AS_JUDGE: 'llmasjudge'>,\n", - "│ s3_output_path='s3://mufi-test-serverless-smtj/eval/',\n", - "│ steps=[]\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mLLMAJEvaluationExecution\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0marn\u001b[0m=\u001b[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation-LLMAJEvaluation-f952b79f-4afe-4f2f-b45d-17894533c6e9/execution/m318nngjk32f'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'eval-meta-04295d90'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m, \u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mlast_modified_time\u001b[0m=\u001b[1;38;2;225;0;225mdatetime\u001b[0m\u001b[1;38;2;225;0;225m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m11\u001b[0m, \u001b[1;36m29\u001b[0m, \u001b[1;36m16\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m689000\u001b[0m, \u001b[38;2;215;175;0mtzinfo\u001b[0m=\u001b[1;38;2;225;0;225mtzlocal\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0meval_type\u001b[0m=\u001b[1m<\u001b[0m\u001b[1;38;2;225;0;225mEvalType.LLM_AS_JUDGE:\u001b[0m\u001b[39m \u001b[0m\u001b[38;2;0;135;0m'llmasjudge'\u001b[0m\u001b[1m>\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0ms3_output_path\u001b[0m=\u001b[38;2;0;135;0m's3://mufi-test-serverless-smtj/eval/'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0msteps\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Run evaluation\n", "execution = evaluator.evaluate()\n", @@ -1468,7 +278,9 @@ "print(f\"Status: {execution.status.overall_status}\")\n", "\n", "pprint(execution)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1481,56 +293,16 @@ }, { "cell_type": "code", - "execution_count": 9, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Executing',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Starting',\n", - "│ │ │ start_time='2025-11-29T16:22:04.148000-08:00',\n", - "│ │ │ end_time='<sagemaker.core.utils.utils.Unassigned object at 0x1298e7170>',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Executing'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Starting'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-29T16:22:04.148000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'\u001b[0m\u001b[1;38;2;0;135;0m<\u001b[0m\u001b[1;38;2;0;135;0msagemaker.core.utils.utils.Unassigned\u001b[0m\u001b[38;2;0;135;0m object at 0x1298e7170\u001b[0m\u001b[1;38;2;0;135;0m>\u001b[0m\u001b[38;2;0;135;0m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Refresh status\n", "execution.refresh()\n", "\n", "# Display job status using rich pprint\n", "pprint(execution.status)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -1543,576 +315,24 @@ }, { "cell_type": "code", - "execution_count": 10, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Pipeline Execution Status ───────────────────────────────────────────╮\n", - "│ Overall Status Succeeded │\n", - "│ Target Status Succeeded │\n", - "│ Elapsed Time 1885.8s │\n", - "│ │\n", - "│ Pipeline Steps │\n", - "│ Step Name Status Duration │\n", - "│ AssociateLineage Succeeded 1.9s │\n", - "│ EvaluateCustomModelMetrics Succeeded 1327.1s │\n", - "│ EvaluateCustomInferenceModel Succeeded 554.1s │\n", - "│ CreateEvaluationAction Succeeded 4.5s │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mPipeline Execution Status\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mOverall Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mTarget Status \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[1;37mSucceeded\u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;36m \u001b[0m\u001b[1;36mElapsed Time \u001b[0m\u001b[1;36m \u001b[0m\u001b[37m \u001b[0m\u001b[37m1885.8s \u001b[0m\u001b[37m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35mPipeline Steps\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35mStep Name \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mStatus \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35mDuration \u001b[0m\u001b[1;35m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mAssociateLineage \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1.9s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomModelMetrics \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m1327.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mEvaluateCustomInferenceModel \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m554.1s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m \u001b[0m\u001b[36mCreateEvaluationAction \u001b[0m\u001b[36m \u001b[0m\u001b[33m \u001b[0m\u001b[32mSucceeded\u001b[0m\u001b[33m \u001b[0m\u001b[33m \u001b[0m\u001b[32m \u001b[0m\u001b[32m4.5s \u001b[0m\u001b[32m \u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/29/25 16:53:37] INFO Final Resource Status: Succeeded execution.py:979\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 16:53:37]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: Succeeded \u001b]8;id=524139;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=278480;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#979\u001b\\\u001b[2m979\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Wait for job completion (optional)\n", "# This will poll every 5 seconds for up to 1 hour\n", "execution.wait(poll=5, timeout=3600)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:07] INFO Extracted training job name: show_results_utils.py:52\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \n", - " step: EvaluateCustomModelMetrics (priority: Custom) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:07]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=177834;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=168478;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────────────────── Result Artifacts Location ───────────────────────────────────────────╮\n", - "│ │\n", - "│ │\n", - "│ 📦 Full evaluation artifacts available at: │\n", - "│ s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/ │\n", - "│ │\n", - "│ │\n", - "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001b[34m╭─\u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m \u001b[0m\u001b[1;34mResult Artifacts Location\u001b[0m\u001b[34m \u001b[0m\u001b[34m──────────────────────────────────────────\u001b[0m\u001b[34m─╮\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[1;34m📦 \u001b[0m\u001b[1mFull evaluation artifacts available at:\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[36m s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955/\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m│\u001b[0m \u001b[34m│\u001b[0m\n", - "\u001b[34m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO S3 bucket: mufi-test-serverless-smtj, prefix: eval show_results_utils.py:341\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 bucket: mufi-test-serverless-smtj, prefix: eval \u001b]8;id=453165;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=425984;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#341\u001b\\\u001b[2m341\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted training job name: show_results_utils.py:52\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \n", - " step: EvaluateCustomModelMetrics (priority: Custom) \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted training job name: \u001b]8;id=324161;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=683512;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#52\u001b\\\u001b[2m52\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955 from \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m step: EvaluateCustomModelMetrics \u001b[1m(\u001b[0mpriority: Custom\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for bedrock summary in show_results_utils.py:361\n", - " s3://mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E \n", - " valuateCustomModelM-lN73ONZ955/output/output/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for bedrock summary in \u001b]8;id=308182;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=660550;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#361\u001b\\\u001b[2m361\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/pipelines-m318nngjk32f-E\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mvaluateCustomModelM-lN73ONZ955/output/output/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found bedrock job name: custom-llmaj-eval-m318nngjk32f show_results_utils.py:377\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found bedrock job name: custom-llmaj-eval-m318nngjk32f \u001b]8;id=705765;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=855376;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#377\u001b\\\u001b[2m377\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Searching for JSONL in show_results_utils.py:387\n", - " s3://mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn \n", - " gjk32f/ \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Searching for JSONL in \u001b]8;id=236968;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=874421;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#387\u001b\\\u001b[2m387\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/eval/custom-llmaj-eval-m318nn\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mgjk32f/\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found JSONL: show_results_utils.py:405\n", - " eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \n", - " l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421 \n", - " 4-9c1e-0c0bf2c71fd6_output.jsonl \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found JSONL: \u001b]8;id=648967;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=247115;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#405\u001b\\\u001b[2m405\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Found results file: show_results_utils.py:413\n", - " eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \n", - " l/taskTypes/General/datasets/CustomDataset/4a22339b-b5b1-421 \n", - " 4-9c1e-0c0bf2c71fd6_output.jsonl \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found results file: \u001b]8;id=234223;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=249361;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#413\u001b\\\u001b[2m413\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m eval/custom-llmaj-eval-m318nngjk32f/ld39q6di74sg/models/mode \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m l/taskTypes/General/datasets/CustomDataset/\u001b[93m4a22339b-b5b1-421\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[93m4-9c1e-0c0bf2c71fd6\u001b[0m_output.jsonl \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Loaded 3 evaluation results show_results_utils.py:429\n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Loaded \u001b[1;36m3\u001b[0m evaluation results \u001b]8;id=139737;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py\u001b\\\u001b[2mshow_results_utils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=460642;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/common_utils/show_results_utils.py#429\u001b\\\u001b[2m429\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 1 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 1 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: What is the next number in this series? 1, 2, 4, 8, 16, ?\n", - "\n" - ], - "text/plain": [ - "\u001b[1mPrompt:\u001b[0m What is the next number in this series? \u001b[1;36m1\u001b[0m, \u001b[1;36m2\u001b[0m, \u001b[1;36m4\u001b[0m, \u001b[1;36m8\u001b[0m, \u001b[1;36m16\u001b[0m, ?\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Model Response: The next number in the series is 32.\n", - "\n" - ], - "text/plain": [ - "\u001b[1mModel Response:\u001b[0m The next number in the series is \u001b[1;36m32\u001b[0m.\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 100.0% \n", - " Builtin.Faithfulness 100.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 2 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 2 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: What is the symbol that ends the sentence as a question\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mPrompt:\u001b[0m What is the symbol that ends the sentence as a question\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Model Response: The symbol that ends the sentence as a question is: ?\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mModel Response:\u001b[0m The symbol that ends the sentence as a question is: ?\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 100.0% \n", - " Builtin.Faithfulness 100.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 100.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n",
- "═══ Evaluation 3 of 3 ═══\n",
- "\n",
- "\n"
- ],
- "text/plain": [
- "\n",
- "\u001b[1;36m═══ Evaluation 3 of 3 ═══\u001b[0m\n",
- "\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Prompt: Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mPrompt:\u001b[0m Repeat only the last two words of the following: I ate a hamburger today and it was kind of dry\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Model Response: I ate a hamburger today and it was kind of dry.\n",
- "\n"
- ],
- "text/plain": [
- "\u001b[1mModel Response:\u001b[0m I ate a hamburger today and it was kind of dry.\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - " Metric Score \n", - " ───────────────────────────────────────────── \n", - " Builtin.Completeness 0.0% \n", - " Builtin.Faithfulness 0.0% \n", - " \n", - "\n" - ], - "text/plain": [ - " \n", - " \u001b[1;35m \u001b[0m\u001b[1;35mMetric \u001b[0m\u001b[1;35m \u001b[0m \u001b[1;35m \u001b[0m\u001b[1;35m Score\u001b[0m\u001b[1;35m \u001b[0m \n", - " ───────────────────────────────────────────── \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Completeness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \u001b[36m \u001b[0m\u001b[36mBuiltin.Faithfulness \u001b[0m\u001b[36m \u001b[0m \u001b[32m \u001b[0m\u001b[32m 0.0%\u001b[0m\u001b[32m \u001b[0m \n", - " \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n" - ], - "text/plain": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n", - "\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Showing evaluations 1-3 of 3\n", - "\n", - "\n" - ], - "text/plain": [ - "\u001b[1;36mShowing evaluations \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;36m-\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;36m of \u001b[0m\u001b[1;36m3\u001b[0m\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
══════════════════════════════════════════════════════════════════════\n", - "\n" - ], - "text/plain": [ - "══════════════════════════════════════════════════════════════════════\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# Display results\n", "execution.show_results(limit=10, offset=0, show_explanations=False)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -2125,222 +345,11 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/29/25 17:02:15] WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge/execution/4hr7446yft1d \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:15]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=315627;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=953607;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \n", - " s3://mufi-test-serverless-smtj/eval \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=739992;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=203397;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-4hr7446yft1d-EvaluateCustomModelM-qePWbkcMxz: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=550335;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=858100;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
WARNING Could not extract eval_type from ARN: execution.py:146\n", - " arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \n", - " -llmasjudge/execution/4hr7446yft1d \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m Could not extract eval_type from ARN: \u001b]8;id=379628;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=725705;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#146\u001b\\\u001b[2m146\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:052150106756:pipeline/SagemakerEvaluation \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m -llmasjudge/execution/4hr7446yft1d \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
PipelineExecutionStatus(\n", - "│ overall_status='Succeeded',\n", - "│ step_details=[\n", - "│ │ StepDetail(\n", - "│ │ │ name='AssociateLineage',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:45:57.889000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:45:59.266000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomModelMetrics',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:27:55.641000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:45:56.749000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='EvaluateCustomInferenceModel',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:18:07.804000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:27:54.474000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ ),\n", - "│ │ StepDetail(\n", - "│ │ │ name='CreateEvaluationAction',\n", - "│ │ │ status='Succeeded',\n", - "│ │ │ start_time='2025-11-19T15:18:05.550000-08:00',\n", - "│ │ │ end_time='2025-11-19T15:18:07.332000-08:00',\n", - "│ │ │ display_name=None,\n", - "│ │ │ failure_reason=None\n", - "│ │ )\n", - "│ ],\n", - "│ failure_reason=None\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001b[1;38;2;225;0;225mPipelineExecutionStatus\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0moverall_status\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mstep_details\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'AssociateLineage'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:57.889000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:59.266000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomModelMetrics'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:55.641000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:45:56.749000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'EvaluateCustomInferenceModel'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.804000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:27:54.474000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;38;2;225;0;225mStepDetail\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mname\u001b[0m=\u001b[38;2;0;135;0m'CreateEvaluationAction'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstatus\u001b[0m=\u001b[38;2;0;135;0m'Succeeded'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mstart_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:05.550000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mend_time\u001b[0m=\u001b[38;2;0;135;0m'2025-11-19T15:18:07.332000-08:00'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mdisplay_name\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[38;2;215;175;0mfailure_reason\u001b[0m=\u001b[3;38;2;225;0;225mNone\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", - "│ in <module>:17 │\n", - "│ │\n", - "│ 14 ) │\n", - "│ 15 pprint(existing_execution.status) │\n", - "│ 16 │\n", - "│ ❱ 17 existing_execution.show_results(limit=5, offset=0, show_explanations=False) │\n", - "│ 18 │\n", - "│ │\n", - "│ /Users/mufi/.local/share/mise/installs/python/3.12.12/lib/python3.12/site-packages/pydantic/main │\n", - "│ .py:1026 in __getattr__ │\n", - "│ │\n", - "│ 1023 │ │ │ │ │ │ return super().__getattribute__(item) # Raises AttributeError i │\n", - "│ 1024 │ │ │ │ │ else: │\n", - "│ 1025 │ │ │ │ │ │ # this is the current error │\n", - "│ ❱ 1026 │ │ │ │ │ │ raise AttributeError(f'{type(self).__name__!r} object has no att │\n", - "│ 1027 │ │ │\n", - "│ 1028 │ │ def __setattr__(self, name: str, value: Any) -> None: │\n", - "│ 1029 │ │ │ if (setattr_handler := self.__pydantic_setattr_handlers__.get(name)) is not │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", - "AttributeError: 'EvaluationPipelineExecution' object has no attribute 'show_results'\n", - "\n" - ], - "text/plain": [ - "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", - "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[11/29/25 17:02:21] INFO Extracted s3_output_path from training job execution.py:367\n", - " pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \n", - " s3://mufi-test-serverless-smtj/eval \n", - "\n" - ], - "text/plain": [ - "\u001b[2;36m[11/29/25 17:02:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Extracted s3_output_path from training job \u001b]8;id=802368;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py\u001b\\\u001b[2mexecution.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=75226;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-train/src/sagemaker/train/evaluate/execution.py#367\u001b\\\u001b[2m367\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m pipelines-m318nngjk32f-EvaluateCustomModelM-lN73ONZ955: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/mufi-test-serverless-smtj/\u001b[0m\u001b[38;2;225;0;225meval\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 2 LLM-as-Judge evaluation jobs\n", - " - m318nngjk32f: Succeeded\n", - " - 2m5hczli7vdp: Failed\n" - ] - } - ], "source": [ "from sagemaker.train.evaluate import LLMAsJudgeEvaluator\n", "\n", @@ -2404,7 +386,9 @@ "print(f\"Found {len(all_executions)} LLM-as-Judge evaluation jobs\")\n", "for execution in all_executions:\n", " print(f\" - {execution.name}: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -2417,14 +401,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# Uncomment to stop the job\n", "# execution.stop()\n", "# print(f\"Execution stopped. Status: {execution.status.overall_status}\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/v3-examples/model-customization-examples/model_builder_deployment_notebook.ipynb b/v3-examples/model-customization-examples/model_builder_deployment_notebook.ipynb index fb16da1045..36b7d9a5c4 100644 --- a/v3-examples/model-customization-examples/model_builder_deployment_notebook.ipynb +++ b/v3-examples/model-customization-examples/model_builder_deployment_notebook.ipynb @@ -9,8 +9,7 @@ "\n", "from sagemaker.core.resources import TrainingJob, HubContent, InferenceComponent, ModelPackage\n", "from sagemaker.core.utils.utils import Unassigned\n", - "! aws configure add-model --service-model file://sagemaker-2017-07-24.normal.json --service-name sagemaker\n", - "! ada credentials update --provider=isengard --account=052150106756 --role=Admin --profile=default --once\n", + "! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "! aws configure set region us-west-2" ], "outputs": [], @@ -114,12 +113,7 @@ { "cell_type": "code", "id": "695a83cf38e46cea", - "metadata": { - "ExecuteTime": { - "end_time": "2025-11-25T20:15:30.741329Z", - "start_time": "2025-11-25T20:15:26.098063Z" - } - }, + "metadata": {}, "source": [ "from sagemaker.core.resources import TrainingJob\n", "from sagemaker.serve import ModelBuilder\n", @@ -127,143 +121,8 @@ "model_builder = ModelBuilder(model=TrainingJob.get(training_job_name=\"meta-textgeneration-llama-3-2-1b-instruct-sft-20251123162832\"))\n", "model_builder.fetch_endpoint_names_for_base_model()" ], - "outputs": [ - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/25/25 12:15:26]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=181853;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=841908;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/25/25 12:15:26] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/nargokul/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/25/25 12:15:28]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=795775;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=603883;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/25/25 12:15:28] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;135;0mDEBUG \u001B[0m Auto-detecting optimal instance type for model\u001B[33m...\u001B[0m \u001B]8;id=748521;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py\u001B\\\u001B[2mmodel_builder_utils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=805191;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py#337\u001B\\\u001B[2m337\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
DEBUG Auto-detecting optimal instance type for model... model_builder_utils.py:337\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;135;0mDEBUG \u001B[0m Using default CPU instance type: ml.m5.large \u001B]8;id=350223;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py\u001B\\\u001B[2mmodel_builder_utils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=369639;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder_utils.py#369\u001B\\\u001B[2m369\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
DEBUG Using default CPU instance type: ml.m5.large model_builder_utils.py:369\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/25/25 12:15:29]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;215;0;0mERROR \u001B[0m recipe_name: llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora \u001B]8;id=874042;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py\u001B\\\u001B[2mmodel_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=67069;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py#1642\u001B\\\u001B[2m1642\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/25/25 12:15:29] ERROR recipe_name: llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora model_builder.py:1642\n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;215;0;0mERROR \u001B[0m checking for \u001B]8;id=635731;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py\u001B\\\u001B[2mmodel_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=357381;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py#1644\u001B\\\u001B[2m1644\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m -\u001B[1;36m607831\u001B[0m-inference-component \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
ERROR checking for model_builder.py:1644\n", - " arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \n", - " -607831-inference-component \n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/25/25 12:15:30]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;215;0;0mERROR \u001B[0m checking for \u001B]8;id=271259;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py\u001B\\\u001B[2mmodel_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=932028;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py#1644\u001B\\\u001B[2m1644\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m -\u001B[1;36m2358\u001B[0m-inference-component-adapter \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
[11/25/25 12:15:30] ERROR checking for model_builder.py:1644\n", - " arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \n", - " -2358-inference-component-adapter \n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;215;0;0mERROR \u001B[0m checking for \u001B]8;id=634683;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py\u001B\\\u001B[2mmodel_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=475111;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/model_builder.py#1644\u001B\\\u001B[2m1644\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m -\u001B[1;36m2358\u001B[0m-inference-component \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
ERROR checking for model_builder.py:1644\n", - " arn:aws:sagemaker:us-west-2:052150106756:inference-component/e2e \n", - " -2358-inference-component \n", - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "{'e2e-2358', 'e2e-607831'}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 3 + "outputs": [], + "execution_count": null }, { "cell_type": "code", @@ -359,7 +218,7 @@ "from sagemaker.core.resources import ModelPackage\n", "\n", "name = f\"e2e-{random.randint(100, 1000000)}\"\n", - "model_package = ModelPackage.get(model_package_name=\"arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/68\")\n", + "model_package = ModelPackage.get(model_package_name=\"arn:aws:sagemaker:us-west-2:<>:model-package/test-finetuned-models-gamma/68\")\n", "model_builder = ModelBuilder(model=model_package)\n", "model_builder.build()" ], @@ -475,7 +334,7 @@ "id": "533d0f1022d169eb", "metadata": {}, "source": [ - "! ada credentials update --provider=isengard --account=551952248621 --role=Admin --profile=default --once\n" + "! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n" ], "outputs": [], "execution_count": null @@ -493,7 +352,7 @@ "name = f\"e2e-{random.randint(100, 10000)}\"\n", "\n", "# bedrock_builder = BedrockModelBuilder(model=training_job)\n", - "# bedrock_builder.deploy(job_name=name, imported_model_name=name, role_arn=\"arn:aws:iam::551952248621:role/Admin\")" + "# bedrock_builder.deploy(job_name=name, imported_model_name=name, role_arn=\"arn:aws:iam::<>:role/Admin\")" ], "outputs": [], "execution_count": null diff --git a/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb b/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb index f0927dac04..5b0da85a26 100644 --- a/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb +++ b/v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod.ipynb @@ -21,47 +21,22 @@ "Initialize the environment by importing necessary libraries and configuring AWS credentials" ] }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Configure AWS credentials and region\n", + "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", + "#! aws configure set region us-west-2" + ], + "id": "4a05468e7078023e", + "outputs": [], + "execution_count": null + }, { "cell_type": "code", - "execution_count": 1, "id": "cec1af2d-c0c1-4348-8ee7-502a6d7ee2d0", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[12/02/25 10:24:29] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001B[2;36m[12/02/25 10:24:29]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=932969;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=642938;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001B\\\u001B[2m1364\u001B[0m\u001B]8;;\u001B\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/rsareddy/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/html": [ - "
INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1364\n", - "\n" - ], - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=777241;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=529130;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/botocore/credentials.py#1364\u001B\\\u001B[2m1364\u001B[0m\u001B]8;;\u001B\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "#!/usr/bin/env python3\n", "\n", @@ -80,7 +55,9 @@ "# For MLFlow native metrics in Trainer wait, run below line with approriate region\n", "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = \"https://mlflow.sagemaker.us-west-2.app.aws\"\n", "\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -106,24 +83,8 @@ }, { "cell_type": "code", - "execution_count": 3, "id": "07aefa46-29f2-4fcf-86da-b0bd471e0a6a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[12/01/25 12:12:18] INFO SageMaker session not provided. Using default Session. defaults.py:61\n", - "\n" - ], - "text/plain": [ - "\u001B[2;36m[12/01/25 12:12:18]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m SageMaker session not provided. Using default Session. \u001B]8;id=126215;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py\u001B\\\u001B[2mdefaults.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=479582;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py#61\u001B\\\u001B[2m61\u001B[0m\u001B]8;;\u001B\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "# For fine-tuning \n", "rlaif_trainer = RLAIFTrainer(\n", @@ -131,16 +92,18 @@ " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", " reward_model_id='anthropic.claude-3-5-sonnet-20240620-v1:0',\n", " reward_prompt='Builtin.Correctness',\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", + " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-rlaif-finetuned-models-run\", # Optional[str]\n", " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #Optional[]\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " accept_eula=True\n", " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::052150106756:role/Admin\"\n", + " #role=\"arn:aws:iam::<>:role/Admin\"\n", ")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -154,187 +117,19 @@ }, { "cell_type": "code", - "execution_count": 4, "id": "b31d57c0-9777-428d-8792-557f7be4cfda", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Default Finetuning Options:\n" - ] - }, - { - "data": { - "text/html": [ - "
{\n", - "│ 'data_path': 'None',\n", - "│ 'global_batch_size': '128',\n", - "│ 'judge_model_id': 'bedrock/openai.gpt-oss-120b-1:0',\n", - "│ 'judge_prompt_template': '/opt/ml/code/verl/summarize.jinja',\n", - "│ 'learning_rate': '1e-05',\n", - "│ 'max_epochs': '2',\n", - "│ 'max_prompt_length': '1024',\n", - "│ 'mlflow_run_id': '',\n", - "│ 'mlflow_tracking_uri': '',\n", - "│ 'model_name_or_path': 'meta-llama/Llama-3.2-1B-Instruct',\n", - "│ 'name': 'example-name-c9jrd',\n", - "│ 'output_path': '/opt/ml/model',\n", - "│ 'results_directory': '',\n", - "│ 'resume_from_path': '',\n", - "│ 'rollout': '8',\n", - "│ 'train_val_split_ratio': '0.9',\n", - "│ 'validation_data_path': 'None'\n", - "}\n", - "\n" - ], - "text/plain": [ - "\u001B[1m{\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'data_path'\u001B[0m: \u001B[38;2;0;135;0m'None'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'global_batch_size'\u001B[0m: \u001B[38;2;0;135;0m'128'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'judge_model_id'\u001B[0m: \u001B[38;2;0;135;0m'bedrock/openai.gpt-oss-120b-1:0'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'judge_prompt_template'\u001B[0m: \u001B[38;2;0;135;0m'/opt/ml/code/verl/summarize.jinja'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'learning_rate'\u001B[0m: \u001B[38;2;0;135;0m'1e-05'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'max_epochs'\u001B[0m: \u001B[38;2;0;135;0m'2'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'max_prompt_length'\u001B[0m: \u001B[38;2;0;135;0m'1024'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'mlflow_run_id'\u001B[0m: \u001B[38;2;0;135;0m''\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'mlflow_tracking_uri'\u001B[0m: \u001B[38;2;0;135;0m''\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'model_name_or_path'\u001B[0m: \u001B[38;2;0;135;0m'meta-llama/Llama-3.2-1B-Instruct'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'name'\u001B[0m: \u001B[38;2;0;135;0m'example-name-c9jrd'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'output_path'\u001B[0m: \u001B[38;2;0;135;0m'/opt/ml/model'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'results_directory'\u001B[0m: \u001B[38;2;0;135;0m''\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'resume_from_path'\u001B[0m: \u001B[38;2;0;135;0m''\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'rollout'\u001B[0m: \u001B[38;2;0;135;0m'8'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'train_val_split_ratio'\u001B[0m: \u001B[38;2;0;135;0m'0.9'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;0;135;0m'validation_data_path'\u001B[0m: \u001B[38;2;0;135;0m'None'\u001B[0m\n", - "\u001B[1m}\u001B[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "data_path:\n", - " Current value: None\n", - " Type: string\n", - " Default: None\n", - " Required: Yes\n", - "\n", - "global_batch_size:\n", - " Current value: 128\n", - " Type: integer\n", - " Default: 128\n", - " Valid options: [128, 256, 512, 1024]\n", - " Required: Yes\n", - "\n", - "judge_model_id:\n", - " Current value: bedrock/openai.gpt-oss-120b-1:0\n", - " Type: string\n", - " Default: bedrock/openai.gpt-oss-120b-1:0\n", - " Required: Yes\n", - "\n", - "judge_prompt_template:\n", - " Current value: /opt/ml/code/verl/summarize.jinja\n", - " Type: string\n", - " Default: /opt/ml/code/verl/summarize.jinja\n", - " Valid options: ['/opt/ml/code/verl/cot.jinja', '/opt/ml/code/verl/evaluate.jinja', '/opt/ml/code/verl/faithfulness.jinja', '/opt/ml/code/verl/summarize.jinja']\n", - "\n", - "learning_rate:\n", - " Current value: 1e-05\n", - " Type: float\n", - " Default: 1e-05\n", - " Range: 1e-07 - 0.001\n", - " Required: Yes\n", - "\n", - "max_epochs:\n", - " Current value: 2\n", - " Type: integer\n", - " Default: 2\n", - " Range: 1 - 30\n", - " Required: Yes\n", - "\n", - "max_prompt_length:\n", - " Current value: 1024\n", - " Type: integer\n", - " Default: 1024\n", - " Range: 512 - 16384\n", - " Required: Yes\n", - "\n", - "mlflow_run_id:\n", - " Current value: \n", - " Type: string\n", - " Default: \n", - "\n", - "mlflow_tracking_uri:\n", - " Current value: \n", - " Type: string\n", - " Default: \n", - "\n", - "model_name_or_path:\n", - " Current value: meta-llama/Llama-3.2-1B-Instruct\n", - " Type: string\n", - " Default: meta-llama/Llama-3.2-1B-Instruct\n", - " Required: Yes\n", - "\n", - "name:\n", - " Current value: example-name-c9jrd\n", - " Type: string\n", - " Default: example-name-c9jrd\n", - " Required: Yes\n", - "\n", - "output_path:\n", - " Current value: /opt/ml/model\n", - " Type: string\n", - " Default: /opt/ml/model\n", - " Required: Yes\n", - "\n", - "results_directory:\n", - " Current value: \n", - " Type: string\n", - " Default: \n", - " Required: Yes\n", - "\n", - "resume_from_path:\n", - " Current value: \n", - " Type: string\n", - " Default: \n", - " Required: Yes\n", - "\n", - "rollout:\n", - " Current value: 8\n", - " Type: integer\n", - " Default: 8\n", - " Valid options: [8]\n", - " Required: Yes\n", - "\n", - "train_val_split_ratio:\n", - " Current value: 0.9\n", - " Type: float\n", - " Default: 0.9\n", - " Range: 0.0 - 1.0\n", - "\n", - "validation_data_path:\n", - " Current value: None\n", - " Type: string\n", - " Default: None\n", - " Required: Yes\n" - ] - } - ], "source": [ "print(\"Default Finetuning Options:\")\n", "pprint(rlaif_trainer.hyperparameters.to_dict()) # rename as hyperparameters\n", "\n", "#set options\n", "rlaif_trainer.hyperparameters.get_info()\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -346,492 +141,58 @@ }, { "cell_type": "code", - "execution_count": 4, "id": "5d5fa362-0caf-412d-977c-5e47f0548ea5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭────────────────────────────────── Training Job Status ───────────────────────────────────╮\n", - "│ TrainingJob Name meta-textgeneration-llama-3-2-1b-instruct-rlvr-20251123173910 │\n", - "│ MLFlow URL mmlu-eval-experiment(link valid for 5 mins) │\n", - "│ │\n", - "│ Job Status Completed │\n", - "│ Secondary Status Completed │\n", - "│ Elapsed Time 711.5s │\n", - "│ │\n", - "│ Status Transitions │\n", - "│ │\n", - "│ Step Details Duration │\n", - "│ ─────────────────────────────────────────────────────────────────────────── │\n", - "│ ✓ Starting Starting the training job 0.8s │\n", - "│ ✓ Pending Preparing the instances for 21.0s │\n", - "│ training │\n", - "│ ✓ Downloading Downloading the training image 15.7s │\n", - "│ ✓ Training Training image download completed. 612.5s │\n", - "│ Training in progress. │\n", - "│ ✓ Uploading Uploading generated training model 58.2s │\n", - "│ ✓ Completed Training job completed 0.0s │\n", - "│ │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001B[38;5;172m╭─\u001B[0m\u001B[38;5;172m─────────────────────────────────\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[1;94mTraining Job Status\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[38;5;172m──────────────────────────────────\u001B[0m\u001B[38;5;172m─╮\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mTrainingJob Name \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;32mmeta-textgeneration-llama-3-2-1b-instruct-rlvr-20251123173910\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mMLFlow URL \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B]8;id=390885;https://t-eq86xu3xab1s.us-west-2.experiments.sagemaker.aws/auth?authToken=eyJhbGciOiJIUzI1NiJ9.eyJhdXRoVG9rZW5JZCI6IjNiNWQyYmIxLWRkZmUtNDk0OC04MDA0LWI5MjRiYzA1ZjU4NyIsImZhc0NyZWRlbnRpYWxzIjoiQWdWNHhZc2dLdFhjUFBseFhHcUpaMkROYi9HZnpJUUY2RElCaDhLK0dBNjluZDBBWHdBQkFCVmhkM010WTNKNWNIUnZMWEIxWW14cFl5MXJaWGtBUkVGdGIzQkdXaTlxYUVnemNVMW1aalZzVUV4b056aGFPVWhzZEZWM2VsWXhjazlrVWpkRGJWVlBlVTVNTTBGclREWkJSVGcxYkhsWWEwVlhZbkpVWlZSVVp6MDlBQUVBQjJGM2N5MXJiWE1BUzJGeWJqcGhkM002YTIxek9uVnpMWGRsYzNRdE1qbzFPVEF4T0RNM016azFNRFE2YTJWNUx6ZzNabUUxTVdReUxURTRNRGt0TkdVMFl5MWhObVV6TFRRNFpXWTNNelk1WW1NM1lnQzRBUUlCQUhndFR2ZzEyNVZtTm50WE4vTEVZV1dzREMrSk1KRzNQSUl1eU9PYTB5SU5FZ0ZLdTAyc1B4bk1qVCs1UE50d1lEeUpBQUFBZmpCOEJna3Foa2lHOXcwQkJ3YWdiekJ0QWdFQU1HZ0dDU3FHU0liM0RRRUhBVEFlQmdsZ2hrZ0JaUU1FQVM0d0VRUU1aOG9NYUFXTmw3UzhONXJjQWdFUWdEdGVhN2toLzQ1aGNiWEhBQ0IvQnRPUFlyaUZCSU14cVBkVzJuWmJocWJ4c2FWTzYxUW5tQ1lISFU3K1lnOE1BZjEwMDVPTXYydTcySVE5K3dJQUFCQUFKZWxhQVNWWGJySTN3cmVnRDhYTDI5ZVphc1NaRTQzVXBjcXc1RWJsOFJtSS91Tk5KckFvUHdpaFFSdzhmcHg2Ly8vLy93QUFBQUVBQUFBQUFBQUFBQUFBQUFFQUFBUkhJTFhiaGNQMHVscU9abytQak9wUlo3ZFZQZy9ZRk9VMVNzRWlwbzhUeFo0WlhHb1VHVWNDRkZDRldBVHdXWWtLOWJ0VEx3RG9aRWlCNkhmenU4aWV6MnZBcVZwMFEwaDZLanNiWG1yVy9LL0lFSkhGQzgybFp3cFZ6WnpIc2FRdjlmVTE3aVJodTlaZVpxZk1xM1Y0MG1LRkNVRW5HdnZHZWlFWVNVOFRoc3VoclRzRG1SN01qZnBWeWs4dGhiWG5DWmNrVnJyK1UrbFZnbERpVGRBUm1zYTdob1ZpVWl1L2NhejRMY0MrUGswVFVzZFlzNkJiTWtpV1RZeGVFSEViamt2QUNqckFZQ3ljN3JhVFZDUlJzM3d3MEcxMHFzMlgvaW8wRjAwR0NoZFI3WUlvaE1XWnk5UlA5eXl6Tnd2cWd4Qk5EcVpKS2pTYktvZFBFVGpBdFJ0YU5sRlNPbHJuelNxcWdrZmJHQXN1ZzlIRFlVTGQxR3JndXNOTllWRW95Z1R4K09QQStyTFZsVkhEUUdJM0JBdDF6UzhkTTNmWXplbGhndUpjNHdYZlFwYTJOeWtpYkZpVlpxSnF0b0ZpRzJBUTc3clh0VXRDR1NFZGRFRHVIZlNSTVZLMW56Smp2L0dUSm1LRGhLU01maGFKNFg3SE5Sd1Bmd1AzR1hHRmRmeVBrYm5mNjUrZjVjdFY3d3V6WENtUVdaWEVjbUxsYmhmdUJ1YnhuaWhYMW9zQ1dLUmpjbVJXQ2RES1NaOVd4L2UxQ2V2bzNkK2xOMFpwRlpEaXFzRFBBNDM4ODk3NlE5ZGtOSUUwbzFvTk9tUHF2RGVqZzRjWDRUVDdTeGtkdkZFVEVtMjFpOHNmMnl4ekVwRGJOTWZPUkNabVFoTTlyMWc4TllwUTcwQ1BEd1hJMkQweU95Tnlsc2hTOGFrRDNpVHBhb0htc2x4VUhCT0UyM0VOSG1VbzFidDRHZmRzS2NqQ1FzOTE5ZnROWmJ2UXBuYWRDL0U4akcwVW1zU2Q0UGlHRnVLSG9XbldSeFZqM2xXaVFKYWt5NVVpOTZvQXR3OU1RVmVCWVpSM25PdVlUb05OWlhIcE9JR3gwbGczVEVsSmZ3ZzNIVmwyRnlSQk9sRUNPdnlxMWh5Z2FaOWUvZjBWTFNLQXJTTFdvMGcxNVoyWDZyaUhRZGhHRks1RG5YTDA5ZlBuR1Y5UjZMTDFyMUtxZkJFN1N0OVYwd25KTjkrcERDTzhKVGdUS01KSG9Xei96UnVvTHJwdVhiU1E5dnRuZCsvUUMwNDhSRGxrdTQ2UEdCY2VpV3pMcE41dlpMTG9rSkJWNDV3c2Q4MCt4Y0VkaDUxU0Z6OEZFZm80cE5IK2xLdVJpbXl2enFoY3NRRXpwcjZtMGx6Q2tSWVhxRDJMRWNDQ1IrdlRyR3BiemtXdlVnSi9UbDZ6THR6VTlpRVBLbWhTckdENEZjYUdSajc1aUhZbEU3ZDlnYVJSdDV3OEZYYS8xWms0TkdBSlFXenI2dkc3TGIxV2V5SHU3YTAwcDFsRUlMMjlRcXpzb0hSUzRDaWtLK0xaQlZTd3NRVW1xQ2U4dlFoWmRHNFhsZWFDM2NYMkxQRnBaUVpJZmdmL2ZxdXI1M0JMbVByVWdreTdPM21xVlNTZ0hWZkRQTldLeFBYZUl4RmlSWEZNZng1dGRIdWJNZUxEUEh2enFOdXYxU2Y0VXBGb3JDQjZWWWdhK0FiU0xjaWs0TjNNZm9yRk0vN1dqekV5NE9TYWVRNzR2NFVDblJoVDFKMG5yeHBBSnpVQlNkanN0N0pqc054NVdKWFNzV1pUblFUWHZHMktURFVwb3dzSkh3K0ZKRnYxaVZEUVluczl4WWMxTzFlNlVqNm16QjFPOHY1WGxzV215eWpkM2J1ajc5ekJRbldzcUZ2d0RwWEFta04zOE00NGxSNENwZ3d6anRoUUhOamJnQWNHUmdCbk1HVUNNUUN4NlZTTXZlTFpRNDF4UjIzeTk2OERlUDFoNWNCZmVCWWtYaXVrRDRTNkJKTGJmQUdpZWU4RUNDc3E4dE5LT3pVQ01ESGY3SUxEbFlld3hEWEF5aTFwanVqUzZDdWkvTEhxQ2Z2T0VrRlM3S1dHMFdmWUlDTHY5bHErcUkvdmFnbjl2QT09IiwiY2lwaGVyVGV4dCI6IkFRSUJBSGd0VHZnMTI1Vm1ObnRYTi9MRVlXV3NEQytKTUpHM1BJSXV5T09hMHlJTkVnR0xDVHh6VHBZQ2xUTUI0L1c0ZGRiVEFBQUFvakNCbndZSktvWklodmNOQVFjR29JR1JNSUdPQWdFQU1JR0lCZ2txaGtpRzl3MEJCd0V3SGdZSllJWklBV1VEQkFFdU1CRUVEUGVTeWUyOFRWVnJWOEdhamdJQkVJQmIvT3JFMHJIeDRrandRS1QzL1VQdEFHeXhOcjAzenl4blE1NUJmelBRRlppTWp6TDEzdFZQWXVBQ0pqVTM5dEtGN2NJeFNqd1FySHMvRXFvWHRHY0xzR3ZYTmhqQmpkMHkvUnVvK2FCMXZQamFreTU1Njl0VEtuR0VNQT09Iiwic3ViIjoiYXJuOmF3czpzYWdlbWFrZXI6dXMtd2VzdC0yOjA1MjE1MDEwNjc1NjptbGZsb3ctdHJhY2tpbmctc2VydmVyL21tbHUtZXZhbC1leHBlcmltZW50IiwiaWF0IjoxNzYzOTQ4MzUyLCJleHAiOjE3NjM5NDg2NTJ9.rw7ffe5FrJjwAXSYMPd3jnjxGGyv6XwFZlltemBV89c\u001B\\\u001B[1;4;94mmmlu-eval-experiment(link valid for 5 mins)\u001B[0m\u001B]8;;\u001B\\\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mJob Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;38;5;172mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mSecondary Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;33mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mElapsed Time \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;91m711.5s\u001B[0m\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35mStatus Transitions\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mStep \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDetails \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDuration \u001B[0m\u001B[1;35m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m ─────────────────────────────────────────────────────────────────────────── \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mStarting \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mStarting the training job \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.8s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mPending \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mPreparing the instances for \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m21.0s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mtraining \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mDownloading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mDownloading the training image \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m15.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mTraining \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining image download completed. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m612.5s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining in progress. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mUploading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mUploading generated training model \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m58.2s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mCompleted \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining job completed \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.0s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m╰──────────────────────────────────────────────────────────────────────────────────────────╯\u001B[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "\n", "training_job = rlaif_trainer.train(wait=True)\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 5, "id": "a0781a22-d9ea-4c9b-a854-5d7efde3539d", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭────────────────────────────────── Training Job Status ───────────────────────────────────╮\n", - "│ TrainingJob Name meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238 │\n", - "│ │\n", - "│ Job Status Completed │\n", - "│ Secondary Status Completed │\n", - "│ Elapsed Time 614.6s │\n", - "│ │\n", - "│ Status Transitions │\n", - "│ │\n", - "│ Step Details Duration │\n", - "│ ─────────────────────────────────────────────────────────────────────────── │\n", - "│ ✓ Starting Starting the training job 0.7s │\n", - "│ ✓ Pending Preparing the instances for 15.7s │\n", - "│ training │\n", - "│ ✓ Downloading Downloading the training image 5.7s │\n", - "│ ✓ Training Training image download completed. 551.9s │\n", - "│ Training in progress. │\n", - "│ ✓ Uploading Uploading generated training model 38.7s │\n", - "│ ✓ Completed Training job completed 0.0s │\n", - "│ │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001B[38;5;172m╭─\u001B[0m\u001B[38;5;172m─────────────────────────────────\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[1;94mTraining Job Status\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[38;5;172m──────────────────────────────────\u001B[0m\u001B[38;5;172m─╮\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mTrainingJob Name \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;32mmeta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mJob Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;38;5;172mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mSecondary Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;33mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mElapsed Time \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;91m614.6s\u001B[0m\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35mStatus Transitions\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mStep \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDetails \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDuration \u001B[0m\u001B[1;35m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m ─────────────────────────────────────────────────────────────────────────── \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mStarting \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mStarting the training job \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mPending \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mPreparing the instances for \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m15.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mtraining \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mDownloading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mDownloading the training image \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m5.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mTraining \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining image download completed. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m551.9s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining in progress. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mUploading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mUploading generated training model \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m38.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mCompleted \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining job completed \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.0s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m╰──────────────────────────────────────────────────────────────────────────────────────────╯\u001B[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "training_job = rlaif_trainer.train(wait=True)\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 6, "id": "c34b93c8-2e4c-437a-8efb-b8475fb941f3", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
TrainingJob(\n", - "│ training_job_name='meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238',\n", - "│ training_job_arn='arn:aws:sagemaker:us-west-2:729646638167:training-job/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238',\n", - "│ processing_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ tuning_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ labeling_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ auto_ml_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ model_artifacts=ModelArtifacts(\n", - "│ │ s3_model_artifacts='s3://mc-flows-sdk-testing/output/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238/output/model'\n", - "│ ),\n", - "│ training_job_output=TrainingJobOutput(\n", - "│ │ s3_training_job_output='s3://mc-flows-sdk-testing/output/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238/output/output'\n", - "│ ),\n", - "│ training_job_status='Completed',\n", - "│ secondary_status='Completed',\n", - "│ failure_reason=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ hyper_parameters={\n", - "│ │ 'data_path': 'None',\n", - "│ │ 'global_batch_size': '128',\n", - "│ │ 'judge_model_id': 'bedrock/openai.gpt-oss-120b-1:0',\n", - "│ │ 'judge_prompt_template': '/opt/ml/code/verl/summarize.jinja',\n", - "│ │ 'learning_rate': '1e-05',\n", - "│ │ 'max_epochs': '2',\n", - "│ │ 'max_prompt_length': '1024',\n", - "│ │ 'model_name_or_path': 'meta-llama/Llama-3.2-1B-Instruct',\n", - "│ │ 'name': 'example-name-c9jrd',\n", - "│ │ 'output_path': '/opt/ml/model',\n", - "│ │ 'rollout': '8',\n", - "│ │ 'train_val_split_ratio': '0.9',\n", - "│ │ 'validation_data_path': 'None'\n", - "│ },\n", - "│ algorithm_specification=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ role_arn='arn:aws:iam::729646638167:role/Admin',\n", - "│ input_data_config=[\n", - "│ │ Channel(\n", - "│ │ │ channel_name='train',\n", - "│ │ │ data_source=DataSource(\n", - "│ │ │ │ s3_data_source=S3DataSource(\n", - "│ │ │ │ │ s3_data_type='S3Prefix',\n", - "│ │ │ │ │ s3_uri='s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl',\n", - "│ │ │ │ │ s3_data_distribution_type='FullyReplicated',\n", - "│ │ │ │ │ attribute_names=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ │ │ instance_group_names=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ │ │ model_access_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ │ │ hub_access_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ │ │ │ ),\n", - "│ │ │ │ file_system_data_source=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ │ dataset_source=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ │ │ ),\n", - "│ │ │ content_type=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ compression_type='None',\n", - "│ │ │ record_wrapper_type='None',\n", - "│ │ │ input_mode=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ shuffle_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ │ enable_ffm=False\n", - "│ │ )\n", - "│ ],\n", - "│ output_data_config=OutputDataConfig(\n", - "│ │ s3_output_path='s3://mc-flows-sdk-testing/output/',\n", - "│ │ kms_key_id='',\n", - "│ │ compression_type='NONE',\n", - "│ │ remove_job_name_from_s3_output_path=False,\n", - "│ │ disable_model_upload=False,\n", - "│ │ channels=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ ),\n", - "│ resource_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ warm_pool_status=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ vpc_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ stopping_condition=StoppingCondition(\n", - "│ │ max_runtime_in_seconds=86400,\n", - "│ │ max_wait_time_in_seconds=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ max_pending_time_in_seconds=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ ),\n", - "│ creation_time=datetime.datetime(2025, 12, 1, 12, 12, 39, 272000, tzinfo=tzlocal()),\n", - "│ training_start_time=datetime.datetime(2025, 12, 1, 12, 12, 55, 672000, tzinfo=tzlocal()),\n", - "│ training_end_time=datetime.datetime(2025, 12, 1, 12, 22, 51, 994000, tzinfo=tzlocal()),\n", - "│ last_modified_time=datetime.datetime(2025, 12, 1, 12, 22, 51, 994000, tzinfo=tzlocal()),\n", - "│ secondary_status_transitions=[\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Starting',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 12, 39, 272000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 12, 39, 939000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Starting the training job'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Pending',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 12, 39, 939000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 12, 55, 672000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Preparing the instances for training'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Downloading',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 12, 55, 672000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 13, 1, 397000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Downloading the training image'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Training',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 13, 1, 397000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 22, 13, 298000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training image download completed. Training in progress.'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Uploading',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 22, 13, 298000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 22, 51, 994000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Uploading generated training model'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Completed',\n", - "│ │ │ start_time=datetime.datetime(2025, 12, 1, 12, 22, 51, 994000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 12, 1, 12, 22, 51, 994000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training job completed'\n", - "│ │ )\n", - "│ ],\n", - "│ final_metric_data_list=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ enable_network_isolation=False,\n", - "│ enable_inter_container_traffic_encryption=False,\n", - "│ enable_managed_spot_training=False,\n", - "│ checkpoint_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ training_time_in_seconds=596,\n", - "│ billable_time_in_seconds=596,\n", - "│ billable_token_count=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ debug_hook_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ experiment_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ debug_rule_configurations=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ tensor_board_output_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ debug_rule_evaluation_statuses=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ upstream_platform_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ profiler_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ profiler_rule_configurations=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ profiler_rule_evaluation_statuses=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ profiling_status=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ environment=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ retry_strategy=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ last_modified_by=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ created_by=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ disable_efa=False,\n", - "│ processing_job_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ image_metadata=ImageMetadata(image_type='BYOImage'),\n", - "│ remote_debug_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ resource_tags=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ infra_check_config=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ serverless_job_config=ServerlessJobConfig(\n", - "│ │ base_model_arn='arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.25.0',\n", - "│ │ job_type='FineTuning',\n", - "│ │ accept_eula=True,\n", - "│ │ customization_technique='RLAIF',\n", - "│ │ peft='LORA',\n", - "│ │ evaluation_type=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ evaluator_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ job_spec=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ ),\n", - "│ mlflow_config=MlflowConfig(\n", - "│ │ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:729646638167:mlflow-app/app-W7FOBBXZANVX',\n", - "│ │ mlflow_tracking_server_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ │ mlflow_experiment_name='test-rlaif-finetuned-models-exp',\n", - "│ │ mlflow_run_name='test-rlaif-finetuned-models-run'\n", - "│ ),\n", - "│ model_package_config=ModelPackageConfig(\n", - "│ │ model_package_group_arn='arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models',\n", - "│ │ source_model_package_arn=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>\n", - "│ ),\n", - "│ mlflow_details=MlflowDetails(mlflow_experiment_id='2', mlflow_run_id='67f33659b9974b90a4c70ff134619e78'),\n", - "│ progress_info=<sagemaker.core.utils.utils.Unassigned object at 0x1173048f0>,\n", - "│ output_model_package_arn='arn:aws:sagemaker:us-west-2:729646638167:model-package/sdk-test-finetuned-models/3'\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001B[1;38;2;225;0;225mTrainingJob\u001B[0m\u001B[1m(\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_name\u001B[0m=\u001B[38;2;0;135;0m'meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_arn\u001B[0m=\u001B[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:729646638167:training-job/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251201121238'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mprocessing_job_arn\u001B[0m=\u001B[1m<\u001B[0m\u001B[1;38;2;225;0;225msagemaker.core.utils.utils.Unassigned\u001B[0m\u001B[39m object at \u001B[0m\u001B[1;36m0x1173048f0\u001B[0m\u001B[39m>,\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtuning_job_arn\u001B[0m\u001B[39m=
TrainingJob(\n", - "│ training_job_name='meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754',\n", - "│ training_job_arn='arn:aws:sagemaker:us-west-2:052150106756:training-job/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754',\n", - "│ processing_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ tuning_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ labeling_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ auto_ml_job_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ model_artifacts=ModelArtifacts(\n", - "│ │ s3_model_artifacts='s3://open-models-testing-pdx/output/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754/output/model'\n", - "│ ),\n", - "│ training_job_output=TrainingJobOutput(\n", - "│ │ s3_training_job_output='s3://open-models-testing-pdx/output/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754/output/output'\n", - "│ ),\n", - "│ training_job_status='Completed',\n", - "│ secondary_status='Completed',\n", - "│ failure_reason=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ hyper_parameters={\n", - "│ │ 'data_path': 'None',\n", - "│ │ 'global_batch_size': '256',\n", - "│ │ 'judge_model_id': 'bedrock/openai.gpt-oss-120b-1:0',\n", - "│ │ 'judge_prompt_template': '/opt/ml/code/verl/summarize.jinja',\n", - "│ │ 'learning_rate': '1e-05',\n", - "│ │ 'lora_alpha': '256',\n", - "│ │ 'max_epochs': '2',\n", - "│ │ 'max_prompt_length': '1024',\n", - "│ │ 'model_name_or_path': 'meta-llama/Llama-3.2-1B-Instruct',\n", - "│ │ 'name': 'example-name-ea0mx',\n", - "│ │ 'output_path': '/opt/ml/model',\n", - "│ │ 'rollout': '8',\n", - "│ │ 'train_val_split_ratio': '0.9',\n", - "│ │ 'validation_data_path': 'None'\n", - "│ },\n", - "│ algorithm_specification=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ role_arn='arn:aws:iam::052150106756:role/Admin',\n", - "│ input_data_config=[\n", - "│ │ Channel(\n", - "│ │ │ channel_name='train',\n", - "│ │ │ data_source=DataSource(\n", - "│ │ │ │ s3_data_source=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ │ │ file_system_data_source=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ │ │ dataset_source=DatasetSource(\n", - "│ │ │ │ │ dataset_arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/DataSet/rlvr-rlaif-test-dataset/0.0.2'\n", - "│ │ │ │ )\n", - "│ │ │ ),\n", - "│ │ │ content_type=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ │ compression_type='None',\n", - "│ │ │ record_wrapper_type='None',\n", - "│ │ │ input_mode=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ │ shuffle_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ │ enable_ffm=False\n", - "│ │ )\n", - "│ ],\n", - "│ output_data_config=OutputDataConfig(\n", - "│ │ s3_output_path='s3://open-models-testing-pdx/output',\n", - "│ │ kms_key_id='',\n", - "│ │ compression_type='NONE',\n", - "│ │ remove_job_name_from_s3_output_path=False,\n", - "│ │ disable_model_upload=False,\n", - "│ │ channels=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>\n", - "│ ),\n", - "│ resource_config=ResourceConfig(\n", - "│ │ instance_type=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ instance_count=0,\n", - "│ │ volume_size_in_gb=0,\n", - "│ │ volume_kms_key_id=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ keep_alive_period_in_seconds=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ capacity_reservation_ids=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ instance_groups=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ capacity_schedules_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ training_plan_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ instance_placement_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>\n", - "│ ),\n", - "│ warm_pool_status=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ vpc_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ stopping_condition=StoppingCondition(\n", - "│ │ max_runtime_in_seconds=86400,\n", - "│ │ max_wait_time_in_seconds=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ max_pending_time_in_seconds=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>\n", - "│ ),\n", - "│ creation_time=datetime.datetime(2025, 11, 24, 14, 7, 54, 925000, tzinfo=tzlocal()),\n", - "│ training_start_time=datetime.datetime(2025, 11, 24, 15, 0, 56, 747000, tzinfo=tzlocal()),\n", - "│ training_end_time=datetime.datetime(2025, 11, 24, 15, 12, 43, 328000, tzinfo=tzlocal()),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 24, 15, 12, 43, 328000, tzinfo=tzlocal()),\n", - "│ secondary_status_transitions=[\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Starting',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 14, 7, 54, 925000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 14, 7, 55, 596000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Starting the training job'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Pending',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 14, 7, 55, 596000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 15, 0, 56, 747000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Preparing the instances for training'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Downloading',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 15, 0, 56, 747000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 15, 1, 7, 481000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Downloading the training image'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Training',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 15, 1, 7, 481000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 15, 11, 39, 946000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training image download completed. Training in progress.'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Uploading',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 15, 11, 39, 946000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 15, 12, 43, 328000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Uploading generated training model'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Completed',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 24, 15, 12, 43, 328000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 24, 15, 12, 43, 328000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training job completed'\n", - "│ │ )\n", - "│ ],\n", - "│ final_metric_data_list=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ enable_network_isolation=False,\n", - "│ enable_inter_container_traffic_encryption=False,\n", - "│ enable_managed_spot_training=False,\n", - "│ checkpoint_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ training_time_in_seconds=707,\n", - "│ billable_time_in_seconds=707,\n", - "│ billable_token_count=0,\n", - "│ debug_hook_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ experiment_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ debug_rule_configurations=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ tensor_board_output_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ debug_rule_evaluation_statuses=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ upstream_platform_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ profiler_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ profiler_rule_configurations=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ profiler_rule_evaluation_statuses=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ profiling_status=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ environment=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ retry_strategy=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ last_modified_by=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ created_by=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ disable_efa=False,\n", - "│ processing_job_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ image_metadata=ImageMetadata(image_type='BYOImage'),\n", - "│ remote_debug_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ resource_tags=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ infra_check_config=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ serverless_job_config=ServerlessJobConfig(\n", - "│ │ base_model_arn='arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/meta-textgeneration-llama-3-2-1b-instruct/1.21.0',\n", - "│ │ job_type='FineTuning',\n", - "│ │ accept_eula=False,\n", - "│ │ customization_technique='RLAIF',\n", - "│ │ peft='LORA',\n", - "│ │ evaluation_type=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ evaluator_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ job_spec={'CustomizationTechnique': 'RLAIF', 'PEFT': 'LORA'}\n", - "│ ),\n", - "│ mlflow_config=MlflowConfig(\n", - "│ │ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ │ mlflow_tracking_server_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>,\n", - "│ │ mlflow_experiment_name='test-rlaif-finetuned-models-exp',\n", - "│ │ mlflow_run_name='test-rlaif-finetuned-models-run'\n", - "│ ),\n", - "│ model_package_config=ModelPackageConfig(\n", - "│ │ model_package_group_arn='arn:aws:sagemaker:us-west-2:052150106756:model-package-group/test-finetuned-models-gamma',\n", - "│ │ source_model_package_arn=<sagemaker.core.utils.utils.Unassigned object at 0x11446b7d0>\n", - "│ ),\n", - "│ mlflow_details=MlflowDetails(mlflow_experiment_id='88', mlflow_run_id='3a8be3c0a9be4030a5ff496cfffdb88c'),\n", - "│ progress_info=TrainingProgressInfo(\n", - "│ │ total_step_count_per_epoch=1,\n", - "│ │ current_step=1,\n", - "│ │ current_epoch=2,\n", - "│ │ max_epoch=2\n", - "│ ),\n", - "│ output_model_package_arn='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models-gamma/83'\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001B[1;38;2;225;0;225mTrainingJob\u001B[0m\u001B[1m(\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_name\u001B[0m=\u001B[38;2;0;135;0m'meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_arn\u001B[0m=\u001B[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:training-job/meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251124140754'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mprocessing_job_arn\u001B[0m=\u001B[1m<\u001B[0m\u001B[1;38;2;225;0;225msagemaker.core.utils.utils.Unassigned\u001B[0m\u001B[39m object at \u001B[0m\u001B[1;36m0x11446b7d0\u001B[0m\u001B[39m>,\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtuning_job_arn\u001B[0m\u001B[39m=
[12/02/25 10:28:14] INFO SageMaker session not provided. Using default Session. defaults.py:61\n", - "\n" - ], - "text/plain": [ - "\u001B[2;36m[12/02/25 10:28:14]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m SageMaker session not provided. Using default Session. \u001B]8;id=423696;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py\u001B\\\u001B[2mdefaults.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=732501;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/train/defaults.py#61\u001B\\\u001B[2m61\u001B[0m\u001B]8;;\u001B\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
INFO Runs on sagemaker prod, region:us-west-2 utils.py:354\n", - "\n" - ], - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Runs on sagemaker prod, region:us-west-\u001B[1;36m2\u001B[0m \u001B]8;id=836889;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/utils/utils.py\u001B\\\u001B[2mutils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=831071;file:///Users/rsareddy/workplace/virtual_envs/sagemaker-v3/lib/python3.12/site-packages/sagemaker/core/utils/utils.py#354\u001B\\\u001B[2m354\u001B[0m\u001B]8;;\u001B\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "\n", "\n", @@ -1277,153 +241,44 @@ " model=\"meta-textgeneration-llama-3-2-1b-instruct\", # Union[str, ModelPackage] \n", " model_package_group_name=\"sdk-test-finetuned-models\", # Make it Optional\n", " reward_model_id='anthropic.claude-3-5-sonnet-20240620-v1:0',\n", - " reward_prompt=\"arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/JsonDoc/rlaif-test-prompt/0.0.1\",\n", - " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", + " reward_prompt=\"arn:aws:sagemaker:us-west-2:<>:hub-content/sdktest/JsonDoc/rlaif-test-prompt/0.0.1\",\n", + " #mlflow_resource_arn=\"arn:aws:sagemaker:us-west-2:<>:mlflow-tracking-server/mmlu-eval-experiment\", # Optional[str] - MLflow app ARN (auto-resolved if not provided), can accept name and search in the account\n", " mlflow_experiment_name=\"test-rlaif-finetuned-models-exp\", # Optional[str]\n", " mlflow_run_name=\"test-rlaif-finetuned-models-run\", # Optional[str]\n", " training_dataset=\"s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl\", #Optional[str]\n", " s3_output_path=\"s3://mc-flows-sdk-testing/output/\",\n", " accept_eula=True\n", " #sagemaker_session=sagemaker_session,\n", - " #role=\"arn:aws:iam::052150106756:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", - " #role=\"arn:aws:iam::052150106756:role/Admin\"\n", + " #role=\"arn:aws:iam::<>:role/service-role/AmazonSageMaker-ExecutionRole-20250731T162975\"\n", + " #role=\"arn:aws:iam::<>:role/Admin\"\n", ")\n" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 3, "id": "029aa3cf-8a98-487b-8e21-445af9a72e91", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭────────────────────────────────── Training Job Status ───────────────────────────────────╮\n", - "│ TrainingJob Name meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251119221710 │\n", - "│ MLFlow URL mmlu-eval-experiment │\n", - "│ │\n", - "│ Job Status Completed │\n", - "│ Secondary Status Completed │\n", - "│ Elapsed Time 1597.4s │\n", - "│ │\n", - "│ Status Transitions │\n", - "│ │\n", - "│ Step Details Duration │\n", - "│ ─────────────────────────────────────────────────────────────────────────── │\n", - "│ ✓ Starting Starting the training job 0.6s │\n", - "│ ✓ Pending Preparing the instances for 15.4s │\n", - "│ training │\n", - "│ ✓ Downloading Downloading the training image 166.9s │\n", - "│ ✓ Training Training image download completed. 1339.4s │\n", - "│ Training in progress. │\n", - "│ ✓ Uploading Uploading generated training model 74.1s │\n", - "│ ✓ Completed Training job completed 0.0s │\n", - "│ │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001B[34m╭─\u001B[0m\u001B[34m─────────────────────────────────\u001B[0m\u001B[34m \u001B[0m\u001B[1;34mTraining Job Status\u001B[0m\u001B[34m \u001B[0m\u001B[34m──────────────────────────────────\u001B[0m\u001B[34m─╮\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mTrainingJob Name \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;32mmeta-textgeneration-llama-3-2-1b-instruct-rlaif-20251119221710\u001B[0m\u001B[37m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mMLFlow URL \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B]8;id=595913;https://t-eq86xu3xab1s.us-west-2.experiments.sagemaker.aws/auth?authToken=eyJhbGciOiJIUzI1NiJ9.eyJhdXRoVG9rZW5JZCI6IjUzNjQxYzc2LTUwYjYtNDM1YS05ZTI4LTY1OGMwZTZlM2ZjOSIsImZhc0NyZWRlbnRpYWxzIjoiQWdWNGYvd3NIT0FlMDZMU2NmL09vYUZSMGx6MTlyTU5BM3o1UUpMaG1NMmFQemdBWHdBQkFCVmhkM010WTNKNWNIUnZMWEIxWW14cFl5MXJaWGtBUkVFclVHRmpjell6VW05M1VYWnhabkZrYURObVJFaHhNM1ZEUjNWYWRtdHpWMDV0YVVKdmJYaFhWMnN4TDNoblExbHFiVm80VkZvM2JYcEtNRTVaT1RGRVp6MDlBQUVBQjJGM2N5MXJiWE1BUzJGeWJqcGhkM002YTIxek9uVnpMWGRsYzNRdE1qbzFPVEF4T0RNM016azFNRFE2YTJWNUx6ZzNabUUxTVdReUxURTRNRGt0TkdVMFl5MWhObVV6TFRRNFpXWTNNelk1WW1NM1lnQzRBUUlCQUhndFR2ZzEyNVZtTm50WE4vTEVZV1dzREMrSk1KRzNQSUl1eU9PYTB5SU5FZ0hLcGRFL1hPbzYxYnFuNUJNK1FVVk1BQUFBZmpCOEJna3Foa2lHOXcwQkJ3YWdiekJ0QWdFQU1HZ0dDU3FHU0liM0RRRUhBVEFlQmdsZ2hrZ0JaUU1FQVM0d0VRUU1LNlNjWjJNRjJJSitrb2hRQWdFUWdEdFAreTUvTjl0Z3ZVMUVYUHFUSjZFMENwRENJZmluUVhMOW04QUhNYS9qRWgzbTFLZVRyWUNUSGZFVVJjaTBEVVF4ZWJJSTdUdUF3dGhPb3dJQUFCQUFmS2ZlTnlQWTVtYUxZZWNrVytZMTVKbVg2TXlJbkN3bkFiQjhwak9HeWVoN2hPRDhGSkh3QXlwalJ3TS92Sk14Ly8vLy93QUFBQUVBQUFBQUFBQUFBQUFBQUFFQUFBUTdiMkJXT29XL0NpdVVwZ2UvaHV3Z0pSUGRHYlprRk5Cbml5ZzJueUVsaXRCa1I3Q3M3a04xUXdOMkp6Vm5PTzdDNWtTdk9OTVB4SnYrN3pMNlZDR2FXV1Z3V1hUMHBBSU5DOHRjZlYzQVVQbUlnSkhGMi9oWkEvbmxvaldNMjBkM0NsQUxXWFZsNnJnNW93SXJzek4vOExLSm9ZUUMrTXYrVTRSQzVYckpYWXB0K3hpc3AxSG5iU3pFMG03R0tQcGZrVHpFL2FiSElwNCtqSUlJOE5YNEo5Um9BNnhkbjhPb0ZwOXFpVVlrTjljOWkwZ3lIa0xTQTNxMWxURklXazgvbHZZM3JGVGhJZnNpWFo3OE9QQk5mTkxrT0hvSUZmK2ZNemdVRUp4c0NIeVdENC9IN1dyWjJ2WC9IWHN1NUI1ajQ5N0RUQ3R0NGRRQWVhbWQxeGZVaUVsaGRvSWNsVWZMeHZxRGxqMmdKYm9qazRHQTExalFINmwvUEFvQU5XT3dZMWd4OVdqbStFVFp2dVdYOVpZSjhxVFNISkQxZDVRam96NW1GbDFBaXRyVjZJZnIzMDRxQk9qVXpIVTdmdks1RW9jTE92U2U5NHFaNVpEbGdvLzhkWDVuNTU0ZzlzdjJKLzEzR015SDBxa3VZNVU0eUpRQ3c5NExCRURnTVBBL1N3WU1aUTBwa1NCU2pLbFBoQ0tEKzB5RDVWNGNzcVF2dlYvN3NNRStybUJnWm84emJENGw3NmNhTWxXZzN5MmpLMWhzSWJLTE9QbWNUY0NvbHpuM29ZNExSOVN0Qlg4NVF3dFNjd29PS05SR215TjZXRXUvNUxBRkxSWFppZjl6UFpVelV6Z0VBc21iVytoRzQrT2dOaUxGajJjTXIrWG00bjN4eVlzUzJrMmhQN0FZV2lHTjEyZG9WSEhkVlNCMmZKQ2ZTdkpSTUpQMUNtV1FHNk1vNWhaMURrczlmVUFCVWFZdEtFUUFjUVltNisyWFIxWnl5d212QVM5bGJEdllhdWNWZUlYV1BCaHhHSzlqZk1XT01hM0pSVFNrVE1GQzlCMTV0MGovWkx4aERqRmRIeWhwM01OUlJJQVlOdFlqRUJvajdlQUJHRkdwVnp4Wk5aOW5qZnRlQjdJRCtiSlF2L3EyWlBOTE9kZ1pPR2Q0bkRnMkQyUndaWWxWV0hlckRpYWVNb1gzTFEybEs5OE43QldENWl5dWw3b0U4U2pla1BraDUxK3U0MkZXQ3ZVeW5VK2ZCSC96NkhuUWRvMVFrKzh1SXNFb0hFTW4rdExGUFBQTS9vQTBaQW1iN0hJT3VpcU5acWVqcllzNXZiK1Z5c1M1ZVJ1MjhUeUVabXdIbGlpL3pDZzllY3N2VFFLdis3UHZ3amh4VWp5UERXbmlUa21PVFJ0TjV1dHJEY2h4VFZRbmNVT1NTYVBGV0F0VkxuaFYydzRjdW82VGt6UmNVUlNqK1htNHhvcS8yQ0VhbWRVemg0MUwvMW1aNzFpNVc4aFZHeW00WEl0SDNJcUJueTQ4KzFGcC9jclRseVplQXVMM2dmTUxqSFp4dzdNRG90OUNLVDI0UzI0SHRJTEtiN1dyb3Vnb1JQYTJIVkRuWkhRMlZmOUdJWUwxODk3UmMreWVVSGN1TnBPVHFHRmkrZkpBK0grU1pOeGNBdUluL2dCUno0QThFdHYyNnZtaitNL214TUZWSEYwVmJ4U3lOTkxVRTJWZU91YUJHbTRQblpCYUhtb2t4eHRDbTZ1enJNQng1Wko0ZW8yaE9Ya3IwV29vWXFKS2RrY29mZ3pvUUExTmxyY0JTR2x4SVR6VlRQUVpjY0RNbU5ScUxlR2ZNa1NqcHF2S21JRTVzOHFEVnRLSVlHVnNiTDdNSTVZNUV6ZGlyTVROczRaZzdYUGF2QXNqTzRndEwvYm5Pd2w1Ty9ibTZpQ1JPQUJuTUdVQ01GNCtYc3NYaHVBbStUTE5OdWU0akpZZTZSZzFoYlQ0SzZwYVdyNFNFbzRITHdYeWtYMUpRQXBFazc0OHpNbUlWQUl4QU1BUXZyTXovaE9aelQ4enI4cUtXNnFNUzhoa0lXaDU5NC9wUmErdGFXazluMEhPaC8wRTBxNDRyMHFOMjRtd3JRPT0iLCJjaXBoZXJUZXh0IjoiQVFJQkFIZ3RUdmcxMjVWbU5udFhOL0xFWVdXc0RDK0pNSkczUElJdXlPT2EweUlORWdGRWVVbVdSQzJ1YmY3b2lBUWVlWWdiQUFBQW9qQ0Jud1lKS29aSWh2Y05BUWNHb0lHUk1JR09BZ0VBTUlHSUJna3Foa2lHOXcwQkJ3RXdIZ1lKWUlaSUFXVURCQUV1TUJFRURDUXRnckc1WlNOMFlHMC96Z0lCRUlCYmJQdkc2dThHZ1g2ZzRpSXZ0UE1MckhyZW5QMHE3b1VyTHZkcytvUXV0YjVRcE9PQVNUMzdFRE1Wc09ZZGFUeDhNTHBtRXBPNjZ4M3FoOEJaMFBxT2dEYmlOcGxTUUpPWmtmOXVFTG13V0xZY0tmV2NaVzFrMjRXUkVBPT0iLCJzdWIiOiJhcm46YXdzOnNhZ2VtYWtlcjp1cy13ZXN0LTI6MDUyMTUwMTA2NzU2Om1sZmxvdy10cmFja2luZy1zZXJ2ZXIvbW1sdS1ldmFsLWV4cGVyaW1lbnQiLCJpYXQiOjE3NjM2MTk0MzEsImV4cCI6MTc2MzYxOTczMX0.aTzocvx1hKVKdbfiSAVJ1SDqI1ZPxnum8FcoBqbEJWM\u001B\\\u001B[1;4;34mmmlu-eval-experiment\u001B[0m\u001B]8;;\u001B\\\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mJob Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;38;5;172mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mSecondary Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;33mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mElapsed Time \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;31m1597.4s\u001B[0m\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;35mStatus Transitions\u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mStep \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDetails \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDuration \u001B[0m\u001B[1;35m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m ─────────────────────────────────────────────────────────────────────────── \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mStarting \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mStarting the training job \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.6s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mPending \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mPreparing the instances for \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m15.4s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mtraining \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mDownloading \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mDownloading the training image \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m166.9s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mTraining \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mTraining image download completed. \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m1339.4s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mTraining in progress. \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mUploading \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mUploading generated training model \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m74.1s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mCompleted \u001B[0m\u001B[36m \u001B[0m \u001B[34m \u001B[0m\u001B[34mTraining job completed \u001B[0m\u001B[34m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.0s \u001B[0m\u001B[32m \u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m│\u001B[0m \u001B[34m│\u001B[0m\n", - "\u001B[34m╰──────────────────────────────────────────────────────────────────────────────────────────╯\u001B[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "training_job = rlaif_trainer.train(wait=True,\n", " logs=True)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 3, "id": "6815868d-0490-43a4-9765-148c4b2ef4af", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
╭────────────────────────────────── Training Job Status ───────────────────────────────────╮\n", - "│ TrainingJob Name meta-textgeneration-llama-3-2-1b-instruct-rlaif-20251202102827 │\n", - "│ │\n", - "│ Job Status Completed │\n", - "│ Secondary Status Completed │\n", - "│ Elapsed Time 801.2s │\n", - "│ │\n", - "│ Status Transitions │\n", - "│ │\n", - "│ Step Details Duration │\n", - "│ ─────────────────────────────────────────────────────────────────────────── │\n", - "│ ✓ Starting Starting the training job 1.4s │\n", - "│ ✓ Pending Preparing the instances for 16.4s │\n", - "│ training │\n", - "│ ✓ Downloading Downloading the training image 15.7s │\n", - "│ ✓ Training Training image download completed. 722.6s │\n", - "│ Training in progress. │\n", - "│ ✓ Uploading Uploading generated training model 43.7s │\n", - "│ ✓ Completed Training job completed 0.0s │\n", - "│ │\n", - "╰──────────────────────────────────────────────────────────────────────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "\u001B[38;5;172m╭─\u001B[0m\u001B[38;5;172m─────────────────────────────────\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[1;94mTraining Job Status\u001B[0m\u001B[38;5;172m \u001B[0m\u001B[38;5;172m──────────────────────────────────\u001B[0m\u001B[38;5;172m─╮\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mTrainingJob Name \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;32mmeta-textgeneration-llama-3-2-1b-instruct-rlaif-20251202102827\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mJob Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;38;5;172mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mSecondary Status \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;33mCompleted\u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;36m \u001B[0m\u001B[1;36mElapsed Time \u001B[0m\u001B[1;36m \u001B[0m\u001B[37m \u001B[0m\u001B[1;91m801.2s\u001B[0m\u001B[37m \u001B[0m\u001B[37m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35mStatus Transitions\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mStep \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDetails \u001B[0m\u001B[1;35m \u001B[0m \u001B[1;35m \u001B[0m\u001B[1;35mDuration \u001B[0m\u001B[1;35m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m ─────────────────────────────────────────────────────────────────────────── \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mStarting \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mStarting the training job \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m1.4s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mPending \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mPreparing the instances for \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m16.4s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mtraining \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mDownloading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mDownloading the training image \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m15.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mTraining \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining image download completed. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m722.6s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m \u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining in progress. \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mUploading \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mUploading generated training model \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m43.7s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[32m \u001B[0m\u001B[32m✓ \u001B[0m\u001B[32m \u001B[0m \u001B[36m \u001B[0m\u001B[36mCompleted \u001B[0m\u001B[36m \u001B[0m \u001B[38;5;172m \u001B[0m\u001B[38;5;172mTraining job completed \u001B[0m\u001B[38;5;172m \u001B[0m \u001B[32m \u001B[0m\u001B[32m0.0s \u001B[0m\u001B[32m \u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m│\u001B[0m \u001B[38;5;172m│\u001B[0m\n", - "\u001B[38;5;172m╰──────────────────────────────────────────────────────────────────────────────────────────╯\u001B[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "training_job = rlaif_trainer.train(wait=True)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 4, "id": "fc3e1c67-d9c5-429a-aed7-07b26106ef2e", "metadata": { "collapsed": true, @@ -1432,404 +287,9 @@ }, "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
TrainingJob(\n", - "│ training_job_name='test-llama-3-2-1b-instruct-rlaif-20251111133638',\n", - "│ training_job_arn='arn:aws:sagemaker:us-west-2:052150106756:training-job/test-llama-3-2-1b-instruct-rlaif-20251111133638',\n", - "│ processing_job_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ tuning_job_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ labeling_job_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ auto_ml_job_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ model_artifacts=ModelArtifacts(\n", - "│ │ s3_model_artifacts='s3://open-models-testing-pdx/output/test-llama-3-2-1b-instruct-rlaif-20251111133638/output/model'\n", - "│ ),\n", - "│ training_job_output=TrainingJobOutput(\n", - "│ │ s3_training_job_output='s3://open-models-testing-pdx/output/test-llama-3-2-1b-instruct-rlaif-20251111133638/output/output'\n", - "│ ),\n", - "│ training_job_status='Completed',\n", - "│ secondary_status='Completed',\n", - "│ failure_reason=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ hyper_parameters={\n", - "│ │ 'custom_prompt_arn': 'arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/jamjee-test-rp1/0.0.1',\n", - "│ │ 'data_path': 'None',\n", - "│ │ 'global_batch_size': '128',\n", - "│ │ 'judge_prompt_template': '/opt/ml/input/data/judge_prompt/custom_prompt.jinja',\n", - "│ │ 'learning_rate': '1e-05',\n", - "│ │ 'lora_alpha': '256',\n", - "│ │ 'lora_rank': '128',\n", - "│ │ 'max_epochs': '3',\n", - "│ │ 'max_prompt_length': '1024',\n", - "│ │ 'model_name_or_path': 'meta-llama/Llama-3.2-1B-Instruct',\n", - "│ │ 'name': 'example-name-pjf9v',\n", - "│ │ 'preset_reward_function': 'compute_score',\n", - "│ │ 'reward_model_id': 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',\n", - "│ │ 'rollout': '8',\n", - "│ │ 'temperature': '1.0',\n", - "│ │ 'top_k': '-1',\n", - "│ │ 'top_p': '1',\n", - "│ │ 'train_val_split_ratio': '0.9',\n", - "│ │ 'validation_data_path': 'None'\n", - "│ },\n", - "│ algorithm_specification=AlgorithmSpecification(\n", - "│ │ training_input_mode='File',\n", - "│ │ training_image='',\n", - "│ │ algorithm_name=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ metric_definitions=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ enable_sage_maker_metrics_time_series=False,\n", - "│ │ container_entrypoint=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ container_arguments=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ training_image_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>\n", - "│ ),\n", - "│ role_arn='arn:aws:iam::052150106756:role/Admin',\n", - "│ input_data_config=[\n", - "│ │ Channel(\n", - "│ │ │ channel_name='train',\n", - "│ │ │ data_source=DataSource(\n", - "│ │ │ │ s3_data_source=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ │ │ file_system_data_source=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ │ │ dataset_source=DatasetSource(\n", - "│ │ │ │ │ dataset_arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/DataSet/MarketingDemoDataset1/1.0.0'\n", - "│ │ │ │ )\n", - "│ │ │ ),\n", - "│ │ │ content_type=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ │ compression_type='None',\n", - "│ │ │ record_wrapper_type='None',\n", - "│ │ │ input_mode=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ │ shuffle_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ │ enable_ffm=False\n", - "│ │ )\n", - "│ ],\n", - "│ output_data_config=OutputDataConfig(\n", - "│ │ s3_output_path='s3://open-models-testing-pdx/output',\n", - "│ │ kms_key_id='',\n", - "│ │ compression_type='NONE',\n", - "│ │ remove_job_name_from_s3_output_path=False,\n", - "│ │ disable_model_upload=False,\n", - "│ │ channels=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>\n", - "│ ),\n", - "│ resource_config=ResourceConfig(\n", - "│ │ volume_size_in_gb=10,\n", - "│ │ instance_type='ml.p4de.24xlarge',\n", - "│ │ instance_count=1,\n", - "│ │ volume_kms_key_id=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ keep_alive_period_in_seconds=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ capacity_reservation_ids=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ instance_groups=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ capacity_schedules_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ training_plan_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ instance_placement_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>\n", - "│ ),\n", - "│ warm_pool_status=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ vpc_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ stopping_condition=StoppingCondition(\n", - "│ │ max_runtime_in_seconds=86400,\n", - "│ │ max_wait_time_in_seconds=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ max_pending_time_in_seconds=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>\n", - "│ ),\n", - "│ creation_time=datetime.datetime(2025, 11, 11, 13, 36, 38, 542000, tzinfo=tzlocal()),\n", - "│ training_start_time=datetime.datetime(2025, 11, 11, 13, 36, 52, 938000, tzinfo=tzlocal()),\n", - "│ training_end_time=datetime.datetime(2025, 11, 11, 13, 49, 15, 463000, tzinfo=tzlocal()),\n", - "│ last_modified_time=datetime.datetime(2025, 11, 11, 13, 49, 15, 463000, tzinfo=tzlocal()),\n", - "│ secondary_status_transitions=[\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Starting',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 36, 38, 542000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 36, 39, 239000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Starting the training job'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Pending',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 36, 39, 239000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 36, 52, 938000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Preparing the instances for training'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Downloading',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 36, 52, 938000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 39, 39, 928000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Downloading the training image'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Training',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 39, 39, 928000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 48, 42, 252000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training image download completed. Training in progress.'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Uploading',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 48, 42, 252000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 49, 15, 463000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Uploading generated training model'\n", - "│ │ ),\n", - "│ │ SecondaryStatusTransition(\n", - "│ │ │ status='Completed',\n", - "│ │ │ start_time=datetime.datetime(2025, 11, 11, 13, 49, 15, 463000, tzinfo=tzlocal()),\n", - "│ │ │ end_time=datetime.datetime(2025, 11, 11, 13, 49, 15, 463000, tzinfo=tzlocal()),\n", - "│ │ │ status_message='Training job completed'\n", - "│ │ )\n", - "│ ],\n", - "│ final_metric_data_list=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ enable_network_isolation=False,\n", - "│ enable_inter_container_traffic_encryption=False,\n", - "│ enable_managed_spot_training=False,\n", - "│ checkpoint_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ training_time_in_seconds=743,\n", - "│ billable_time_in_seconds=743,\n", - "│ billable_token_count=0,\n", - "│ debug_hook_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ experiment_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ debug_rule_configurations=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ tensor_board_output_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ debug_rule_evaluation_statuses=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ upstream_platform_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ profiler_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ profiler_rule_configurations=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ profiler_rule_evaluation_statuses=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ profiling_status='Disabled',\n", - "│ environment=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ retry_strategy=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ last_modified_by=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ created_by=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ disable_efa=False,\n", - "│ processing_job_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ image_metadata=ImageMetadata(image_type='BYOImage'),\n", - "│ remote_debug_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ resource_tags=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ infra_check_config=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ serverless_job_config=ServerlessJobConfig(\n", - "│ │ job_type='FineTuning',\n", - "│ │ base_model_arn='arn:aws:sagemaker:us-west-2:aws:hub-content/SageMakerPublicHub/Model/test-llama-3-2-1b-instruct/0.0.3',\n", - "│ │ job_spec={'CustomizationTechnique': 'RLAIF'},\n", - "│ │ model_package_group_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ source_model_package_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ accept_eula=False,\n", - "│ │ evaluator_arn='arn:aws:sagemaker:us-west-2:052150106756:hub-content/AIRegistry/JsonDoc/jamjee-test-rp1/0.0.1'\n", - "│ ),\n", - "│ mlflow_config=MlflowConfig(\n", - "│ │ mlflow_resource_arn='arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment',\n", - "│ │ mlflow_tracking_server_arn=<sagemaker_core.main.utils.Unassigned object at 0x12c04d7d0>,\n", - "│ │ mlflow_experiment_name='test-rlaif-finetuned-models-exp',\n", - "│ │ mlflow_run_name='test-rlaif-finetuned-models-run'\n", - "│ ),\n", - "│ mlflow_details=MlflowDetails(mlflow_experiment_id='88', mlflow_run_id='3184c75ab4a142f3b15132a7f00e0b68'),\n", - "│ progress_info=TrainingProgressInfo(\n", - "│ │ total_step_count_per_epoch=1,\n", - "│ │ current_step=2,\n", - "│ │ current_epoch=2,\n", - "│ │ max_epoch=3\n", - "│ ),\n", - "│ output_model_package_arn='arn:aws:sagemaker:us-west-2:052150106756:model-package/test-finetuned-models/1'\n", - ")\n", - "\n" - ], - "text/plain": [ - "\u001B[1;38;2;225;0;225mTrainingJob\u001B[0m\u001B[1m(\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_name\u001B[0m=\u001B[38;2;0;135;0m'test-llama-3-2-1b-instruct-rlaif-20251111133638'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtraining_job_arn\u001B[0m=\u001B[38;2;0;135;0m'arn:aws:sagemaker:us-west-2:052150106756:training-job/test-llama-3-2-1b-instruct-rlaif-20251111133638'\u001B[0m,\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mprocessing_job_arn\u001B[0m=\u001B[1m<\u001B[0m\u001B[1;38;2;225;0;225msagemaker_core.main.utils.Unassigned\u001B[0m\u001B[39m object at \u001B[0m\u001B[1;36m0x12c04d7d0\u001B[0m\u001B[39m>,\u001B[0m\n", - "\u001B[2;32m│ \u001B[0m\u001B[38;2;215;175;0mtuning_job_arn\u001B[0m\u001B[39m=