diff --git a/07-ml-model-development(1).ipynb b/07-ml-model-development(1).ipynb new file mode 100644 index 0000000000..471b642cee --- /dev/null +++ b/07-ml-model-development(1).ipynb @@ -0,0 +1,1260 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "95cfbd26-7fa9-4dd7-9367-c27e3c9e03bb", + "metadata": {}, + "source": [ + "# Leveraging Lakehouse data with Amazon SageMaker XGBoost and AutoML\n", + "_**Supervised learning with MLFlow logging of experiments**_\n", + "\n", + "---\n", + "\n", + "---\n", + "\n", + "## Contents\n", + "\n", + "1. [Background](#Background)\n", + "1. [Prepration](#Preparation)\n", + "1. [Data Preparation](#DataPreparation)\n", + "1. [Training XGBoost](#XGBoost)\n", + "1. [Training AutoML](#AutoML)\n", + "1. [Deployment and inference test](#Deployment_and_inference_test)\n", + "1. [Evaluation](#Evaluation)\n", + "\n", + "---\n", + "\n", + "## Background\n", + "One of the key advantages of the new SageMaker AI Unified Studio is its ability to integrate data from multiple sources. In this notebook, we'll walk through an example of bringing data from a Lakehouse to train models using XGBoost and AutoML. We'll also leverage the power of MLFlow servers to capture and analyze the training data.\n", + "\n", + "This notebook demonstrates how to predict a customer's purchase potential based on a set of features. We'll go through the following steps:\n", + "\n", + "* Setting up your Amazon SageMaker AI notebook\n", + "* Querying data sources using Athena\n", + "* Transforming the data to feed into Amazon SageMaker algorithms\n", + "* Training a model using the Gradient Boosting algorithm (XGBoost)\n", + "* Launching an AutoML task to target the same feature\n", + "* Utilizing MLFlow to capture and visualize experiment data\n", + "\n", + "---\n", + "\n", + "## Preparation\n", + "\n", + "Let's start by bringing in the Python libraries that we'll use throughout the notebook:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d777a36-e56e-467a-a69e-817c57fee926", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "import pandas as pd\n", + "import numpy as np\n", + "import logging\n", + "import sagemaker\n", + "import mlflow\n", + "import os\n", + "from datetime import datetime, timezone\n", + "from sagemaker.modules import Session\n", + "from sagemaker_studio import Project" + ] + }, + { + "cell_type": "markdown", + "id": "1aa126ff-222b-4796-b14f-e8044a6e361d", + "metadata": {}, + "source": [ + "Now, let's set up our logging and specify the necessary configurations:\n", + "\n", + "1. Configure the logging we'll use, including the ARN of the MLFlow server we've set up in the prerequisite\n", + "2. Specify the S3 bucket and prefix for storing training and model data\n", + "3. Set up the IAM role ARN to provide necessary permissions for training and hosting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daba65bb-43b8-4e54-92e9-9c7e33073559", + "metadata": {}, + "outputs": [], + "source": [ + "logging.basicConfig(level=logging.INFO)\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "markdown", + "id": "bd639c27-1a78-4507-b98f-cdb418ab87fe", + "metadata": {}, + "source": [ + "### Copy MLFlow Tracking Server ARN\n", + "\n", + "Copy/Paste the ARN of your Project MLFlow Tracking Server. You can find it by navigating to the Project->Compute page, then selecting \"MLFlow Tracking Server\" tab. Select the `Copy ARN` button" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff4d6000-5e19-4c0c-a6ab-7a5455317b61", + "metadata": {}, + "outputs": [], + "source": [ + "project = Project()\n", + "#mlflow_arn = project.mlflow_tracking_server_arn\n", + "\n", + "# Cut/Paste the ARN from the Tracking Server instance\n", + "## mlflow_arn = \"arn:aws:sagemaker:us-west-2:767398116961:mlflow-tracking-server/tracking-server-blogxwjruvstqo-cv0wvz63pbj11s-dev\"\n", + "mlflow_arn = \"COPY_TRACKING_SERVER_ARN_HERE\"\n", + "print(f\"ARN: {mlflow_arn}\")\n", + "\n", + "mlflow.set_tracking_uri(mlflow_arn)" + ] + }, + { + "cell_type": "markdown", + "id": "b96facd5", + "metadata": {}, + "source": [ + "One of the added benefit of SageMaker Unified Studio is the use of Project to bring resources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff00e488-7591-4446-9444-788d9df49f66", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize AWS session\n", + "session = boto3.Session()\n", + "bucket_root = project.s3.root\n", + "role = project.iam_role\n", + "\n", + "# Parse the S3 URI\n", + "s3_parts = bucket_root.replace(\"s3://\", \"\").split(\"/\")\n", + "bucket = s3_parts[0]\n", + "prefix = \"/\".join(s3_parts[1:])\n", + "\n", + "## If you prefer NOT using the new SageMaker AI Project framework, here is an alternative\n", + "#session = sagemaker.Session()\n", + "#bucket = session.default_bucket()\n", + "#from sagemaker import get_execution_role\n", + "#role = get_execution_role()\n", + "#sagemaker_client = boto3.Session().client(service_name='sagemaker',region_name=region)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7310961-92a5-4bef-a3c8-57a5cd11a1ee", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Using Bucket: {bucket}\")\n", + "print(f\"Using prefix: {prefix}\")\n", + "print(f\"Using Role: {role}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4a51b428", + "metadata": {}, + "source": [ + "Now, let's retrieve the name of the project's database through the default catalog:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea29364f-1c0d-4ddd-8f24-5068c06f615b", + "metadata": {}, + "outputs": [], + "source": [ + "# A good example of the Project class is getting the name of the project's database through the default catalog\n", + "catalog = project.connection().catalog()\n", + "project_database = catalog.databases[0].name\n", + "project_database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf429cae-55ac-4db9-819e-272462ef217d", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: If your account has more than one Catalog, use this code to lookup names\n", + "id = 0\n", + "for db in catalog.databases:\n", + " print(f\"Index {id}: {db}\")\n", + " id += 1" + ] + }, + { + "cell_type": "markdown", + "id": "0eccfa13-9ec6-445a-85a6-f153bcb3ab0d", + "metadata": {}, + "source": [ + "### Data Preparation\n", + "\n", + "First, we need to upload the data file named \"5000-sales-records.csv\". If you are running this notebook from the **Sagemaker Unified Studio Workshop**, this file can be downloaded from the instructions page. Next, we can upload the file using the S3 Browser on the Project->Data page. Once the file is successfully uploaded, open the S3 Console, and locate the file by navigating to the folder prefix where you uploaded it. (Note: file uploads can normally be found under the `local-uploads` prefix).\n", + "\n", + "From the S3 console, select the file \"5000-sales-records.csv\" and hit \"Copy S3 URI\" button. Then paste the URI within the quotes in the read_csv() call below. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc19b755-4880-4dba-8f23-fd7b490db2db", + "metadata": {}, + "outputs": [], + "source": [ + "# Using pandas to read CSV directly from S3 URI\n", + "\n", + "# Example:\n", + "# data = pd.read_csv(\"s3://csv-file-store-72f9fec0/dzd_d22v67c8i2tzv4/blogxwjruvstqo/dev/local-uploads/1756921917470/5000-sales-records.csv\")\n", + "data = pd.read_csv(\"COPY_S3_URI_HERE\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d866bb71-4f54-49fc-ad67-023f3668f32f", + "metadata": {}, + "outputs": [], + "source": [ + "# Rename columns to match Spark Dataframe infer\n", + "data.rename(columns={\n", + " \"Region\": \"region\",\n", + " \"Country\": \"country\",\n", + " \"Item Type\": \"item type\",\n", + " \"Sales Channel\": \"sales channel\",\n", + " \"Order Priority\": \"order priority\",\n", + " \"Order Date\": \"order date\",\n", + " \"Order ID\": \"order id\",\n", + " \"Ship Date\": \"ship date\",\n", + " \"Units Sold\": \"units sold\",\n", + " \"Unit Price\": \"unit price\",\n", + " \"Unit Cost\": \"unit cost\",\n", + " \"Total Revenue\": \"total revenue\",\n", + " \"Total Cost\": \"total cost\",\n", + " \"Total Profit\": \"total profit\",\n", + " }, \n", + " inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "605c2be3-077b-4b8c-8882-6deac8a398da", + "metadata": {}, + "outputs": [], + "source": [ + "# Dump Dataframe metadata\n", + "logger.info(f\"DataFrame shape: {data.shape}\")\n", + "logger.info(\"\\nDataFrame info:\")\n", + "logger.info(data.info())" + ] + }, + { + "cell_type": "markdown", + "id": "c43a504a-3657-4b56-b2f5-62c7a93d636f", + "metadata": {}, + "source": [ + "Now that we have our data queried and available, let's prepare it for our machine learning models. We'll perform the following steps:\n", + "\n", + "1. Split the data into features (X) and target variable (y)\n", + "2. Handle any missing values\n", + "3. Encode categorical variables\n", + "4. Scale numerical features\n", + "5. Split the data into training and testing sets\n", + "\n", + "Let's start by preparing our feature matrix and target variable:" + ] + }, + { + "cell_type": "markdown", + "id": "9e4367eb-6c81-4b70-ada8-edb8136dd71f", + "metadata": {}, + "source": [ + "Amazon SageMaker's XGBoost container expects data in the libSVM or CSV data format. For this example, we'll stick to CSV. Note that the first column must be the target variable and the CSV should not include headers. Also, notice that although repetitive it's easiest to do this after the train|validation|test split rather than before. This avoids any misalignment issues due to random reordering." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1c68276-84cc-42f2-bc80-24fc1305797a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def process_data(data: pd.DataFrame):\n", + " \"\"\"\n", + " Process and prepare data for modeling\n", + " \"\"\"\n", + " # Create copy to avoid modifying original\n", + " df = data.copy()\n", + " \n", + " # Drop 'order id' column\n", + " df = df.drop('order id', axis=1)\n", + " \n", + " # Convert date columns to datetime and extract features\n", + " date_columns = ['order date', 'ship date']\n", + " for col in date_columns:\n", + " df[col] = pd.to_datetime(df[col])\n", + " df[f'{col}_year'] = df[col].dt.year\n", + " df[f'{col}_month'] = df[col].dt.month\n", + " df[f'{col}_quarter'] = df[col].dt.quarter\n", + " \n", + " # Drop original date columns\n", + " df = df.drop(columns=date_columns)\n", + " \n", + " # Create lag features for 'total revenue'\n", + " for i in range(1, 4):\n", + " df[f'revenue_lag_{i}'] = df.groupby(['item type', 'sales channel'])['total revenue'].shift(i)\n", + " \n", + " # Drop rows with NaN values\n", + " df = df.dropna()\n", + " \n", + " # Convert categorical variables to dummy variables\n", + " categorical_columns = ['region', 'country', 'item type', 'sales channel', 'order priority']\n", + " df_encoded = pd.get_dummies(df, columns=categorical_columns)\n", + " \n", + " # Prepare features and target\n", + " target_column = 'total profit' # Assuming 'total profit' is the target variable\n", + " numeric_columns = ['units sold', 'unit price', 'unit cost', 'total revenue', 'total cost']\n", + " feature_columns = [col for col in df_encoded.columns if col != target_column]\n", + " X = df_encoded[feature_columns]\n", + " y = df_encoded[target_column].astype(float)\n", + " \n", + " # Train-test-validation split\n", + " X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1729)\n", + " X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.33, random_state=1729)\n", + " \n", + " # Scale numeric features\n", + " scaler = StandardScaler()\n", + " numeric_features = [col for col in X_train.columns if col in numeric_columns + \n", + " ['order date_year', 'order date_month', 'order date_quarter',\n", + " 'ship date_year', 'ship date_month', 'ship date_quarter'] +\n", + " [f'revenue_lag_{i}' for i in range(1, 4)]]\n", + " \n", + " X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])\n", + " X_val[numeric_features] = scaler.transform(X_val[numeric_features])\n", + " X_test[numeric_features] = scaler.transform(X_test[numeric_features])\n", + " \n", + " return X_train, X_val, X_test, y_train, y_val, y_test, feature_columns, scaler\n", + "\n", + "# Process the data\n", + "X_train, X_val, X_test, y_train, y_val, y_test, feature_columns, scaler = process_data(data)\n", + "\n", + "# Print some information about the processed data\n", + "print(\"\\nProcessed data shape:\", X_train.shape)\n", + "print(\"\\nFirst few rows of processed data:\")\n", + "print(X_train.head())\n", + "print(X_train.shape)\n", + "print(X_train.info())\n", + "print(\"\\nColumn names:\")\n", + "print(X_train.columns.tolist())\n", + "\n", + "# Verify target variable\n", + "print(\"\\nSummary statistics of the target variable:\")\n", + "print(y_train.describe())" + ] + }, + { + "cell_type": "markdown", + "id": "d4dd59f1-27b1-4d25-8f55-8c8f3f422e51", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Training XGBoost\n", + "\n", + "### Option 1: Using the SageMaker Decorator\n", + "Now we know that most of our features have skewed distributions, some are highly correlated with one another, and some appear to have non-linear relationships with our target variable. Also, for targeting future prospects, good predictive accuracy is preferred to being able to explain why that prospect was targeted. Taken together, these aspects make gradient boosted trees a good candidate algorithm.\n", + "\n", + "There are several intricacies to understanding the algorithm, but at a high level, gradient boosted trees works by combining predictions from many simple models, each of which tries to address the weaknesses of the previous models. By doing this the collection of simple models can actually outperform large, complex models. Other Amazon SageMaker notebooks elaborate on gradient boosting trees further and how they differ from similar algorithms.\n", + "\n", + "`xgboost` is an extremely popular, open-source package for gradient boosted trees. It is computationally powerful, fully featured, and has been successfully used in many machine learning competitions. \n", + "\n", + "Let's train a first version of XGBoost using this open-source library and using SageMaker's @remote decorator. You can use the @remote decorator to annotate a function. SageMaker AI will transform the code inside the decorator into a SageMaker training job. \n", + "\n", + "Note how we log various parameters, metrics, tags, and artifacts to MLflow. When the training is finished, don't forget to open up MLflow to take a look at the experiment results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "569c1c16", + "metadata": {}, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "import os\n", + "import joblib\n", + "from sagemaker.remote_function import remote\n", + "\n", + "\n", + "def train_model(X_train, y_train, X_val, y_val):\n", + " \"\"\"\n", + " Train XGBoost model\n", + " \"\"\"\n", + " # Initialize model\n", + " model = xgb.XGBRegressor(\n", + " n_estimators=100,\n", + " learning_rate=0.1,\n", + " max_depth=5,\n", + " random_state=42\n", + " )\n", + " \n", + " # Train model\n", + " model.fit(\n", + " X_train, \n", + " y_train,\n", + " eval_set=[(X_val, y_val)],\n", + " verbose=False\n", + " )\n", + " \n", + " return model\n", + "\n", + "\n", + "@remote(job_name_prefix=\"xgboost-sales-forecast\", \n", + " instance_type=\"ml.m5.large\", \n", + " keep_alive_period_in_seconds=600,)\n", + "def model_train(X_train, y_train, X_val, y_val, mlflow_arn):\n", + " \"\"\"\n", + " Main function to orchestrate the model training process\n", + " \"\"\"\n", + " mlflow.set_tracking_uri(mlflow_arn)\n", + " mlflow.set_experiment(\"XG-Boost\")\n", + " \n", + " with mlflow.start_run(run_name=f\"xgboost-decorator-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}\"):\n", + " # Log information about the data\n", + " mlflow.log_param(\"train_samples\", len(X_train))\n", + " mlflow.log_param(\"val_samples\", len(X_val))\n", + " mlflow.log_param(\"features\", X_train.shape[1])\n", + " \n", + " # Train model\n", + " model = train_model(X_train, y_train, X_val, y_val)\n", + " \n", + " # Log model parameters\n", + " params = model.get_params()\n", + " mlflow.log_params(params)\n", + " \n", + " # Log validation results\n", + " results = model.evals_result()\n", + " for epoch, rmse_value in enumerate(results['validation_0']['rmse']):\n", + " mlflow.log_metric('train_rmse', rmse_value, step=epoch)\n", + "\n", + " # Log final metrics\n", + " final_rmse = results['validation_0']['rmse'][-1]\n", + " best_rmse = min(results['validation_0']['rmse'])\n", + " best_epoch = results['validation_0']['rmse'].index(best_rmse)\n", + " mlflow.log_metrics({\n", + " 'final_rmse': final_rmse,\n", + " 'best_rmse': best_rmse,\n", + " 'best_epoch': best_epoch\n", + " })\n", + "\n", + " # Set tags for the run\n", + " mlflow.set_tag(\"model_type\", \"XGBoost\")\n", + " mlflow.set_tag(\"framework\", \"OSS\")\n", + " \n", + " # Infer model signature and register model\n", + " predictions = model.predict(X_val)\n", + " signature = mlflow.models.infer_signature(X_train, predictions)\n", + " mlflow.xgboost.log_model(model, \"model\", registered_model_name=\"xgboost-lib-regression\", signature=signature)\n", + " \n", + " # Save model\n", + " path = \"/opt/ml/model\"\n", + " joblib.dump(model, os.path.join(path, 'revenue_forecast_model.joblib'))\n", + " return model, predictions\n", + "\n", + "# Run the training\n", + "xgb_model, output = model_train(X_train, y_train, X_val, y_val, mlflow_arn)" + ] + }, + { + "cell_type": "markdown", + "id": "0b9f781a", + "metadata": {}, + "source": [ + "### Option 2: Using SageMaker's built-in algorithm\n", + "\n", + "Amazon SageMaker also has a managed, distributed [training framework for XGBoost](https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html). This section shows how you can use train this version of XGBoost. Instead of using the @remote decorator, this section shows how to use the [ModelTrainer](https://sagemaker.readthedocs.io/en/stable/api/training/model_trainer.html) SDK to create a training job.\n", + "\n", + "First we must adjust the data to be suitable for this version of XGBoost." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b9f7f37", + "metadata": {}, + "outputs": [], + "source": [ + "def save_for_sagemaker_xgboost(X, y, filename):\n", + " \"\"\"\n", + " Save data in a format compatible with SageMaker's XGBoost algorithm.\n", + " \"\"\"\n", + " # Combine target and features\n", + " data = pd.concat([y.reset_index(drop=True), X.reset_index(drop=True)], axis=1)\n", + " \n", + " # Convert boolean columns to int\n", + " bool_columns = data.select_dtypes(include=['bool']).columns\n", + " data[bool_columns] = data[bool_columns].astype(int)\n", + " \n", + " # Ensure all data is numeric\n", + " data = data.apply(pd.to_numeric, errors='coerce')\n", + " \n", + " # Replace any remaining non-numeric values with 0\n", + " data = data.fillna(0)\n", + " \n", + " # Save to csv without header and index\n", + " data.to_csv(filename, header=False, index=False)\n", + " print(f\"Data saved to {filename}\")\n", + "\n", + "# Combine train and validation sets\n", + "X_train_full = pd.concat([X_train, X_val])\n", + "y_train_full = pd.concat([y_train, y_val])\n", + "\n", + "# Save training data (including validation data)\n", + "save_for_sagemaker_xgboost(X_train_full, y_train_full, 'train.csv')\n", + "\n", + "# Save test data\n", + "save_for_sagemaker_xgboost(X_test, y_test, 'test.csv')\n", + "\n", + "# Print some information about the saved files\n", + "print(\"\\nTrain file info:\")\n", + "print(pd.read_csv('train.csv', header=None).info())\n", + "\n", + "print(\"\\nTest file info:\")\n", + "print(pd.read_csv('test.csv', header=None).info())\n", + "\n", + "# Verify first few rows of each file\n", + "print(\"\\nFirst few rows of train.csv:\")\n", + "print(pd.read_csv('train.csv', header=None).head())\n", + "\n", + "print(\"\\nFirst few rows of test.csv:\")\n", + "print(pd.read_csv('test.csv', header=None).head())\n" + ] + }, + { + "cell_type": "markdown", + "id": "b6b83856-154e-4789-add8-db58b27e41a6", + "metadata": {}, + "source": [ + "In the cell above, we performed preprocessing on our dataset and produced output files \"train.csv\" and \"test.csv\". Next, we'll upload these local files to our S3 location." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da67f22f-ff96-4609-a197-68abc2a18877", + "metadata": {}, + "outputs": [], + "source": [ + "session.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train_xgboost/train.csv')).upload_file('train.csv')\n", + "session.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test_xgboost/test.csv')).upload_file('test.csv')\n", + "session.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation_xgboost/test.csv')).upload_file('test.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "aa0e05af-81dc-472f-9f57-3d0fb85f663b", + "metadata": {}, + "source": [ + "We'll need to specify the ECR container location for Amazon SageMaker's implementation of XGBoost." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43fd0ef4-388a-4354-94b6-396cecbe34cb", + "metadata": {}, + "outputs": [], + "source": [ + "container = sagemaker.image_uris.retrieve(region=boto3.Session().region_name, framework='xgboost', version='latest')" + ] + }, + { + "cell_type": "markdown", + "id": "9e92e1f5-905c-4202-993f-b9d07f24f33c", + "metadata": {}, + "source": [ + "Then, because we're training with the CSV file format, we'll create `TrainingInput` objects that our training function can use as a pointer to the files in S3, which also specify that the content type is CSV." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e64769cd-f2ff-4c1c-94f5-e488f8c649fa", + "metadata": {}, + "outputs": [], + "source": [ + "s3_input_train = sagemaker.inputs.TrainingInput(\n", + " s3_data='s3://{}/{}/train_xgboost/'.format(bucket, prefix),\n", + " content_type='csv'\n", + ")\n", + "s3_input_test = sagemaker.inputs.TrainingInput(\n", + " s3_data='s3://{}/{}/test_xgboost/'.format(bucket, prefix),\n", + " content_type='csv'\n", + ")\n", + "s3_input_validation = sagemaker.inputs.TrainingInput(\n", + " s3_data='s3://{}/{}/validation_xgboost/'.format(bucket, prefix),\n", + " content_type='csv'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7e4db3d1-a6e8-46ea-9a6e-35339e4b9ef5", + "metadata": {}, + "source": [ + "First we'll need to specify training parameters to the estimator. This includes:\n", + "1. The `xgboost` algorithm container\n", + "1. The IAM role to use\n", + "1. Training instance type and count\n", + "1. S3 location for output data\n", + "1. Algorithm hyperparameters\n", + "\n", + "And then a `.fit()` function which specifies the input data. In this case we have both a training and validation set which are passed in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "003300a9-c6c3-4459-bedc-c9697da03940", + "metadata": {}, + "outputs": [], + "source": [ + "from mlflow.models import infer_signature\n", + "\n", + "sm_session = sagemaker.Session()\n", + "xgb_estimator = sagemaker.estimator.Estimator(container,\n", + " role, \n", + " instance_count=1, \n", + " instance_type='ml.m5.xlarge',\n", + " output_path=f's3://{bucket}/{prefix}/output',\n", + " sagemaker_session=sm_session)\n", + "\n", + "hyperparameters = {\n", + " \"max_depth\": 6,\n", + " \"eta\": 0.2,\n", + " \"gamma\": 4,\n", + " \"min_child_weight\": 8,\n", + " \"subsample\": 0.6,\n", + " \"verbosity\": 0,\n", + " \"objective\": \"reg:linear\",\n", + " \"num_round\": 75,\n", + "}\n", + "xgb_estimator.set_hyperparameters(**hyperparameters)\n", + "\n", + "mlflow.set_experiment(\"XG-Boost\")\n", + "with mlflow.start_run(run_name=f\"xgboost-builtin-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}\"):\n", + " # Log the hyperparameters\n", + " mlflow.log_params(hyperparameters)\n", + "\n", + " # Fit the model and capture training metrics\n", + " xgb_estimator.fit({'train': s3_input_train, 'validation': s3_input_test})\n", + " \n", + " # Get the training job name\n", + " job_name = xgb_estimator.latest_training_job.job_name\n", + " \n", + " # Get the training job description\n", + " client = sm_session.boto_session.client('sagemaker')\n", + " training_job_description = client.describe_training_job(TrainingJobName=job_name)\n", + " \n", + " # Extract and log metrics\n", + " for metric in training_job_description['FinalMetricDataList']:\n", + " metric_name = metric['MetricName']\n", + " metric_value = metric['Value']\n", + " mlflow.log_metric(metric_name, metric_value)\n", + "\n", + " # Set tags for the run\n", + " mlflow.set_tag(\"model_type\", \"XGBoost\")\n", + " mlflow.set_tag(\"framework\", \"SageMaker\")\n", + "\n", + " # Register the model\n", + " mlflow.register_model(f\"runs:/{mlflow.active_run().info.run_id}/model\", \"xgboost-sm-regression\")\n", + "\n", + " print(f\"Model saved in run {mlflow.active_run().info.run_uuid}\")" + ] + }, + { + "cell_type": "markdown", + "id": "62bfccde-d940-424e-903c-bf1122c38714", + "metadata": {}, + "source": [ + "Now that you have successfully completed the training of the XGBoost model and SageMaker Autopilot job on the dataset, you can deploy xgboost and create a model from any of the candidates by using [Inference Pipelines](https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipelines.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "693f1424-7cac-411f-82f0-836630ba6bb4", + "metadata": {}, + "outputs": [], + "source": [ + "xgb_predictor = xgb_estimator.deploy(initial_instance_count=1,\n", + " instance_type='ml.m5.xlarge')" + ] + }, + { + "cell_type": "markdown", + "id": "022d9db7-594f-48f1-84e8-b5123d4aede3", + "metadata": {}, + "source": [ + "First we'll need to determine how we pass data into and receive data from our endpoint. Our data is currently stored as NumPy arrays in memory of our notebook instance. To send it in an HTTP POST request, we'll serialize it as a CSV string and then decode the resulting CSV.\n", + "\n", + "*Note: For inference with CSV format, SageMaker XGBoost requires that the data does NOT include the target variable.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "225ae8c7-e079-48e6-9672-f5252182a4c4", + "metadata": {}, + "outputs": [], + "source": [ + "xgb_predictor.serializer = sagemaker.serializers.CSVSerializer()" + ] + }, + { + "cell_type": "markdown", + "id": "c89f8de3-4bb4-4c93-9c03-414506a4366f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-06T19:29:20.445680Z", + "iopub.status.busy": "2025-01-06T19:29:20.444866Z", + "iopub.status.idle": "2025-01-06T19:29:20.487983Z", + "shell.execute_reply": "2025-01-06T19:29:20.486542Z", + "shell.execute_reply.started": "2025-01-06T19:29:20.445642Z" + } + }, + "source": [ + "Now, we'll use a simple function to:\n", + "1. Loop over our test dataset\n", + "1. Split it into mini-batches of rows \n", + "1. Convert those mini-batches to CSV string payloads (notice, we drop the target variable from our dataset first)\n", + "1. Retrieve mini-batch predictions by invoking the XGBoost endpoint\n", + "1. Collect predictions and convert from the CSV output our model provides into a NumPy array" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cb267f7-8ca8-4d33-afac-52aebf6d2fbc", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the CSV file\n", + "test_data = pd.read_csv('test.csv')\n", + "\n", + "def predict(data, predictor, rows=500):\n", + " split_array = np.array_split(data, int(data.shape[0] / float(rows) + 1))\n", + " predictions = []\n", + " \n", + " for array in split_array:\n", + " # Convert numpy array to CSV string\n", + " csv = '\\n'.join([','.join(map(str, row)) for row in array])\n", + " \n", + " # Get predictions\n", + " prediction = predictor.predict(csv)\n", + " \n", + " # Decode and convert to numpy array\n", + " prediction_array = np.fromstring(prediction.decode('utf-8'), sep=',')\n", + " \n", + " predictions.append(prediction_array)\n", + " \n", + " # Concatenate all predictions\n", + " return np.concatenate(predictions)\n", + "\n", + "# Print column names and data info for debugging\n", + "print(\"Original columns:\", test_data.columns)\n", + "print(test_data.info())\n", + "\n", + "# Remove the first column (target variable)\n", + "X_test = test_data.iloc[:, 1:]\n", + "\n", + "# Print shape to confirm\n", + "print(\"Shape of X_test:\", X_test.shape)\n", + "print(\"Columns of X_test:\", X_test.columns)\n", + "\n", + "# Ensure we have the correct number of features\n", + "if X_test.shape[1] != 224:\n", + " print(f\"Warning: You have {X_test.shape[1]} features. The model expects 224.\")\n", + " print(\"Current features:\", X_test.columns.tolist())\n", + " # If needed, you can manually select the correct 8 features:\n", + " # X_test = X_test[['feature1', 'feature2', ..., 'feature8']]\n", + "\n", + "# Now you can use X_test in your predict function\n", + "predictions = predict(X_test.values, xgb_predictor)\n" + ] + }, + { + "cell_type": "markdown", + "id": "c5868b0c-991d-4519-b899-ab43f30a7b3c", + "metadata": {}, + "source": [ + "Now we'll output a score for the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3717de0-f579-4051-b83f-d7c94623af6f", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n", + "\n", + "# Extract the actual values (first column)\n", + "y_true = test_data.iloc[:, 0]\n", + "\n", + "# Ensure predictions are in the same format as y_true\n", + "y_pred = predictions # This should be the predictions from your previous cell\n", + "\n", + "# Make sure y_true and y_pred have the same length\n", + "assert len(y_true) == len(y_pred), \"Mismatch in length between actual and predicted values\"\n", + "\n", + "# Calculate evaluation metrics\n", + "mse = mean_squared_error(y_true, y_pred)\n", + "rmse = np.sqrt(mse)\n", + "mae = mean_absolute_error(y_true, y_pred)\n", + "r2 = r2_score(y_true, y_pred)\n", + "\n", + "print(f\"Mean Squared Error: {mse:.4f}\")\n", + "print(f\"Root Mean Squared Error: {rmse:.4f}\")\n", + "print(f\"Mean Absolute Error: {mae:.4f}\")\n", + "print(f\"R-squared Score: {r2:.4f}\")\n", + "\n", + "# If you want to see a sample of the actual vs predicted values\n", + "comparison_df = pd.DataFrame({'Actual': y_true, 'Predicted': y_pred})\n", + "print(\"\\nSample of Actual vs Predicted values:\")\n", + "print(comparison_df.head(10))\n", + "\n", + "# If you want to plot the actual vs predicted values\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(y_true, y_pred, alpha=0.5)\n", + "plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)\n", + "plt.xlabel('Actual Values')\n", + "plt.ylabel('Predicted Values')\n", + "plt.title('Actual vs Predicted Values')\n", + "plt.show()\n", + "\n", + "# Calculate and print additional statistics\n", + "print(\"\\nAdditional Statistics:\")\n", + "print(f\"Mean of Actual Values: {y_true.mean():.4f}\")\n", + "print(f\"Mean of Predicted Values: {y_pred.mean():.4f}\")\n", + "print(f\"Standard Deviation of Actual Values: {y_true.std():.4f}\")\n", + "print(f\"Standard Deviation of Predicted Values: {y_pred.std():.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "69a4699d-d734-447e-9b87-23d0c33df7b3", + "metadata": {}, + "source": [ + "#### Optional: Hyperparameter Tuning Job\n", + "\n", + "We can optionally run a Hyperparameter Optimization (HPO) Job to improve our results. This will run multiple training jobs with different values for the hyperparameters based on the ranges specified below. The HPO job automatically chooses new parameter values based on the results of previous jobs, eventually leading to better results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dc4dd4b-440c-4e2b-9a14-1d3b929fc70e", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.tuner import HyperparameterTuner\n", + "from sagemaker.parameter import ContinuousParameter, IntegerParameter\n", + "\n", + "\n", + "hyperparameter_ranges = {\n", + " \"max_depth\": IntegerParameter(5, 10),\n", + " \"eta\": ContinuousParameter(0.001, 0.3),\n", + " \"min_child_weight\": IntegerParameter(5, 10),\n", + " \"subsample\": ContinuousParameter(0.3, 0.8),\n", + " \"num_round\": IntegerParameter(50, 100),\n", + "}\n", + "\n", + "objective_metric_name = \"validation:rmse\"\n", + "\n", + "tuner = HyperparameterTuner(\n", + " xgb_estimator,\n", + " objective_metric_name,\n", + " hyperparameter_ranges,\n", + " objective_type=\"Minimize\",\n", + " strategy=\"Bayesian\",\n", + " max_jobs=10,\n", + " max_parallel_jobs=5,\n", + ") " + ] + }, + { + "cell_type": "markdown", + "id": "2b4075b5-df7b-4666-86f8-3f44f42aeadc", + "metadata": {}, + "source": [ + "When we run the HPO job, we can log the results of the hyperparameter tuning job and each child training job into MLflow. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f90281c8-a5b7-47f7-bf1d-884eaf742045", + "metadata": {}, + "outputs": [], + "source": [ + "def format_param_range(param_range):\n", + " formatted_param_range = {\n", + " f\"{param_range['Name']}_min_value\": param_range['MinValue'],\n", + " f\"{param_range['Name']}_max_value\": param_range['MaxValue']\n", + " }\n", + " return formatted_param_range\n", + "\n", + "\n", + "mlflow.set_experiment(\"XG-Boost-HPO\")\n", + "with mlflow.start_run(run_name=f\"xgboost-hpo-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}\"):\n", + " tuner.fit({\"train\": s3_input_train, \"validation\": s3_input_validation})\n", + " tuner_descr = tuner.describe()\n", + " \n", + " # Log parameters relevant to overall tuning job\n", + " mlflow.log_params(tuner_descr['HyperParameterTuningJobConfig']['ResourceLimits'])\n", + " mlflow.log_param('Strategy', tuner_descr['HyperParameterTuningJobConfig']['Strategy'])\n", + " mlflow.log_params(tuner_descr['TrainingJobDefinition']['StaticHyperParameters'])\n", + " for integer_param in tuner_descr['HyperParameterTuningJobConfig']['ParameterRanges']['IntegerParameterRanges']:\n", + " mlflow.log_params(format_param_range(integer_param))\n", + " for continuous_param in tuner_descr['HyperParameterTuningJobConfig']['ParameterRanges']['ContinuousParameterRanges']:\n", + " mlflow.log_params(format_param_range(continuous_param))\n", + " for categorical_param in tuner_descr['HyperParameterTuningJobConfig']['ParameterRanges']['CategoricalParameterRanges']:\n", + " mlflow.log_params(format_param_range(categorical_param))\n", + " mlflow.log_param('BestTrainingJobName', tuner_descr['BestTrainingJob']['TrainingJobName'])\n", + "\n", + " # Set tags for the run\n", + " mlflow.set_tag(\"model_type\", \"XGBoost\")\n", + " mlflow.set_tag(\"framework\", \"SageMaker\")\n", + "\n", + " # Log parameters and metrics for each training job\n", + " train_summaries = tuner.analytics().training_job_summaries()\n", + " for train_results in train_summaries:\n", + " with mlflow.start_run(run_name=train_results['TrainingJobName'], nested=True):\n", + " mlflow.log_params(train_results['TunedHyperParameters'])\n", + " mlflow.log_metric(train_results['FinalHyperParameterTuningJobObjectiveMetric']['MetricName'],\n", + " train_results['FinalHyperParameterTuningJobObjectiveMetric']['Value'])\n", + " mlflow.set_tag(\"model_type\", \"XGBoost\")\n", + " mlflow.set_tag(\"framework\", \"SageMaker\")" + ] + }, + { + "cell_type": "markdown", + "id": "eb14d426-4ba2-4525-ae60-25124ca22a43", + "metadata": {}, + "source": [ + "It is also easy to deploy the best model from the hyperparameter tuning job. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c816a536-bc81-4c2f-a166-d450e0f270dd", + "metadata": {}, + "outputs": [], + "source": [ + "tuner.deploy(\n", + " initial_instance_count=1, \n", + " instance_type='ml.m5.xlarge'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "af854c3c-7b28-440d-bfb9-601307e11a6d", + "metadata": {}, + "source": [ + "---\n", + "## AutoML Training\n", + "\n", + "Amazon SageMaker Autopilot is an automated machine learning (commonly referred to as AutoML) solution for tabular datasets. You can use SageMaker Autopilot in different ways: on autopilot (hence the name) or with human guidance, without code through SageMaker Studio, or using the AWS SDKs. This notebook, as a first glimpse, will use the AWS SDKs to simply create and deploy a machine learning model.\n", + "\n", + "This part of the notebook demonstrates how you can use Autopilot on this dataset to get the most accurate ML pipeline through exploring a number of potential options, or \"candidates\". Each candidate generated by Autopilot consists of two steps. The first step performs automated feature engineering on the dataset and the second step trains and tunes an algorithm to produce a model. When you deploy this model, it follows similar steps. Feature engineering followed by inference, to decide whether the lead is worth pursuing or not. The notebook contains instructions on how to train the model as well as to deploy the model to perform batch predictions on a set of leads. Where it is possible, use the Amazon SageMaker Python SDK, a high level SDK, to simplify the way you interact with Amazon SageMaker." + ] + }, + { + "cell_type": "markdown", + "id": "9f8d2871-85a2-42cc-b07b-ed1837ce96a6", + "metadata": {}, + "source": [ + "First, we need to upload the entire dataset to S3.\n", + "\n", + "Caution: Before running the cell below, you must upload the data file \"5000-sales-records.csv\" to the local directory!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5866a843-88d1-4206-b05c-11ebca7972b1", + "metadata": {}, + "outputs": [], + "source": [ + "# Upload data for AutoML Job\n", + "\n", + "session.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train_automl/train.csv')).upload_file('5000-sales-records.csv')\n", + "session.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'test_automl/test.csv')).upload_file('5000-sales-records.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "a687e5c8-c205-4884-bd9e-785045258cb5", + "metadata": {}, + "source": [ + "### AutoML Configuration\n", + "\n", + "You can specify the type of problem you want to solve with your dataset (`Regression, MulticlassClassification, BinaryClassification`). In case you are not sure, SageMaker Autopilot will infer the problem type based on statistics of the target column (the column you want to predict). \n", + "\n", + "You have the option to limit the running time of a SageMaker Autopilot job by providing either the maximum number of pipeline evaluations or candidates (one pipeline evaluation is called a `Candidate` because it generates a candidate model) or providing the total time allocated for the overall Autopilot job. Under default settings, this job takes about four hours to run. This varies between runs because of the nature of the exploratory process Autopilot uses to find optimal training parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63c0bed7-21d6-48cb-9584-3e9f5c899304", + "metadata": {}, + "outputs": [], + "source": [ + "from time import gmtime, strftime, sleep\n", + "import json\n", + "import mlflow\n", + "import boto3\n", + "\n", + "# Set up MLflow experiment\n", + "mlflow.set_experiment(\"AutoML-Job\")\n", + "\n", + "# Start MLflow run\n", + "with mlflow.start_run(run_name=\"AutoML-Job-Run\"):\n", + "\n", + " input_data_config = [{\n", + " 'DataSource': {\n", + " 'S3DataSource': {\n", + " 'S3DataType': 'S3Prefix',\n", + " 'S3Uri': f's3://{bucket}/{prefix}/train_automl'\n", + " }\n", + " },\n", + " 'ContentType': 'text/csv;header=present',\n", + " 'TargetAttributeName': 'Total Profit'\n", + " }]\n", + "\n", + " output_data_config = {\n", + " 'S3OutputPath': f's3://{bucket}/{prefix}/output_automl'\n", + " }\n", + "\n", + " auto_ml_job_config = {\n", + " 'CompletionCriteria': {\n", + " 'MaxCandidates': 5\n", + " }\n", + " }\n", + "\n", + " autoMLJobObjective = {\n", + " \"MetricName\": \"MSE\" \n", + " }\n", + "\n", + " # Log configurations to MLflow\n", + " mlflow.log_dict(input_data_config, \"input_data_config.json\")\n", + " mlflow.log_dict(output_data_config, \"output_data_config.json\")\n", + " mlflow.log_dict(auto_ml_job_config, \"auto_ml_job_config.json\")\n", + " mlflow.log_dict(autoMLJobObjective, \"autoMLJobObjective.json\")\n", + "\n", + " # Configuration\n", + " timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())\n", + " auto_ml_job_name = 'demo' + timestamp_suffix\n", + " print('AutoMLJobName: ' + auto_ml_job_name)\n", + " mlflow.log_param(\"AutoMLJobName\", auto_ml_job_name)\n", + "\n", + " # Create AutoML job\n", + " sm = boto3.client('sagemaker')\n", + " sm.create_auto_ml_job(\n", + " AutoMLJobName=auto_ml_job_name,\n", + " InputDataConfig=input_data_config,\n", + " OutputDataConfig=output_data_config,\n", + " AutoMLJobConfig=auto_ml_job_config,\n", + " AutoMLJobObjective=autoMLJobObjective,\n", + " ProblemType=\"Regression\", \n", + " RoleArn=role \n", + " )\n", + "\n", + " # Wait for the AutoML job to complete\n", + " while True:\n", + " response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)\n", + " status = response['AutoMLJobStatus']\n", + " if status in ['Completed', 'Failed', 'Stopped']:\n", + " break\n", + " print(f\"AutoML job status: {status}\")\n", + " sleep(60)\n", + "\n", + " # Log final job status\n", + " mlflow.log_param(\"FinalJobStatus\", status)\n", + "\n", + " if status == 'Completed':\n", + " # Log best candidate info\n", + " best_candidate = response['BestCandidate']\n", + " mlflow.log_dict(best_candidate, \"best_candidate.json\")\n", + " \n", + " # Log objective metric\n", + " objective_metric = best_candidate['FinalAutoMLJobObjectiveMetric']\n", + " mlflow.log_metric(objective_metric['MetricName'], objective_metric['Value'])\n", + "\n", + " # Log other metrics if available\n", + " if 'CandidateProperties' in best_candidate:\n", + " for metric in best_candidate['CandidateProperties'].get('Metrics', []):\n", + " mlflow.log_metric(metric['MetricName'], metric['Value'])\n", + "\n", + " print(f\"AutoML job {auto_ml_job_name} finished with status: {status}\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69c7bac7-f750-4263-aea7-73a8e2d17a25", + "metadata": {}, + "outputs": [], + "source": [ + "sagemaker_client = boto3.client('sagemaker')\n", + "\n", + "best_candidate = sagemaker_client.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']\n", + "best_candidate_name = best_candidate['CandidateName']\n", + "print(best_candidate)\n", + "print('\\n')\n", + "\n", + "print(\"CandidateName: \" + best_candidate_name)\n", + "print(\"FinalAutoMLJobObjectiveMetricName: \" + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])\n", + "print(\"FinalAutoMLJobObjectiveMetricValue: \" + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))" + ] + }, + { + "cell_type": "markdown", + "id": "956b70d5", + "metadata": {}, + "source": [ + "### Create Model for best candidate\n", + "\n", + "When the AutoML job has finished running, you can easily create a SageMaker Model object using the best candidate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82f69a99-7d29-426b-a147-db6222ebbfda", + "metadata": {}, + "outputs": [], + "source": [ + "from time import gmtime, strftime\n", + "\n", + "timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())\n", + "\n", + "model_name = 'demo-' + timestamp_suffix\n", + "print(f\"Model name: {model_name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1167ba0b-5f3c-4b70-8692-ba5a567c9da8", + "metadata": {}, + "outputs": [], + "source": [ + "# Create Model\n", + "model = sagemaker_client.create_model(\n", + " Containers=best_candidate['InferenceContainers'],\n", + " ModelName=model_name,\n", + " ExecutionRoleArn=role\n", + ")\n", + "\n", + "print('Model ARN corresponding to the best candidate is : {}'.format(model['ModelArn']))" + ] + }, + { + "cell_type": "markdown", + "id": "27b99203-019b-4c16-aca5-856e6751c192", + "metadata": {}, + "source": [ + "### View other candidates\n", + "You can view all the candidates (pipeline evaluations with different hyperparameter combinations) that were explored by SageMaker Autopilot and sort them by their final performance metric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a8d4b63-4ef1-4f53-899e-69882a98ca07", + "metadata": {}, + "outputs": [], + "source": [ + "candidates = sagemaker_client.list_candidates_for_auto_ml_job(\n", + " AutoMLJobName=auto_ml_job_name, SortBy='FinalObjectiveMetricValue')['Candidates']\n", + "\n", + "index = 1\n", + "for candidate in candidates:\n", + " print (str(index) + \" \" + candidate['CandidateName'] + \" \" + str(candidate['FinalAutoMLJobObjectiveMetric']['Value']))\n", + " index += 1" + ] + }, + { + "cell_type": "markdown", + "id": "cac7ef2d-0bb3-4a59-a3e7-bfaef07757ec", + "metadata": {}, + "source": [ + "## Cleanup\n", + "\n", + "The Autopilot job creates many underlying artifacts such as dataset splits, preprocessing scripts, or preprocessed data, etc. This code, when un-commented, deletes them. This operation deletes all the generated models and the auto-generated notebooks as well. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08ba43ea-6c41-4a60-8730-ff4327e35428", + "metadata": {}, + "outputs": [], + "source": [ + "#s3 = boto3.resource('s3')\n", + "#bucket = s3.Bucket(bucket)\n", + "\n", + "#job_outputs_prefix = '{}/output/{}'.format(prefix,auto_ml_job_name)\n", + "#bucket.objects.filter(Prefix=job_outputs_prefix).delete()\n", + "# xgb_predictor.delete_endpoint(delete_endpoint_config=True)\n", + "# tuner.delete_endpoint(delete_endpoint_config=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/bedrock-modelbuilder-deployment-nova.ipynb b/bedrock-modelbuilder-deployment-nova.ipynb new file mode 100644 index 0000000000..ba50e67ef0 --- /dev/null +++ b/bedrock-modelbuilder-deployment-nova.ipynb @@ -0,0 +1,568 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bedrock ModelBuilder Example\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2026/03/16 14:33:32 Refreshing aws credentials for default\n", + "2026/03/16 14:33:33 Successfully refreshed aws credentials for default\n" + ] + } + ], + "source": [ + "# Configure AWS credentials and region\n", + "! ada credentials update --provider=isengard --account=099324990371 --role=Admin --profile=default --once\n", + "! aws configure set region us-east-1" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[03/16/26 14:33:37] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/16/26 14:33:37]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=778733;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=885530;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", + "sagemaker.config INFO - Not applying SDK defaults from location: /Users/twillit/Library/Application Support/sagemaker/config.yaml\n" + ] + } + ], + "source": [ + "# Setup\n", + "import boto3\n", + "import json\n", + "import time\n", + "import random\n", + "from sagemaker.core.resources import TrainingJob\n", + "from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Configuration\n", + "TRAINING_JOB_NAME = 'my-lora-run-tpnld-1773683343850'\n", + "ROLE_ARN = \"arn:aws:iam::099324990371:role/AmazonSageMaker-ExecutionRole-20260219T233135\"\n", + "REGION = 'us-east-1'\n", + "BUCKET = 'sagemaker-us-east-1-099324990371'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[03/16/26 14:33:39] WARNING No region provided. Using default region. utils.py:356\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/16/26 14:33:39]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m No region provided. Using default region. \u001b]8;id=432135;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=278513;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py#356\u001b\\\u001b[2m356\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Runs on sagemaker prod, region:us-east-1 utils.py:370\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker prod, region:us-east-\u001b[1;36m1\u001b[0m \u001b]8;id=134958;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=705239;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py#370\u001b\\\u001b[2m370\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=665312;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=477402;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training job status: Completed\n", + "Using HF model path: s3://sagemaker-us-east-1-099324990371/model-customization/output-artifacts/my-lora-run-tpnld-1773683343850/output/model/checkpoints/hf_merged/\n" + ] + } + ], + "source": [ + "# Step 1: Get training job and prepare model path\n", + "training_job = TrainingJob.get(training_job_name=TRAINING_JOB_NAME)\n", + "print(f\"Training job status: {training_job.training_job_status}\")\n", + "\n", + "# Use the hf_merged directory which has complete HuggingFace format\n", + "base_s3_path = training_job.model_artifacts.s3_model_artifacts\n", + "hf_model_path = base_s3_path.rstrip('/') + '/checkpoints/hf_merged/'\n", + "print(f\"Using HF model path: {hf_model_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[03/16/26 14:33:40] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/16/26 14:33:40]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=44052;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=307088;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Checking required files:\n", + "❌ config.json - MISSING\n", + "❌ tokenizer.json - MISSING\n", + "❌ tokenizer_config.json - MISSING\n", + "❌ model.safetensors - MISSING\n" + ] + } + ], + "source": [ + "# Step 2: Verify required files exist\n", + "s3_client = boto3.client('s3', region_name=REGION)\n", + "\n", + "required_files = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'model.safetensors']\n", + "model_prefix = hf_model_path.replace(f's3://{BUCKET}/', '')\n", + "\n", + "print(\"Checking required files:\")\n", + "for file in required_files:\n", + " try:\n", + " s3_client.head_object(Bucket=BUCKET, Key=model_prefix + file)\n", + " print(f\"✅ {file}\")\n", + " except:\n", + " print(f\"❌ {file} - MISSING\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ added_tokens.json exists\n" + ] + } + ], + "source": [ + "# Step 3: Create missing tokenizer files if needed\n", + "def ensure_tokenizer_files():\n", + " # Create added_tokens.json (usually empty for Llama)\n", + " try:\n", + " s3_client.head_object(Bucket=BUCKET, Key=model_prefix + 'added_tokens.json')\n", + " print(\"✅ added_tokens.json exists\")\n", + " except:\n", + " s3_client.put_object(\n", + " Bucket=BUCKET,\n", + " Key=model_prefix + 'added_tokens.json',\n", + " Body=json.dumps({}),\n", + " ContentType='application/json'\n", + " )\n", + " print(\"✅ Created added_tokens.json\")\n", + "\n", + "ensure_tokenizer_files()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Checking S3 structure...\n", + "Base prefix: model-customization/output-artifacts/my-lora-run-tpnld-1773683343850/output/model\n", + "Contents:\n", + "\n", + "Checking hf_merged path: model-customization/output-artifacts/my-lora-run-tpnld-1773683343850/output/model/checkpoints/hf_merged/\n", + "Files in hf_merged:\n", + " added_tokens.json\n", + "✅ Copied added_tokens.json\n" + ] + } + ], + "source": [ + "# Debug: Check what's actually in the S3 bucket\n", + "print(\"Checking S3 structure...\")\n", + "base_prefix = base_s3_path.replace(f's3://{BUCKET}/', '')\n", + "print(f\"Base prefix: {base_prefix}\")\n", + "\n", + "# List files to see the actual structure\n", + "response = s3_client.list_objects_v2(\n", + " Bucket=BUCKET,\n", + " Prefix=base_prefix,\n", + " Delimiter='/'\n", + ")\n", + "\n", + "print(\"Contents:\")\n", + "if 'Contents' in response:\n", + " for obj in response['Contents'][:10]: # Show first 10 files\n", + " print(f\" {obj['Key']}\")\n", + "\n", + "# Check specifically for hf_merged directory\n", + "hf_merged_prefix = base_prefix.rstrip('/') + '/checkpoints/hf_merged/'\n", + "print(f\"\\nChecking hf_merged path: {hf_merged_prefix}\")\n", + "\n", + "try:\n", + " response = s3_client.list_objects_v2(Bucket=BUCKET, Prefix=hf_merged_prefix)\n", + " if 'Contents' in response:\n", + " print(\"Files in hf_merged:\")\n", + " for obj in response['Contents']:\n", + " file_name = obj['Key'].replace(hf_merged_prefix, '')\n", + " print(f\" {file_name}\")\n", + " \n", + " # Now copy with correct paths\n", + " for obj in response['Contents']:\n", + " source_key = obj['Key']\n", + " file_name = source_key.replace(hf_merged_prefix, '')\n", + " dest_key = base_prefix.rstrip('/') + '/' + file_name\n", + " \n", + " try:\n", + " s3_client.copy_object(\n", + " Bucket=BUCKET,\n", + " CopySource={'Bucket': BUCKET, 'Key': source_key},\n", + " Key=dest_key\n", + " )\n", + " print(f\"✅ Copied {file_name}\")\n", + " except Exception as e:\n", + " print(f\"❌ Failed to copy {file_name}: {e}\")\n", + " else:\n", + " print(\"No files found in hf_merged directory\")\n", + "except Exception as e:\n", + " print(f\"Error: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Job name: bedrock-nova-import-4982\n" + ] + }, + { + "data": { + "text/html": [ + "
[03/16/26 14:33:41] INFO S3 artifacts path: bedrock_model_builder.py:212\n", + " s3://sagemaker-us-east-1-099324990371/model-customization \n", + " /output-artifacts/my-lora-run-tpnld-1773683343850/output/ \n", + " model \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/16/26 14:33:41]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m S3 artifacts path: \u001b]8;id=917035;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py\u001b\\\u001b[2mbedrock_model_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848667;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py#212\u001b\\\u001b[2m212\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/sagemaker-us-east-1-099324990371/model-customization\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m/output-artifacts/my-lora-run-tpnld-1773683343850/output/\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mmodel\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Manifest path: bedrock_model_builder.py:219\n", + " s3://sagemaker-us-east-1-099324990371/model-customization \n", + " /output-artifacts/my-lora-run-tpnld-1773683343850/output/ \n", + " output/manifest.json \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Manifest path: \u001b]8;id=352000;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py\u001b\\\u001b[2mbedrock_model_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=280360;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py#219\u001b\\\u001b[2m219\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/sagemaker-us-east-1-099324990371/model-customization\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m/output-artifacts/my-lora-run-tpnld-1773683343850/output/\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225moutput/\u001b[0m\u001b[38;2;225;0;225mmanifest.json\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Looking for manifest at bedrock_model_builder.py:226\n", + " s3://sagemaker-us-east-1-099324990371/model-customization \n", + " /output-artifacts/my-lora-run-tpnld-1773683343850/output/ \n", + " output/manifest.json \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Looking for manifest at \u001b]8;id=681920;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py\u001b\\\u001b[2mbedrock_model_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=157754;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py#226\u001b\\\u001b[2m226\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/sagemaker-us-east-1-099324990371/model-customization\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225m/output-artifacts/my-lora-run-tpnld-1773683343850/output/\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225moutput/\u001b[0m\u001b[38;2;225;0;225mmanifest.json\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/16/26 14:33:42] INFO Manifest content: {'checkpoint_s3_bucket': bedrock_model_builder.py:232\n", + " 's3://customer-escrow-099324990371-smtj-cc62fd20/my-lora- \n", + " run-tpnld-1773683343850/896'} \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/16/26 14:33:42]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Manifest content: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'checkpoint_s3_bucket'\u001b[0m: \u001b]8;id=350064;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py\u001b\\\u001b[2mbedrock_model_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=173941;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py#232\u001b\\\u001b[2m232\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m's3://customer-escrow-099324990371-smtj-cc62fd20/my-lora-\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0mrun-tpnld-1773683343850/896'\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Checkpoint URI: bedrock_model_builder.py:239\n", + " s3://customer-escrow-099324990371-smtj-cc62fd20/my-lora-r \n", + " un-tpnld-1773683343850/896 \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Checkpoint URI: \u001b]8;id=265208;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py\u001b\\\u001b[2mbedrock_model_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=611184;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/bedrock_model_builder.py#239\u001b\\\u001b[2m239\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m s3:\u001b[38;2;225;0;225m/\u001b[0m\u001b[38;2;225;0;225m/customer-escrow-099324990371-smtj-cc62fd20/my-lora-r\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;225;0;225mun-tpnld-1773683343850/\u001b[0m\u001b[38;2;225;0;225m896\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:bedrock:us-east-1:099324990371:custom-model/imported/fyjoelpl3jra\n" + ] + } + ], + "source": [ + "# Step 4: Create Bedrock model builder and deploy\n", + "job_name = f\"bedrock-nova-import-{random.randint(1000, 9999)}\"\n", + "print(f\"Job name: {job_name}\")\n", + "\n", + "# Create builder with correct model path\n", + "bedrock_builder = BedrockModelBuilder(\n", + " model=training_job\n", + ")\n", + "\n", + "# Deploy to Bedrock\n", + "deployment_result = bedrock_builder.deploy(\n", + " job_name=job_name,\n", + " imported_model_name=job_name,\n", + " role_arn=ROLE_ARN,\n", + " custom_model_name=job_name\n", + ")\n", + "\n", + "model_arn = deployment_result['modelArn']\n", + "print(model_arn)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "model_arn = deployment_result['modelArn']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Creating\n", + "Model status: Active\n" + ] + } + ], + "source": [ + "# Create the custom model deployment\n", + "from uuid import uuid4\n", + "bedrock_client = boto3.client('bedrock', region_name=REGION)\n", + "\n", + "# Wait for model to be Active before deploying\n", + "while True:\n", + " status = bedrock_client.get_custom_model(modelIdentifier=model_arn).get(\"modelStatus\")\n", + " print(f\"Model status: {status}\")\n", + " if status == \"Active\":\n", + " break\n", + " if status == \"Failed\":\n", + " raise RuntimeError(\"Model creation failed\")\n", + " time.sleep(60)\n", + "\n", + "# Now safe to create deployment\n", + "deploy_resp = bedrock_client.create_custom_model_deployment(\n", + " modelDeploymentName=f\"deployment-{job_name}\",\n", + " modelArn=model_arn,\n", + " clientRequestToken=str(uuid4()),\n", + ")\n", + "deployment_arn = deploy_resp['customModelDeploymentArn']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for deployment to complete...\n", + "Deployment status: Creating\n", + "Deployment status: Creating\n", + "Deployment status: Creating\n", + "Deployment status: Creating\n", + "Deployment status: Creating\n", + "Deployment status: Creating\n", + "Deployment status: Active\n" + ] + } + ], + "source": [ + "# Step 5: Wait for custom model creation to complete\n", + "\n", + "print(\"Waiting for deployment to complete...\")\n", + "while True:\n", + " status = bedrock_client.get_custom_model_deployment(\n", + " customModelDeploymentIdentifier=deployment_arn\n", + " ).get(\"status\")\n", + " print(f\"Deployment status: {status}\")\n", + " if status == \"Active\":\n", + " break\n", + " if status == \"Failed\":\n", + " raise RuntimeError(\"Deployment failed\")\n", + " time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model Inference Message: What is the capital of France?\n", + "Model Response: The capital of France is Paris. Paris is not only the political center of France but also a major cultural, historical, and economic hub. It is situated in the northern part of the country, along the Seine River. Paris is renowned for its iconic landmarks such as the Eiffel Tower, the Louvre Museum, Notre-Dame Cathedral, and the Champs-Élysées. The city is also famous for its influence on art, fashion, cuisine, and philosophy.\n" + ] + } + ], + "source": [ + "# Step 6: Test inference with correct format\n", + "bedrock_runtime = boto3.client(\"bedrock-runtime\", region_name=\"us-east-1\")\n", + "message = \"What is the capital of France?\"\n", + "print(f\"Model Inference Message: {message}\")\n", + "resp = bedrock_runtime.converse(\n", + " modelId=deployment_arn,\n", + " messages=[{\"role\": \"user\", \"content\": [{\"text\": message}]}],\n", + " inferenceConfig={\"maxTokens\": 100, \"temperature\": 0.7},\n", + ")\n", + "\n", + "response_str = resp[\"output\"][\"message\"][\"content\"][0][\"text\"]\n", + "print(f\"Model Response: {response_str}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/boto3_deployment_notebook.ipynb b/boto3_deployment_notebook.ipynb new file mode 100644 index 0000000000..b3f9cab141 --- /dev/null +++ b/boto3_deployment_notebook.ipynb @@ -0,0 +1,1202 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 24, + "id": "d71538e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker: 3.5.0\n", + "boto3: 1.42.58\n", + "botocore: 1.42.58\n" + ] + } + ], + "source": [ + "from importlib.metadata import version\n", + "print(f\"sagemaker: {version('sagemaker')}\")\n", + "print(f\"boto3: {version('boto3')}\")\n", + "print(f\"botocore: {version('botocore')}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "e8d62358", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize constants\n", + "REGION = \"us-west-2\"\n", + "TRAINING_JOB_NAME = \"test-lora-training-1-1773273846617\"\n", + "INSTANCE_TYPE = \"ml.g5.8xlarge\"\n", + "ROLE=\"arn:aws:iam::099324990371:role/service-role/AmazonSageMaker-ExecutionRole-20260219T233135\"\n", + "\n", + "LMI_IMAGE_URI = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128\"\n", + "LMI_IMAGE_URI_31 = f\"763104351884.dkr.ecr.{REGION}.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\"\n", + "\n", + "BASE_MODEL_S3_URI = f\"s3://jumpstart-private-cache-prod-{REGION}/meta-textgeneration/meta-textgeneration-llama-3-2-1b-instruct/artifacts/inference-prepack/v1.0.0/\"\n", + "\n", + "import random\n", + "name_suffix = random.randint(100, 10000)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "3e654dac", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize clients\n", + "import boto3\n", + "sm = boto3.client(\"sagemaker\", region_name=REGION)\n", + "sm_runtime = boto3.client(\"sagemaker-runtime\", region_name=REGION)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "3d0d573f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Adapter weights: s3://sagemaker-us-west-2-099324990371/model-customization/output-artifacts/test-lora-training-1-1773273846617/output/model/checkpoints/hf/\n" + ] + } + ], + "source": [ + "# get s3 artifact location\n", + "response = sm.describe_training_job(TrainingJobName=TRAINING_JOB_NAME)\n", + "model_s3_uri = response[\"ModelArtifacts\"][\"S3ModelArtifacts\"]\n", + "adapter_s3_uri = f\"{model_s3_uri}/checkpoints/hf/\"\n", + "print(f\" Adapter weights: {adapter_s3_uri}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be45afc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model: {'ModelArn': 'arn:aws:sagemaker:us-west-2:099324990371:model/model-1977', 'ResponseMetadata': {'RequestId': '510a2671-03c1-464a-81d1-1ad7f512a72d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '510a2671-03c1-464a-81d1-1ad7f512a72d', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': \"frame-ancestors 'none'\", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '72', 'date': 'Thu, 12 Mar 2026 20:09:45 GMT'}, 'RetryAttempts': 0}}\n" + ] + } + ], + "source": [ + "# create model in SageMaker using boto3\n", + "model_name = f\"model-{name_suffix}\"\n", + "model = sm.create_model(\n", + " ModelName=model_name,\n", + " ExecutionRoleArn=ROLE,\n", + " PrimaryContainer={\n", + " \"Image\": LMI_IMAGE_URI,\n", + " \"Environment\": {\n", + " # Use lmi-dist for rolling batch — NOT \"disable\" (which requires the vllm entrypoint on 0.34.0+)\n", + " \"OPTION_ROLLING_BATCH\": \"lmi-dist\",\n", + " \"OPTION_ENABLE_LORA\": \"true\",\n", + " \"OPTION_MAX_LORAS\": \"8\",\n", + " \"OPTION_MAX_CPU_LORAS\": \"64\",\n", + " \"OPTION_MAX_LORA_RANK\": \"128\",\n", + " \"OPTION_MAX_ROLLING_BATCH_SIZE\": \"8\",\n", + " # Must match GPU count on the instance: \"1\" for g5.2xlarge, \"8\" for g6e.48xlarge\n", + " \"OPTION_TENSOR_PARALLEL_DEGREE\": \"1\",\n", + " \"OPTION_DTYPE\": \"fp16\",\n", + " \"OPTION_MAX_MODEL_LEN\": \"4096\",\n", + " },\n", + " # Load base model from JumpStart S3 cache — avoids needing HF_TOKEN for gated models\n", + " \"ModelDataSource\": {\n", + " \"S3DataSource\": {\n", + " \"S3Uri\": BASE_MODEL_S3_URI,\n", + " \"S3DataType\": \"S3Prefix\",\n", + " \"CompressionType\": \"None\",\n", + " \"ModelAccessConfig\": {\"AcceptEula\": True},\n", + " }\n", + " },\n", + " },\n", + ")\n", + "print(f\"model: {model}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e7da772", + "metadata": {}, + "outputs": [], + "source": [ + "# create model in SageMaker using ModelBuilder\n", + "from sagemaker.core.resources import TrainingJob\n", + "from sagemaker.serve import ModelBuilder\n", + "\n", + "training_job = TrainingJob.get(training_job_name=TRAINING_JOB_NAME)\n", + "print(f\"model package arn: {training_job.output_model_package_arn}\")\n", + "\n", + "model_name = f\"model-{name_suffix}\"\n", + "model_builder = ModelBuilder(model=training_job, role_arn=ROLE, instance_type=INSTANCE_TYPE)\n", + "model = model_builder.build(model_name=model_name)\n", + "print(f\"model arn: {model.model_arn}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "13eefb5c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "endpoint config: {'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:099324990371:endpoint-config/e2e-1977', 'ResponseMetadata': {'RequestId': 'ded5b4e4-1b3e-49c2-97c2-25bb7a11b2a4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ded5b4e4-1b3e-49c2-97c2-25bb7a11b2a4', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': \"frame-ancestors 'none'\", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '89', 'date': 'Thu, 12 Mar 2026 20:10:22 GMT'}, 'RetryAttempts': 0}}\n", + "endpoint: {'EndpointArn': 'arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-1977', 'ResponseMetadata': {'RequestId': 'ca82b575-df5e-48f7-ba9b-74de1137d1ef', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ca82b575-df5e-48f7-ba9b-74de1137d1ef', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': \"frame-ancestors 'none'\", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '76', 'date': 'Thu, 12 Mar 2026 20:10:23 GMT'}, 'RetryAttempts': 0}}\n" + ] + } + ], + "source": [ + "# create endpoint\n", + "endpoint_name = f\"e2e-{name_suffix}\"\n", + "ep_config = sm.create_endpoint_config(\n", + " EndpointConfigName=endpoint_name,\n", + " ExecutionRoleArn=ROLE,\n", + " ProductionVariants=[\n", + " {\n", + " \"VariantName\": \"AllTraffic\",\n", + " \"InstanceType\": INSTANCE_TYPE,\n", + " \"InitialInstanceCount\": 1,\n", + " }\n", + " ],\n", + ")\n", + "print(f\"endpoint config: {ep_config}\")\n", + "\n", + "endpoint = sm.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_name)\n", + "print(f\"endpoint: {endpoint}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "dbf2a262", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "base inference component: {'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-1977-inference-component', 'ResponseMetadata': {'RequestId': 'c24a105b-762b-4aa1-b17c-ae71adfd0df2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c24a105b-762b-4aa1-b17c-ae71adfd0df2', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': \"frame-ancestors 'none'\", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '117', 'date': 'Thu, 12 Mar 2026 20:10:28 GMT'}, 'RetryAttempts': 0}}\n" + ] + } + ], + "source": [ + "# create base model inference component\n", + "base_ic_name = f\"{endpoint_name}-inference-component\"\n", + "base_inference_component = sm.create_inference_component(\n", + " InferenceComponentName=base_ic_name,\n", + " EndpointName=endpoint_name,\n", + " VariantName=\"AllTraffic\",\n", + " Specification={\n", + " \"ModelName\": model_name,\n", + " \"ComputeResourceRequirements\": {\n", + " \"MinMemoryRequiredInMb\": 4096,\n", + " \"NumberOfAcceleratorDevicesRequired\": 1,\n", + " },\n", + " },\n", + " RuntimeConfig={\"CopyCount\": 1},\n", + ")\n", + "print(f\"base inference component: {base_inference_component}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "df74cb13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "adapter inference component: {'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-1977-adapter', 'ResponseMetadata': {'RequestId': 'fd3e04bf-1b48-4211-bd92-16d3d67cc4df', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'fd3e04bf-1b48-4211-bd92-16d3d67cc4df', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': \"frame-ancestors 'none'\", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '105', 'date': 'Thu, 12 Mar 2026 20:39:33 GMT'}, 'RetryAttempts': 0}}\n" + ] + } + ], + "source": [ + "# create adapter inference component\n", + "endpoint_name = \"e2e-1977\"\n", + "base_ic_name = \"e2e-1977-inference-component\"\n", + "\n", + "adapter_ic_name = f\"{endpoint_name}-adapter\"\n", + "adapter_inference_component = sm.create_inference_component(\n", + " InferenceComponentName=adapter_ic_name,\n", + " EndpointName=endpoint_name,\n", + " Specification={\n", + " \"BaseInferenceComponentName\": base_ic_name,\n", + " \"Container\": {\"ArtifactUrl\": adapter_s3_uri},\n", + " },\n", + ")\n", + "print(f\"adapter inference component: {adapter_inference_component}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "e0823def", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response: {'generated_text': ' Paris. The capital of France is Paris. The capital of France is Paris. The capital of France is Paris. The capital of France is Paris. The capital of France is Paris. The capital of France is Paris. The capital of France is Paris'}\n" + ] + } + ], + "source": [ + "# test inference on base model inference component\n", + "import json\n", + "payload = json.dumps({\"inputs\": \"What is the capital of France?\", \"parameters\": {\"max_new_tokens\": 50}})\n", + "base_model_response = sm_runtime.invoke_endpoint(\n", + " EndpointName=endpoint_name,\n", + " InferenceComponentName=base_ic_name,\n", + " Body=payload,\n", + " ContentType=\"application/json\",\n", + ")\n", + "result = json.loads(base_model_response[\"Body\"].read().decode())\n", + "print(f\"Response: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "79b05339", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'CreationTime': datetime.datetime(2026, 3, 12, 9, 50, 14, 970000, tzinfo=tzlocal()),\n", + " 'EndpointArn': 'arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-5429',\n", + " 'EndpointName': 'e2e-5429',\n", + " 'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-5429-inference-component',\n", + " 'InferenceComponentName': 'e2e-5429-inference-component',\n", + " 'InferenceComponentStatus': 'InService',\n", + " 'LastModifiedTime': datetime.datetime(2026, 3, 12, 9, 55, 36, 65000, tzinfo=tzlocal()),\n", + " 'ResponseMetadata': {'HTTPHeaders': {'cache-control': 'no-cache, no-store, '\n", + " 'must-revalidate',\n", + " 'content-length': '973',\n", + " 'content-security-policy': 'frame-ancestors '\n", + " \"'none'\",\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'date': 'Thu, 12 Mar 2026 21:02:01 GMT',\n", + " 'strict-transport-security': 'max-age=47304000; '\n", + " 'includeSubDomains',\n", + " 'x-amzn-requestid': '36537af2-0b32-4c7b-a283-8d0f825d5bd1',\n", + " 'x-content-type-options': 'nosniff',\n", + " 'x-frame-options': 'DENY'},\n", + " 'HTTPStatusCode': 200,\n", + " 'RequestId': '36537af2-0b32-4c7b-a283-8d0f825d5bd1',\n", + " 'RetryAttempts': 0},\n", + " 'RuntimeConfig': {'CurrentCopyCount': 1, 'DesiredCopyCount': 1},\n", + " 'Specification': {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 4096,\n", + " 'NumberOfAcceleratorDevicesRequired': 1.0,\n", + " 'NumberOfCpuCoresRequired': 8.0},\n", + " 'Container': {'DeployedImage': {'ResolutionTime': datetime.datetime(2026, 3, 12, 9, 50, 15, 707000, tzinfo=tzlocal()),\n", + " 'ResolvedImage': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference@sha256:4979ff55ba85b9b525333016fde63fa3d709567d1bbf02c486e963bdc0d48b7b',\n", + " 'SpecifiedImage': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128'}},\n", + " 'DataCacheConfig': {'EnableCaching': True}},\n", + " 'VariantName': 'e2e-5429'}\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "\n", + "# resp = sm.describe_inference_component(InferenceComponentName=adapter_ic_name)\n", + "resp = sm.describe_inference_component(InferenceComponentName=\"e2e-5429-inference-component\")\n", + "# resp = sm.describe_endpoint(EndpointName=\"e2e-5429\")\n", + "\n", + "# status = resp[\"InferenceComponentStatus\"]\n", + "# print(f\"Status: {status}\")\n", + "pprint(resp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df316e99", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Response: {'generated_text': ' Paris.\\nWhat is the capital of the United States? Washington, D.C.\\nWhat is the capital of the United Kingdom? London.\\nWhat is the capital of Australia? Canberra.\\nWhat is the capital of Canada? Ottawa.\\nWhat is the capital of'}\n" + ] + } + ], + "source": [ + "# test inference on adapter inference component\n", + "import json\n", + "payload = json.dumps({\"inputs\": \"What is the capital of France?\", \"parameters\": {\"max_new_tokens\": 50}})\n", + "adapter_response = sm_runtime.invoke_endpoint(\n", + " EndpointName=\"e2e-5429\", #endpoint_name,\n", + " InferenceComponentName=\"e2e-5429-inference-component\", #adapter_ic_name,\n", + " Body=payload,\n", + " ContentType=\"application/json\",\n", + ")\n", + "result = json.loads(adapter_response[\"Body\"].read().decode())\n", + "print(f\" Response: {result}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "df2b7dfa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "meta-textgeneration-llama-3-2-1b-instruct\n", + "{'Capabilities': ['TRAINING', 'FINE_TUNING', 'VALIDATION', 'CUSTOMIZATION'],\n", + " 'ContainerStartupHealthCheckTimeout': 1200,\n", + " 'ContextualHelp': {'HubDefaultTrainData': [\"Dataset: [OpenAssistant's TOP-1 \"\n", + " 'Conversation '\n", + " 'Threads](https://huggingface.co/datasets/OpenAssistant/oasst_top1_2023-08-25)',\n", + " \"OpenAssistant's TOP-1 \"\n", + " 'Conversation Threads dataset '\n", + " 'contains roughly 13,000 samples '\n", + " 'of conversations between the '\n", + " 'Assistant and the user.',\n", + " 'License: [Apache '\n", + " '2.0](https://jumpstart-cache-prod-us-east-2.s3-us-east-2.amazonaws.com/licenses/Apache-License/LICENSE-2.0.txt)'],\n", + " 'HubFormatTrainData': ['A train and an optional validation '\n", + " 'directories. Each directory '\n", + " 'contains a jsonl. ',\n", + " ' [Learn how to setup an AWS S3 '\n", + " 'bucket.](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html)']},\n", + " 'DataType': 'text',\n", + " 'DefaultInferenceInstanceType': 'ml.g6.xlarge',\n", + " 'DefaultPayloads': {'emojisBeijing': {'Body': {'inputs': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'Always answer with '\n", + " 'emojis<|eot_id|><|start_header_id|>user<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'How to go from '\n", + " 'Beijing to '\n", + " 'NY?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n'\n", + " '\\n',\n", + " 'parameters': {'max_new_tokens': 256,\n", + " 'temperature': 0.6,\n", + " 'top_p': 0.9}},\n", + " 'ContentType': 'application/json',\n", + " 'OutputKeys': {'generated_text': 'generated_text'},\n", + " 'PromptKey': 'inputs'},\n", + " 'mayonnaise': {'Body': {'inputs': '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'what is the recipe of '\n", + " 'mayonnaise?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n'\n", + " '\\n',\n", + " 'parameters': {'details': True,\n", + " 'max_new_tokens': 256,\n", + " 'temperature': 0.6,\n", + " 'top_p': 0.9}},\n", + " 'ContentType': 'application/json',\n", + " 'OutputKeys': {'generated_text': 'generated_text'},\n", + " 'PromptKey': 'inputs'},\n", + " 'parisHaiku': {'Body': {'inputs': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'Always answer with '\n", + " 'Haiku<|eot_id|><|start_header_id|>user<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'I am going to Paris, '\n", + " 'what should I '\n", + " 'see?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n'\n", + " '\\n',\n", + " 'parameters': {'max_new_tokens': 256,\n", + " 'temperature': 0.6,\n", + " 'top_p': 0.9}},\n", + " 'ContentType': 'application/json',\n", + " 'OutputKeys': {'generated_text': 'generated_text'},\n", + " 'PromptKey': 'inputs'},\n", + " 'parisTrip': {'Body': {'inputs': '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'I am going to Paris, '\n", + " 'what should I '\n", + " 'see?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'Paris, the capital of '\n", + " 'France, is known for '\n", + " 'its stunning '\n", + " 'architecture, art '\n", + " 'museums, historical '\n", + " 'landmarks, and romantic '\n", + " 'atmosphere. Here are '\n", + " 'some of the top '\n", + " 'attractions to see in '\n", + " 'Paris:\\n'\n", + " '\\n'\n", + " '1. The Eiffel Tower: '\n", + " 'The iconic Eiffel Tower '\n", + " 'is one of the most '\n", + " 'recognizable landmarks '\n", + " 'in the world and offers '\n", + " 'breathtaking views of '\n", + " 'the city.\\n'\n", + " '2. The Louvre Museum: '\n", + " 'The Louvre is one of '\n", + " \"the world's largest and \"\n", + " 'most famous museums, '\n", + " 'housing an impressive '\n", + " 'collection of art and '\n", + " 'artifacts, including '\n", + " 'the Mona Lisa.\\n'\n", + " '3. Notre-Dame '\n", + " 'Cathedral: This '\n", + " 'beautiful cathedral is '\n", + " 'one of the most famous '\n", + " 'landmarks in Paris and '\n", + " 'is known for its Gothic '\n", + " 'architecture and '\n", + " 'stunning stained glass '\n", + " 'windows.\\n'\n", + " '\\n'\n", + " 'These are just a few of '\n", + " 'the many attractions '\n", + " 'that Paris has to '\n", + " 'offer. With so much to '\n", + " \"see and do, it's no \"\n", + " 'wonder that Paris is '\n", + " 'one of the most popular '\n", + " 'tourist destinations in '\n", + " 'the '\n", + " 'world.<|eot_id|><|start_header_id|>user<|end_header_id|>\\n'\n", + " '\\n'\n", + " 'What is so great about '\n", + " '#1?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n'\n", + " '\\n',\n", + " 'parameters': {'max_new_tokens': 256,\n", + " 'temperature': 0.6,\n", + " 'top_p': 0.9}},\n", + " 'ContentType': 'application/json',\n", + " 'OutputKeys': {'generated_text': 'generated_text'},\n", + " 'PromptKey': 'inputs'}},\n", + " 'DefaultTrainingDatasetUri': 's3://jumpstart-cache-prod-us-west-2/training-datasets/oasst_top/train/',\n", + " 'DefaultTrainingInstanceType': 'ml.g5.2xlarge',\n", + " 'Dependencies': [],\n", + " 'DisableOutputCompression': True,\n", + " 'DynamicContainerDeploymentSupported': True,\n", + " 'EncryptInterContainerTraffic': True,\n", + " 'FineTuningSupported': True,\n", + " 'Framework': 'meta',\n", + " 'GatedBucket': True,\n", + " 'HostingArtifactCompressionType': 'None',\n", + " 'HostingArtifactS3DataType': 'S3Prefix',\n", + " 'HostingArtifactUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-textgeneration/meta-textgeneration-llama-3-2-1b-instruct/artifacts/inference-prepack/v1.0.0/',\n", + " 'HostingEcrSpecs': {'Framework': 'djl-lmi-18',\n", + " 'FrameworkVersion': '0.36.0',\n", + " 'PyVersion': 'py310'},\n", + " 'HostingEcrUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128',\n", + " 'HostingEulaUri': 's3://jumpstart-cache-prod-us-west-2/fmhMetadata/eula/llama3_2Eula.txt',\n", + " 'HostingInstanceTypeVariants': {'Variants': {'g4dn': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'g5': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'g6': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'g6e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'local_gpu': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'ml.g5.12xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 98304,\n", + " 'NumAccelerators': 4}}},\n", + " 'ml.g5.16xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 131072,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g5.24xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 196608,\n", + " 'NumAccelerators': 4}}},\n", + " 'ml.g5.2xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 16384,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g5.48xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 393216,\n", + " 'NumAccelerators': 8}}},\n", + " 'ml.g5.4xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 32768,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g5.8xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 65536,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g5.xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 8192,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g6.12xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 98304,\n", + " 'NumAccelerators': 4}}},\n", + " 'ml.g6.16xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 131072,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g6.24xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 196608,\n", + " 'NumAccelerators': 4}}},\n", + " 'ml.g6.2xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 16384,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g6.48xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 393216,\n", + " 'NumAccelerators': 8}}},\n", + " 'ml.g6.4xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 32768,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g6.8xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 65536,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.g6.xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 8192,\n", + " 'NumAccelerators': 1}}},\n", + " 'ml.p4d.24xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 589824,\n", + " 'NumAccelerators': 8}}},\n", + " 'ml.p5.48xlarge': {'Properties': {'ResourceRequirements': {'MinMemoryMb': 1048576,\n", + " 'NumAccelerators': 8}}},\n", + " 'p2': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p3': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p3dn': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p4d': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p4de': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p5': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p5e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p5en': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p6': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}},\n", + " 'p6e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.36.0-lmi18.0.0-cu128'}}}},\n", + " 'HostingResourceRequirements': {'MinMemoryMb': 8192, 'NumAccelerators': 1},\n", + " 'HostingScriptUri': 's3://jumpstart-cache-prod-us-west-2/source-directory-tarballs/meta/inference/textgeneration/v1.2.3/sourcedir.tar.gz',\n", + " 'HostingUseScriptUri': False,\n", + " 'Hyperparameters': [{'Default': 'False',\n", + " 'Name': 'int8_quantization',\n", + " 'Options': ['True', 'False'],\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 'True',\n", + " 'Name': 'enable_fsdp',\n", + " 'Options': ['True', 'False'],\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 1,\n", + " 'Max': 1000,\n", + " 'Min': 1,\n", + " 'Name': 'epoch',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 0.0001,\n", + " 'Max': 1,\n", + " 'Min': 1e-08,\n", + " 'Name': 'learning_rate',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'float'},\n", + " {'Default': 8,\n", + " 'Min': 1,\n", + " 'Name': 'lora_r',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 32,\n", + " 'Min': 1,\n", + " 'Name': 'lora_alpha',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 'q_proj,v_proj',\n", + " 'Name': 'target_modules',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 0.05,\n", + " 'Max': 1,\n", + " 'Min': 0,\n", + " 'Name': 'lora_dropout',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'float'},\n", + " {'Default': 'False',\n", + " 'Name': 'instruction_tuned',\n", + " 'Options': ['True', 'False'],\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 'True',\n", + " 'Name': 'chat_dataset',\n", + " 'Options': ['True', 'False'],\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 'True',\n", + " 'Name': 'add_input_output_demarcation_key',\n", + " 'Options': ['True', 'False'],\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 1,\n", + " 'Max': 1000,\n", + " 'Min': 1,\n", + " 'Name': 'per_device_train_batch_size',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 1,\n", + " 'Max': 1000,\n", + " 'Min': 1,\n", + " 'Name': 'per_device_eval_batch_size',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': -1,\n", + " 'Min': -1,\n", + " 'Name': 'max_train_samples',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': -1,\n", + " 'Min': -1,\n", + " 'Name': 'max_val_samples',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 10,\n", + " 'Max': 1000,\n", + " 'Min': 1,\n", + " 'Name': 'seed',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': -1,\n", + " 'Min': -1,\n", + " 'Name': 'max_input_length',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 0.2,\n", + " 'Max': 1,\n", + " 'Min': 0,\n", + " 'Name': 'validation_split_ratio',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'float'},\n", + " {'Default': 0,\n", + " 'Min': 0,\n", + " 'Name': 'train_data_split_seed',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'int'},\n", + " {'Default': 'None',\n", + " 'Name': 'preprocessing_num_workers',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': 'Llama3.1',\n", + " 'Name': 'chat_template',\n", + " 'Scope': 'algorithm',\n", + " 'Type': 'text'},\n", + " {'Default': '/opt/ml/input/data/code/sourcedir.tar.gz',\n", + " 'Name': 'sagemaker_submit_directory',\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': 'transfer_learning.py',\n", + " 'Name': 'sagemaker_program',\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '20',\n", + " 'Name': 'sagemaker_container_log_level',\n", + " 'Scope': 'container',\n", + " 'Type': 'text'}],\n", + " 'IncrementalTrainingSupported': False,\n", + " 'InferenceDependencies': [],\n", + " 'InferenceEnableNetworkIsolation': True,\n", + " 'InferenceEnvironmentVariables': [{'Default': 3600,\n", + " 'Name': 'ENDPOINT_SERVER_TIMEOUT',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'int'},\n", + " {'Default': '/opt/ml/model',\n", + " 'Name': 'HF_MODEL_ID',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '/opt/ml/model',\n", + " 'Name': 'MODEL_CACHE_ROOT',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': 'true',\n", + " 'Name': 'OPTION_ENABLE_CHUNKED_PREFILL',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '20',\n", + " 'Name': 'SAGEMAKER_CONTAINER_LOG_LEVEL',\n", + " 'RequiredForModelClass': False,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '1',\n", + " 'Name': 'SAGEMAKER_ENV',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '3600',\n", + " 'Name': 'SAGEMAKER_MODEL_SERVER_TIMEOUT',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': 1,\n", + " 'Name': 'SAGEMAKER_MODEL_SERVER_WORKERS',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'int'},\n", + " {'Default': 'inference.py',\n", + " 'Name': 'SAGEMAKER_PROGRAM',\n", + " 'RequiredForModelClass': True,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'},\n", + " {'Default': '/opt/ml/model/code',\n", + " 'Name': 'SAGEMAKER_SUBMIT_DIRECTORY',\n", + " 'RequiredForModelClass': False,\n", + " 'Scope': 'container',\n", + " 'Type': 'text'}],\n", + " 'InferenceVolumeSize': 256,\n", + " 'MaxRuntimeInSeconds': 360000,\n", + " 'MinSdkVersion': '2.225.0',\n", + " 'ModelDataDownloadTimeout': 1200,\n", + " 'ModelTypes': ['OPEN_WEIGHTS'],\n", + " 'NotebookLocations': {'DemoNotebook': 's3://jumpstart-cache-prod-us-west-2/pmm-notebooks/pmm-notebook-model-hub-text-generation-deploy.ipynb',\n", + " 'DemoNotebooks': [{'IsDefault': True,\n", + " 'S3Uri': 's3://jumpstart-cache-prod-us-west-2/pmm-notebooks/pmm-notebook-model-hub-text-generation-deploy.ipynb',\n", + " 'Title': 'Deploy'},\n", + " {'IsDefault': False,\n", + " 'S3Uri': 's3://jumpstart-cache-prod-us-west-2/pmm-notebooks/pmm-notebook-model-hub-text-generation-instruction-tuning-llama.ipynb',\n", + " 'Title': 'Fine-Tune: Instruction '\n", + " 'Tuning'},\n", + " {'IsDefault': False,\n", + " 'S3Uri': 's3://jumpstart-cache-prod-us-west-2/open-source-notebooks/sm-studio-oss-training-job-sample-notebook.ipynb',\n", + " 'Title': 'Deploy End-to-End Model '\n", + " 'Customization'}]},\n", + " 'Provider': 'meta',\n", + " 'RecipeCollection': [{'CustomizationTechnique': 'RLAIF',\n", + " 'DisplayName': 'Llama 3.2 1B GRPO RLAIF Fine-Tuning '\n", + " 'with LoRA',\n", + " 'Hardware': 'GPU',\n", + " 'HostingConfigs': [{'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g6.4xlarge',\n", + " 'Profile': 'Default'},\n", + " {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g5.4xlarge'}],\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora_payload_template_k8s_v2.0.0.yaml',\n", + " 'InstanceCount': 1,\n", + " 'Name': 'verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora',\n", + " 'Peft': 'LORA',\n", + " 'RecipeFilePath': 'recipes/fine-tuning/llama/verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora.yaml',\n", + " 'SequenceLength': '2K',\n", + " 'ServerlessMeteringType': 'Hourly',\n", + " 'SmtjImageUri': '920498770698.dkr.ecr.us-west-2.amazonaws.com/hyperpod-recipes:verl-v1.0.0-smtj',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlaif-llama-3-dot-2-1b-instruct-lora_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.p5.48xlarge',\n", + " 'ml.p4de.24xlarge',\n", + " 'ml.p4d.24xlarge'],\n", + " 'Type': 'FineTuning',\n", + " 'Versions': ['1.0.0']},\n", + " {'CustomizationTechnique': 'RLVR',\n", + " 'DisplayName': 'Llama 3.2 1B GRPO RLVR Fine-Tuning',\n", + " 'Hardware': 'GPU',\n", + " 'HostingConfigs': [{'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g6.4xlarge',\n", + " 'Profile': 'Default'},\n", + " {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g5.4xlarge'}],\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora_payload_template_k8s_v2.0.0.yaml',\n", + " 'InstanceCount': 1,\n", + " 'Name': 'verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora',\n", + " 'Peft': 'LORA',\n", + " 'RecipeFilePath': 'recipes/fine-tuning/llama/verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora.yaml',\n", + " 'SequenceLength': '1K',\n", + " 'ServerlessMeteringType': 'Hourly',\n", + " 'SmtjImageUri': '920498770698.dkr.ecr.us-west-2.amazonaws.com/hyperpod-recipes:verl-v1.0.0-smtj',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/verl-grpo-rlvr-llama-3-dot-2-1b-instruct-lora_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.p5.48xlarge',\n", + " 'ml.p4de.24xlarge',\n", + " 'ml.p4d.24xlarge'],\n", + " 'Type': 'FineTuning',\n", + " 'Versions': ['1.1.0']},\n", + " {'CustomizationTechnique': 'SFT',\n", + " 'DisplayName': 'Llama 3.2 1B Simple Fine Tuning with '\n", + " 'Lora on GPU, 4K sequence length',\n", + " 'Hardware': 'GPU',\n", + " 'HostingConfigs': [{'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g6.4xlarge',\n", + " 'Profile': 'Default'},\n", + " {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g5.4xlarge'}],\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora_payload_template_k8s_v2.0.0.yaml',\n", + " 'InstanceCount': 1,\n", + " 'Name': 'llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora',\n", + " 'Peft': 'LORA',\n", + " 'RecipeFilePath': 'recipes/fine-tuning/llama/llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora.yaml',\n", + " 'SequenceLength': '4K',\n", + " 'ServerlessMeteringType': 'Token-based',\n", + " 'SmtjImageUri': '920498770698.dkr.ecr.us-west-2.amazonaws.com/hyperpod-recipes:llmft-v1.0.0',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_sft_lora_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.p4de.24xlarge',\n", + " 'ml.p4d.24xlarge',\n", + " 'ml.p5.48xlarge',\n", + " 'ml.g5.48xlarge',\n", + " 'ml.g5.12xlarge'],\n", + " 'Type': 'FineTuning',\n", + " 'Versions': ['1.1.0']},\n", + " {'CustomizationTechnique': 'DPO',\n", + " 'DisplayName': 'Llama 3.2 1B Direct Preference '\n", + " 'Optimization on GPU, 4K sequence length',\n", + " 'Hardware': 'GPU',\n", + " 'HostingConfigs': [{'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g6.4xlarge',\n", + " 'Profile': 'Default'},\n", + " {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 32768,\n", + " 'NumberOfAcceleratorDevicesRequired': 1,\n", + " 'NumberOfCpuCoresRequired': 12},\n", + " 'EcrAddress': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128',\n", + " 'Environment': {'OPTION_ASYNC_MODE': 'true',\n", + " 'OPTION_ENABLE_LORA': 'true',\n", + " 'OPTION_ENTRYPOINT': 'djl_python.lmi_vllm.vllm_async_service',\n", + " 'OPTION_MAX_CPU_LORAS': '16',\n", + " 'OPTION_MAX_LORAS': '8',\n", + " 'OPTION_MAX_LORA_RANK': '128',\n", + " 'OPTION_MAX_ROLLING_BATCH_SIZE': '8',\n", + " 'OPTION_ROLLING_BATCH': 'disable',\n", + " 'OPTION_TENSOR_PARALLEL_DEGREE': '1',\n", + " 'SAGEMAKER_ENABLE_LOAD_AWARE': '1',\n", + " 'SAGEMAKER_MAX_NUMBER_OF_ADAPTERS_IN_MEMORY': '32'},\n", + " 'InstanceType': 'ml.g5.4xlarge'}],\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_dpo_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_dpo_payload_template_k8s_v2.0.0.yaml',\n", + " 'InstanceCount': 1,\n", + " 'Name': 'llmft_llama3_2_1b_instruct_seq4k_gpu_dpo',\n", + " 'Peft': 'LORA',\n", + " 'RecipeFilePath': 'recipes/fine-tuning/llama/llmft_llama3_2_1b_instruct_seq4k_gpu_dpo.yaml',\n", + " 'SequenceLength': '4K',\n", + " 'ServerlessMeteringType': 'Token-based',\n", + " 'SmtjImageUri': '920498770698.dkr.ecr.us-west-2.amazonaws.com/hyperpod-recipes:llmft-v1.0.0',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_dpo_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/llmft_llama3_2_1b_instruct_seq4k_gpu_dpo_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.p4de.24xlarge',\n", + " 'ml.p4d.24xlarge',\n", + " 'ml.p5.48xlarge',\n", + " 'ml.g5.48xlarge'],\n", + " 'Type': 'FineTuning',\n", + " 'Versions': ['1.1.0']},\n", + " {'DisplayName': 'Open Source Evaluation Evaluation on '\n", + " 'GPU - Meta Textgeneration Llama 3 2 1B '\n", + " 'Instruct',\n", + " 'EvaluationType': 'DeterministicEvaluation',\n", + " 'Hardware': 'GPU',\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/meta_textgeneration_llama_3_2_1b_instruct_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/meta_textgeneration_llama_3_2_1b_instruct_payload_template_k8s_v2.0.0.yaml',\n", + " 'Name': 'open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-deterministic',\n", + " 'SmtjImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/sagemaker-evaluation:latest',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-deterministic_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-deterministic_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.g5.12xlarge',\n", + " 'ml.g5.16xlarge',\n", + " 'ml.p4d.24xlarge',\n", + " 'ml.p5.48xlarge'],\n", + " 'Type': 'Evaluation',\n", + " 'Versions': ['1.0']},\n", + " {'DisplayName': 'Open Source Evaluation Evaluation on '\n", + " 'GPU - Meta Textgeneration Llama 3 2 1B '\n", + " 'Instruct',\n", + " 'EvaluationType': 'LLMAJEvaluation',\n", + " 'Hardware': 'CPU',\n", + " 'HpEksOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/meta_textgeneration_llama_3_2_1b_instruct_override_params_k8s_v2.0.0.json',\n", + " 'HpEksPayloadTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/meta_textgeneration_llama_3_2_1b_instruct_payload_template_k8s_v2.0.0.yaml',\n", + " 'Name': 'open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-llmaj',\n", + " 'SmtjImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/sagemaker-evaluation:latest',\n", + " 'SmtjOverrideParamsS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-llmaj_override_params_sm_jobs_v2.0.0.json',\n", + " 'SmtjRecipeTemplateS3Uri': 's3://jumpstart-cache-prod-us-west-2/recipes/open-source-eval-meta-textgeneration-llama-3-2-1b-instruct-llmaj_payload_template_sm_jobs_v2.0.0.yaml',\n", + " 'SupportedInstanceTypes': ['ml.t3.large'],\n", + " 'Type': 'Evaluation',\n", + " 'Versions': ['1.0']}],\n", + " 'ResourceNameBase': 'llama-3-2-1b-instruct',\n", + " 'SageMakerSdkPredictorSpecifications': {'DefaultAcceptType': 'application/json',\n", + " 'DefaultContentType': 'application/json',\n", + " 'SupportedAcceptTypes': ['application/json'],\n", + " 'SupportedContentTypes': ['application/json']},\n", + " 'SupportedInferenceInstanceTypes': ['ml.g5.12xlarge',\n", + " 'ml.g5.16xlarge',\n", + " 'ml.g5.24xlarge',\n", + " 'ml.g5.2xlarge',\n", + " 'ml.g5.48xlarge',\n", + " 'ml.g5.4xlarge',\n", + " 'ml.g5.8xlarge',\n", + " 'ml.g5.xlarge',\n", + " 'ml.g6.12xlarge',\n", + " 'ml.g6.16xlarge',\n", + " 'ml.g6.24xlarge',\n", + " 'ml.g6.2xlarge',\n", + " 'ml.g6.48xlarge',\n", + " 'ml.g6.4xlarge',\n", + " 'ml.g6.8xlarge',\n", + " 'ml.g6.xlarge',\n", + " 'ml.p4d.24xlarge',\n", + " 'ml.p5.48xlarge'],\n", + " 'SupportedTrainingInstanceTypes': ['ml.g4dn.12xlarge',\n", + " 'ml.g5.12xlarge',\n", + " 'ml.g5.2xlarge',\n", + " 'ml.g5.4xlarge',\n", + " 'ml.g5.8xlarge',\n", + " 'ml.p3dn.24xlarge',\n", + " 'ml.p5.48xlarge'],\n", + " 'Task': 'Text Generation',\n", + " 'TrainingArtifactUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-training/train-meta-textgeneration-llama-3-2-1b-instruct.tar.gz',\n", + " 'TrainingDependencies': ['accelerate==0.33.0',\n", + " 'bitsandbytes==0.39.1',\n", + " 'black==23.7.0',\n", + " 'brotli==1.0.9',\n", + " 'datasets==2.14.1',\n", + " 'docstring-parser==0.16',\n", + " 'fire==0.5.0',\n", + " 'huggingface-hub==0.24.2',\n", + " 'inflate64==0.3.1',\n", + " 'loralib==0.1.1',\n", + " 'multivolumefile==0.2.3',\n", + " 'mypy-extensions==1.0.0',\n", + " 'nvidia-cublas-cu12==12.1.3.1',\n", + " 'nvidia-cuda-cupti-cu12==12.1.105',\n", + " 'nvidia-cuda-nvrtc-cu12==12.1.105',\n", + " 'nvidia-cuda-runtime-cu12==12.1.105',\n", + " 'nvidia-cudnn-cu12==8.9.2.26',\n", + " 'nvidia-cufft-cu12==11.0.2.54',\n", + " 'nvidia-curand-cu12==10.3.2.106',\n", + " 'nvidia-cusolver-cu12==11.4.5.107',\n", + " 'nvidia-cusparse-cu12==12.1.0.106',\n", + " 'nvidia-nccl-cu12==2.19.3',\n", + " 'nvidia-nvjitlink-cu12==12.3.101',\n", + " 'nvidia-nvtx-cu12==12.1.105',\n", + " 'pathspec==0.11.1',\n", + " 'peft==0.4.0',\n", + " 'py7zr==0.20.5',\n", + " 'pybcj==1.0.1',\n", + " 'pycryptodomex==3.18.0',\n", + " 'pyppmd==1.0.0',\n", + " 'pyzstd==0.15.9',\n", + " 'safetensors==0.4.2',\n", + " 'sagemaker_jumpstart_huggingface_script_utilities==1.2.7',\n", + " 'sagemaker_jumpstart_script_utilities==1.1.9',\n", + " 'scipy==1.11.1',\n", + " 'shtab==1.7.1',\n", + " 'termcolor==2.3.0',\n", + " 'texttable==1.6.7',\n", + " 'tokenize-rt==5.1.0',\n", + " 'tokenizers==0.19.1',\n", + " 'torch==2.2.0',\n", + " 'transformers==4.43.1',\n", + " 'triton==2.2.0',\n", + " 'trl==0.8.1',\n", + " 'typing-extensions==4.8.0',\n", + " 'tyro==0.7.3'],\n", + " 'TrainingEcrUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04',\n", + " 'TrainingEnableNetworkIsolation': True,\n", + " 'TrainingInstanceTypeVariants': {'Variants': {'g4dn': {'Properties': {'GatedModelEnvVarUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-training/g4dn/v1.0.0/train-meta-textgeneration-llama-3-2-1b-instruct.tar.gz',\n", + " 'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'g5': {'Properties': {'GatedModelEnvVarUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-training/g5/v1.0.0/train-meta-textgeneration-llama-3-2-1b-instruct.tar.gz',\n", + " 'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'g6': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'g6e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'g7e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'local_gpu': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p2': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p3': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p3dn': {'Properties': {'GatedModelEnvVarUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-training/p3dn/v1.0.0/train-meta-textgeneration-llama-3-2-1b-instruct.tar.gz',\n", + " 'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p4d': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p4de': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p5': {'Properties': {'GatedModelEnvVarUri': 's3://jumpstart-private-cache-prod-us-west-2/meta-training/p5/v1.0.0/train-meta-textgeneration-llama-3-2-1b-instruct.tar.gz',\n", + " 'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p5e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p5en': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p6': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}},\n", + " 'p6e': {'Properties': {'ImageUri': '763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04'}}}},\n", + " 'TrainingMetrics': [{'Name': 'huggingface-textgeneration:eval-loss',\n", + " 'Regex': 'eval_epoch_loss=tensor\\\\(([0-9\\\\.]+)'},\n", + " {'Name': 'huggingface-textgeneration:eval-ppl',\n", + " 'Regex': 'eval_ppl=tensor\\\\(([0-9\\\\.]+)'},\n", + " {'Name': 'huggingface-textgeneration:train-loss',\n", + " 'Regex': 'train_epoch_loss=([0-9\\\\.]+)'}],\n", + " 'TrainingScriptUri': 's3://jumpstart-cache-prod-us-west-2/source-directory-tarballs/training/meta-textgeneration/prepack/inference-meta-textgeneration/v1.2.0/sourcedir.tar.gz',\n", + " 'TrainingSupported': True,\n", + " 'TrainingVolumeSize': 256,\n", + " 'Url': 'https://ai.meta.com/resources/models-and-libraries/llama-downloads/',\n", + " 'ValidationSupported': True}\n" + ] + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", + "│ in <module>:12 │\n", + "│ │\n", + "│ 9 ) │\n", + "│ 10 hub_doc = json.loads(hub_resp[\"HubContentDocument\"]) │\n", + "│ 11 pprint(hub_doc) │\n", + "│ ❱ 12 config = hub_doc[\"InferenceConfigComponents\"][\"lmi\"] │\n", + "│ 13 base_model_s3_uri = config[\"HostingArtifactUri\"] │\n", + "│ 14 instance_family = INSTANCE_TYPE.split(\".\")[1] │\n", + "│ 15 lmi_image_uri = config[\"HostingInstanceTypeVariants\"][\"Variants\"][instance_family][\"Prop │\n", + "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "KeyError: 'InferenceConfigComponents'\n", + "\n" + ], + "text/plain": [ + "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", + "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[03/12/26 08:21:30] INFO Runs on sagemaker prod, region:us-west-2 utils.py:370\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 08:21:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Runs on sagemaker prod, region:us-west-\u001b[1;36m2\u001b[0m \u001b]8;id=142540;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=971376;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/utils/utils.py#370\u001b\\\u001b[2m370\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=216939;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=782940;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sagemaker.core.resources import TrainingJob, HubContent, InferenceComponent, ModelPackage\n", + "from sagemaker.core.utils.utils import Unassigned\n", + "\n", + "for training_job in TrainingJob.get_all(region=\"us-west-2\"):\n", + " if not isinstance(training_job.output_model_package_arn, Unassigned):\n", + " try:\n", + " model_package = ModelPackage.get(training_job.output_model_package_arn)\n", + " if not isinstance(model_package.inference_specification.containers[0].image,Unassigned)\\\n", + " and model_package.inference_specification.containers[0].image is not None:\n", + " print(training_job.training_job_arn)\n", + " print(model_package.inference_specification.containers[0].image)\n", + " except:\n", + " pass\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "0f373c98", + "metadata": {}, + "outputs": [], + "source": [ + "training_job_name = \"test-lora-training-1-1773273846617\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "2415b1cb715a304c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:sagemaker:us-west-2:099324990371:model-package/test-lora-training-1/1\n" + ] + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:47:55] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:47:55]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=796110;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=988891;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:96\n", + " understand our user's needs, diagnose issues, and deliver \n", + " additional features. \n", + " To opt out of telemetry, please disable via TelemetryOptOut \n", + " parameter in SDK defaults config. For more information, refer \n", + " to \n", + " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", + " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=304606;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=165110;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py#96\u001b\\\u001b[2m96\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:47:56] INFO Creating model resource. resources.py:22771\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:47:56]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating model resource. \u001b]8;id=451439;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=477865;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#22771\u001b\\\u001b[2m22771\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:47:57] INFO ✅ Model has been created: 'e2e-5429' using server None in model_builder.py:3147\n", + " SAGEMAKER_ENDPOINT mode (ARN: \n", + " arn:aws:sagemaker:us-west-2:099324990371:model/e2e-5429) \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:47:57]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m ✅ Model has been created: \u001b[38;2;0;135;0m'e2e-5429'\u001b[0m using server \u001b[3;38;2;225;0;225mNone\u001b[0m in \u001b]8;id=842515;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=301257;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#3147\u001b\\\u001b[2m3147\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m SAGEMAKER_ENDPOINT mode \u001b[1m(\u001b[0mARN: \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m arn:aws:sagemaker:us-west-2:099324990371:model/e2e-\u001b[1;36m5429\u001b[0m\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:sagemaker:us-west-2:099324990371:model/e2e-5429\n" + ] + } + ], + "source": [ + "from sagemaker.core.resources import TrainingJob\n", + "import random\n", + "training_job = TrainingJob.get(training_job_name=\"test-lora-training-1-1773273846617\")\n", + "print(training_job.output_model_package_arn)\n", + "name = f\"e2e-{random.randint(100, 10000)}\"\n", + "from sagemaker.serve import ModelBuilder\n", + "model_builder = ModelBuilder(model=training_job, role_arn=\"arn:aws:iam::099324990371:role/service-role/AmazonSageMaker-ExecutionRole-20260219T233135\", instance_type=\"ml.g5.8xlarge\")\n", + "model = model_builder.build(model_name=name)\n", + "print(model.model_arn)\n", + "import random\n", + "#endpoint = model_builder.deploy(endpoint_name=name)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "1941af1c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'containers': [ContainerDefinition(container_hostname=Unassigned(), image='763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128', image_config=Unassigned(), mode='SingleModel', model_data_url=Unassigned(), model_data_source=ModelDataSource(s3_data_source=S3ModelDataSource(s3_uri='s3://sagemaker-us-west-2-099324990371/model-customization/output-artifacts/test-lora-training-1-1773273846617/output/model/', s3_data_type='S3Prefix', compression_type='None', model_access_config=Unassigned(), hub_access_config=Unassigned(), manifest_s3_uri=Unassigned(), e_tag=Unassigned(), manifest_etag=Unassigned())), additional_model_data_sources=Unassigned(), environment=Unassigned(), model_package_name=Unassigned(), inference_specification_name=Unassigned(), multi_model_config=Unassigned())],\n", + " 'creation_time': datetime.datetime(2026, 3, 12, 9, 47, 57, 83000, tzinfo=tzlocal()),\n", + " 'deployment_recommendation': Unassigned(),\n", + " 'enable_network_isolation': False,\n", + " 'execution_role_arn': 'arn:aws:iam::099324990371:role/service-role/AmazonSageMaker-ExecutionRole-20260219T233135',\n", + " 'inference_execution_config': Unassigned(),\n", + " 'model_arn': 'arn:aws:sagemaker:us-west-2:099324990371:model/e2e-5429',\n", + " 'model_name': 'e2e-5429',\n", + " 'primary_container': Unassigned(),\n", + " 'vpc_config': Unassigned()}\n" + ] + } + ], + "source": [ + "pprint(dict(model))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "8b8bc9eb4299ecba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[03/12/26 09:48:11] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:96\n", + " understand our user's needs, diagnose issues, and deliver \n", + " additional features. \n", + " To opt out of telemetry, please disable via TelemetryOptOut \n", + " parameter in SDK defaults config. For more information, refer \n", + " to \n", + " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", + " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:48:11]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=599625;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=360222;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py#96\u001b\\\u001b[2m96\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Deploying Model Customization model model_builder.py:3977\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Deploying Model Customization model \u001b]8;id=84460;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=411308;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#3977\u001b\\\u001b[2m3977\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Creating endpoint_config resource. resources.py:11602\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating endpoint_config resource. \u001b]8;id=785545;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=941870;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#11602\u001b\\\u001b[2m11602\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:48:12] INFO Endpoint core call starting model_builder.py:4192\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:48:12]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Endpoint core call starting \u001b]8;id=47259;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=934392;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#4192\u001b\\\u001b[2m4192\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Creating endpoint resource. resources.py:10735\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating endpoint resource. \u001b]8;id=627985;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=869173;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#10735\u001b\\\u001b[2m10735\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:50:14] INFO Final Resource Status: InService resources.py:11017\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:50:14]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Final Resource Status: \u001b[1mInService\u001b[0m \u001b]8;id=678022;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=865965;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#11017\u001b\\\u001b[2m11017\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Creating inference_component resource. resources.py:18988\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating inference_component resource. \u001b]8;id=27865;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=167502;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#18988\u001b\\\u001b[2m18988\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 09:50:15] INFO Creating action resource. resources.py:248\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 09:50:15]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating action resource. \u001b]8;id=34150;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=32508;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#248\u001b\\\u001b[2m248\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO ✅ Model customization deployment successful: Endpoint model_builder.py:4309\n", + " 'e2e-5429' \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m ✅ Model customization deployment successful: Endpoint \u001b]8;id=901196;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=952336;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#4309\u001b\\\u001b[2m4309\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'e2e-5429'\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sagemaker.core.inference_config import ResourceRequirements\n", + "resources = ResourceRequirements(\n", + " requests={\n", + " \"num_accelerators\": 1, # ml.g5.8xlarge has 1 GPU\n", + " \"memory\": 4096,\n", + " \"num_cpus\": 8,\n", + " }\n", + ")\n", + "endpoint = model_builder.deploy(endpoint_name=name, inference_config=resources, instance_type=\"ml.g5.8xlarge\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "58b5d5995791bd96", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-9271-inference-component\n" + ] + } + ], + "source": [ + "from sagemaker.core.resources import InferenceComponent, Tag\n", + "from pprint import pprint\n", + "\n", + "for inference_component in InferenceComponent.get_all(endpoint_name_equals=\"e2e-9271\"):\n", + " print(inference_component.inference_component_arn)\n", + " for tag in Tag.get_all(resource_arn=inference_component.inference_component_arn):\n", + " pprint(tag)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "2833eab06285f075", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-5429\n" + ] + }, + { + "data": { + "text/html": [ + "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", + "│ in <module>:4 │\n", + "│ │\n", + "│ 1 import json │\n", + "│ 2 # Note this is expected to fail since Endpoint invoke is only available for authorized u │\n", + "│ 3 print(endpoint.endpoint_arn) │\n", + "│ ❱ 4 endpoint.invoke(body=json.dumps({\"inputs\": \"What is the capital of France?\", \"parameters │\n", + "│ 5 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/c │\n", + "│ ore/resources.py:143 in wrapper │\n", + "│ │\n", + "│ 140 │ │ @functools.wraps(func) │\n", + "│ 141 │ │ def wrapper(*args, **kwargs): │\n", + "│ 142 │ │ │ config = dict(arbitrary_types_allowed=True) │\n", + "│ ❱ 143 │ │ │ return validate_call(config=config)(func)(*args, **kwargs) │\n", + "│ 144 │ │ │\n", + "│ 145 │ │ return wrapper │\n", + "│ 146 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/pydantic/_i │\n", + "│ nternal/_validate_call.py:39 in wrapper_function │\n", + "│ │\n", + "│ 36 │ │ │\n", + "│ 37 │ │ @functools.wraps(wrapped) │\n", + "│ 38 │ │ def wrapper_function(*args, **kwargs): │\n", + "│ ❱ 39 │ │ │ return wrapper(*args, **kwargs) │\n", + "│ 40 │ │\n", + "│ 41 │ # We need to manually update this because `partial` object has no `__name__` and `__ │\n", + "│ 42 │ wrapper_function.__name__ = extract_function_name(wrapped) │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/pydantic/_i │\n", + "│ nternal/_validate_call.py:136 in __call__ │\n", + "│ │\n", + "│ 133 │ │ if not self.__pydantic_complete__: │\n", + "│ 134 │ │ │ self._create_validators() │\n", + "│ 135 │ │ │\n", + "│ ❱ 136 │ │ res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, │\n", + "│ 137 │ │ if self.__return_pydantic_validator__: │\n", + "│ 138 │ │ │ return self.__return_pydantic_validator__(res) │\n", + "│ 139 │ │ else: │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/c │\n", + "│ ore/resources.py:11299 in invoke │\n", + "│ │\n", + "│ 11296 │ │ ) │\n", + "│ 11297 │ │ │\n", + "│ 11298 │ │ logger.debug(f\"Calling invoke_endpoint API\") │\n", + "│ ❱ 11299 │ │ response = client.invoke_endpoint(**operation_input_args) │\n", + "│ 11300 │ │ logger.debug(f\"Response: {response}\") │\n", + "│ 11301 │ │ │\n", + "│ 11302 │ │ transformed_response = transform(response, \"InvokeEndpointOutput\") │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/cl │\n", + "│ ient.py:602 in _api_call │\n", + "│ │\n", + "│ 599 │ │ │ │ │ f\"{py_operation_name}() only accepts keyword arguments.\" │\n", + "│ 600 │ │ │ │ ) │\n", + "│ 601 │ │ │ # The \"self\" in this scope is referring to the BaseClient. │\n", + "│ ❱ 602 │ │ │ return self._make_api_call(operation_name, kwargs) │\n", + "│ 603 │ │ │\n", + "│ 604 │ │ _api_call.__name__ = str(py_operation_name) │\n", + "│ 605 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/co │\n", + "│ ntext.py:123 in wrapper │\n", + "│ │\n", + "│ 120 │ │ │ with start_as_current_context(): │\n", + "│ 121 │ │ │ │ if hook: │\n", + "│ 122 │ │ │ │ │ hook() │\n", + "│ ❱ 123 │ │ │ │ return func(*args, **kwargs) │\n", + "│ 124 │ │ │\n", + "│ 125 │ │ return wrapper │\n", + "│ 126 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/cl │\n", + "│ ient.py:1078 in _make_api_call │\n", + "│ │\n", + "│ 1075 │ │ │ │ 'error_code_override' │\n", + "│ 1076 │ │ │ ) or error_info.get(\"Code\") │\n", + "│ 1077 │ │ │ error_class = self.exceptions.from_code(error_code) │\n", + "│ ❱ 1078 │ │ │ raise error_class(parsed_response, operation_name) │\n", + "│ 1079 │ │ else: │\n", + "│ 1080 │ │ │ return parsed_response │\n", + "│ 1081 │\n", + "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Inference Component\n", + "Name header is required for endpoints to which you plan to deploy inference components. Please include Inference \n", + "Component Name header or consider using SageMaker models.\n", + "\n" + ], + "text/plain": [ + "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", + "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m
[03/12/26 10:15:21] INFO SageMaker Python SDK will collect telemetry to help us better telemetry_logging.py:96\n", + " understand our user's needs, diagnose issues, and deliver \n", + " additional features. \n", + " To opt out of telemetry, please disable via TelemetryOptOut \n", + " parameter in SDK defaults config. For more information, refer \n", + " to \n", + " https://sagemaker.readthedocs.io/en/stable/overview.html#confi \n", + " guring-and-using-defaults-with-the-sagemaker-python-sdk. \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 10:15:21]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m SageMaker Python SDK will collect telemetry to help us better \u001b]8;id=984215;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py\u001b\\\u001b[2mtelemetry_logging.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=81444;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/telemetry/telemetry_logging.py#96\u001b\\\u001b[2m96\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m understand our user's needs, diagnose issues, and deliver \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m additional features. \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m To opt out of telemetry, please disable via TelemetryOptOut \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m parameter in SDK defaults config. For more information, refer \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m to \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mhttps://sagemaker.readthedocs.io/en/stable/overview.html#confi\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;38;2;0;105;255mguring-and-using-defaults-with-the-sagemaker-python-sdk.\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
WARNING ModelBuilder.deploy() has already been called. Reusing model_builder.py:3965\n", + " ModelBuilder objects for multiple deployments is not \n", + " recommended. Please create a new ModelBuilder instance for \n", + " additional deployments. \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;215;175;0mWARNING \u001b[0m \u001b[1;38;2;225;0;225mModelBuilder.deploy\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m has already been called. Reusing \u001b]8;id=832054;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=161604;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#3965\u001b\\\u001b[2m3965\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m ModelBuilder objects for multiple deployments is not \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m recommended. Please create a new ModelBuilder instance for \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m additional deployments. \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Deploying Model Customization model model_builder.py:3977\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Deploying Model Customization model \u001b]8;id=818306;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=173311;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#3977\u001b\\\u001b[2m3977\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
INFO Creating inference_component resource. resources.py:18988\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating inference_component resource. \u001b]8;id=817928;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py\u001b\\\u001b[2mresources.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=436693;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/core/resources.py#18988\u001b\\\u001b[2m18988\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[03/12/26 10:15:22] INFO ✅ Model customization deployment successful: Endpoint model_builder.py:4309\n", + " 'e2e-5429' \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 10:15:22]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m ✅ Model customization deployment successful: Endpoint \u001b]8;id=441209;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py\u001b\\\u001b[2mmodel_builder.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=100814;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/serve/model_builder.py#4309\u001b\\\u001b[2m4309\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'e2e-5429'\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "resources = ResourceRequirements(\n", + " requests={\n", + " \"num_accelerators\": 0, # ml.g5.8xlarge has 1 GPU\n", + " \"memory\": 0,\n", + " \"num_cpus\": 0,\n", + " }\n", + ")\n", + "endpoint = model_builder.deploy(endpoint_name=name, inference_component_name=f\"{name}-adapter\", wait=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a362a3cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adapter S3 URI: s3://sagemaker-us-west-2-099324990371/model-customization/output-artifacts/test-lora-training-1-1773273846617/output/model/checkpoints/hf/\n" + ] + } + ], + "source": [ + "model_s3_uri = training_job.model_artifacts.s3_model_artifacts\n", + "adapter_s3_uri = f\"{model_s3_uri}/checkpoints/hf/\"\n", + "print(f\"Adapter S3 URI: {adapter_s3_uri}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "9398a5ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[03/12/26 10:20:57] INFO Found credentials in shared credentials file: ~/.aws/credentials credentials.py:1392\n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[03/12/26 10:20:57]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=926733;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=906514;file:///Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/credentials.py#1392\u001b\\\u001b[2m1392\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-5429-adapter-boto3',\n", + " 'ResponseMetadata': {'RequestId': '3bcfec43-27fd-4b75-99f5-62aab782f1c1',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '3bcfec43-27fd-4b75-99f5-62aab782f1c1',\n", + " 'strict-transport-security': 'max-age=47304000; includeSubDomains',\n", + " 'x-frame-options': 'DENY',\n", + " 'content-security-policy': \"frame-ancestors 'none'\",\n", + " 'cache-control': 'no-cache, no-store, must-revalidate',\n", + " 'x-content-type-options': 'nosniff',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '111',\n", + " 'date': 'Thu, 12 Mar 2026 17:20:57 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import boto3\n", + "sm = boto3.client(\"sagemaker\", region_name='us-west-2')\n", + "sm.create_inference_component(\n", + " InferenceComponentName=\"e2e-5429-adapter-boto3\",\n", + " EndpointName=name,\n", + " Specification={\n", + " \"BaseInferenceComponentName\": \"e2e-5429-inference-component\",\n", + " \"Container\": {\"ArtifactUrl\": adapter_s3_uri},\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "40cccc6d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'CreationTime': datetime.datetime(2026, 3, 12, 9, 50, 14, 970000, tzinfo=tzlocal()),\n", + " 'EndpointArn': 'arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-5429',\n", + " 'EndpointName': 'e2e-5429',\n", + " 'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-5429-inference-component',\n", + " 'InferenceComponentName': 'e2e-5429-inference-component',\n", + " 'InferenceComponentStatus': 'InService',\n", + " 'LastModifiedTime': datetime.datetime(2026, 3, 12, 9, 55, 36, 65000, tzinfo=tzlocal()),\n", + " 'ResponseMetadata': {'HTTPHeaders': {'cache-control': 'no-cache, no-store, '\n", + " 'must-revalidate',\n", + " 'content-length': '973',\n", + " 'content-security-policy': 'frame-ancestors '\n", + " \"'none'\",\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'date': 'Thu, 12 Mar 2026 21:12:35 GMT',\n", + " 'strict-transport-security': 'max-age=47304000; '\n", + " 'includeSubDomains',\n", + " 'x-amzn-requestid': '841deac7-9dde-4dae-89c0-5b52786cf1d1',\n", + " 'x-content-type-options': 'nosniff',\n", + " 'x-frame-options': 'DENY'},\n", + " 'HTTPStatusCode': 200,\n", + " 'RequestId': '841deac7-9dde-4dae-89c0-5b52786cf1d1',\n", + " 'RetryAttempts': 0},\n", + " 'RuntimeConfig': {'CurrentCopyCount': 1, 'DesiredCopyCount': 1},\n", + " 'Specification': {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 4096,\n", + " 'NumberOfAcceleratorDevicesRequired': 1.0,\n", + " 'NumberOfCpuCoresRequired': 8.0},\n", + " 'Container': {'DeployedImage': {'ResolutionTime': datetime.datetime(2026, 3, 12, 9, 50, 15, 707000, tzinfo=tzlocal()),\n", + " 'ResolvedImage': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference@sha256:4979ff55ba85b9b525333016fde63fa3d709567d1bbf02c486e963bdc0d48b7b',\n", + " 'SpecifiedImage': '763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.34.0-lmi16.0.0-cu128'}},\n", + " 'DataCacheConfig': {'EnableCaching': True}},\n", + " 'VariantName': 'e2e-5429'}\n", + "{'CreationTime': datetime.datetime(2026, 3, 12, 13, 10, 27, 885000, tzinfo=tzlocal()),\n", + " 'EndpointArn': 'arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-1977',\n", + " 'EndpointName': 'e2e-1977',\n", + " 'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:099324990371:inference-component/e2e-1977-inference-component',\n", + " 'InferenceComponentName': 'e2e-1977-inference-component',\n", + " 'InferenceComponentStatus': 'InService',\n", + " 'LastModifiedTime': datetime.datetime(2026, 3, 12, 13, 16, 57, 914000, tzinfo=tzlocal()),\n", + " 'ResponseMetadata': {'HTTPHeaders': {'cache-control': 'no-cache, no-store, '\n", + " 'must-revalidate',\n", + " 'content-length': '613',\n", + " 'content-security-policy': 'frame-ancestors '\n", + " \"'none'\",\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'date': 'Thu, 12 Mar 2026 21:12:36 GMT',\n", + " 'strict-transport-security': 'max-age=47304000; '\n", + " 'includeSubDomains',\n", + " 'x-amzn-requestid': '11ebe4bd-3cd7-4142-8bca-850c7499a304',\n", + " 'x-content-type-options': 'nosniff',\n", + " 'x-frame-options': 'DENY'},\n", + " 'HTTPStatusCode': 200,\n", + " 'RequestId': '11ebe4bd-3cd7-4142-8bca-850c7499a304',\n", + " 'RetryAttempts': 0},\n", + " 'RuntimeConfig': {'CurrentCopyCount': 1, 'DesiredCopyCount': 1},\n", + " 'Specification': {'ComputeResourceRequirements': {'MinMemoryRequiredInMb': 4096,\n", + " 'NumberOfAcceleratorDevicesRequired': 1.0},\n", + " 'ModelName': 'model-1977'},\n", + " 'VariantName': 'AllTraffic'}\n" + ] + } + ], + "source": [ + "# pprint(sm.describe_training_job(TrainingJobName=training_job_name))\n", + "# pprint(sm.describe_endpoint(EndpointName='e2e-5429'))\n", + "pprint(sm.describe_inference_component(InferenceComponentName='e2e-5429-inference-component'))\n", + "pprint(sm.describe_inference_component(InferenceComponentName='e2e-1977-inference-component'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "a4a7feb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Invoke failed: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from lZI6Eo1dICc4MLUF6RP25TJlbyckLO2VyBwM with message \"{\n", + " \"code\": 400,\n", + " \"type\": \"BadRequestException\",\n", + " \"message\": \"Parameter model_name is required.\"\n", + "}\n", + "\". See https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logEventViewer:group=/aws/sagemaker/Endpoints/e2e-5429 in account 099324990371 for more information.\n" + ] + } + ], + "source": [ + "import json\n", + "sm_runtime = boto3.client(\"sagemaker-runtime\", region_name='us-west-2')\n", + "# adapter_ic_name = 'e2e-5429-adapter-boto3'\n", + "# adapter_ic_name = 'e2e-5429-adapter'\n", + "adapter_ic_name = 'e2e-5429-inference-component'\n", + "try:\n", + " resp = sm_runtime.invoke_endpoint(\n", + " EndpointName=name,\n", + " InferenceComponentName=adapter_ic_name,\n", + " Body=json.dumps({\n", + " \"inputs\": \"What is the capital of France?\",\n", + " \"parameters\": {\"max_new_tokens\": 50},\n", + " }),\n", + " ContentType=\"application/json\",\n", + " )\n", + " result = json.loads(resp[\"Body\"].read().decode())\n", + " print(f\"Response: {result}\")\n", + "except Exception as e:\n", + " print(f\"Invoke failed: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "06317fe5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "arn:aws:sagemaker:us-west-2:099324990371:endpoint/e2e-5429\n", + "e2e-5429\n" + ] + } + ], + "source": [ + "import json\n", + "# Note this is expected to fail since Endpoint invoke is only available for authorized users. The Invoke call here is the sagemaker-core Endpoint.invoke call .\n", + "print(endpoint.endpoint_arn)\n", + "print(name)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "d1f63ede", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n", + "│ in <module>:1 │\n", + "│ │\n", + "│ ❱ 1 endpoint.invoke(body=json.dumps({\"inputs\": \"What is the capital of France?\", \"parameters │\n", + "│ 2 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/c │\n", + "│ ore/resources.py:143 in wrapper │\n", + "│ │\n", + "│ 140 │ │ @functools.wraps(func) │\n", + "│ 141 │ │ def wrapper(*args, **kwargs): │\n", + "│ 142 │ │ │ config = dict(arbitrary_types_allowed=True) │\n", + "│ ❱ 143 │ │ │ return validate_call(config=config)(func)(*args, **kwargs) │\n", + "│ 144 │ │ │\n", + "│ 145 │ │ return wrapper │\n", + "│ 146 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/pydantic/_i │\n", + "│ nternal/_validate_call.py:39 in wrapper_function │\n", + "│ │\n", + "│ 36 │ │ │\n", + "│ 37 │ │ @functools.wraps(wrapped) │\n", + "│ 38 │ │ def wrapper_function(*args, **kwargs): │\n", + "│ ❱ 39 │ │ │ return wrapper(*args, **kwargs) │\n", + "│ 40 │ │\n", + "│ 41 │ # We need to manually update this because `partial` object has no `__name__` and `__ │\n", + "│ 42 │ wrapper_function.__name__ = extract_function_name(wrapped) │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/pydantic/_i │\n", + "│ nternal/_validate_call.py:136 in __call__ │\n", + "│ │\n", + "│ 133 │ │ if not self.__pydantic_complete__: │\n", + "│ 134 │ │ │ self._create_validators() │\n", + "│ 135 │ │ │\n", + "│ ❱ 136 │ │ res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, │\n", + "│ 137 │ │ if self.__return_pydantic_validator__: │\n", + "│ 138 │ │ │ return self.__return_pydantic_validator__(res) │\n", + "│ 139 │ │ else: │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/sagemaker/c │\n", + "│ ore/resources.py:11299 in invoke │\n", + "│ │\n", + "│ 11296 │ │ ) │\n", + "│ 11297 │ │ │\n", + "│ 11298 │ │ logger.debug(f\"Calling invoke_endpoint API\") │\n", + "│ ❱ 11299 │ │ response = client.invoke_endpoint(**operation_input_args) │\n", + "│ 11300 │ │ logger.debug(f\"Response: {response}\") │\n", + "│ 11301 │ │ │\n", + "│ 11302 │ │ transformed_response = transform(response, \"InvokeEndpointOutput\") │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/cl │\n", + "│ ient.py:602 in _api_call │\n", + "│ │\n", + "│ 599 │ │ │ │ │ f\"{py_operation_name}() only accepts keyword arguments.\" │\n", + "│ 600 │ │ │ │ ) │\n", + "│ 601 │ │ │ # The \"self\" in this scope is referring to the BaseClient. │\n", + "│ ❱ 602 │ │ │ return self._make_api_call(operation_name, kwargs) │\n", + "│ 603 │ │ │\n", + "│ 604 │ │ _api_call.__name__ = str(py_operation_name) │\n", + "│ 605 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/co │\n", + "│ ntext.py:123 in wrapper │\n", + "│ │\n", + "│ 120 │ │ │ with start_as_current_context(): │\n", + "│ 121 │ │ │ │ if hook: │\n", + "│ 122 │ │ │ │ │ hook() │\n", + "│ ❱ 123 │ │ │ │ return func(*args, **kwargs) │\n", + "│ 124 │ │ │\n", + "│ 125 │ │ return wrapper │\n", + "│ 126 │\n", + "│ │\n", + "│ /Users/twillit/.local/share/mise/installs/python/3.12.6/lib/python3.12/site-packages/botocore/cl │\n", + "│ ient.py:1078 in _make_api_call │\n", + "│ │\n", + "│ 1075 │ │ │ │ 'error_code_override' │\n", + "│ 1076 │ │ │ ) or error_info.get(\"Code\") │\n", + "│ 1077 │ │ │ error_class = self.exceptions.from_code(error_code) │\n", + "│ ❱ 1078 │ │ │ raise error_class(parsed_response, operation_name) │\n", + "│ 1079 │ │ else: │\n", + "│ 1080 │ │ │ return parsed_response │\n", + "│ 1081 │\n", + "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n", + "ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Inference Component\n", + "Name header is required for endpoints to which you plan to deploy inference components. Please include Inference \n", + "Component Name header or consider using SageMaker models.\n", + "\n" + ], + "text/plain": [ + "\u001b[38;2;255;0;0m╭─\u001b[0m\u001b[38;2;255;0;0m──────────────────────────────\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[1;38;2;255;0;0mTraceback \u001b[0m\u001b[1;2;38;2;255;0;0m(most recent call last)\u001b[0m\u001b[38;2;255;0;0m \u001b[0m\u001b[38;2;255;0;0m───────────────────────────────\u001b[0m\u001b[38;2;255;0;0m─╮\u001b[0m\n", + "\u001b[38;2;255;0;0m│\u001b[0m in \u001b[92m