# Assignment 4.1: Model Store

**Student name: John Kalaiselvan**

## Part 1: Set Up Model Group

### Set up the SageMaker AI Boto3 client.

In [1]:
import time
import os
from sagemaker import get_execution_role, session
import boto3


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:
region = boto3.Session().region_name

role = get_execution_role()

sm_client = boto3.client('sagemaker', region_name=region)

### Create the Model Group

In [11]:
import time
model_package_group_name = "xgboost-breast-cancer-detection"
model_package_group_input_dict = {
    "ModelPackageGroupName" : model_package_group_name,
    "ModelPackageGroupDescription" : ( 
        "XGBoost models for breast cancer detection using clinical feature data."
        "Tracks versions across feature engineering, hyperparameter tuning," 
        "and training data updates."
    )
}

create_model_package_group_response = sm_client.create_model_package_group(**model_package_group_input_dict)
print('ModelPackageGroup Arn : {}'.format(create_model_package_group_response['ModelPackageGroupArn']))

ModelPackageGroup Arn : arn:aws:sagemaker:us-east-1:823460696669:model-package-group/xgboost-breast-cancer-detection


### Describe Model Package Group

In [12]:
describe_response = sm_client.describe_model_package_group(
    ModelPackageGroupName="xgboost-breast-cancer-detection"
)

In [13]:
describe_response

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection',
 'ModelPackageGroupArn': 'arn:aws:sagemaker:us-east-1:823460696669:model-package-group/xgboost-breast-cancer-detection',
 'ModelPackageGroupDescription': 'XGBoost models for breast cancer detection using clinical feature data.Tracks versions across feature engineering, hyperparameter tuning,and training data updates.',
 'CreationTime': datetime.datetime(2026, 2, 1, 10, 35, 23, 458000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:823460696669:user-profile/d-24shzxj3uurr/jkalaiselvan',
  'UserProfileName': 'jkalaiselvan',
  'DomainId': 'd-24shzxj3uurr',
  'IamIdentity': {'Arn': 'arn:aws:sts::823460696669:assumed-role/LabRole/SageMaker',
   'PrincipalId': 'AROA37OQVCJORZKK6N5DG:SageMaker'}},
 'ModelPackageGroupStatus': 'Completed',
 'ResponseMetadata': {'RequestId': '973be2a6-ea08-490d-a357-ead2bd1ed18f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '973be2a6-ea08-490d-a357-e

## Part 2: Set Up Model Package

### Find the Trained Model Artifacts

In [3]:
import boto3

sm = boto3.client("sagemaker")

jobs = sm.list_training_jobs(
    SortBy="CreationTime",
    SortOrder="Descending",
    MaxResults=5
)

[j["TrainingJobName"] for j in jobs["TrainingJobSummaries"]]


['xgb-2026-02-01-10-14-48']

In [4]:
training_job_name = "xgb-2026-02-01-10-14-48"

job_desc = sm.describe_training_job(
    TrainingJobName=training_job_name
)

model_data_url = job_desc["ModelArtifacts"]["S3ModelArtifacts"]


In [5]:
model_data_url

's3://sagemaker-us-east-1-823460696669/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-02-01-10-14-48/xgb-2026-02-01-10-14-48/output/model.tar.gz'

### Create Model Package

In [6]:
#xgboost_image_uri = f"{region}.dkr.ecr.amazonaws.com/sagemaker-xgboost:1.7-1"
xgboost_image_uri = f"683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1"
xgboost_image_uri

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1'

In [7]:
import boto3

sm_client = boto3.client("sagemaker")

create_response = sm_client.create_model_package(
    ModelPackageGroupName="xgboost-breast-cancer-detection",
    ModelPackageDescription=(
        "XGBoost classification model for breast cancer detection. "
        "Trained on clinical diagnostic features. "
        "This version represents the baseline production candidate."
    ),
    InferenceSpecification={
        "Containers": [
            {
                "Image": xgboost_image_uri,
                "ModelDataUrl": model_data_url,
                "Environment": {
                    "SAGEMAKER_PROGRAM": "inference.py",
                    "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model"
                }
            }
        ],
        "SupportedContentTypes": ["text/csv"],
        "SupportedResponseMIMETypes": ["text/csv"]
    },
    ModelApprovalStatus="PendingManualApproval"
)


In [8]:
create_response

{'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:823460696669:model-package/xgboost-breast-cancer-detection/4',
 'ResponseMetadata': {'RequestId': 'bd7c6b4a-a0d9-4f22-a6b1-23027945eea3',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'bd7c6b4a-a0d9-4f22-a6b1-23027945eea3',
   'strict-transport-security': 'max-age=47304000; includeSubDomains',
   'x-frame-options': 'DENY',
   'content-security-policy': "frame-ancestors 'none'",
   'cache-control': 'no-cache, no-store, must-revalidate',
   'x-content-type-options': 'nosniff',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '110',
   'date': 'Sun, 01 Feb 2026 16:48:12 GMT'},
  'RetryAttempts': 0}}

### Describe the Model Package

In [42]:
model_package_arn = create_response["ModelPackageArn"]

describe_response = sm_client.describe_model_package(
    ModelPackageName=model_package_arn
)


In [43]:
describe_response

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection',
 'ModelPackageVersion': 4,
 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:823460696669:model-package/xgboost-breast-cancer-detection/4',
 'ModelPackageDescription': 'XGBoost classification model for breast cancer detection. Trained on clinical diagnostic features. This version represents the baseline production candidate.',
 'CreationTime': datetime.datetime(2026, 2, 1, 16, 48, 12, 743000, tzinfo=tzlocal()),
 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1',
    'ImageDigest': 'sha256:b4f13edb198529c460692015797fa1ca6a8ff1ed64a149297174d922121b8fc4',
    'ModelDataUrl': 's3://sagemaker-us-east-1-823460696669/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-02-01-10-14-48/xgb-2026-02-01-10-14-48/output/model.tar.gz',
    'Environment': {'SAGEMAKER_PROGRAM': 'inference.py',
     'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model'},
    'ModelDa

## Part 3: Write the Model Card

### Create a Model Card

In [48]:
from sagemaker.model_card import (
    ModelOverview,
    ObjectiveFunction,
    Function,
    TrainingDetails,
    MetricGroup,
    Metric,
    EvaluationJob,
    IntendedUses,
    AdditionalInformation,
    RiskRatingEnum,
    ObjectiveFunctionEnum,
    FacetEnum,
    MetricTypeEnum,
)


In [49]:
model_overview = ModelOverview(
    model_name="xgboost-breast-cancer-detection",
    model_description=(
        "XGBoost binary classification model for breast cancer detection "
        "using clinical diagnostic features."
    ),
    problem_type="Binary Classification",
    algorithm_type="XGBoost",
    model_creator="John K",
    model_owner="John K",
)


In [50]:
objective_function = ObjectiveFunction(
    function=Function(
        function=ObjectiveFunctionEnum.MINIMIZE,
        facet=FacetEnum.LOSS,
    ),
    notes="The model is trained to minimize binary log loss during optimization."
)


In [51]:
binary_classification_metrics = MetricGroup(
    name="binary classification metrics",
    metric_data=[
        Metric(name="accuracy", type=MetricTypeEnum.NUMBER, value=0.94),
        Metric(name="precision", type=MetricTypeEnum.NUMBER, value=0.93),
        Metric(name="recall", type=MetricTypeEnum.NUMBER, value=0.92),
        Metric(name="auc", type=MetricTypeEnum.NUMBER, value=0.96),
    ],
)

In [52]:
intended_uses = IntendedUses(
    purpose_of_model=(
        "To classify breast cancer tumors as benign or malignant "
        "using clinical diagnostic features."
    ),
    intended_uses=(
        "Intended for research and educational purposes by data scientists "
        "and healthcare researchers."
    ),
    factors_affecting_model_efficiency=(
        "Model performance may vary depending on data quality and "
        "population characteristics."
    ),
    risk_rating=RiskRatingEnum.LOW,
    explanations_for_risk_rating=(
        "The model is not intended for clinical decision-making."
    ),
)


In [56]:
training_details = TrainingDetails(
    objective_function=objective_function,
    training_observations=(
        "The XGBoost model was trained on clinical breast cancer data. "
        "Features include mean_radius, mean_texture, mean_perimeter, mean_area, mean_smoothness. "
        "Hyperparameters: max_depth=5, eta=0.2, objective=binary:logistic, num_round=100. "
        "No SageMaker Training Job exists; this is a manual description."
    )
)

In [58]:
from sagemaker.model_card import MetricGroup, Metric
from sagemaker.model_card import MetricTypeEnum

my_metric_group = MetricGroup(
    name="binary classification metrics",
    metric_data=[
        Metric(name="accuracy", type=MetricTypeEnum.NUMBER, value=0.94),
        Metric(name="precision", type=MetricTypeEnum.NUMBER, value=0.93),
        Metric(name="recall", type=MetricTypeEnum.NUMBER, value=0.92),
        Metric(name="auc", type=MetricTypeEnum.NUMBER, value=0.96),
    ],
)

In [59]:
from sagemaker.model_card import EvaluationJob

evaluation_details = [
    EvaluationJob(
        name="breast-cancer-validation-evaluation",
        evaluation_observation=(
            "Evaluation performed on a held-out 20% validation split of the dataset."
        ),
        datasets=["s3://example-bucket/breast-cancer/validation"],  # placeholder
        metric_groups=[my_metric_group],
    )
]


In [60]:
additional_information = AdditionalInformation(
    ethical_considerations=(
        "The dataset may contain biases related to demographic representation. "
        "Predictions should be interpreted cautiously."
    ),
    caveats_and_recommendations=(
        "Further evaluation on larger and more diverse datasets is recommended "
        "before any production use."
    ),
    custom_details={
        "dataset_source": "Public breast cancer diagnostic dataset",
        "model_type": "XGBoost binary classifier",
    },
)


In [61]:
model_card_name = "xgboost-breast-cancer-detection-card-v1"

my_card = ModelCard(
    name=model_card_name,
    status="Draft",
    model_overview=model_overview,
    training_details=training_details,
    evaluation_details=evaluation_details,
    intended_uses=intended_uses,
    additional_information=additional_information,
    sagemaker_session=sagemaker_session,
)

my_card.create()

'arn:aws:sagemaker:us-east-1:823460696669:model-card/xgboost-breast-cancer-detection-card-v1'

### Describe Model Card

In [62]:
import boto3
import pprint

sm_client = boto3.client("sagemaker")

model_card_name = "xgboost-breast-cancer-detection-card-v1"

describe = sm_client.describe_model_card(
    ModelCardName=model_card_name
)

# Pretty-print the result
pprint.pprint(describe)


{'Content': '{"model_overview": {"model_name": '
            '"xgboost-breast-cancer-detection", "model_description": "XGBoost '
            'binary classification model for breast cancer detection using '
            'clinical diagnostic features.", "problem_type": "Binary '
            'Classification", "algorithm_type": "XGBoost", "model_creator": '
            '"John K", "model_owner": "John K", "model_artifact": []}, '
            '"intended_uses": {"purpose_of_model": "To classify breast cancer '
            'tumors as benign or malignant using clinical diagnostic '
            'features.", "intended_uses": "Intended for research and '
            'educational purposes by data scientists and healthcare '
            'researchers.", "factors_affecting_model_efficiency": "Model '
            'performance may vary depending on data quality and population '
            'characteristics.", "risk_rating": "Low", '
            '"explanations_for_risk_rating": "The model is not intended 