# Alejandro Marchini Assignment 4.1

In [109]:
import os
import boto3
import sagemaker

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = "DEMO-breast-cancer-prediction-xgboost-highlevel"

print(f"Region: {region}")
print(f"S3 Bucket: {bucket}")
print(f"Prefix: {prefix}")

Region: us-east-1
S3 Bucket: sagemaker-us-east-1-933747558592
Prefix: DEMO-breast-cancer-prediction-xgboost-highlevel


## Part 1: Set Up Model Group

In [110]:
sagemaker_client = boto3.client('sagemaker', region_name=region)

# nodel package group details
model_package_group_name = 'xgboost-breast-cancer-detection'
model_package_group_description = (
    'XGBoost binary classification model group for breast cancer tumor detection. '
    'Trained on Wisconsin Diagnostic Breast Cancer dataset to predict malignant vs benign tumors. '
    'Uses 30 diagnostic features extracted from tumor cell nuclei images.'
)

In [111]:
try:
    create_model_package_group_response = sagemaker_client.create_model_package_group(
        ModelPackageGroupName=model_package_group_name,
        ModelPackageGroupDescription=model_package_group_description
    )
    
    print(f"Model Package Group ARN: {create_model_package_group_response['ModelPackageGroupArn']}")

# added this so I can rerun the cell without error
except Exception as e:
    error_message = str(e)
    if "already exists" in error_message:
        print(f"Model Package Group '{model_package_group_name}' already exists.")
        
        # Get existing model package group details
        describe_response = sagemaker_client.describe_model_package_group(
            ModelPackageGroupName=model_package_group_name
        )
        print(f"Model Package Group ARN: {describe_response['ModelPackageGroupArn']}")
    else:
        print(f"Error creating Model Package Group: {error_message}")

Model Package Group 'xgboost-breast-cancer-detection' already exists.
Model Package Group ARN: arn:aws:sagemaker:us-east-1:933747558592:model-package-group/xgboost-breast-cancer-detection


In [112]:
# Verify Model Package Group creation by describing it
describe_mpg_response = sagemaker_client.describe_model_package_group(
    ModelPackageGroupName=model_package_group_name
)

describe_mpg_response

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection',
 'ModelPackageGroupArn': 'arn:aws:sagemaker:us-east-1:933747558592:model-package-group/xgboost-breast-cancer-detection',
 'ModelPackageGroupDescription': 'XGBoost binary classification model group for breast cancer tumor detection. Trained on Wisconsin Diagnostic Breast Cancer dataset to predict malignant vs benign tumors. Uses 30 diagnostic features extracted from tumor cell nuclei images.',
 'CreationTime': datetime.datetime(2026, 1, 31, 17, 19, 2, 242000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:933747558592:user-profile/d-k3ulyxb1rb4e/default-1767752827442',
  'UserProfileName': 'default-1767752827442',
  'DomainId': 'd-k3ulyxb1rb4e',
  'IamIdentity': {'Arn': 'arn:aws:sts::933747558592:assumed-role/LabRole/SageMaker',
   'PrincipalId': 'AROA5SZ5RBDAOGD3FKJPV:SageMaker'}},
 'ModelPackageGroupStatus': 'Completed',
 'ResponseMetadata': {'RequestId': 'b664332f-d7fd-46a3-aea8-5612ac128d2

## Part 2: Set Up Model Package

In [113]:
image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.7-1"
)

# get the model artifacts from the lab training job
model_data_url = f"s3://{bucket}/{prefix}/output/xgb-2026-01-30-01-35-05/xgb-2026-01-30-01-35-05/output/model.tar.gz"

print(f"Container Image: {image_uri}")
print(f"Model Data: {model_data_url}")

Container Image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1
Model Data: s3://sagemaker-us-east-1-933747558592/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-01-30-01-35-05/xgb-2026-01-30-01-35-05/output/model.tar.gz


### Create the Inference Specification

In [114]:
# Define the inference specification
inference_specification = {
    "Containers": [
        {
            "Image": image_uri,
            "ModelDataUrl": model_data_url,
            "Framework": "XGBOOST",
            "FrameworkVersion": "1.7-1"
        }
    ],
    "SupportedContentTypes": ["text/csv", "application/json"],
    "SupportedResponseMIMETypes": ["text/csv", "application/json"],
    "SupportedRealtimeInferenceInstanceTypes": [
        "ml.m5.xlarge"
    ],
    "SupportedTransformInstanceTypes": [
        "ml.m5.xlarge"
    ]
}

### Create the Model Package

In [115]:
model_package_description = (
    "XGBoost binary classifier v1.0 for breast cancer detection. "
    "Trained on Wisconsin Diagnostic Breast Cancer dataset with 30 features. "
    "Predicts probability of malignant tumor (binary:logistic objective). "
    "Hyperparameters: max_depth=5, eta=0.2, gamma=4, num_round=100."
)

try:
    create_model_package_response = sagemaker_client.create_model_package(
        ModelPackageGroupName=model_package_group_name,
        ModelPackageDescription=model_package_description,
        InferenceSpecification=inference_specification,
        ModelApprovalStatus="Approved",
        CustomerMetadataProperties={
            "Algorithm": "XGBoost",
            "Dataset": "Wisconsin Breast Cancer Diagnostic",
            "Features": "30 diagnostic features",
            "Objective": "binary:logistic",
            "UseCase": "Medical diagnosis support"
        }
    )
    
    model_package_arn = create_model_package_response['ModelPackageArn']
    print(f"Model Package ARN: {model_package_arn}")

except Exception as e:
    print(f"Error creating Model Package: {str(e)}")
    raise


Model Package ARN: arn:aws:sagemaker:us-east-1:933747558592:model-package/xgboost-breast-cancer-detection/7


### Describe the Model Package

In [116]:
describe_model_package_response = sagemaker_client.describe_model_package(
    ModelPackageName=model_package_arn
)

describe_model_package_response

{'ModelPackageGroupName': 'xgboost-breast-cancer-detection',
 'ModelPackageVersion': 7,
 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:933747558592:model-package/xgboost-breast-cancer-detection/7',
 'ModelPackageDescription': 'XGBoost binary classifier v1.0 for breast cancer detection. Trained on Wisconsin Diagnostic Breast Cancer dataset with 30 features. Predicts probability of malignant tumor (binary:logistic objective). Hyperparameters: max_depth=5, eta=0.2, gamma=4, num_round=100.',
 'CreationTime': datetime.datetime(2026, 1, 31, 18, 15, 34, 651000, tzinfo=tzlocal()),
 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1',
    'ImageDigest': 'sha256:b4f13edb198529c460692015797fa1ca6a8ff1ed64a149297174d922121b8fc4',
    'ModelDataUrl': 's3://sagemaker-us-east-1-933747558592/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-01-30-01-35-05/xgb-2026-01-30-01-35-05/output/model.tar.gz',
    'Framework'

## Write the Model Card

### Define Model Card Content

In [117]:
model_card_name = f"{model_package_group_name}-card"

model_card_content = {
    "model_overview": {
        "model_description": "XGBoost binary classification model for breast cancer tumor detection. Predicts the probability that a tumor is malignant based on 30 diagnostic features extracted from digitized images of fine needle aspirate (FNA) of breast masses.",
        "model_owner": "Alejandro Marchini",
        "model_creator": "Alejandro Marchini",
        "problem_type": "Binary Classification",
        "algorithm_type": "XGBoost",
        "model_id": model_package_arn,
        "model_artifact": [model_data_url],
        "model_name": "xgboost-breast-cancer-v1",
        "model_version": 1.0
    },
    "intended_uses": {
        "purpose_of_model": "Medical diagnosis support tool to assist healthcare professionals in identifying malignant breast tumors from diagnostic imaging data.",
        "intended_uses": "This model is intended to be used as a decision support tool in clinical settings to help prioritize cases requiring urgent attention and support diagnostic workflows. Not intended as sole diagnostic tool or for use outside Wisconsin Breast Cancer Diagnostic dataset characteristics.",
        "factors_affecting_model_efficiency": "Model performance may vary based on image quality, patient demographics, and tumor characteristics that differ from training data.",
        "risk_rating": "Medium",
        "explanations_for_risk_rating": "Medical diagnosis requires human oversight. Model trained on specific dataset demographics and may not generalize to all populations."
    },
    "business_details": {
        "business_problem": "Early and accurate detection of malignant breast tumors is critical for patient outcomes. This model aims to reduce diagnostic time and improve accuracy in tumor classification.",
        "business_stakeholders": "Healthcare providers, radiologists, oncology departments, and clinical decision support teams.",
        "line_of_business": "Healthcare - Medical Imaging & Diagnostics"
    },
    "training_details": {
        "objective_function": "binary:logistic - Outputs probability score between 0 (benign) and 1 (malignant)",
        "training_observations": "Training set: approximately 456 samples (80% of 569 total observations)",
        "training_job_details": {
            "training_arn": f"arn:aws:sagemaker:{region}:training-job/xgb-2026-01-30-01-35-05",
            "training_datasets": [f"s3://{bucket}/{prefix}/train"],
            "training_environment": {
                "container_image": [image_uri]
            },
            "training_metrics": [
                {"name": "objective_metric", "value": 0.03, "notes": "Validation error"}
            ],
            "hyper_parameters": [
                {"name": "max_depth", "value": "5"},
                {"name": "eta", "value": "0.2"},
                {"name": "gamma", "value": "4"},
                {"name": "min_child_weight", "value": "6"},
                {"name": "subsample", "value": "0.8"},
                {"name": "objective", "value": "binary:logistic"},
                {"name": "num_round", "value": "100"}
            ]
        }
    },
    "evaluation_details": [
        {
            "name": "Validation Set Evaluation",
            "evaluation_observation": "Model evaluated on 10% validation set (approximately 57 samples)",
            "evaluation_job_arn": f"arn:aws:sagemaker:{region}:training-job/xgb-2026-01-30-01-35-05",
            "datasets": [f"s3://{bucket}/{prefix}/validation"],
            "metadata": {
                "validation_split": "10% of total dataset",
                "metrics_calculated": "error rate, accuracy"
            }
        }
    ],
    "additional_information": {
        "ethical_considerations": "Medical AI models must be used as decision support tools only. Final diagnostic decisions should always be made by qualified healthcare professionals. Consider potential bias in training data demographics.",
        "caveats_and_recommendations": "Model trained on Wisconsin Breast Cancer Diagnostic dataset. Performance may vary on different populations. Regular model monitoring and retraining recommended as new data becomes available.",
        "custom_details": {
            "dataset_source": "UCI Machine Learning Repository - Wisconsin Breast Cancer Diagnostic",
            "features": "30 diagnostic features: radius, texture, perimeter, area, smoothness, compactness, concavity, concave points, symmetry, and fractal dimension (mean, SE, and worst values)",
            "framework": "SageMaker XGBoost 1.7-1",
            "instance_type": "ml.m5.xlarge",
            "deployment_options": "Real-time endpoint, Batch Transform",
            "input_format": "CSV or JSON with 30 features",
            "output_format": "Probability score (0-1)"
        }
    }
}

### Create the Model Card

In [118]:
try:
    create_model_card_response = sagemaker_client.create_model_card(
        ModelCardName=model_card_name,
        Content=json.dumps(model_card_content),
        ModelCardStatus='Draft',  # Can be: Draft, PendingReview, Approved, Archived
        Tags=[
            {'Key': 'Project', 'Value': 'AAI-540-Assignment-4.1'},
            {'Key': 'Model', 'Value': 'XGBoost-Breast-Cancer'},
            {'Key': 'Owner', 'Value': 'Alejandro-Marchini'}
        ]
    )
    
    model_card_arn = create_model_card_response['ModelCardArn']
    print(f"Model Card ARN: {model_card_arn}")
    
except Exception as e:
    error_message = str(e)
    if "already exists" in error_message:
        print(f"Model Card '{model_card_name}' already exists.")
        
        describe_card = sagemaker_client.describe_model_card(ModelCardName=model_card_name)
        model_card_arn = describe_card['ModelCardArn']
        print(f"Model Card ARN: {model_card_arn}")
    else:
        print(f"Error creating Model Card: {error_message}")

Model Card 'xgboost-breast-cancer-detection-card' already exists.
Model Card ARN: arn:aws:sagemaker:us-east-1:933747558592:model-card/xgboost-breast-cancer-detection-card


### Describe the Model Card

In [119]:
describe_model_card_response = sagemaker_client.describe_model_card(
    ModelCardName=model_card_name
)

describe_model_card_response

{'ModelCardArn': 'arn:aws:sagemaker:us-east-1:933747558592:model-card/xgboost-breast-cancer-detection-card',
 'ModelCardName': 'xgboost-breast-cancer-detection-card',
 'ModelCardVersion': 1,
 'Content': '{"model_overview": {"model_description": "XGBoost binary classification model for breast cancer tumor detection. Predicts the probability that a tumor is malignant based on 30 diagnostic features extracted from digitized images of fine needle aspirate (FNA) of breast masses.", "model_owner": "Alejandro Marchini", "model_creator": "Alejandro Marchini", "problem_type": "Binary Classification", "algorithm_type": "XGBoost (Extreme Gradient Boosting)", "model_id": "arn:aws:sagemaker:us-east-1:933747558592:model-package/xgboost-breast-cancer-detection/1", "model_artifact": ["s3://sagemaker-us-east-1-933747558592/DEMO-breast-cancer-prediction-xgboost-highlevel/output/xgb-2026-01-30-01-35-05/output/model.tar.gz"], "model_name": "xgboost-breast-cancer-v1", "model_version": 1.0}, "intended_uses"