In [1]:
!pip install -U sagemaker boto3 pandas scikit-learn joblib



In [2]:
pip install --upgrade sagemaker


Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.model import SKLearnModel
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
# Define variables
region = boto3.Session().region_name
role = get_execution_role()
bucket = sagemaker.Session().default_bucket()
prefix = "sagemaker/car-evaluation"
print(f"bucket: {bucket}")

bucket: sagemaker-us-east-2-423623824438


In [5]:
# Load the dataset
column_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
df = pd.read_csv('data/car.data', names=column_names)

# Encode categorical variables
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

# Split the data into features and target
X = df.drop('class', axis=1)
y = df['class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [7]:
# Save the model
joblib.dump(model, 'car_evaluation_model.joblib')

['car_evaluation_model.joblib']

In [8]:
# Create a tar.gz file of the model
!tar czvf car_evaluation_model.tar.gz car_evaluation_model.joblib

car_evaluation_model.joblib


In [9]:
# Upload the model to S3
key = os.path.join(prefix, "car_evaluation_model.tar.gz")
boto3.Session().resource("s3").Bucket(bucket).Object(key).upload_fileobj(open("car_evaluation_model.tar.gz", "rb"))
model_data = f"s3://{bucket}/{key}"
print(f"Model data: {model_data}")

Model data: s3://sagemaker-us-east-2-423623824438/sagemaker/car-evaluation/car_evaluation_model.tar.gz


In [10]:
# Deploy the model
model = SKLearnModel(
    role=role,
    model_data=model_data,  # S3 path to your model artifact
    framework_version="1.2-1",  # Adjust to your scikit-learn version
    py_version="py3",
    source_dir="code",  # Directory containing inference.py and requirements.txt
    entry_point="inference.py",
)


In [11]:
predictor = model.deploy(instance_type="ml.m5.large", initial_instance_count=1)

----------------------------------------------*

Please check the troubleshooting guide for common errors: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-python-sdk-troubleshooting.html#sagemaker-python-sdk-troubleshooting-create-endpoint


UnexpectedStatusException: Error hosting endpoint sagemaker-scikit-learn-2024-09-03-05-51-03-281: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.. Try changing the instance type or reference the troubleshooting page https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference-troubleshooting.html