In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=42)
df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(20)])
df['target'] = y

In [2]:
# Split dataset into train and test
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Save the datasets locally
train.to_csv('train.csv', index=False)
test.to_csv('test.csv', index=False)

In [4]:
# Upload datasets to S3
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
bucket = sagemaker.Session().default_bucket()
bucket

'sagemaker-us-east-1-011528297661'

In [5]:
prefix = 'sagemaker/classification'
train_location = sagemaker.Session().upload_data('train.csv', bucket=bucket, key_prefix=prefix)
test_location = sagemaker.Session().upload_data('test.csv', bucket=bucket, key_prefix=prefix)

print(f"Train data uploaded to: {train_location}")
print(f"Test data uploaded to: {test_location}")

Train data uploaded to: s3://sagemaker-us-east-1-011528297661/sagemaker/classification/train.csv
Test data uploaded to: s3://sagemaker-us-east-1-011528297661/sagemaker/classification/test.csv


In [7]:
# Upload the training script to S3
train_script_location = sagemaker.Session().upload_data('train.py', bucket=bucket, key_prefix=prefix)

print(f"Training script uploaded to: {train_script_location}")

Training script uploaded to: s3://sagemaker-us-east-1-011528297661/sagemaker/classification/train.py


In [8]:
# Create an estimator and run the training job
from sagemaker.sklearn.estimator import SKLearn

sklearn_estimator = SKLearn(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    py_version='py3',
    script_mode=True
)

sklearn_estimator.fit({'train': train_location})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-08-03-17-34-36-066


2024-08-03 17:34:36 Starting - Starting the training job...
2024-08-03 17:34:52 Starting - Preparing the instances for training...
2024-08-03 17:35:23 Downloading - Downloading input data...
2024-08-03 17:35:48 Downloading - Downloading the training image...
2024-08-03 17:36:29 Training - Training image download completed. Training in progress..[34m2024-08-03 17:36:32,766 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-08-03 17:36:32,769 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-03 17:36:32,809 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-08-03 17:36:33,016 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-03 17:36:33,028 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-03 17:36:33,039 sagemaker-training-toolkit INFO     No GPUs det