In [5]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
import pandas as pd
from sklearn.datasets import load_diabetes

In [6]:
session = sagemaker.Session()
bucket = session.default_bucket()

In [9]:
get_execution_role()

In [10]:
# Load simple dataset
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = pd.DataFrame(diabetes.target, columns=['target'])
data = pd.concat([X, y], axis=1)

# Save to CSV and upload to S3
data.to_csv('diabetes.csv', index=False)
session = sagemaker.Session()
bucket = session.default_bucket()
prefix = 'sagemaker/diabetes'
input_s3 = session.upload_data('diabetes.csv', bucket=bucket, key_prefix=prefix)

# SageMaker role
role = 'arn:aws:iam::026090555467:role/SageMakerExecutionRole'

# Define a simple sklearn training script
train_script = '''
import pandas as pd
import joblib
from sklearn.linear_model import LinearRegression
import os

if __name__ == "__main__":
    input_data_path = "/opt/ml/input/data/train/diabetes.csv"
    data = pd.read_csv(input_data_path)
    X = data.drop(columns=['target'])
    y = data['target']

    model = LinearRegression()
    model.fit(X, y)

    model_dir = os.environ["SM_MODEL_DIR"]
    joblib.dump(model, f"{model_dir}/model.joblib")
'''

# Save the training script
with open("train.py", "w") as f:
    f.write(train_script)

# Set up and run the training job
sklearn_estimator = SKLearn(
    entry_point="train.py",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    framework_version="0.23-1",
    py_version="py3",
    output_path=f"s3://{bucket}/{prefix}/output",
)

sklearn_estimator.fit({'train': input_s3})

# Deploy the trained model
predictor = sklearn_estimator.deploy(instance_type="ml.m5.large", initial_instance_count=1)


----------

In [11]:
predictor.delete_endpoint()

In [None]:
# Test the endpoint in Python
test_data = diabetes.data[0:1]
result = predictor.predict(test_data)
print(result)