In [None]:
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.xgboost.estimator import XGBoost
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [None]:
# Set up SageMaker session and role
session = sagemaker.Session()
role = get_execution_role()

# Create local directory for data
os.makedirs('data', exist_ok=True)


In [3]:
# Load and prepare a smaller subset of the Iris dataset (50 samples)
iris = load_iris()
X, y = iris.data, iris.target
subset_indices = np.random.choice(len(X), size=50, replace=False)  # Random 50 samples
X, y = X[subset_indices], y[subset_indices]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Create DataFrames and save to local CSV files
train_data = pd.DataFrame(X_train, columns=iris.feature_names)
train_data['target'] = y_train
test_data = pd.DataFrame(X_test, columns=iris.feature_names)
test_data['target'] = y_test

train_path = 'data/train.csv'
test_path = 'data/test.csv'
train_data.to_csv(train_path, index=False, header=False)
test_data.to_csv(test_path, index=False, header=False)



In [7]:
# **Upload datasets to S3**
s3_bucket = session.default_bucket()  # Use default SageMaker S3 bucket
s3_prefix = "sagemaker-xgboost-iris-small"
train_s3_path = session.upload_data(train_path, bucket=s3_bucket, key_prefix=s3_prefix)
test_s3_path = session.upload_data(test_path, bucket=s3_bucket, key_prefix=s3_prefix)

ConnectTimeoutError: Connect timeout on endpoint URL: "https://sts.us-east-1.amazonaws.com/"

In [2]:


# Configure smaller XGBoost model
xgb = XGBoost(entry_point='xgboost_script.py',
              framework_version='1.5-1',
              instance_type='ml.t3.medium',  # medium instance
              instance_count=1,
              role=role,
              output_path=f"s3://{s3_bucket}/{s3_prefix}/model",
              hyperparameters={
                  'max_depth': 3,  # Smaller tree depth
                  'eta': 0.1,  # Lower learning rate
                  'gamma': 2,  # Less pruning
                  'min_child_weight': 3,
                  'subsample': 0.8,
                  'objective': 'multi:softprob',
                  'num_class': 3,
                  'num_round': 10  # Fewer training iterations
              })

# **Train the model using S3 paths**
xgb.fit({'train': train_s3_path, 'validation': test_s3_path})


ConnectTimeoutError: Connect timeout on endpoint URL: "https://sts.us-east-1.amazonaws.com/"

In [None]:
# Deploy the model
predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.t2.medium')



In [1]:
import boto3
session = boto3.Session(region_name="us-east-1")
client = session.client("sts")
print(client.get_caller_identity())

KeyboardInterrupt: 

In [None]:
# Prepare test data for prediction
test_data_array = test_data.drop('target', axis=1).values.astype('float32')


In [None]:
# Make predictions
predictions = predictor.predict(test_data_array)



In [None]:
# Evaluate results
predicted_labels = np.argmax(predictions, axis=1)
accuracy = (predicted_labels == y_test).mean()
print(f"Model accuracy: {accuracy:.2f}")

In [None]:
# Clean up resources
predictor.delete_endpoint()
session.delete_bucket(bucket=bucket)

print("Lab completed successfully!")