In [None]:
# BuildAndDeployModel.ipynb

# # 1. Import Libraries
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# # 2. Load Data
# Load the iris dataset
data = pd.read_csv('iris.csv')
data.head()

# # 3. Preprocess Data
# Example: Encoding the target variable (species)
data['species'] = data['species'].astype('category').cat.codes

# Separate features and target
X = data.drop('species', axis=1)
y = data['species']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # 4. Upload Data to S3
# Define the bucket and prefix
bucket_name = 'chris2223'
prefix = 'iris'

# Save the training data locally
pd.concat([y_train, X_train], axis=1).to_csv('train.csv', index=False, header=False)
pd.concat([y_test, X_test], axis=1).to_csv('test.csv', index=False, header=False)

# Upload to S3
s3_resource = boto3.Session().resource('s3')
s3_resource.Bucket('chris2223').Object(f'iris/train/train.csv').upload_file('train.csv')
s3_resource.Bucket('chris2223').Object(f'iris/test/test.csv').upload_file('test.csv')

# # 5. Set Up SageMaker Estimator
role = get_execution_role()
session = sagemaker.Session()

# Specify the XGBoost container
container = sagemaker.image_uris.retrieve('xgboost', boto3.Session().region_name, '1.2-1')

# Set up the XGBoost estimator
xgb = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=f's3://chris2223/iris/output',
    sagemaker_session=session
)

# Set hyperparameters
xgb.set_hyperparameters(
    objective='multi:softmax',  # Since iris is a multiclass classification problem
    num_class=3,  # 3 classes in the iris dataset
    num_round=100,
    max_depth=5,
    eta=0.2
)

# # 6. Train the Model
train_input = TrainingInput(s3_data=f's3://chris2223/iris/train/train.csv', content_type='csv')
validation_input = TrainingInput(s3_data=f's3://chris2223/iris/test/test.csv', content_type='csv')

xgb.fit({'train': train_input, 'validation': validation_input})

# # 7. Deploy the Model
xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m5.large')

# # 8. Test the Model
test_data_array = X_test.values
predictions = xgb_predictor.predict(test_data_array)
predictions

# # 9. Clean Up
xgb_predictor.delete_endpoint()
