# Modeling with XGBoost

### Steps Covered:  
1. **Load Data from Feature Store / S3**
2. **Train XGBoost Model using SageMaker Training Job**
3. **Evaluate Performance with Processing Job** (accuracy, precision, recall, F1-score, etc.)
4. **Register Model in SageMaker Model Registry**
5. **Deploy Model as a SageMaker Endpoint**
6. **Decomission Endpoint** (optional)

In [None]:
import pandas as pd
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
import boto3
from sklearn.model_selection import train_test_split

from sagemaker.estimator import Estimator
from sagemaker.predictor import Predictor
from sagemaker.model import Model
from sagemaker import ModelPackage



## Setting up Retrieval from Feature Store

## Data Splits and Push to New Bucket

In [None]:
# Splitting Data from our feature store into training and testing splits
X = feature_data.drop(columns=['Attrition'])
y = feature_data['Attrition']

# Split dataset for training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save data for training
train_data = pd.concat([y_train, X_train], axis=1)
test_data = pd.concat([y_test, X_test], axis=1)

train_path = "train.csv"
test_path = "test.csv"

train_data.to_csv(train_path, index=False, header=False)
test_data.to_csv(test_path, index=False, header=False)

### Uploading to S3

In [None]:
# TODO: ALTER PATHS SO THEY ALIGN WITH THE REST OF THE PROJECT ##


# Upload data to S3
s3 = boto3.client("s3")
s3.upload_file(train_path, bucket, f"{prefix}/{train_path}")
s3.upload_file(test_path, bucket, f"{prefix}/{test_path}")

s3_train_path = f"s3://{bucket}/{prefix}/{train_path}"
s3_test_path = f"s3://{bucket}/{prefix}/{test_path}"

print(s3_train_path)
print(s3_test_path)

## XGBoost Training

In [1]:
#TODO: DEFINE OUTPUT PATH VARIABLES AND ADJUST EVAL METRICS 



In [None]:
# Define a unique model name
model_name = f"assignment5-xgb-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

# Retrieve AWS SageMaker XGBoost container image
image_uri = image_uris.retrieve(framework="xgboost", version="0.90-1", region=region)

# Train the XGBoost model
xgb = Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path=f"s3://{bucket}/{prefix}/output", # TODO: FIX OUTPUT PATH
    sagemaker_session=session
)

xgb.set_hyperparameters(
    objective="binary:logistic",
    num_round=100,
    max_depth=5,
    eta=0.2,
    subsample=0.8,
    eval_metric="auc"
)

xgb.fit({"train": TrainingInput(s3_train_path, content_type="csv"), "validation": TrainingInput(s3_test_path, content_type="csv")}

## Model Evaluation

## Registering Model

In [None]:
# Create model object
model = Model(
    image_uri=xgboost_image_uri,
    model_data=xgb_estimator.model_data,
    role=role,
    sagemaker_session=sagemaker_session
)

# Register model in SageMaker Model Registry
model_package = model.register(
    model_package_group_name="xgboost-model-group",
    approval_status="Approved"
)

print(f"Model registered: {model_package.model_package_arn}")


## Deploying Endpoint

In [None]:
# Define model S3 path

# TODO: ADJUST MODEL URL AND ENDPOINT TO REFLECT FINAL PROJECT
model_url = "ADJUST_ME"


# Create SageMaker Model
model = Model(
    image_uri=image_uri,
    model_data=model_url,
    role=role,
    sagemaker_session=session
)

# Define an endpoint name
endpoint_name = f"{model_name}-endpoint"

# Enable data capture for bias monitoring
data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,  # Capture 100% of requests
    destination_s3_uri=f"s3://{bucket}/{prefix}/monitoring"
)

# Deploy model with data capture
model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config
)

In [None]:

predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=session, serializer=CSVSerializer()
)

In [None]:
print(f"Model deployed at endpoint: {endpoint_name}")

## Spinning Down Endpoint