# The orginal scikit-learn model code used

In [32]:
import torch
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

In [33]:
model = torch.load('./trainedModel/xgboost.pt')
print(model)

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=6, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=10, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)


In [34]:
filepath = "./data/iot23_test_2.csv"
df = pd.read_csv(filepath)

In [164]:
X = df[['duration', 'orig_bytes', 'resp_bytes', 'missed_bytes', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes', 'proto_icmp', 'proto_tcp', 'proto_udp', 'conn_state_OTH', 'conn_state_REJ', 'conn_state_RSTO', 'conn_state_RSTOS0', 'conn_state_RSTR', 'conn_state_RSTRH', 'conn_state_S0', 'conn_state_S1', 'conn_state_S2', 'conn_state_S3', 'conn_state_SF', 'conn_state_SH', 'conn_state_SHR']]
Y = df['label']
print(X.iloc[0])
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(Y)
Y = label_encoder.transform(Y)

duration              0.0
orig_bytes            0.0
resp_bytes            0.0
missed_bytes          0.0
orig_pkts             1.0
orig_ip_bytes        40.0
resp_pkts             0.0
resp_ip_bytes         0.0
proto_icmp            0.0
proto_tcp             1.0
proto_udp             0.0
conn_state_OTH        0.0
conn_state_REJ        0.0
conn_state_RSTO       0.0
conn_state_RSTOS0     0.0
conn_state_RSTR       0.0
conn_state_RSTRH      0.0
conn_state_S0         1.0
conn_state_S1         0.0
conn_state_S2         0.0
conn_state_S3         0.0
conn_state_SF         0.0
conn_state_SH         0.0
conn_state_SHR        0.0
Name: 0, dtype: float64


In [36]:
preds = model.predict(X)
test_accuracy = accuracy_score(Y, preds)
print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))

Test Accuracy: 75.72%


# Save the trained model to the tar-zipped form

In [49]:
import os
import boto3
import re
import json
import sagemaker

region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
role = get_execution_role()
bucket = sagemaker.Session().default_bucket()

In [100]:
model_file_name = "local-xgboost-model"
model.save_model(model_file_name)
!tar czvf model.tar.gz $model_file_name

local-xgboost-model


upload the trained model to S3 bucket

In [101]:
prefix = "sagemaker/DEMO-xgboost-byo"
bucket_path = "https://s3-{}.amazonaws.com/{}".format(region, bucket)
fObj = open("model.tar.gz", "rb")
key = os.path.join(prefix, model_file_name, "model.tar.gz")
boto3.Session().resource("s3").Bucket(bucket).Object(key).upload_fileobj(fObj)

# Set up Hosting for the model

In [102]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = sagemaker.image_uris.retrieve("xgboost", region, "1.5-1")

## Upload the self-trained model to the container

In [103]:
from time import gmtime, strftime

model_name = model_file_name + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model_url = "https://s3-{}.amazonaws.com/{}/{}".format(region, bucket, key)
sm_client = boto3.client("sagemaker")

print(model_url)

primary_container = {
    "Image": container,
    "ModelDataUrl": model_url,
}

create_model_response2 = sm_client.create_model(
    ModelName=model_name, ExecutionRoleArn=role, PrimaryContainer=primary_container
)

print(create_model_response2["ModelArn"])

https://s3-us-west-2.amazonaws.com/sagemaker-us-west-2-374296686743/sagemaker/DEMO-xgboost-byo/local-xgboost-model/model.tar.gz
arn:aws:sagemaker:us-west-2:374296686743:model/local-xgboost-model2023-03-13-02-31-19


## Create endpoint Configuration

In [104]:
from time import gmtime, strftime

endpoint_config_name = "DEMO-XGBoostEndpointConfig-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_config_name)
create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.m4.xlarge",
            "InitialInstanceCount": 1,
            "InitialVariantWeight": 1,
            "ModelName": model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

print("Endpoint Config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

DEMO-XGBoostEndpointConfig-2023-03-13-02-31-46
Endpoint Config Arn: arn:aws:sagemaker:us-west-2:374296686743:endpoint-config/demo-xgboostendpointconfig-2023-03-13-02-31-46


## create the endpoint

In [105]:
%%time
import time

endpoint_name = "DEMO-XGBoostEndpoint-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)
print(create_endpoint_response["EndpointArn"])

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

DEMO-XGBoostEndpoint-2023-03-13-02-31-50
arn:aws:sagemaker:us-west-2:374296686743:endpoint/demo-xgboostendpoint-2023-03-13-02-31-50
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:us-west-2:374296686743:endpoint/demo-xgboostendpoint-2023-03-13-02-31-50
Status: InService
CPU times: user 70.8 ms, sys: 1.99 ms, total: 72.8 ms
Wall time: 4min


## Check the ready-to-use model

In [125]:
runtime_client = boto3.client("runtime.sagemaker")

In [161]:
POINT_INDEX = 0
point_x = X.iloc[0:1]
np.savetxt("./data/test_point.csv", point_x, delimiter=",")

In [162]:
import json

file_name = (
    "./data/test_point.csv" 
)

with open(file_name, "r") as f:
    payload = f.read().strip()

response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name, ContentType="text/csv", Body=payload
)
result = response["Body"].read().decode()

print(result)

0.18077495694160461,0.12932561337947845,0.4295567572116852,0.26034262776374817



In [163]:
floatArr = np.fromstring(result, dtype=float, sep=',')
# floatArr = np.array(result)
print(floatArr.shape)
predictedLabel = np.argmax(floatArr)
print("Predicted Class Label: {}.".format(predictedLabel))
print("Actual Class Label: {}.".format(Y[POINT_INDEX]))

(4,)
Predicted Class Label: 2.
Actual Class Label: 2.


## Release Endpoint 

In [165]:
sm_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '937b4cef-06f1-4b63-abc6-e2085f0e3029',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '937b4cef-06f1-4b63-abc6-e2085f0e3029',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 13 Mar 2023 04:30:20 GMT'},
  'RetryAttempts': 0}}