In [1]:
import boto3
import sagemaker
import pandas as pd
import io
from sklearn.metrics import classification_report

In [2]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
s3 = boto3.client('s3')

Access the Model Endpoint

In [3]:
endpoint_name = sm.list_endpoints()['Endpoints'][0]['EndpointName']
endpoint_name

'xgboost-220329-0214-004-a00b3c69'

In [4]:
predictor = sagemaker.predictor.Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess
)

In [5]:
# Create a serializer
predictor.serializer = sagemaker.serializers.CSVSerializer()
# Create a deserializer
predictor.deserializer = sagemaker.deserializers.CSVDeserializer()

Get the Test Data

In [6]:
key = 'data/test/data.csv'

In [7]:
response = s3.get_object(Bucket=bucket, Key=key)

In [8]:
content = response['Body'].read()

In [9]:
column_names = [
    'approved',
    'product_type_HUMAN OTC DRUG',
    'mentions_drug_interactions',
    'product_type_HUMAN PRESCRIPTION DRUG',
    'dosage_form_LIQUID',
    'dea_schedule_N/A',
    'dosage_form_TABLET, FILM COATED',
    'dosage_form_TABLET',
    'dosage_form_CAPSULE',
    'marketing_start_month_3',
    'dosage_form_SOLUTION'    
]

In [10]:
test_data = pd.read_csv(io.BytesIO(content), index_col=False, names=column_names)

In [11]:
test_data.head()

Unnamed: 0,approved,product_type_HUMAN OTC DRUG,mentions_drug_interactions,product_type_HUMAN PRESCRIPTION DRUG,dosage_form_LIQUID,dea_schedule_N/A,"dosage_form_TABLET, FILM COATED",dosage_form_TABLET,dosage_form_CAPSULE,marketing_start_month_3,dosage_form_SOLUTION
0,1,0,1,0,0,0,1,0,1,0,0
1,0,1,1,0,0,0,1,0,0,0,0
2,1,0,0,0,1,0,1,0,0,0,0
3,0,1,1,0,0,0,1,0,0,0,1
4,1,0,1,0,0,0,1,0,0,0,0


In [12]:
y_test = test_data['approved']
X_test = test_data.drop(columns=['approved'])

Evaluate the Model on the Test Data

In [13]:
y_prob = pd.Series(predictor.predict(X_test.values)[0]).astype('float')

In [14]:
y_pred = (y_prob > 0.5).astype('int')

In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.74      0.79      1356
           1       0.77      0.86      0.81      1355

    accuracy                           0.80      2711
   macro avg       0.81      0.80      0.80      2711
weighted avg       0.81      0.80      0.80      2711

