In [2]:
import sagemaker
import boto3
import json

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
bucket = sess.default_bucket()
region = sess.boto_region_name
prefix = 'sagemaker-multimodel-endpoint'

In [3]:
import numpy as np
import pandas as pd
import os

In [4]:
source_file='s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt'
local_prefix = 'churn_data'
os.makedirs(local_prefix, exist_ok=True)

sagemaker.s3.S3Downloader.download(source_file, local_prefix)

In [6]:
df = pd.read_csv(f'./{local_prefix}/churn.txt')
df['CustomerID'] = df.index
df.head()

Unnamed: 0,State,Account Length,Area Code,Phone,Int'l Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,...,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Churn?,CustomerID
0,PA,163,806,403-2562,no,yes,300,8.162204,3,7.579174,...,6.508639,4.065759,100,5.111624,4.92816,6,5.673203,3,True.,0
1,SC,15,836,158-8416,yes,no,0,10.018993,4,4.226289,...,9.972592,7.14104,200,6.436188,3.221748,6,2.559749,8,False.,1
2,MO,131,777,896-6253,no,yes,300,4.70849,3,4.76816,...,4.566715,5.363235,100,5.142451,7.139023,2,6.254157,4,False.,2
3,WY,75,878,817-5729,yes,yes,700,1.268734,3,2.567642,...,2.333624,3.773586,450,3.814413,2.245779,6,1.080692,6,False.,3
4,WY,146,878,450-4942,yes,no,0,2.696177,3,5.908916,...,3.670408,3.751673,250,2.796812,6.905545,4,7.134343,6,True.,4


In [7]:
df.columns

Index(['State', 'Account Length', 'Area Code', 'Phone', 'Int'l Plan',
       'VMail Plan', 'VMail Message', 'Day Mins', 'Day Calls', 'Day Charge',
       'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls',
       'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge',
       'CustServ Calls', 'Churn?', 'CustomerID'],
      dtype='object')

In [10]:
df[["Int'l Plan", "VMail Plan"]] = df[["Int'l Plan", "VMail Plan"]].replace(to_replace = ['yes', 'no'], value=[1, 0])

In [11]:
df['Churn?'] = df['Churn?'].replace(to_replace=['True.', 'False.'], value=[1, 0]) 

In [12]:
columns=['Churn?', 'State', 'Account Length', "Int'l Plan",
           'VMail Plan', 'VMail Message', 'Day Mins', 'Day Calls', 'Day Charge',
           'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls',
           'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge',
           'CustServ Calls']
df.index = df['CustomerID']
df_processed = df[columns]

In [14]:
df_processed.head()

Unnamed: 0_level_0,Churn?,State,Account Length,Int'l Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,Eve Mins,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,1,PA,163,0,1,300,8.162204,3,7.579174,3.933035,4,6.508639,4.065759,100,5.111624,4.92816,6,5.673203,3
1,0,SC,15,1,0,0,10.018993,4,4.226289,2.325005,0,9.972592,7.14104,200,6.436188,3.221748,6,2.559749,8
2,0,MO,131,0,1,300,4.70849,3,4.76816,4.537466,3,4.566715,5.363235,100,5.142451,7.139023,2,6.254157,4
3,0,WY,75,1,1,700,1.268734,3,2.567642,2.528748,5,2.333624,3.773586,450,3.814413,2.245779,6,1.080692,6
4,1,WY,146,1,0,0,2.696177,3,5.908916,6.015337,3,3.670408,3.751673,250,2.796812,6.905545,4,7.134343,6


In [15]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df_processed, test_size=0.1, random_state=42,
                                     shuffle=True, stratify=df_processed['State'])

In [16]:
columns_no_target=['Account Length', "Int'l Plan", 'VMail Plan', 'VMail Message', 'Day Mins',
                   'Day Calls', 'Day Charge', 'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls',
                   'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge', 'CustServ Calls']

In [21]:
df_test.to_csv(f'{local_prefix}/churn_test.csv')

df_test[columns_no_target].to_csv(f'{local_prefix}/churn_test_no_target.csv', index=False)

sagemaker.s3.S3Uploader.upload(f'{local_prefix}/churn_test.csv',
                               f's3://{bucket}/{prefix}/{local_prefix}')

sagemaker.s3.S3Uploader.upload(f'{local_prefix}/churn_test_no_target.csv',
                             f's3://{bucket}/{prefix}/{local_prefix}')

's3://sagemaker-us-east-1-104877823522/sagemaker-multimodel-endpoint/churn_data/churn_test_no_target.csv'

In [23]:
!pip install sagemaker-experiments

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting sagemaker-experiments
  Using cached sagemaker_experiments-0.1.35-py3-none-any.whl (42 kB)
Installing collected packages: sagemaker-experiments
Successfully installed sagemaker-experiments-0.1.35
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


In [27]:
from sagemaker.amazon.amazon_estimator import image_uris
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from botocore.exceptions import ClientError
from time import gmtime, strftime

In [29]:
image = image_uris.retrieve(region = region, framework='xgboost', version='1.3-1')
train_instance_type = 'ml.m5.xlarge'
train_instance_count = 1
s3_output = f's3://{bucket}/{prefix}/training'

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.3-1'