## screencap of kaggle

- Install dependencies for this Jupyter notebook

In [None]:
%%bash
pip install kaggle
sudo apt-get update
sudo apt-get install -y jq

- Upload your own `kaggle.json` using the Sagemaker Studio File Broswser to the left, then...
- Move `kaggle.json` to `/root/.kaggle/` on the Sagemaker Studio notebook instance, and change permissions to ensure it's not readable
- Download and unzip the santander-customer-transaction-prediction dataset from Kaggle to the notebook instance

In [None]:
%%bash
mv kaggle.json /root/.kaggle/
chmod 600 /root/.kaggle/kaggle.json
kaggle competitions download -c santander-customer-transaction-prediction
unzip ./santander-customer-transaction-prediction.zip

- Create a temporary S3 bucket for this project
- Upload `train.csv` from Kaggle competition to S3

In [None]:
%%bash
AWS_ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account') #account ID ensures s3 bucket name is globally unique
BUCKET="smstudio-santander-$AWS_ACCOUNT_ID"
aws s3api create-bucket --bucket $BUCKET --region $AWS_REGION --create-bucket-configuration LocationConstraint=$AWS_REGION
aws s3 cp ./train.csv s3://$BUCKET/train.csv

## screencap of SMS AutoML steps

In [3]:
import boto3

client = boto3.client('sagemaker')


In [4]:
response = client.list_training_jobs_for_hyper_parameter_tuning_job(
    HyperParameterTuningJobName='tuning-job-1-3ec69a12130e443a86',
    StatusEquals='Completed',
    SortBy='FinalObjectiveMetricValue',
    SortOrder='Descending',
    MaxResults=1
)

In [16]:
response['TrainingJobSummaries'][0]['TrainingJobName']

'tuning-job-1-3ec69a12130e443a86-177-fb23b88c'

In [12]:
model_name = 'tuning-job-1-3ec69a12130e443a86-177-fb23b88c'
info = client.describe_training_job(TrainingJobName='tuning-job-1-3ec69a12130e443a86-177-fb23b88c')
model_data = info['ModelArtifacts']['S3ModelArtifacts']
print(model_data)

primary_container = {
    'Image': '257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3',
    'ModelDataUrl': model_data
}

create_model_response = client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = 'arn:aws:iam::273210948404:role/service-role/AmazonSageMaker-ExecutionRole-20200328T115688',
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

s3://smstudio-santander-273210948404/output/smstudio-santander-accuracy/tuning/smstudio-s-dpp0-xgb/tuning-job-1-3ec69a12130e443a86-177-fb23b88c/output/model.tar.gz
arn:aws:sagemaker:us-east-2:273210948404:model/tuning-job-1-3ec69a12130e443a86-177-fb23b88c


In [None]:
!pip install 'sagemaker[local]' --upgrade

In [24]:
from sagemaker.transformer import Transformer
transformer = Transformer(model_name='tuning-job-1-3ec69a12130e443a86-177-fb23b88c',
                          instance_count=1,
                          instance_type='ml.m4.xlarge',
                          assemble_with='Line',
                          max_payload=1
                         )

In [25]:
transformer.transform('s3://smstudio-santander-273210948404/test.csv', content_type='text/csv', split_type='Line')

In [22]:
%%bash
AWS_ACCOUNT_ID=$(aws sts get-caller-identity | jq -r '.Account') #account ID ensures s3 bucket name is globally unique
BUCKET="smstudio-santander-$AWS_ACCOUNT_ID"
#aws s3api create-bucket --bucket $BUCKET --region $AWS_REGION --create-bucket-configuration LocationConstraint=$AWS_REGION
aws s3 cp ./test.csv s3://$BUCKET/test.csv

upload: ./test.csv to s3://smstudio-santander-273210948404/test.csv 


In [28]:
import csv

with open('test.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            print(f'\t{row[0]} works in the {row[1]} department, and was born in {row[2]}.')
            line_count += 1
        if line_count >= 10:
            break
    print(f'Processed {line_count} lines.')

Column names are ID_code, var_0, var_1, var_2, var_3, var_4, var_5, var_6, var_7, var_8, var_9, var_10, var_11, var_12, var_13, var_14, var_15, var_16, var_17, var_18, var_19, var_20, var_21, var_22, var_23, var_24, var_25, var_26, var_27, var_28, var_29, var_30, var_31, var_32, var_33, var_34, var_35, var_36, var_37, var_38, var_39, var_40, var_41, var_42, var_43, var_44, var_45, var_46, var_47, var_48, var_49, var_50, var_51, var_52, var_53, var_54, var_55, var_56, var_57, var_58, var_59, var_60, var_61, var_62, var_63, var_64, var_65, var_66, var_67, var_68, var_69, var_70, var_71, var_72, var_73, var_74, var_75, var_76, var_77, var_78, var_79, var_80, var_81, var_82, var_83, var_84, var_85, var_86, var_87, var_88, var_89, var_90, var_91, var_92, var_93, var_94, var_95, var_96, var_97, var_98, var_99, var_100, var_101, var_102, var_103, var_104, var_105, var_106, var_107, var_108, var_109, var_110, var_111, var_112, var_113, var_114, var_115, var_116, var_117, var_118, var_119, var_

In [29]:
row

['test_8',
 '6.9856',
 '0.8402',
 '13.7161',
 '4.7749',
 '8.6784',
 '-13.7607',
 '4.3386',
 '14.5843',
 '2.5883',
 '7.2215',
 '9.375',
 '8.4046',
 '14.3322',
 '17.1366',
 '7.9301',
 '14.2254',
 '6.722',
 '-8.7087',
 '-1.3541',
 '19.2714',
 '9.3437',
 '18.6532',
 '7.7823',
 '2.6009',
 '6.7642',
 '13.6464',
 '-6.3747',
 '1.1001',
 '6.3812',
 '7.6421',
 '4.3898',
 '9.0597',
 '1.204',
 '7.1039',
 '12.3777',
 '-2.9641',
 '7.0173',
 '7.4202',
 '19.6818',
 '-4.4557',
 '-8.9754',
 '1.5151',
 '11.5905',
 '12.0374',
 '14.9053',
 '-3.7726',
 '8.2385',
 '-14.4489',
 '-0.7069',
 '6.9919',
 '12.8479',
 '27.6295',
 '-4.9911',
 '6.5384',
 '8.362',
 '16.5132',
 '23.6665',
 '5.1845',
 '4.3941',
 '8.6522',
 '8.3098',
 '-7.7338',
 '-1.9156',
 '-3.2809',
 '7.2819',
 '3.4439',
 '7.3613',
 '5.1669',
 '5.0119',
 '-9.8221',
 '28.1225',
 '0.9355',
 '-1.8602',
 '23.5856',
 '13.4518',
 '21.7888',
 '9.118',
 '18.7106',
 '6.7353',
 '14.58',
 '-2.9163',
 '16.0976',
 '-2.0214',
 '-0.6714',
 '0.8176',
 '20.7796',
 '10