In [1]:
import os
import sys
import logging

import numpy as np
import pandas as pd
from sagemaker.local import LocalSession
import sagemaker
from sagemaker.pytorch import PyTorch
import torch

In [2]:
LOCAL = True

In [3]:
# Set up logging
logger = logging.getLogger(__name__)

logging.basicConfig(
    level=logging.getLevelName("INFO"),
    handlers=[logging.StreamHandler(sys.stdout)],
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

In [4]:
if LOCAL:
    session = LocalSession()
    session.config = {"local": {"local_code": True}}
    bucket = "."
    role = "arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001"
    region = "local"
    train_input_path = "file://./data/train"
    val_input_path = "file://./data/val"
    test_input_path = "file://./data/test"

else:

    session = sagemaker.Session()
    # sagemaker session bucket -> used for uploading data, models and logs
    # sagemaker will automatically create this bucket if it not exists
    bucket = "quantsagemaker"
    if bucket is None and sess is not None:
        # set to default bucket if a bucket name is not given
        bucket = session.default_bucket()

    role = sagemaker.get_execution_role()
    session = sagemaker.Session(default_bucket=sagemaker_session_bucket)
    region = session.boto_region_name

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {bucket}")
print(f"sagemaker session region: {region}")

sagemaker role arn: arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001
sagemaker bucket: .
sagemaker session region: local


In [5]:
train_input_path = "s3://sagemaker-eu-central-1-611215368770/sagemaker/imdb/train.csv"
val_input_path = "s3://sagemaker-eu-central-1-611215368770/sagemaker/imdb/val.csv"
test_input_path = "s3://sagemaker-eu-central-1-611215368770/sagemaker/imdb/test.csv"

## Training

In [None]:
hyperparameters = {
    "epochs": 1,
    "train_batch_size": 32,
    "model_name": "distilbert-base-uncased",
}
estimator = PyTorch(
    entry_point="train.py",
    source_dir="./code",
    role=role,
    framework_version="1.7.1",
    py_version="py3",
    instance_count=1,
    instance_type="local",
    hyperparameters=hyperparameters,
)
estimator.fit({'training': train_input_path, 'validating': val_input_path, 'testing': test_input_path})

## Inference

In [None]:
%%sh -s $estimator.model_data
mkdir model
aws s3 cp $1 model/ 
tar xvzf model/model.tar.gz --directory ./model

In [None]:
model_data = estimator.model_data
print(model_data)

In [None]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data="s3://sagemaker-eu-central-1-611215368770/pytorch-training-2021-06-01-09-20-56-523/model.tar.gz",
    role=role,
    framework_version="1.7.1",
    source_dir="code",
    py_version="py3",
    entry_point="inference.py",
)


In [7]:
transformer = pytorch_model.transformer(instance_count=1, instance_type="local")

2021-06-01 13:16:55,882 - botocore.credentials - INFO - Found credentials in environment variables.
2021-06-01 13:18:27,863 - sagemaker - INFO - Creating model with name: pytorch-inference-2021-06-01-11-18-27-862


In [8]:
transformer.transform(test_input_path, content_type='text/csv', split_type='Line')

2021-06-01 13:18:27,868 - sagemaker - INFO - Creating transform job with name: pytorch-inference-2021-06-01-11-18-27-868
2021-06-01 13:18:27,869 - sagemaker.local.image - INFO - serving
2021-06-01 13:18:27,870 - sagemaker.local.image - INFO - creating hosting dir in /tmp/tmp_mukpy_4
2021-06-01 13:18:45,552 - sagemaker.local.image - INFO - docker compose file: 
networks:
  sagemaker-local:
    name: sagemaker-local
services:
  algo-1-myu8a:
    command: serve
    container_name: 1aoywkheg3-algo-1-myu8a
    environment:
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    - '[Masked]'
    image: 763104351884.dkr.ecr.eu-central-1.amazonaws.com/pytorch-inference:1.7.1-cpu-py3
    networks:
      sagemaker-local:
        aliases:
        - algo-1-myu8a
    ports:
    - 8080:8080
    stdin_open: true
    tty: true
    volumes:
    - /tmp/tmp5wsddp7o:/opt/ml/model
version: '2.3'

2021-06-01 13:18:45,553 - s

In [9]:
!aws s3 cp --recursive $transformer.output_path ./

download: s3://sagemaker-eu-central-1-611215368770/pytorch-inference-2021-06-01-11-18-27-868/pytorch-inference-2021-06-01-11-18-27-868/test.csv.out to pytorch-inference-2021-06-01-11-18-27-868/test.csv.out


In [12]:
predictor = pytorch_model.deploy(initial_instance_count=1, instance_type="local")
predictor.serializer = sagemaker.serializers.JSONSerializer()
predictor.deserializer = sagemaker.deserializers.StringDeserializer()
predictor.predict('this is a very good movie')

2021-06-01 13:25:46,919 - botocore.credentials - INFO - Found credentials in environment variables.


ClientError: An error occurred (ExpiredToken) when calling the GetCallerIdentity operation: The security token included in the request is expired

In [None]:
predictor.predict(['this movie sucks', 'this movie is ok'])

In [None]:
predictor.predict(['such a terrible movie', 'what a great movie', 'omg best movie ever'])

In [None]:
predictor.delete_endpoint()

In [None]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, AutoConfig
config = AutoConfig.from_pretrained(os.path.join('model', 'config.json'))
model = AutoModelForSequenceClassification.from_pretrained(os.path.join('model', 'pytorch_model.bin'),
                                                               config=config)

tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

In [None]:
inputs = ['this is a terrific movie']

In [None]:
tokenized_input = tokenizer(inputs, truncation=True, padding=True)

In [None]:
input_ids = torch.Tensor(tokenized_input['input_ids']).long()
attention_mask = torch.Tensor(tokenized_input['attention_mask']).long()

In [None]:
with torch.no_grad():
    res = model(input_ids, attention_mask)