## SageMaker endpoint
To deploy the model you previously trained, you need to create a Sagemaker Endpoint. This is a hosted prediction service that you can use to perform inference.

In [None]:
import io
from PIL import Image

import torch
import torch.utils.data as data_utils
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import boto3
import pandas as pd
from sklearn.metrics import accuracy_score

### Finding the model
This notebook uses a stored model if it exists. If you recently ran a training example that use the %store% magic, it will be restored in the next cell.

Otherwise, you can pass the URI to the model file (a .tar.gz file) in the model_data variable.

You can find your model files through the SageMaker console by choosing Training > Training jobs in the left navigation pane. Find your recent training job, choose it, and then look for the s3:// link in the Output pane. Uncomment the model_data line in the next cell that manually sets the model's URI.

In [78]:
# Retrieve a saved model from a previous notebook run's stored variable
%store -r model_data

# If no model was found, set it manually here.
model_data = 's3://sagemaker-us-east-1-318322629142/pytorch-smdataparallel-histopathology-m-2021-05-18-18-59-25-802/output/model.tar.gz'

print("Using this model: {}".format(model_data))

no stored variable or alias model_data
Using this model: s3://sagemaker-us-east-1-318322629142/pytorch-smdataparallel-histopathology-m-2021-05-18-18-59-25-802/output/model.tar.gz


### Create a model object
You define the model object by using SageMaker SDK's PyTorchModel and pass in the model from the estimator and the entry_point. The endpoint's entry point for inference is defined by model_fn as seen in the following code block that prints out inference.py. The function loads the model and sets it to use a GPU, if available.

In [79]:
!pygmentize code/inference.py

[37m# Licensed to the Apache Software Foundation (ASF) under one[39;49;00m
[37m# or more contributor license agreements.  See the NOTICE file[39;49;00m
[37m# distributed with this work for additional information[39;49;00m
[37m# regarding copyright ownership.  The ASF licenses this file[39;49;00m
[37m# to you under the Apache License, Version 2.0 (the[39;49;00m
[37m# "License"); you may not use this file except in compliance[39;49;00m
[37m# with the License.  You may obtain a copy of the License at[39;49;00m
[37m#[39;49;00m
[37m#   http://www.apache.org/licenses/LICENSE-2.0[39;49;00m
[37m#[39;49;00m
[37m# Unless required by applicable law or agreed to in writing,[39;49;00m
[37m# software distributed under the License is distributed on an[39;49;00m
[37m# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY[39;49;00m
[37m# KIND, either express or implied.  See the License for the[39;49;00m
[37m# specific language governing permissions and limit

In [80]:
import sagemaker
role = sagemaker.get_execution_role()

from sagemaker.pytorch import PyTorchModel
model = PyTorchModel(model_data=model_data, source_dir='code',
                        entry_point='inference.py', role=role, framework_version='1.6.0', py_version='py3')

#### Deploy the model on an endpoint
You create a predictor by using the model.deploy function. You can optionally change both the instance count and instance type.

In [81]:
predictor = model.deploy(initial_instance_count=1, instance_type='ml.m5.24xlarge')

------------------!

### Test the model
You can test the depolyed model using samples from the test set.

In [83]:
def image_from_s3(bucket, key):
    bucket = s3_resource.Bucket(bucket)
    image = bucket.Object(key)
    img_data = image.get().get('Body').read()

    return Image.open(io.BytesIO(img_data))

In [86]:
class TileDataset(data_utils.Dataset):

    def __init__(self, img_path, folder_num, dataframe, num_tiles, transform=None):
        """
        img_path: Where the images are stored
        dataframe: The train.csv dataframe
        num_tiles: How many tiles should the dataset return per sample
        transform: The function to apply to the image. Usually dataaugmentation. Do not do normalization here.
        """
        self.img_path = img_path
        self.folder_num = folder_num
        self.df = dataframe
        self.num_tiles = num_tiles
        self.img_list = list(self.df['image_id'])
        self.transform = transform

    def __getitem__(self, idx):
        img_id = self.img_list[idx]

        tiles = ['test_'+str(self.folder_num)+'/'+img_id + '_' + str(i) + '.png' for i in range(0, self.num_tiles)]
        image_tiles = []
        

        for tile in tiles:
            image = image_from_s3(self.img_path, tile)

            if self.transform is not None:
                image = self.transform(image)

            image = 1 - image
            image = transforms.Normalize([1.0-0.90949707, 1.0-0.8188697, 1.0-0.87795304], [0.1279171 , 0.24528177, 0.16098117])(image)
            image_tiles.append(image)

        image_tiles = torch.stack(image_tiles, dim=0)

        return torch.tensor(image_tiles), torch.tensor(self.df.iloc[idx]['isup_grade'])

    def __len__(self):
        return len(self.img_list)

In [87]:
def get_csv(bucket, folder_num, df):
    # Getting tiles that are in S3
    print('Collecting list of tiles')
    tiles_set = set()
    bucket = s3_resource.Bucket('sagemaker-us-east-1-318322629142')
    for key in bucket.objects.all():
        if 'test_'+str(folder_num) in key.key:
            tiles_set.add(key.key.split('/')[1].split('_')[0])
    tiles_list = list(tiles_set)
    
    print('Creating dataframe')
    # Creating dataframe containing labels for each tile in S3
    tiles_df = pd.DataFrame(columns=['image_id', 'data_provider', 'isup_grade', 'gleason_score'])
    for i in range(len(tiles_list)):
        tiles_df = tiles_df.append(df.loc[df['image_id'] == tiles_list[i]])
    
    tiles_df = tiles_df.drop_duplicates()
    return tiles_df

In [108]:
bucket = 'sagemaker-us-east-1-318322629142'

dataset_csv_key = 'panda_dataset.csv'
dataset_csv_dir = 's3://{}/{}'.format(bucket, dataset_csv_key)
df = pd.read_csv(dataset_csv_dir)

df['isup_grade'] = df['isup_grade'].replace([1,2], 0)
df['isup_grade'] = df['isup_grade'].replace([3,4,5], 1)

test_df = get_csv(bucket, 1, df)
print(test_df)

transform_train = transforms.Compose([transforms.RandomHorizontalFlip(0.5),
                                      transforms.RandomVerticalFlip(0.5),
                                      transforms.ToTensor()])


print('Creating data loader')
test_set = TileDataset(bucket, 1, test_df, 16, transform=transform_train)

batch_size = 1
test_loader = data_utils.DataLoader(test_set, batch_size, shuffle=False, num_workers=0)

predictions = []
true_labels = []
for batch_idx, (data, label) in enumerate(test_loader):
    print('batch_idx = ', batch_idx)
    _, Y_hat, _ = predictor.predict(data)
    predictions.append(int(Y_hat))
    true_labels.append(int(label))



Collecting list of tiles
Creating dataframe
                              image_id data_provider isup_grade gleason_score
1732  2b730c057bde4c56e79f693e3d577138       radboud          1           4+5
1727  2b4d629c0b0a02ddfb05cc41c0c8dc65    karolinska          1           4+4
1707  2ac5f9c41e6b9a004fc0cecf6c3083be    karolinska          0           3+3
1774  2c8fd1d0ab8640342f6d10a0a54e5279    karolinska          0           0+0
1254  1fc49bfab631583981f96f285ec0c94d    karolinska          1           4+5
...                                ...           ...        ...           ...
1709  2ad0f2857a4552a25127205fd04a5e9f       radboud          0           3+3
1725  2b340c9844077ddcdf641adac5f116e3       radboud          0      negative
1680  2a1c3373688904fcabbdeb4a177972f8       radboud          0           3+3
1249  1fb65315d7ded63d688194863a1b123e    karolinska          1           5+5
1257  1fe0cfea7347950a76bcbdafa0ad96ab    karolinska          0           3+4

[125 rows x 4 colum



batch_idx =  0
batch_idx =  1
batch_idx =  2
batch_idx =  3
batch_idx =  4
batch_idx =  5
batch_idx =  6
batch_idx =  7
batch_idx =  8
batch_idx =  9
batch_idx =  10
batch_idx =  11
batch_idx =  12
batch_idx =  13
batch_idx =  14
batch_idx =  15
batch_idx =  16
batch_idx =  17
batch_idx =  18
batch_idx =  19
batch_idx =  20
batch_idx =  21
batch_idx =  22
batch_idx =  23
batch_idx =  24
batch_idx =  25
batch_idx =  26
batch_idx =  27
batch_idx =  28
batch_idx =  29
batch_idx =  30
batch_idx =  31
batch_idx =  32
batch_idx =  33
batch_idx =  34
batch_idx =  35
batch_idx =  36
batch_idx =  37
batch_idx =  38
batch_idx =  39
batch_idx =  40
batch_idx =  41
batch_idx =  42
batch_idx =  43
batch_idx =  44
batch_idx =  45
batch_idx =  46
batch_idx =  47
batch_idx =  48
batch_idx =  49
batch_idx =  50
batch_idx =  51
batch_idx =  52
batch_idx =  53
batch_idx =  54
batch_idx =  55
batch_idx =  56
batch_idx =  57
batch_idx =  58
batch_idx =  59
batch_idx =  60
batch_idx =  61
batch_idx =  62
ba

In [255]:
print(accuracy_score(true_labels, predictions))

0.672


#### Cleanup
If you don't intend on trying out inference or to do anything else with the endpoint, you should delete it.

In [None]:
predictor.delete_endpoint()