Upload model

In [1]:
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role, Session
from sagemaker.serverless.serverless_inference_config import ServerlessInferenceConfig

In [2]:
sess = Session()
model_data = sess.upload_data(
        path="model.tar.gz", bucket=sess.default_bucket(), key_prefix="model/pytorch"
    )

In [3]:
model = PyTorchModel(
    entry_point="code/inference.py",
    source_dir="code/",
    role='sagemaker-execution',
    model_data=model_data,
    framework_version="1.10",
    py_version="py38",
)

In [4]:
model.repacked_model_data, model.model_data

(None, 's3://sagemaker-us-east-1-144349053222/model/pytorch/model.tar.gz')

***
Deploy
- need to deploy model + endpoint like this (not sure how to deploy just model)
- using the model, go to the sagemaker UI and create the endpoint-config manually to create serverless

In [14]:
serverless_inference_config = ServerlessInferenceConfig(memory_size_in_mb=6144, max_concurrency=20)

In [15]:
predictor = model.deploy(endpoint_name='endpoint-2022-06-30-6GB', serverless_inference_config=serverless_inference_config)

--------!

***
Invoke

In [6]:
import torchaudio
import json
import boto3
import ast
import csv
import sys
import numpy as np
import torch

In [2]:
def make_features(wav_name, mel_bins, target_length=1024):
    waveform, sr = torchaudio.load(wav_name)
    fbank = torchaudio.compliance.kaldi.fbank(
        waveform, htk_compat=True, sample_frequency=sr, use_energy=False,
        window_type='hanning', num_mel_bins=mel_bins, dither=0.0,
        frame_shift=10)
    n_frames = fbank.shape[0]
    p = target_length - n_frames
    if p > 0:
        m = torch.nn.ZeroPad2d((0, 0, 0, p))
        fbank = m(fbank)
    elif p < 0:
        fbank = fbank[0:target_length, :]
    fbank = (fbank - (-4.2677393)) / (4.5689974 * 2)
    return fbank

In [3]:
audio_path ='2021-06-04T20_12_42.393397+0000_sEVT_FW-C-M31.wav'
feats = make_features(audio_path, mel_bins=128)           # shape(1024, 128)
input_tdim = feats.shape[0]
feats_data = feats.expand(1, input_tdim, 128)           # reshape the feature
feats.shape

torch.Size([1024, 128])

In [10]:
# Currently rounding to 1 decimal to avoid memory issue -- test out accuracy using different round values to see tradeoff
body = json.dumps({"instances": feats.numpy().astype(float).tolist()})

In [11]:
sys.getsizeof(body)

2824948

In [12]:
# res = predictor.predict(data=body) 

In [13]:
client = boto3.client('sagemaker-runtime') 
content_type = 'application/json'   
endpoint = 'endpoint-2022-06-30-6GB'
response = client.invoke_endpoint(
    EndpointName=endpoint,
    Body=body,
    ContentType=content_type
 )
predictions = response['Body']

In [14]:
def load_label(label_csv):
    with open(label_csv, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        lines = list(reader)
    labels = []
    ids = []  # Each label has a unique id such as "/m/068hy"
    for i1 in range(1, len(lines)):
        id = lines[i1][1]
        label = lines[i1][2]
        ids.append(id)
        labels.append(label)
    return labels

labels = load_label("class_labels_indices.csv")
    
def evaluate_results(res):
    sorted_indexes = np.argsort(res[0])[::-1]
    print('[*INFO] predice results:')
    for k in range(10):
        print('{}: {:.4f}'.format(np.array(labels)[sorted_indexes[k]],
                                  res[0][sorted_indexes[k]]))

In [15]:
res = json.load(predictions)
evaluate_results(res)

[*INFO] predice results:
Emergency vehicle: 0.5420
Siren: 0.4673
Police car (siren): 0.4421
Ambulance (siren): 0.2040
Speech: 0.1775
Vehicle: 0.1604
Car: 0.0928
Bird: 0.0651
Outside, urban or manmade: 0.0603
Fire engine, fire truck (siren): 0.0557


In [None]:
pq.io.read_wavefile(audio_path)

***
Test Locally

In [20]:
from inference import input_fn, model_fn, output_fn, predict_fn

In [9]:
# model_dir = fetch_model("s3://sagemaker-us-east-1-144349053222/model/pytorch/ast-model-2022-06-16.tar.gz")
model_dir = 'test-ast-model-2022-06-16/ast-model-2022-06-16/'

In [11]:
# load model
net = model_fn(model_dir)

In model_fn. Model directory is -
test-ast-model-2022-06-16/ast-model-2022-06-16/
---------------AST Model Summary---------------
ImageNet pretraining: False, AudioSet pretraining: False
frequncey stride=10, time stride=10
number of patches=1212
Loading the ast model


In [12]:
# get prediction
input_object = input_fn(body, content_type)
predictions = predict_fn(input_object, net)
res = output_fn(predictions, content_type)

> [0;32m/mnt/c/Users/jnam.AERC/Desktop/Misc/ML/Sagemaker/inference.py[0m(27)[0;36minput_fn[0;34m()[0m
[0;32m     25 [0;31m    [0mdata[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mtensor[0m[0;34m([0m[0mdata[0m[0;34m,[0m [0mdtype[0m[0;34m=[0m[0mtorch[0m[0;34m.[0m[0mfloat32[0m[0;34m,[0m [0mdevice[0m[0;34m=[0m[0mdevice[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     26 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 27 [0;31m    [0mdata[0m [0;34m=[0m [0mdata[0m[0;34m.[0m[0mexpand[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0mdata[0m[0;34m.[0m[0mshape[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m,[0m [0;36m128[0m[0;34m)[0m           [0;31m# reshape the feature[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     28 [0;31m    [0;32mreturn[0m [0mdata[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     29 [0;31m[0;34m[0m[0m
[0

ipdb>  data.shape[0


*** SyntaxError: unexpected EOF while parsing


ipdb>  data.shape[0]


1024


ipdb>  c


In [40]:
result_output = ast.literal_eval(res)[0]
sorted_indexes = np.argsort(result_output)[::-1]

In [42]:
print('[*INFO] predice results:')
for k in range(10):
    print('{}: {:.4f}'.format(np.array(labels)[sorted_indexes[k]],
                              result_output[sorted_indexes[k]]))

[*INFO] predice results:
Emergency vehicle: 0.5659
Siren: 0.5038
Police car (siren): 0.4735
Ambulance (siren): 0.2245
Speech: 0.1997
Vehicle: 0.1923
Car: 0.1047
Fire engine, fire truck (siren): 0.0630
Bird: 0.0603
Outside, urban or manmade: 0.0515
