In [28]:
!git push

Counting objects: 45, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (35/35), done.
Writing objects: 100% (45/45), 13.48 KiB | 4.49 MiB/s, done.
Total 45 (delta 20), reused 0 (delta 0)
remote: Resolving deltas: 100% (20/20), completed with 15 local objects.[K
remote: This repository moved. Please use the new location:[K
remote:   https://github.com/basilwong/sagemaker-repo.git[K
To https://github.com/basilwong/awstest1.git
   e557b51..8da01d7  master -> master


### Add Dependencies

In [29]:
import sagemaker as sage
from sagemaker import get_execution_role

import zipfile
import os

from sagemaker import ModelPackage


# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.append('src')

!pip install pydub

import audio_util

[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [30]:
# Execution role
role = get_execution_role()
# S3 prefixes
common_prefix = "source_separation"
batch_inference_input_prefix = common_prefix + "/batch-inference-input-data"
# Sagemaker Session
sagemaker_session = sage.Session()
# Arn for Source Separator Model Package
modelpackage_arn = 'arn:aws:sagemaker:us-east-2:057799348421:model-package/source-separation-v11570291536-75ed8128ecee95e142ec4404d884ecad'



### Creating the Model

In [31]:
from sagemaker import ModelPackage

def predict_wrapper(endpoint, session):
    return sage.RealTimePredictor(endpoint, session, content_type='application/x-recordio-protobuf')

model = ModelPackage(role=role,
                     model_package_arn=modelpackage_arn,
                     sagemaker_session=sagemaker_session,
                     predictor_cls=predict_wrapper)

### Running the Batch Job

In [32]:
batch_input_folder = "source-separation-input"


transform_input = sagemaker_session.upload_data(batch_input_folder, key_prefix=batch_inference_input_prefix)
print("Transform input uploaded to " + transform_input)

Transform input uploaded to s3://sagemaker-us-east-2-075178354542/source_separation/batch-inference-input-data


In [33]:
import json 
import uuid

bucket = sagemaker_session.default_bucket()

transformer = model.transformer(1, 'ml.m4.xlarge', strategy='SingleRecord', output_path='s3://'+bucket+'/'+common_prefix+'/batch-transform-output')
transformer.transform(transform_input, content_type='application/x-recordio-protobuf')
transformer.wait()

print("Batch Transform output saved to " + transformer.output_path)

....................[34mStarting the inference server with 4 workers.[0m
[34m[2020-04-12 10:13:40 +0000] [11] [INFO] Starting gunicorn 19.9.0[0m
[34m[2020-04-12 10:13:40 +0000] [11] [INFO] Listening at: unix:/tmp/gunicorn.sock (11)[0m
[34m[2020-04-12 10:13:40 +0000] [11] [INFO] Using worker: gevent[0m
[34m[2020-04-12 10:13:40 +0000] [15] [INFO] Booting worker with pid: 15[0m
[34m[2020-04-12 10:13:40 +0000] [16] [INFO] Booting worker with pid: 16[0m
[34m[2020-04-12 10:13:40 +0000] [17] [INFO] Booting worker with pid: 17[0m
[34m[2020-04-12 10:13:40 +0000] [18] [INFO] Booting worker with pid: 18[0m
[34mTesting...[0m
[34m2020-04-12 10:14:06.826292: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA[0m
[34m169.254.255.130 - - [12/Apr/2020:10:14:07 +0000] "GET /ping HTTP/1.1" 200 1 "-" "Go-http-client/1.1"[0m
[34m169.254.255.130 - - [12/Apr/2020:10:14:07 +0000] "GET /executio

UnexpectedStatusException: Error for Transform job source-separation-v11570291536-75ed8128-2020-04-12-10-10-33-259: Failed. Reason: ClientError: See job logs for more information

### Processing the Batch Output

In [None]:
import boto3
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(sagemaker_session.default_bucket())
prefix = "source_separation/batch-transform-output/"
i = 0
audio_util.clear_folder('source-separation-output/batch-transform-output')
for object_summary in my_bucket.objects.filter(Prefix=prefix):
    i = i + 1
    file_name = object_summary.key.split('/')[-1]
    print(file_name)
    my_bucket.download_file(prefix+ file_name, 'source-separation-output/batch-transform-output/output-{}.zip'.format(i))

In [7]:
audio_util.clear_folder('source-separation-output/extracted')
for file in os.listdir('source-separation-output/batch-transform-output'):
    print(file)
    with zipfile.ZipFile('source-separation-output/batch-transform-output/'+file, 'r') as zip_ref:
        zip_ref.extractall('source-separation-output/extracted/'+file.split('.')[0]+'/')

output-3.zip
output-6.zip
output-8.zip
output-9.zip
output-1.zip
output-4.zip
output-7.zip
output-2.zip
output-5.zip


In [None]:
import os
for i, folder in enumerate(os.listdir('source-separation-output/extracted/')):
    for file in os.listdir('source-separation-output/extracted/' + folder + '/output'):
        print(file)
        if "vocals" in file:
            os.rename('source-separation-output/extracted/' + folder + '/output/' + file, 'source-separation-output/vocals/' + file + str(i))
        elif "accompaniment" in file:
            os.rename('source-separation-output/extracted/' + folder + '/' + file, 'source-separation-output/background/' + file + str(i))

### Transcribe the Vocal Files

In [None]:
from __future__ import print_function
import time
import boto3

transcribe = boto3.client('transcribe')
job_name = "job name"
job_uri = "https://S3 endpoint/test-transcribe/answer2.wav"
transcribe.start_transcription_job(
    TranscriptionJobName=job_name,
    Media={'MediaFileUri': job_uri},
    MediaFormat='wav',
    LanguageCode='en-US'
)
while True:
    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    print("Not ready yet...")
    time.sleep(5)
print(status)