DSI BOS 11 (May 2020) Project 5

Alex Golden, Jungmoon Ham, Luke Podsiadlo, Zach Tretter

Workbook 3 - Audio Cleaning

----------

## Clean Audio Files for Improved Transcription 

Relevance to Problem Statement : Transcription is the crucial step between aquiring data and having usable data.  Any steps we can make, especially given the audio quality of the subject matter, to improve the accuracy will improve later extraction of location and situational information.


#### Required Libraries/Packages

1. [requests](https://github.com/psf/requests)

2. [boto3](https://github.com/boto/boto3)

3. time

#### Workflow Steps

0. Prework:
    - Set up an AWS account & instantiate an S3 sever with proper permissions on BOTH the bucket AND the account for which the AWS_KEY and AWS_SECRET are linked.  [AWS Instructions](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html) [Helpful Medium Article](https://medium.com/@aidan.hallett/securing-aws-s3-uploads-using-presigned-urls-aa821c13ae8d)
    - Set up an account on [Dolby.io](Dolby.io).  Comes with 2000 free minutes of processing time. 

1. Establish AWS S3 input and output locations

2. Set cleaning parameters (defaults are established so only any deviations need to be set. More info can be located [here](https://dolby.io/developers/media-processing/tutorials/improving-audio-quality)

3. Run the `enhance_multiple_files` function to clean audio files.

In [12]:
# Imports
import boto3
import requests
import time

In [13]:
# Keys
dolby_api = '[YOUR DOLBY API KEY HERE]'
aws_key = '[YOUR AWS KEY HERE]'
aws_secret = '[YOUR AWS SECRET KEY HERE]'

In [14]:
# Creates our input presigned url
def create_input_s3_presigned_url(aws_key = '', 
                                  aws_secret = '',
                                  bucket_name = '',
                                  input_file = ''):
    # Instantiate Boto Client
    s3_client = boto3.client('s3',
                             aws_access_key_id = aws_key,
                             aws_secret_access_key = aws_secret)
    
    # Create input presigned URL
    aws_input_url = s3_client.generate_presigned_url('get_object',
                                                     Params = {'Bucket': bucket_name,
                                                               'Key': input_file})
    return aws_input_url

# Creates our output presigned url
def create_output_s3_presigned_url(aws_key = '', 
                                   aws_secret = '',
                                   bucket_name = '',
                                   output_path = '',
                                   output_file = ''):
    # Instantiate Boto Client
    s3_client = boto3.client('s3',
                             aws_access_key_id = aws_key,
                             aws_secret_access_key = aws_secret)
    
    # Create output presigned URL
    aws_output_url = s3_client.generate_presigned_url('put_object',
                                                  Params = {'Bucket': bucket_name,
                                                            'Key': output_path + output_file})
    return aws_output_url

# Creates a list of our input files
def get_file_keys(
    aws_key = '',
    aws_secret = '',
    bucket_name = '',
    bucket_path = '',
    file_type = ''
):
    key_list = []
    s3_client = boto3.client('s3',
                         aws_access_key_id = aws_key,
                         aws_secret_access_key = aws_secret)
    
    files = s3_client.list_objects_v2(Bucket = bucket_name)
    
    for file in files['Contents']:
        if file['Key'].startswith(bucket_path) and file['Key'].endswith(file_type):
            key_list.append(file['Key'])
    return key_list

In [None]:
# creates dicitonary of tuning parameters
def build_enhancement_json(user_params_dict):
    
    params_dict = {
        'loudness_enable': True, 
        'dynamics_enable': True,
        'dynamics_amount': 'max',
        'noise_enable': True,
        'noise_amount': 'max',
        'filter_high_pass_enable': True,
        'filter_high_pass_freq': 80,
        'speech_iso_enable': True,
        'speech_iso_amount': 50,
        'speech_sibilance_enable': True,
        'speech_sibilance_amount': 'low'
    }
    
    params_dict.update(user_params_dict)
    
    json = {"audio": {
        "loudness": {
            "enable": params_dict['loudness_enable']
        },
        "dynamics": {
            "range_control": {
                "enable": params_dict['dynamics_enable'],
                "amount": params_dict['dynamics_amount']
            }
        },
        "noise": {
            "reduction": {
                "enable": params_dict['noise_enable'],
                "amount": params_dict['noise_amount']
            }
        },
        "filter": {
            "high_pass": {
                "enable": params_dict['filter_high_pass_enable'],
                "frequency": params_dict['filter_high_pass_freq']
            }
        },
        "speech": {
            "isolation": {
                "enable": params_dict['speech_iso_enable'],
                "amount": params_dict['speech_iso_amount']},
            "sibilance": {
                "reduction": {
                    "enable": params_dict['speech_sibilance_enable'],
                    "amount": params_dict['speech_sibilance_amount']}
            }
        }
    }
           }
    return json

In [None]:
def dolby_enhance_audio(
    dolby_api = '',
    aws_key = '',
    aws_secret = '',
    bucket_name = '',
    input_file = '',
    output_path = '',
    output_file = '',
    get_status_updates = 10,
    user_params_dict = {}
    
):
    
    json_dict = {'input': create_input_s3_presigned_url(aws_key = aws_key,
                                                        aws_secret = aws_secret,
                                                        bucket_name = bucket_name,
                                                        input_file = input_file),
                 'output': create_output_s3_presigned_url(aws_key = aws_key, 
                                                          aws_secret = aws_secret,
                                                          bucket_name = bucket_name,
                                                          output_path = output_path,
                                                          output_file = output_file)}

    json_dict.update(build_enhancement_json(user_params_dict))
    
    post_request_enhance = requests.post('https://api.dolby.com/media/enhance',
                                         headers = {'x-api-key': dolby_api},
                                         json = json_dict)
    
    status_url = 'https://api.dolby.com/media/enhance?job_id=' + post_request_enhance.json()['job_id']
    
    status = requests.get(status_url, headers = {'x-api-key': dolby_api}).json()
    
    while status['status'] == 'Running':
        status = requests.get(status_url, headers = {'x-api-key': dolby_api}).json()
        print(status)
#         print(f'Time elapsed: {time.time() - time_start}s')
        status = status
        time.sleep(get_status_updates)

In [1]:
def enhance_multiple_files(
    dolby_api = '',
    aws_key = '',
    aws_secret = '',
    bucket_name = '',
    input_bucket_path = '',
    output_path = '',
    output_prefix = '',
    get_status_updates = 10,
    file_type = '.wav',
    user_params_dict = {}
):
    """
    *** NOTE YOU NEED TO HAVE AMAZON S3 PERMISSION SET UP CORRECTLY ***
    ** For information on setting up s3 permission please see the following resources: 
    ** https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html
    ** https://medium.com/@aidan.hallett/securing-aws-s3-uploads-using-presigned-urls-aa821c13ae8d
    ** additonal information can be found at dolby.io
    
    dolby_api = REQUIRED
    
    aws_key = REQUIRED
    
    aws_secret = REQUIRED
    
    bucket_name = REQUIRED, Name of s3 bucket
    
    input_bucket_path = Used to specicfy input folder in s3 bucket
    
    output_path = REQUIRED, Used to specicfy input folder in s3 bucket
    
    output_prefix = Used to add prefix to output file
    
    !!WARNING!! Requesting to frequently for to long can result in errors
    get_status_updates = int -> Seconds between each get status request 
    
    file_type = 'str' -> specify audio format ex: '.wav', '.mp3',...
    
    user_param_dict = {dict} -> these are for changing the default values of the dolby enhance 
    
    The following parameters can be entered to tune the audio enhancement, 
    more information can be found at 
    (https://dolby.io/developers/media-processing/tutorials/improving-audio-quality)    
    
    user_param_dict = { 
        'loudness_enable': True, 
        'dynamics_enable': True,
        'dynamics_amount': 'max',
        'noise_enable': True,
        'noise_amount': 'max',
        'filter_high_pass_enable': True,
        'filter_high_pass_freq': 80,
        'speech_iso_enable': True,
        'speech_iso_amount': 50,
        'speech_sibilance_enable': True,
        'speech_sibilance_amount': 'low'
        }
        
    """

    key_list = get_file_keys(aws_key = aws_key,
                             aws_secret = aws_secret,
                             bucket_name = bucket_name,
                             bucket_path = input_bucket_path,
                             file_type= file_type)

    for key in key_list:
        dolby_enhance_audio(
            dolby_api = dolby_api,
            aws_key = aws_key,
            aws_secret = aws_secret,
            bucket_name = bucket_name,
            input_file = key,
            output_path = output_path,
            output_file = output_prefix + key[len(input_bucket_path):],
            get_status_updates = get_status_updates,
            user_params_dict = {})

In [None]:
params = {}

enhance_multiple_files(
    dolby_api = dolby_api,
    aws_key = aws_key,
    aws_secret = aws_secret,
    bucket_name = '[YOUR BUCKET NAME HERE]',
    input_bucket_path = '[NAME OF INPUT DIRECTORY WITHIN BUCKET HERE]',
    output_path = '[NAME OF OUTPUT DIRECTORY]',
    output_prefix = '[STRING TO PREFIX OUTPUT FILES WITH]',
    user_params_dict = params,
    file_type = '.wav'
)