In [2]:
import os
import boto3
import botocore
import logging
from functools import lru_cache
import multiprocess as mp
import time

In [2]:
def aws_session(aws_profile=None):
    """Create a new boto3 session.
    Params:
        (string): credentials profile name
    Returns:
        (boto3 client object)
    """
    session = boto3.Session()
    # If the session is run on a local machine, with AWS credentials fetched
    # from a shared file, use the DataScience role profile.
    if session.get_credentials().method == 'shared-credentials-file':
        session = boto3.Session(profile_name=aws_profile)
    creds = session.get_credentials()
    result = {
        'aws_access_key_id': creds.access_key,
        'aws_secret_access_key': creds.secret_key,
        'aws_session_token': creds.token
    }
    return result

In [3]:
class Migration:
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename='s3_transfer.log', level=logging.INFO)

    def __init__(self, src, dest):
        self.src = src
        self.dest = dest

    def multi(self, function, arguments):
        """Run a given function across multiple processes."""
        pool = mp.Pool(processes=mp.cpu_count())

        pool_results = [
            pool.apply_async(function, args=(argument, ))
            for argument in arguments
        ]

        pool.close()
        pool.join()
        return pool_results

    def download(self, key):
        '''Downloads file from src bucket.'''
        for i in range(2): 
            try:
                logging.info("%s is being downloaded", key)
                local_file = os.path.abspath(key)
                current_dir = os.path.split(local_file)[0]

                if not os.path.exists(current_dir):
                    os.makedirs(current_dir)

                session = boto3.Session(profile_name = 'default')
                s3 = session.client('s3', 
                                     region_name = 'ap-southeast-1')
                s3.download_file(self.src, key, local_file)
                logging.info("%s downloaded successfully", key)

            except botocore.exceptions.ClientError as err:
                session = boto3.Session(profile_name = 'default')
                logging.exception(err)

            except Exception as ex:
                logging.exception(ex)

            else:
                break

    def upload(self, key):
        '''Uploads file to dest bucket.'''
        for i in range(2): 
            try:
                logging.info("%s is being uploaded", key)
                local_file = os.path.abspath(key)

                session = boto3.Session(profile_name = 'default')
                s3 = session.client('s3', 
                                     region_name = 'ap-southeast-1')
                s3.upload_file(local_file, self.dest, key)
                logging.info("%s uploaded successfully", key)

            except botocore.exceptions.ClientError as err:
                session = boto3.Session(profile_name = 'default')
                logging.exception(err)

            except Exception as ex:
                logging.exception(ex)

            else:
                break

    def transfer(self, key):      
        self.download(key)
        self.upload(key)
        os.remove(key)

    def transfer_all(self):
        '''
        Copies file from src bucket to dest bucket using multiprocessing.
        Prints to log total time taken for this process.
        '''
        start_time = time.time()

        logging.info('Transferring...')

        key_file = open('keys.txt', 'r')
        keys = [key.rstrip('\n') for key in key_file]
        self.multi(self.transfer, keys)
        key_file.close()

        logging.info('Transfer completed')
        time_taken = time.time() - start_time
        logging.info("took %s to run", time_taken)
        
    def transfer_1000(self):
        with open("keys.txt") as myfile:
            head = [next(myfile).strip() for x in range(1000)]

        start_time = time.time()

        logging.info('Transferring...')

        self.multi(self.transfer, head)

        logging.info('Transfer completed')
        time_taken = time.time() - start_time
        logging.info("took %s to run", time_taken) 

In [None]:
src = "yara-sh-dads-scd"
dest = "yara-sh-dads-scd-stage"
test_migrate = Migration(src, dest)
test_migrate.transfer_all()