In [1]:
import os
import boto3
import botocore
import logging
from functools import lru_cache
import multiprocess as mp
import time

In [7]:
def aws_session(aws_profile=None):
    """Create a a boto3 session.
    Params:
        (string): credentials profile name
    Returns:
        (boto3 client object)
    """
    session = boto3.Session()
    # If the session is run on a local machine, with AWS credentials fetched
    # from a shared file, use the DataScience role profile.
    if session.get_credentials().method == 'shared-credentials-file':
        session = boto3.Session(profile_name=aws_profile)
    creds = session.get_credentials()
    result={
        'aws_access_key_id': creds.access_key,
        'aws_secret_access_key': creds.secret_key,
        'aws_session_token': creds.token
    }
    return result
CREDS = aws_session()
@lru_cache()
def s3_client():
    """Cache a boto3 client with credentias and MFA token."""
    return boto3.client(
            service_name='s3',
            aws_access_key_id=CREDS['aws_access_key_id'],
            aws_secret_access_key=CREDS['aws_secret_access_key'],
            aws_session_token=CREDS['aws_session_token'],
            # config = botocore.client.Config(max_pool_connections = 50)
    )

In [14]:
class Migration:
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename='s3_transfer.log', level=logging.INFO)

    def __init__(self, src, dest, client):
        self.src = src
        self.dest = dest
        self.client = client
        
    def generate_keys(self):
        '''Generates a list of 100 keys from src bucket'''
        s3_100_keys = []
        response = self.client.list_objects_v2(Bucket=self.src, 
                                MaxKeys=100)
        for content in response['Contents']:
            key = content['Key']
            s3_100_keys += [key] if not key[-1] == '/' else []
        return s3_100_keys

    def download(self, key):
        '''Downloads file from src bucket.'''
        try:
            logging.info("%s is being downloaded", key)
            local_file = os.path.abspath(key)
            current_dir = os.path.split(local_file)[0]
            if not os.path.exists(current_dir):
                os.makedirs(current_dir)
            self.client.download_file(self.src, key, local_file)
            logging.info("%s downloaded successfully", key)
        except Exception as ex:
            logging.exception(ex)

    def upload(self, key):
        '''Uploads file to dest bucket.'''
        try:
            logging.info("%s is being uploaded", key)
            local_file = os.path.abspath(key)
            # upload into a copy of the file
            key = key.split('/')[-2] + '_copy/' + key.split('/')[-1]
            self.client.upload_file(local_file, self.dest, key)
            logging.info("%s uploaded successfully", key)
        except Exception as ex:
            logging.exception(ex)

    def transfer(self):
        '''Copies file from src bucket to dest bucket.'''
        start_time = time.time()
        logging.info("Transferring...")

        keys = self.generate_keys()
        pool = mp.Pool(5)
        pool.map(self.download, keys)
        pool.map(self.upload, keys)

        logging.info('Transfer completed')
        time_taken = time.time() - start_time
        logging.info("took %s to run", time_taken)

In [18]:
src = "yara-sh-dads-scd"
dest = "yara-sh-dads-scd-stage"
test_migrate = Migration(src, dest, s3_client())
test_migrate.transfer()

PicklingError: Can't pickle <class 'botocore.client.S3'>: it's not found as botocore.client.S3