In [5]:
import os
import boto3
import botocore
import logging
from sagemaker import get_execution_role
from concurrent import futures

In [12]:
role = get_execution_role()
print(role)

s3b = boto3.client('s3', region_name = 'ap-southeast-1',
                   config = botocore.client.Config(max_pool_connections = 50))

arn:aws:iam::463541169828:role/sagemaker-role


In [16]:
class Migration:
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    logging.basicConfig(filename = 's3_transfer.log', level = logging.INFO)
    
    def __init__(self, src, dest, client):
        self.src = src
        self.dest = dest
        self.client = client
        
    def key_exists(self, key):
        try: 
            response = self.client.list_objects_v2(Bucket = self.dest, Prefix = key)
            for obj in response['Contents']:
                if key == obj['Key']:
                    return True
        except KeyError:
            return False
        
    def download(self, key):
        '''Downloads file from src bucket.'''
        try:
            logging.info("%s is being downloaded", key)
            local_file = os.path.abspath(key)
            current_dir = os.path.split(local_file)[0]
            if not os.path.exists(current_dir):
                os.makedirs(current_dir)
            self.client.download_file(self.src, key, local_file)
            logging.info("%s downloaded successfully", key)
        except Exception as ex:
            logging.exception(ex)
    
    def upload(self, key):
        '''Uploads file to dest bucket.'''
        try: 
            logging.info("%s is being uploaded", key)
            local_file = os.path.abspath(key)
            if self.key_exists(key):
                logging.info("%s already exists", key)
                key_copy = key.split('.')[0] + '_copy.' + key.split('.')[1]
                self.client.upload_file(local_file, self.dest, key_copy)
                logging.info("%s uploaded successfully", key_copy)
            else: 
                self.client.upload_file(local_file, self.dest, key)
                logging.info("%s uploaded successfully", key)
        except Exception as ex:
            logging.exception(ex)
        
    def transfer(self, keys):
        '''Copies file from src bucket to dest bucket.'''
        with futures.ThreadPoolExecutor(max_workers = 5) as executor:
            logging.info("Transferring...")
            
            future_to_key_download = {executor.submit(self.download, key): key for key in keys}
            
            for future in futures.as_completed(future_to_key_download):
                key = future_to_key_download[future]
                self.upload(key)

        logging.info('Transfer completed')

In [None]:
src = "yara-sh-dads-scd" 
dest = "yara-sh-dads-scd-stage"
test_migrate = Migration(src, dest, s3b)
test_migrate.transfer(['state_soil_tests/Andaman_and_Nicobar_Islands.csv',
                        'state_soil_tests/Andhra_Pradesh.csv',
                        'state_soil_tests/Arunachal_Pradesh.csv',
                        'state_soil_tests/Assam.csv',
                        'state_soil_tests/Bihar.csv'])