In [None]:
import boto3 # AWS SDK for Python
import pandas as pd
import numpy as np
import cv2
import os

In [None]:
# Read in metadata
metadata_df = pd.read_parquet("/Users/cjdonahoe/github/cellvit/metadata/metadata.parquet")

print(metadata_df[['Image_Name', 'Treatment']].head(5))

for index, row in metadata_df.head(5).iterrows():
    substr_raw = row['Image_Name'][21:]
    print(substr_raw)

In [None]:
# Set environment variables
os.environ['AWS_ACCESS_KEY_ID'] = ''
os.environ['AWS_SECRET_ACCESS_KEY'] = ''

# Get environment variables
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')

In [None]:
s3_client = boto3.client(
  's3', 
  aws_access_key_id=AWS_ACCESS_KEY_ID, 
  aws_secret_access_key=AWS_SECRET_ACCESS_KEY
  )

In [None]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('cpg0019-moshkov-deepprofiler')

# find all prefixes that start with 'broad/training_images/BBBC021/'
def get_prefixes(bucket, prefix):
    paginator = s3_client.get_paginator('list_objects_v2')
    operation_parameters = {'Bucket': bucket, 'Prefix': prefix, 'Delimiter': '/'}
    page_iterator = paginator.paginate(**operation_parameters)
    prefixes = []
    for page in page_iterator:
        if 'CommonPrefixes' in page:
            prefixes.extend([cp['Prefix'] for cp in page['CommonPrefixes']])
    return prefixes

for obj in bucket.objects.filter(Prefix='broad/training_images/BBBC021/'):
    print(obj.key)
    substr_raw = obj.key[21:]
    print(substr_raw)


def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [None]:
def folder_exists_and_not_empty(bucket:str, path:str) -> bool:
    '''
    Folder should exists. 
    Folder should not be empty.
    '''
    s3 = boto3.client('s3')
    if not path.endswith('/'):
        path = path+'/' 
    resp = s3.list_objects(Bucket=bucket, Prefix=path, Delimiter='/',MaxKeys=1)
    return 'Contents' in resp

In [None]:
print(folder_exists_and_not_empty('cpg0019-moshkov-deepprofiler', 'broad/training_images/BBBC037'))
print(get_prefixes('cpg0019-moshkov-deepprofiler', 'broad/training_images/BBBC037/'))


In [None]:
list_prefixes = get_prefixes('cpg0019-moshkov-deepprofiler', 'broad/training_images/BBBC037/')
print(list_prefixes)

In [None]:
single_collection_df = metadata_df[metadata_df['Collection'] == COLLECTION]
for index, row in single_collection_df.iterrows():
    # get the full path to each image in single_collection_df
    src = ROOT_PATH + row['Image_Name'][22:]
    # check if a directory for the treatment exists in S3
    if :
        os.makedirs(root_path + collection + "/" + row['Treatment'])

src

In [None]:
BUCKET = 'cpg0019-moshkov-deepprofiler'
PREFIX = 'broad/training_images/BBBC037/'

bucket = s3.Bucket('cpg0019-moshkov-deepprofiler')
objs = list(bucket.objects.filter(Prefix=PREFIX))
objs

In [None]:
def s3_folder_exists(bucket, folder):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket)
    objs = list(bucket.objects.filter(Prefix=folder))
    if len(objs) > 0 and objs[0].key == folder:
        return True
    else:
        return False

In [None]:
def create_s3_directory(bucket_name, directory_name):
    """Create a directory in an S3 bucket

    :param bucket_name: Bucket to create directory in
    :param directory_name: Directory to create
    :return: True if directory was created, else False
    """

    # Create the bucket
    try:
        response = s3_client.put_object(Bucket=bucket_name, Key=(directory_name))
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [None]:
def get_s3_keys(bucket, prefix=None):
    """Get a list of keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :return: List of keys in the bucket.
    """

    keys = []

    resp = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    for obj in resp['Contents']:
        keys.append(obj['Key'])

    return keys

In [None]:
s3_key_list = get_s3_keys('cpg0019-moshkov-deepprofiler', prefix='broad/training_images/BBBC037/')
s3_key_list = set([x.split('/')[4] for x in s3_key_list])
print(s3_key_list)

In [None]:
def copy_rename_move_s3_key(bucket_name, old_key, new_key):
    """Copy, rename, and move an S3 key

    :param bucket_name: Bucket to copy key from
    :param old_key: Old key name
    :param new_key: New key name
    :return: True if key was copied, else False
    """

    # Copy the object
    try:
        copy_source = {
            'Bucket': bucket_name,
            'Key': old_key
        }
        s3_client.copy(copy_source, bucket_name, new_key)
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [None]:
create_s3_directory('cpg0019-moshkov-deepprofiler', 'broad/training_images/BBBC037_pytorch/')

In [None]:
x = []
x = [i[-6:-1] for i in get_prefixes('cpg0019-moshkov-deepprofiler', 'broad/training_images/BBBC037_pytorch/')]
x

In [None]:
bucket='cpg0019-moshkov-deepprofiler'
file_prefix='broad/training_images/BBBC021/'

result = s3_client.list_objects_v2(Bucket=bucket, Prefix=file_prefix)

s3 = boto3.resource('s3')
bucket = s3.Bucket('cpg0019-moshkov-deepprofiler')

In [None]:
def copy_and_rename_img(root_path, collection: str):
  # get the metadata for a single collection
  single_collection_df = metadata_df[metadata_df['Collection'] == collection]
  # iterate through the rows of the single collection metadata
  for index, row in single_collection_df.iterrows():
      # get the full path to each image in single_collection_df
      src = root_path + row['Image_Name'][22:]
      # check if a directory for the treatment exists
      if not os.path.exists(root_path + collection + "/" + row['Treatment']):
          # if not, create the directory
          os.makedirs(root_path + collection + "/" + row['Treatment'])
      # store the new path for the image in dst
      dst = root_path + collection + "/" + row['Treatment'] + "/" + row['Metadata_Plate'] + "_" + row['Metadata_Well'] + "_" + row['Metadata_Site'] + "_" + row['PathId']
      # copy the image to the new path
      shutil.copy(src, dst)
  return

# copy_and_rename_img('BBBC037')

In [None]:
def copy_and_rename_img_s3(src_bucket:str, dst_bucket:str, collection:str):
  s3 = boto3.resource('s3')
  # get the metadata for a single collection
  single_collection_df = metadata_df[metadata_df['Collection'] == collection]
  file_counter = 0
  # iterate through the rows of the single collection metadata
  for index, row in single_collection_df.iterrows():
      # get the full path to each image in single_collection_df
      copy_source = {
          'Bucket': src_bucket,
          'Key': 'broad/training_images/' + row['Image_Name'][22:]
      }
      dst_key = 'broad/training_images/' + collection + "/" + row['Treatment'] + "/" + row['Metadata_Plate'] + "_" + row['Metadata_Well'] + "_" + row['Metadata_Site'] + "_" + row['PathId']
      bucket = s3.Bucket(dst_bucket)
      bucket.copy(copy_source, dst_key)
      file_counter += 1
      if file_counter % 5000 == 0:
          print(f"{file_counter} files copied")
  return

In [None]:
copy_and_rename_img_s3('cpg0019-moshkov-deepprofiler', 'bbbc037-pytorch', 'BBBC037')

In [None]:
ROOT_PATH = "/Users/cjdonahoe/Documents/personal/ucb-mids/210-capstone/cellpainting-gallery/cpg0019-moshkov-deepprofiler/broad/workspace_dl/training_images/"
copy_and_rename_img_s3(ROOT_PATH, collection='BBBC037')

In [None]:
def rename_s3_object():
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('cpg0019-moshkov-deepprofiler')
    for obj in bucket.objects.filter(Prefix='broad/training_images/BBBC037/'):
        print(obj.key)
        substr_raw = obj.key[len('broad/training_images/BBBC037/'):]
        print(substr_raw)
    return

In [None]:
rename_s3_object()