In [1]:
import json 
import boto3

In [3]:
config_data = json.load(open('./../config.json'))
aws_access_key_id = config_data['aws_access_key_id']
aws_secret_access_key = config_data['aws_secret_access_key']
config_data.keys()

dict_keys(['aws_access_key_id', 'aws_secret_access_key'])

In [4]:
# create a s3 client
s3_client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

In [5]:
## list all folder in a s3 bucket 
import boto3

def list_s3_folders(bucket_name):
    s3_client = boto3.client('s3')
    
    # List objects in the bucket
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name)
    
    folders = set()
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                key = obj['Key']
                # Extract the folder name
                folder = key.split('/')[0]
                folders.add(folder)
    
    return sorted(folders)

bucket_name = 'bindcraft'
folders = list_s3_folders(bucket_name)
print("Folders in S3 bucket:")
for folder in folders:
    print(folder)

Folders in S3 bucket:
adaptyv-bio-pdc
pipeline.png
snake-venom-binder


In [10]:
import boto3
import os

def download_s3_folder(bucket_name, s3_folder, local_dir):
    s3_client = boto3.client('s3')
    
    # Ensure the local directory exists
    if not os.path.exists(local_dir):
        os.makedirs(local_dir)
    
    # List objects in the specified S3 folder
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=s3_folder)
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                s3_key = obj['Key']
                local_file_path = os.path.join(local_dir, os.path.relpath(s3_key, s3_folder))
                
                # Ensure the local directory exists
                local_file_dir = os.path.dirname(local_file_path)
                if not os.path.exists(local_file_dir):
                    os.makedirs(local_file_dir)
                
                # Download the file
                s3_client.download_file(bucket_name, s3_key, local_file_path)
                print(f"Downloaded {s3_key} to {local_file_path}")

# Example usage
bucket_name = 'bindcraft'
s3_folder = '2502152323'
local_dir = f'./out/bindcraft/{s3_folder}'

download_s3_folder(bucket_name, s3_folder, local_dir)

Downloaded 2502152323/Accepted/1yi5_l88_s585564_mpnn12_model1.pdb to ./out/bindcraft/2502152323/Accepted/1yi5_l88_s585564_mpnn12_model1.pdb
Downloaded 2502152323/Accepted/1yi5_l88_s585564_mpnn5_model1.pdb to ./out/bindcraft/2502152323/Accepted/1yi5_l88_s585564_mpnn5_model1.pdb
Downloaded 2502152323/Accepted/Animation/1yi5_l88_s585564.html to ./out/bindcraft/2502152323/Accepted/Animation/1yi5_l88_s585564.html
Downloaded 2502152323/Accepted/Plots/1yi5_l88_s585564_con.png to ./out/bindcraft/2502152323/Accepted/Plots/1yi5_l88_s585564_con.png
Downloaded 2502152323/Accepted/Plots/1yi5_l88_s585564_i_con.png to ./out/bindcraft/2502152323/Accepted/Plots/1yi5_l88_s585564_i_con.png
Downloaded 2502152323/Accepted/Plots/1yi5_l88_s585564_i_pae.png to ./out/bindcraft/2502152323/Accepted/Plots/1yi5_l88_s585564_i_pae.png
Downloaded 2502152323/Accepted/Plots/1yi5_l88_s585564_i_ptm.png to ./out/bindcraft/2502152323/Accepted/Plots/1yi5_l88_s585564_i_ptm.png
Downloaded 2502152323/Accepted/Plots/1yi5_l88_s5

In [6]:
# 1. Get S3 folders
def get_s3_folders(bucket_name, aws_access_key_id, aws_secret_access_key,
                   prefix='snake-venom-binder/'):
    """
    Retrieves a list of top-level folder names from a specified S3 path.
    """
    s3_client = boto3.client('s3',
                             aws_access_key_id=aws_access_key_id,
                             aws_secret_access_key=aws_secret_access_key)

    paginator = s3_client.get_paginator('list_objects_v2')
    folders = set()

    for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix, Delimiter='/'):
        for common_prefix in page.get('CommonPrefixes', []):
            # Extract folder name after the main prefix
            folder_path = common_prefix.get('Prefix')
            if folder_path.startswith(prefix):
                folder_name = folder_path[len(prefix):].split('/')[0]
                if folder_name: # Ensure it's not an empty string
                    folders.add(folder_name)
    return sorted(list(folders))

In [7]:
# Load AWS credentials
try:
    with open('./../config.json', 'r', encoding='utf-8') as f:
        config = json.load(f)
except FileNotFoundError:
    print("Error: config.json not found. Please create it with AWS credentials.")
except json.JSONDecodeError:
    print("Error: config.json is not a valid JSON file.")
  
aws_access_key_id = config.get('aws_access_key_id')
aws_secret_access_key = config.get('aws_secret_access_key')

bucket_name = 'bindcraft'
s3_prefix = 'snake-venom-binder/' # Corrected variable name for clarity
local_base_download_dir = './../out/bindcraft/snake-venom-binder'

# Get all S3 folders with prefix
s3_folders = get_s3_folders(bucket_name, aws_access_key_id, aws_secret_access_key, prefix=s3_prefix)
print(s3_folders)

['2501171452', '2501171625', '2501180040', '2501180802', '2501180828', '2501180951', '2501181203', '2501181208', '2501192206', '2501192248', '2501222051', '2501230908', '2502082313', '2502091148', '2502151901', '2502152311', '2502152323', '2502160913', '2502161339', '2502161344', '2502170019', '2502170139', '2502170839', '2503252313', '2505182302', '2505251415', '2505251549', '2505261957', '2505271950', '2505280823', '2505280909', '2505281945', '2505291641', '2505291648', '2505291923', '2505300903', '2506121050', '2506121235', '2506151920', '2506162158', '2506170912', '2506171955', '2506172008', '2506172015', '2506172129']
