In [1]:
# design_protocol_path = None
design_protocol_path = "custom.json"

if design_protocol_path is not None: 
        advanced_settings_path = ("/root/bindcraft/settings_advanced/" + design_protocol_path)
else: 
    advanced_settings_path = (
        "/root/bindcraft/settings_advanced/"
        + "default.json"
    )

print(advanced_settings_path)

/root/bindcraft/settings_advanced/custom.json


In [2]:
import json 
import boto3

In [3]:
config_data = json.load(open('config.json'))
aws_access_key_id = config_data['aws_access_key_id']
aws_secret_access_key = config_data['aws_secret_access_key']

In [4]:
config_data.keys()

dict_keys(['aws_access_key_id', 'aws_secret_access_key'])

In [20]:
# s3 bucket detials 
bucket_name = "bindcraft"
s3_key = "pipeline.png"

In [21]:
local_file_path = "pipeline.png"


In [5]:
# create a s3 client
s3_client = boto3.client('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

In [23]:
# upload the file to s3
try: 
    response = s3_client.upload_file(local_file_path, bucket_name, s3_key)
    print(f"file upload successful: {bucket_name}")
except Exception as e:
    print(f"file upload failed: {e}")


file upload successful: bindcraft


In [16]:
## list all folder in a s3 bucket 
import boto3

def list_s3_folders(bucket_name):
    s3_client = boto3.client('s3')
    
    # List objects in the bucket
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name)
    
    folders = set()
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                key = obj['Key']
                # Extract the folder name
                folder = key.split('/')[0]
                folders.add(folder)
    
    return sorted(folders)

# Example usage
bucket_name = 'bindcraft'
folders = list_s3_folders(bucket_name)
print("Folders in S3 bucket:")
for folder in folders:
    print(folder)

Folders in S3 bucket:
2412111959
2412112016
2412120836
2412120937
2412121056
2412121103
2412121113
2412121450
2412142141
2412150205
2412151122
2412151123
2412151601
2412151733
2412241939
2412242139
2412242155
2412250028
2412250101
2412250846
pipeline.png


In [17]:
import boto3
import os

def download_s3_folder(bucket_name, s3_folder, local_dir):
    s3_client = boto3.client('s3')
    
    # Ensure the local directory exists
    if not os.path.exists(local_dir):
        os.makedirs(local_dir)
    
    # List objects in the specified S3 folder
    paginator = s3_client.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=s3_folder)
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                s3_key = obj['Key']
                local_file_path = os.path.join(local_dir, os.path.relpath(s3_key, s3_folder))
                
                # Ensure the local directory exists
                local_file_dir = os.path.dirname(local_file_path)
                if not os.path.exists(local_file_dir):
                    os.makedirs(local_file_dir)
                
                # Download the file
                s3_client.download_file(bucket_name, s3_key, local_file_path)
                print(f"Downloaded {s3_key} to {local_file_path}")

# Example usage
bucket_name = 'bindcraft'
s3_folder = '2412250846'
local_dir = f'./out/bindcraft/{s3_folder}'

download_s3_folder(bucket_name, s3_folder, local_dir)

Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s460048.pdb to ./out/bindcraft/2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s460048.pdb
Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s4779.pdb to ./out/bindcraft/2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s4779.pdb
Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s533457.pdb to ./out/bindcraft/2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s533457.pdb
Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s573464.pdb to ./out/bindcraft/2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s573464.pdb
Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s724583.pdb to ./out/bindcraft/2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s724583.pdb
Downloaded 2412250846/Trajectory/Clashing/6aru_final_chain_A_domain_3_l50_s893314.pdb to ./out/bindcraft

In [5]:
from Bio.PDB import PDBParser
import numpy as np

def parse_pdb_to_target_protein(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('target', pdb_file)
    
    # Initialize list to hold CA atom positions
    ca_positions = []

    for model in structure:
        for chain in model:
            for residue in chain:
                if 'CA' in residue:
                    ca = residue['CA']
                    ca_positions.append(ca.get_coord())
    
    # Convert to numpy array
    ca_positions = np.array(ca_positions)
    
    # Create target_protein dictionary
    target_protein = {
        "final_atom_positions": ca_positions
    }
    
    return target_protein

# Example usage
# pdb_file = 'path/to/your/file.pdb'
# target_protein = parse_pdb_to_target_protein(pdb_file)

In [10]:
# Example usage
pdb_file = 'target_A.pdb'
target_protein = parse_pdb_to_target_protein(pdb_file)

In [11]:
target_protein

{'final_atom_positions': array([[-1.5953e+01, -3.6500e+01, -1.2000e+01],
        [-1.6078e+01, -3.4625e+01, -8.5160e+00],
        [-1.6375e+01, -3.0703e+01, -9.1410e+00],
        [-1.3336e+01, -2.8500e+01, -8.0470e+00],
        [-1.5703e+01, -2.7156e+01, -5.5390e+00],
        [-1.7750e+01, -2.8219e+01, -2.4960e+00],
        [-1.9891e+01, -2.6344e+01, -3.2000e-02],
        [-1.7812e+01, -2.4531e+01,  2.4940e+00],
        [-1.7609e+01, -2.6281e+01,  5.8480e+00],
        [-1.9172e+01, -2.4578e+01,  8.8670e+00],
        [-1.5875e+01, -2.3250e+01,  1.0008e+01],
        [-1.5633e+01, -2.1219e+01,  6.7580e+00],
        [-1.9266e+01, -2.0141e+01,  6.5780e+00],
        [-1.8344e+01, -1.6484e+01,  7.1560e+00],
        [-1.5062e+01, -1.6562e+01,  5.3240e+00],
        [-1.6141e+01, -1.4383e+01,  2.4140e+00],
        [-1.7250e+01, -1.0773e+01,  3.2130e+00],
        [-1.8859e+01, -9.6480e+00, -7.8000e-02],
        [-1.9266e+01, -1.2953e+01, -1.8700e+00],
        [-2.0016e+01, -1.6641e+01, -1.6490e+0

In [12]:
target_protein["final_atom_positions"].shape

(50, 3)

In [15]:
 # Extract CA atom positions from target protein
target_ca = target_protein["final_atom_positions"]
target_ca.shape


(50, 3)