In [3]:
from Bio import PDB
import numpy as np

def count_and_record_contacts_for_residue(pdb_file, chain_id, residue_id, distance_threshold=5.0):
    # Load the PDB file
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)

    # Get the specified residue
    target_residue = None
    for model in structure:
        for chain in model:
            if chain.id == chain_id:
                for residue in chain:
                    if residue.id[1] == residue_id:
                        target_residue = residue
                        print(target_residue)
                        break
                if target_residue:
                    break
        if target_residue:
            break

    if not target_residue:
        raise ValueError("Specified residue not found")

    # Analyze contacts for each atom in the residue
    all_contacts = {}  # Dictionary to store contacts for each atom
    for target_atom in target_residue:
        contact_count = 0
        contact_details = []  # List to store details of contacting atoms

        for atom in structure.get_atoms():
            if atom != target_atom:  # Avoid self-contact
                distance = np.linalg.norm(atom.coord - target_atom.coord)
                if distance <= distance_threshold:
                    contact_count += 1
                    # Record chain ID, residue ID, and atom name of contacting atom
                    parent_residue = atom.get_parent()
                    parent_chain = parent_residue.get_parent()
                    contact_details.append([
                        parent_chain.id,       # Chain ID
                        parent_residue.id[1],  # Residue ID (sequence number)
                        atom.name,              # Atom name
                        distance 
                        ])

        # Store results for this atom
        all_contacts[target_atom.name] = {
            "contact_count": contact_count,
            "contact_details": contact_details
        }

    return all_contacts



In [4]:
# Example usage
pdb_file = "/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_4/8ee2_4_singlept_semigreedy_adam_0_0_1_0.005_models_1_weights_0.0_0.133_0.0_0.0_0.0_0.0_0.133_0.267_0.267_0.133_0.067_c32_use_templates_True_rm_template_ic_False_bias_True_bias_matrix_single_mutation_0th_bias_A_num_recycles_3.pdb"
chain_id = "B"
residue_id = 100
distance_threshold = 5.0

contacts_per_atom = count_and_record_contacts_for_residue(pdb_file, chain_id, residue_id, distance_threshold)

# Print results
for atom_name, data in contacts_per_atom.items():
    print(f"Atom: {atom_name}")
    print(f"  Number of contacts: {data['contact_count']}")
    print("  Contacting atoms:")
    for contact in data["contact_details"]:
        print(f"    Chain: {contact[0]}, Residue: {contact[1]}, Atom: {contact[2]}, Distance: {contact[3]}")



<Residue ALA het=  resseq=100 icode= >
Atom: N
  Number of contacts: 24
  Contacting atoms:
    Chain: A, Residue: 111, Atom: N, Distance: 4.682144641876221
    Chain: A, Residue: 112, Atom: O, Distance: 4.69628381729126
    Chain: B, Residue: 98, Atom: C, Distance: 3.794109582901001
    Chain: B, Residue: 98, Atom: O, Distance: 3.598041296005249
    Chain: B, Residue: 98, Atom: CG2, Distance: 4.752516746520996
    Chain: B, Residue: 99, Atom: N, Distance: 3.386850595474243
    Chain: B, Residue: 99, Atom: CA, Distance: 2.386460781097412
    Chain: B, Residue: 99, Atom: C, Distance: 1.2994519472122192
    Chain: B, Residue: 99, Atom: CB, Distance: 3.4497199058532715
    Chain: B, Residue: 99, Atom: O, Distance: 2.2413058280944824
    Chain: B, Residue: 99, Atom: CG, Distance: 3.5917882919311523
    Chain: B, Residue: 99, Atom: CD1, Distance: 3.808305501937866
    Chain: B, Residue: 100, Atom: CA, Distance: 1.460526466369629
    Chain: B, Residue: 100, Atom: C, Distance: 2.4622085094451

In [9]:
# total number of contacts between chains
import csv

def count_and_record_contacts_between_chains(pdb_file, chain_id_1, chain_id_2, distance_threshold=5.0, output_file='contacts.csv'):
    # Load the PDB file
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)

    # Extract the specified chains
    chain_1 = None
    chain_2 = None
    for model in structure:
        for chain in model:
            if chain.id == chain_id_1:
                chain_1 = chain
            elif chain.id == chain_id_2:
                chain_2 = chain
        if chain_1 and chain_2:
            break

    if not chain_1 or not chain_2:
        raise ValueError("One or both specified chains not found")

    # Analyze contacts between the two chains
    contacts = []  # List to store contact details
    for residue_1 in chain_1:
        for atom_1 in residue_1:
            for residue_2 in chain_2:
                for atom_2 in residue_2:
                    distance = np.linalg.norm(atom_1.coord - atom_2.coord)
                    if distance <= distance_threshold:
                        contacts.append([
                            chain_id_1, residue_1.id[1], atom_1.name,  # Chain 1 details
                            chain_id_2, residue_2.id[1], atom_2.name,  # Chain 2 details
                            round(distance, 3)  # Distance rounded to 3 decimal places
                        ])

    # Write contacts to CSV file
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        # Write header
        writer.writerow(['chain1', 'residue1', 'atom1', 'chain2', 'residue2', 'atom2', 'distance'])
        # Write contact data
        writer.writerows(contacts)

    return contacts

In [10]:
# Example usage
pdb_file = "/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_4/8ee2_4_singlept_semigreedy_adam_0_0_1_0.005_models_1_weights_0.0_0.133_0.0_0.0_0.0_0.0_0.133_0.267_0.267_0.133_0.067_c32_use_templates_True_rm_template_ic_False_bias_True_bias_matrix_single_mutation_0th_bias_A_num_recycles_3.pdb"
chain_id_1 = "A"
chain_id_2 = "B"
distance_threshold = 5.0
output_file = "test_contacts.csv"

contacts = count_and_record_contacts_between_chains(pdb_file, chain_id_1, chain_id_2, distance_threshold, output_file)



In [None]:
# filter all contacts that are with a specific residue
filtered_contacts = []
for contact in contacts:
    if contact[0] == 'A'