<a href="https://colab.research.google.com/github/eoinleen/Protein-design-random/blob/main/protein-str-param-1st-attempt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Install required packages
!pip install biopython freesasa numpy

# Import required libraries
import os
import sys
from Bio import PDB
from Bio.PDB.PDBIO import PDBIO
from Bio.PDB.Polypeptide import is_aa
import freesasa
import numpy as np
from google.colab import drive

# Mount Google Drive (if not already mounted)
try:
    drive.mount('/content/drive')
except:
    print("Drive already mounted or mounting failed")

# Rest of the code remains the same as before
def calculate_buried_surface_area(pdb_file):
    """Calculate buried surface area between chains in a PDB structure."""
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('protein', pdb_file)

    chains = list(structure.get_chains())
    if len(chains) < 2:
        return None, None

    # Calculate surface area for complete structure
    structure_atoms = [atom for atom in structure.get_atoms()]
    if not structure_atoms:
        return None, None

    combined_structure = freesasa.Structure(pdb_file)
    result = freesasa.calc(combined_structure)
    total_area = result.totalArea()

    # Calculate individual chain areas
    chain_areas = {}
    io = PDBIO()
    for chain in chains:
        # Create a new structure with just this chain
        new_structure = PDB.Structure.Structure('temp')
        new_model = PDB.Model.Model(0)
        new_structure.add(new_model)
        new_chain = chain.copy()
        new_model.add(new_chain)

        # Save temporary file
        temp_file = f"temp_chain_{chain.id}.pdb"
        io.set_structure(new_structure)
        io.save(temp_file)

        # Calculate area
        chain_structure = freesasa.Structure(temp_file)
        chain_result = freesasa.calc(chain_structure)
        chain_areas[chain.id] = chain_result.totalArea()

        # Clean up
        os.remove(temp_file)

    # Calculate buried surface area
    total_individual_area = sum(chain_areas.values())
    buried_surface_area = total_individual_area - total_area

    return abs(buried_surface_area), chain_areas

def calculate_hydrogen_bonds(structure):
    """Calculate hydrogen bonds between chains using distance and angle criteria."""
    h_bonds = []
    for chain1 in structure.get_chains():
        for chain2 in structure.get_chains():
            if chain1.id != chain2.id:
                for res1 in chain1.get_residues():
                    if not is_aa(res1):
                        continue
                    for res2 in chain2.get_residues():
                        if not is_aa(res2):
                            continue
                        # Check for backbone-backbone H-bonds
                        if 'O' in res1 and 'N' in res2:
                            distance = res1['O'] - res2['N']
                            if distance < 3.5:  # Standard H-bond distance cutoff
                                h_bonds.append((res1, res2))
    return len(h_bonds)

def calculate_hydrophobic_contacts(structure):
    """Calculate hydrophobic contacts between chains."""
    hydrophobic_residues = {'ALA', 'VAL', 'LEU', 'ILE', 'MET', 'PHE', 'TRP', 'PRO'}
    contacts = []

    for chain1 in structure.get_chains():
        for chain2 in structure.get_chains():
            if chain1.id >= chain2.id:
                continue

            for res1 in chain1.get_residues():
                if not is_aa(res1) or res1.get_resname() not in hydrophobic_residues:
                    continue

                for res2 in chain2.get_residues():
                    if not is_aa(res2) or res2.get_resname() not in hydrophobic_residues:
                        continue

                    # Calculate minimum distance between any atoms in the residues
                    min_distance = float('inf')
                    for atom1 in res1.get_atoms():
                        for atom2 in res2.get_atoms():
                            distance = atom1 - atom2
                            min_distance = min(min_distance, distance)

                    if min_distance < 5.0:  # Distance threshold for hydrophobic contact
                        contacts.append((res1, res2))

    return len(contacts)

def calculate_salt_bridges(structure):
    """Calculate salt bridges between chains."""
    acidic = {'ASP', 'GLU'}
    basic = {'LYS', 'ARG', 'HIS'}
    salt_bridges = []

    for chain1 in structure.get_chains():
        for chain2 in structure.get_chains():
            if chain1.id >= chain2.id:
                continue

            for res1 in chain1.get_residues():
                if not is_aa(res1):
                    continue
                res1_name = res1.get_resname()

                for res2 in chain2.get_residues():
                    if not is_aa(res2):
                        continue
                    res2_name = res2.get_resname()

                    # Check if residues can form salt bridge
                    if (res1_name in acidic and res2_name in basic) or \
                       (res1_name in basic and res2_name in acidic):
                        # Calculate distance between charged groups
                        min_distance = float('inf')
                        for atom1 in res1.get_atoms():
                            for atom2 in res2.get_atoms():
                                distance = atom1 - atom2
                                min_distance = min(min_distance, distance)

                        if min_distance < 4.0:  # Distance threshold for salt bridge
                            salt_bridges.append((res1, res2))

    return len(salt_bridges)

def process_multiple_pdb_files(pdb_directory):
    """Process multiple PDB files and analyze their structures."""
    results = []
    parser = PDB.PDBParser(QUIET=True)

    for file_name in os.listdir(pdb_directory):
        if not file_name.endswith(".pdb"):
            continue

        pdb_file = os.path.join(pdb_directory, file_name)
        print(f"\nProcessing {file_name}")

        try:
            # Load structure
            structure = parser.get_structure('protein', pdb_file)

            # Calculate metrics
            buried_surface_area, chain_areas = calculate_buried_surface_area(pdb_file)
            h_bonds = calculate_hydrogen_bonds(structure)
            hydrophobic = calculate_hydrophobic_contacts(structure)
            salt_bridges = calculate_salt_bridges(structure)

            results.append({
                'file_name': file_name,
                'buried_surface_area': buried_surface_area,
                'hydrogen_bonds': h_bonds,
                'hydrophobic_contacts': hydrophobic,
                'salt_bridges': salt_bridges,
                'chain_areas': chain_areas
            })

            # Print detailed results for this structure
            print(f"Buried Surface Area: {buried_surface_area:.2f} Å²" if buried_surface_area else "Buried Surface Area: Not applicable")
            print(f"Hydrogen Bonds: {h_bonds}")
            print(f"Hydrophobic Contacts: {hydrophobic}")
            print(f"Salt Bridges: {salt_bridges}")

        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}")

    return results

def print_summary_report(results):
    """Print a formatted summary report of all results."""
    print("\nSummary Report:")
    print(f"{'PDB File':<30} {'Buried Surface Area (Å²)':<25} {'H-Bonds':<12} {'Hydrophobic':<12} {'Salt Bridges':<12}")
    print("="*90)

    for result in results:
        bsa = f"{result['buried_surface_area']:.2f}" if result['buried_surface_area'] else "N/A"
        print(f"{result['file_name']:<30} {bsa:<25} {result['hydrogen_bonds']:<12} "
              f"{result['hydrophobic_contacts']:<12} {result['salt_bridges']:<12}")

# Main execution
# Update this path to your PDB files directory in Google Drive
pdb_directory = '/content/drive/MyDrive/PDB-files/all_pdb-2MBO-no-hot'  # Update this path as needed

print("Starting analysis...")
results = process_multiple_pdb_files(pdb_directory)
print_summary_report(results)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting analysis...

Processing design0_n16.pdb
Buried Surface Area: 1904.15 Å²
Hydrogen Bonds: 17
Hydrophobic Contacts: 11
Salt Bridges: 2

Processing design0_n1.pdb
Buried Surface Area: 1593.75 Å²
Hydrogen Bonds: 0
Hydrophobic Contacts: 15
Salt Bridges: 3

Processing design0_n13.pdb
Buried Surface Area: 1344.79 Å²
Hydrogen Bonds: 2
Hydrophobic Contacts: 13
Salt Bridges: 2

Processing design0_n10.pdb
Buried Surface Area: 1698.90 Å²
Hydrogen Bonds: 5
Hydrophobic Contacts: 12
Salt Bridges: 1

Processing design0_n12.pdb
Buried Surface Area: 1983.12 Å²
Hydrogen Bonds: 1
Hydrophobic Contacts: 20
Salt Bridges: 2

Processing design0_n15.pdb
Buried Surface Area: 1417.30 Å²
Hydrogen Bonds: 1
Hydrophobic Contacts: 11
Salt Bridges: 5

Processing design1_n1.pdb
Buried Surface Area: 1805.60 Å²
Hydrogen Bonds: 3
Hydrophobic Contacts: 12
Salt Bridges: 2

Processing design

In [None]:
from google.colab import drive
drive.mount('/content/drive')