In [None]:
import numpy as np
from Bio.PDB import PDBParser, NeighborSearch
from rdkit import Chem
from rdkit.Chem import AllChem
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from Bio.PDB.DSSP import dssp_dict_from_pdb_file
from Bio.SeqUtils.ProtParam import ProteinAnalysis



In [None]:
# 1. Calculate distance between amino acids
def calculate_aa_distance(structure, chain_id, residue1, residue2):
    chain = structure[0][chain_id]
    coord1 = chain[residue1]['CA'].coord
    coord2 = chain[residue2]['CA'].coord
    return np.linalg.norm(coord1 - coord2)



In [None]:
# 2. Identify potential binding pockets
def identify_binding_pockets(structure, probe_radius=1.4, min_size=50):
    atoms = list(structure.get_atoms())
    ns = NeighborSearch(atoms)
    
    surface_atoms = []
    for atom in atoms:
        if len(ns.search(atom.coord, probe_radius)) <= 3:  # Simplified surface detection
            surface_atoms.append(atom)
    
    coords = np.array([atom.coord for atom in surface_atoms])
    clustering = DBSCAN(eps=3, min_samples=min_size).fit(coords)
    
    pockets = []
    for label in set(clustering.labels_):
        if label != -1:  # Exclude noise points
            pocket = coords[clustering.labels_ == label]
            pockets.append(pocket)
    
    return pockets



In [None]:
# 3. Calculate RMSD between protein structures
def calculate_rmsd(structure1, structure2):
    atoms1 = list(structure1.get_atoms())
    atoms2 = list(structure2.get_atoms())
    
    if len(atoms1) != len(atoms2):
        raise ValueError("Structures have different number of atoms")
    
    coords1 = np.array([atom.coord for atom in atoms1])
    coords2 = np.array([atom.coord for atom in atoms2])
    
    diff = coords1 - coords2
    return np.sqrt(np.sum(diff**2) / len(atoms1))



In [None]:
# 4. Simple molecular docking simulation
def simple_docking(protein, ligand, n_conformers=50):
    # Generate ligand conformers
    conformers = AllChem.EmbedMultipleConfs(ligand, numConfs=n_conformers)
    
    # Simple scoring function (minimize distance to binding site)
    binding_site = identify_binding_pockets(protein)[0]  # Assume first pocket is binding site
    binding_site_center = np.mean(binding_site, axis=0)
    
    best_score = float('inf')
    best_conformer = None
    
    for conf_id in range(n_conformers):
        AllChem.MMFFOptimizeMoleculeConfs(ligand, maxIters=500)
        conf = ligand.GetConformer(conf_id)
        ligand_center = conf.GetPositions().mean(axis=0)
        score = np.linalg.norm(ligand_center - binding_site_center)
        
        if score < best_score:
            best_score = score
            best_conformer = conf_id
    
    return best_conformer, best_score



In [None]:
# 5. Predict protein secondary structure
def predict_secondary_structure(sequence):
    # This is a very simplified prediction based on propensities
    propensities = {
        'H': {'A': 1.45, 'E': 1.53, 'L': 1.34},  # Helix
        'E': {'V': 1.7, 'I': 1.6, 'T': 1.2},    # Sheet
        'C': {'G': 1.2, 'P': 1.2, 'S': 1.2}     # Coil
    }
    
    prediction = []
    for aa in sequence:
        scores = {ss: propensities[ss].get(aa, 1.0) for ss in ['H', 'E', 'C']}
        prediction.append(max(scores, key=scores.get))
    
    return ''.join(prediction)



In [None]:
# 6. Visualize protein-ligand interactions
def visualize_protein_ligand(protein, ligand):
    # This function would typically use a visualization library like PyMOL
    # Here we'll just print a simple text representation
    print("Protein-Ligand Interaction Visualization")
    print("P: Protein atom, L: Ligand atom")
    print("----------------------------------------")
    for i in range(10):  # Simplified 10x10 grid
        row = ''
        for j in range(10):
            if i < 5:
                row += 'P '
            else:
                row += 'L '
        print(row)




In [None]:
# 7. Calculate hydrophobicity profile
def hydrophobicity_profile(sequence, window_size=7):
    protparam = ProteinAnalysis(sequence)
    hydrophobicity = protparam.protein_scale(window=window_size, param_dict='kd')
    return hydrophobicity



In [None]:
# 8. Generate conformers of a small molecule
def generate_conformers(smiles, n_conformers=50):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMultipleConfs(mol, numConfs=n_conformers)
    AllChem.MMFFOptimizeMoleculeConfs(mol, maxIters=500)
    return mol



In [None]:
# 9. Calculate electrostatic potential on protein surface
def electrostatic_potential(structure, probe_radius=1.4):
    # This is a simplified calculation
    atoms = list(structure.get_atoms())
    ns = NeighborSearch(atoms)
    
    surface_atoms = []
    for atom in atoms:
        if len(ns.search(atom.coord, probe_radius)) <= 3:  # Simplified surface detection
            surface_atoms.append(atom)
    
    potentials = []
    for atom in surface_atoms:
        # Simplified potential calculation
        charge = 1 if atom.element == 'O' else -1 if atom.element == 'N' else 0
        potentials.append(charge / (np.linalg.norm(atom.coord) + 1))
    
    return surface_atoms, potentials



In [None]:
# 10. Virtual screening of compound library
def virtual_screening(protein, compound_library, n_conformers=10):
    results = []
    for smiles in compound_library:
        ligand = Chem.MolFromSmiles(smiles)
        ligand = Chem.AddHs(ligand)
        best_conformer, score = simple_docking(protein, ligand, n_conformers)
        results.append((smiles, score))
    
    return sorted(results, key=lambda x: x[1])



In [None]:
# Example usage:
# parser = PDBParser()
# structure = parser.get_structure('protein', 'protein.pdb')
# distance = calculate_aa_distance(structure, 'A', 10, 20)

# pockets = identify_binding_pockets(structure)

# structure2 = parser.get_structure('protein2', 'protein2.pdb')
# rmsd = calculate_rmsd(structure, structure2)

# ligand = Chem.MolFromSmiles('CCO')
# best_conf, score = simple_docking(structure, ligand)

# sequence = "MKWVTFISLLLLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPFEDHVKLVNEVTEFAKTCVADESAENCDKS"
# ss_prediction = predict_secondary_structure(sequence)

# visualize_protein_ligand(structure, ligand)

# hydrophobicity = hydrophobicity_profile(sequence)

# conformers = generate_conformers('CCO')

# surface_atoms, potentials = electrostatic_potential(structure)

# compound_library = ['CCO', 'CCC', 'CCCO']
# screening_results = virtual_screening(structure, compound_library)