get the interaction interface

In [6]:
import os
from Bio.PDB import PDBParser
from Bio import PDB
import multiprocessing

In [7]:
# define the function to get the interface site
def get_interface(pbd_file_list):
    interface={}
    for pbd_file in pbd_file_list:
        basename=os.path.basename(pbd_file).split('_')[0]
        # Load the PDB file
        parser = PDBParser()
        structure = parser.get_structure('protein', pbd_file)
        # Extract residues within a distance threshold
        threshold = 5.0  # Distance threshold for interface residues
        chainA = structure[0]['A'] 
        chainB = structure[0]['B']
        res_A = sorted([res for res in chainA.get_residues() if res.get_id()[0] ==' ']) 
        res_B = sorted([res for res in chainB.get_residues() if res.get_id()[0] ==' '])
        interface1_list = []; interface2_list = []
        for res1 in res_A:
            for res2 in res_B:
                if res1['CA']-res2['CA'] < threshold:
                    res10=res1.get_id()[1]
                    res20=res2.get_id()[1]
                    interface1_list.append(res10)
                    interface2_list.append(res20)
                    if basename not in interface:
                        interface[basename]=[[res10],[res20]]
                    else:
                        interface[basename][0].append(res10)
                        interface[basename][1].append(res20)
    for key in interface:
        interface[key][0]=sorted(list(set(interface[key][0])))
        interface[key][1]=sorted(list(set(interface[key][1])))
    return interface


In [None]:
if __name__ == "__main__":
    input_path='E:\\博士\\实验结果\\Pol II Ser-2P and stress\\protein interaction\\AF2\\test'
    output_path='E:\\博士\\实验结果\\Pol II Ser-2P and stress\\protein interaction\\AF2'
    output_file='E:\\博士\\实验结果\\Pol II Ser-2P and stress\\protein interaction\\AF2\\test.txt'
    pbd_file_list = [os.path.join(input_path,i) for i in os.listdir(input_path)]
    interfacesite=get_interface(pbd_file_list)
    with open(output_file,'w') as op:
        for key, [rpb1,fbxo11] in interfacesite.items():
            op.write(f"{key}\t{rpb1}\t{fbxo11}\n")

In [8]:

def rename_chains(input_pdb,output_pdb):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure('complex', input_pdb)
    # 遍历所有链
    for model in structure:
        count=0
        for chain in model:
            # 将第一条链命名为'A'，第二条链命名为'B'
            if count==0:
                chain.id='A'; count+=1
            elif count==1:
                chain.id='B'; count+=1
            else:
                break

    # 保存修改后的PDB文件
    io = PDB.PDBIO()
    io.set_structure(structure)
    io.save(output_pdb)


In [11]:
def output_interface(output_file):
    input_path='/gpfs/home/qihansong/project/RPB1_stress/RPB1-FBXO11-complex/predicted_complex_pool'
    output_path='/gpfs/home/qihansong/project/RPB1_stress/RPB1-FBXO11-complex/edited'
    input_pdb_list = [os.path.join(input_path,i) for i in os.listdir(input_path)]
    for input_pdb in input_pdb_list:
        name=os.path.basename(input_pdb).replace('.pdb', '_renamed.pdb')
        output_pdb=os.path.join(output_path,name)
        rename_chains(input_pdb,output_pdb)
    pdb_file_list = [os.path.join(output_path,i) for i in os.listdir(output_path)]
    interfacesite=get_interface(pdb_file_list)
    with open(output_file,'w') as op:
        for key, [rpb1,fbxo11] in interfacesite.items():
            op.write(f"{key}\t{rpb1}\t{fbxo11}\n")

In [13]:
if __name__ == "__main__":
    output_file='/gpfs/home/qihansong/project/RPB1_stress/RPB1-FBXO11-complex/interaction_info/interaction_site.txt'
    pool = multiprocessing.Pool(processes=4)
    pool.starmap(output_interface,output_file)
    #output_interface(output_file)
    # Close the pool and wait for the processes to finish
    pool.close()
    pool.join()