# 统计对接后小分子的信息

In [1]:
import os
import pandas as pd
from collections import defaultdict
import numpy as np
import copy
from tqdm import tqdm
from package import get_pdbqt_info, get_pdb_atom_info

# Function define

In [2]:


def get_statistics(pocket_dict:dict, atom_lists):
    # 计算对接后的分子中原子距离pocket中alpha球的距离,return nxm
    results_dict = defaultdict(list)
    atom_array = map(get_xyz, atom_lists)
    for key, pocket in pocket_dict.items():
        # pocket matrix
        print(f'pocket{key}')
        alpha_sphere = get_xyz(pocket)
        # print(alpha_sphere)
        for atom_xyz in tqdm(copy.deepcopy(atom_array)):
            # molecule matrix
            # atom_xyz = get_xyz(atom_list)
            # 计算分子中每个原子据所有alpha球的距离（n, 3, m)， n为alpha球的个数
            vector_matrix = atom_xyz.T[np.newaxis, :] - alpha_sphere[:, :, np.newaxis]
            # 利用爱因斯坦求和简记法对中间一个维度求和->(n, m)
            distance_matrix = np.einsum('ijk, ijk->ik', vector_matrix, vector_matrix)
            # 获取所有原子最近alpha球的距离(1xm)
            results_dict[key].append(np.sqrt(np.min(distance_matrix, axis=0)))
    return results_dict
            

def get_pocket_info(pocket_folder):
    pocket_num = int(len(os.listdir(pocket_folder)) / 2)
    # print(pocket_num)
    # 获取pocket中alpha球的位置
    pocket_dict = dict()
    for i in range(pocket_num):
        i += 1
        pocket_dict[i] = get_pdb_atom_info(os.path.join(pocket_folder, f'pocket{i}_vert.pqr'))['ATOM']
    return pocket_dict


def get_xyz(atom_list):
    df = pd.DataFrame(atom_list, columns=['atom', 'x', 'y', 'z'])
    # print(df['atom'])
    return df[['x', 'y', 'z']].to_numpy()


def main(dock_out_folder, fpocket_out_folder):
    # 提取小分子的信息
    dock_conformation_sm = [os.path.join(dock_out_folder, file_name) for file_name in os.listdir(dock_out_folder)]
    dock_results = map(get_pdbqt_info, dock_conformation_sm)
    # 提取pocket的信息
    pocket_dict = get_pocket_info(fpocket_out_folder+'/pockets')
    # 得到每个分子中m个原子各自最近的alpha球的距离
    min_distance_dict = get_statistics(pocket_dict, dock_results)
    
    pocket_sm = dict()
    for key, distance in min_distance_dict.items():
        x = list()
        for molecular in distance:
            # m个原子的最近距离的平均值
            x.append(np.mean(distance))
        pocket_sm[key] = np.array(x)

    

# main Function

In [7]:
if __name__ == "__main__":
    # IO file
    dock_out_folder = r'/home/huabei/soft_folder/smtr_data/Dock/miRNA/mir-21/pri/dock_results/mk_in_man_Dock_results/exhaustiveness_96/1'
    fpocket_out_folder = r'/mnt/e/tmp/fpocket/pri_mir_21_top_1_out'
    # 提取小分子的信息
    dock_conformation_sm = [os.path.join(dock_out_folder, file_name) for file_name in os.listdir(dock_out_folder)]
    dock_results = map(get_pdbqt_info, dock_conformation_sm)
    # 提取pocket的信息
    pocket_dict = get_pocket_info(fpocket_out_folder+'/pockets')
    # 得到每个分子中m个原子各自最近的alpha球的距离
    min_distance_dict = get_statistics(pocket_dict, dock_results)
    
    pocket_sm = dict()
    for key, distance in min_distance_dict.items():
        x = list()
        for molecular in distance:
            # m个原子的最近距离的平均值
            # print(np.mean(distance))
            x.append(np.mean(molecular))
        pocket_sm[key] = np.where(np.array(x) < 1)[0].tolist()
    pocket_set = [set(pocket_sm[i+1]) for i in range(8)]
    total_set = set()
    for i in pocket_set:
        total_set = total_set | i
    


pocket1
pocket2
pocket3
pocket4
pocket5
pocket6
pocket7
pocket8


In [41]:
len(pocket_sm[3])

567

In [44]:
pocket_set = [set(pocket_sm[i+1]) for i in range(8)]
total_set = set()
for i in pocket_set:
    total_set = total_set | i


In [13]:
a = np.array([[1, 2, 3], [2, 3, 4]])
b = np.array([[3, 4, 5], [4, 5, 6]])
a = a[:, :,np.newaxis]
b = b.T[np.newaxis, :]
b.shape

(1, 3, 2)

In [15]:
c = b - a
c
np.einsum('ijk,ijk->ik', c, c)

array([[12, 27],
       [ 3, 12]])

In [39]:
np.where(np.array([1, 2, 3, 4])>2)

(array([2, 3]),)

In [6]:
distance[0]

array([2748.453102, 2647.989273, 2622.614099, 2650.155056, 2807.312772,
       2866.511501, 2859.235045, 2730.593141, 2836.873688, 2865.430457,
       2826.91055 , 3021.695928, 3019.638267, 3112.921141, 3222.004525,
       3204.902482, 3289.469249, 3344.063757, 3087.475544, 3012.517262,
       3271.240889, 3091.783057, 3243.338025, 2488.516044, 2386.461254,
       2260.713206, 2172.613262, 2109.389821, 2160.883909, 2014.399364,
       2104.673801, 1940.147882, 1988.425293])