# 统计对接后小分子的信息

In [None]:
# 设置python的工作路径
import os

os.chdir("/home/huabei/projects/SMTarRNA")

In [None]:
import copy
from collections import defaultdict
from functools import partial

import numpy as np
import pandas as pd
from tqdm import tqdm

from utils import (
    ZincPdbqt,
    generate_coor,
    get_pocket_info,
    gz_writer,
    ligand_pocket_position_statistics,
    map_and_conjunction,
    zinc_pdbqt_transform_decorator,
)

# Function define

In [None]:
@zinc_pdbqt_transform_decorator
def transform(pdbqt_model):
    return generate_coor(pdbqt_model)


@zinc_pdbqt_transform_decorator
def transform2(atom_list, pocket_alpha: list):
    return ligand_pocket_position_statistics(pocket_alpha, atom_list)


def main(dock_out_folder, fpocket_out_folder, pocket_index):
    # 提取pocket的信息
    pocket_dict = get_pocket_info(fpocket_out_folder)
    # 提取对接输出目录
    dock_conformation_sm = [
        os.path.join(dock_out_folder, file_name)
        for file_name in os.listdir(dock_out_folder)
        if file_name.endswith(".gz")
    ]

    # 以每个分子中所有原子与最近的alpha球距离的平均值作为分子与口袋的距离
    statis_results = []
    for file in tqdm(dock_conformation_sm):
        statis_results.append(
            ZincPdbqt(
                file,
                transform=[transform, partial(transform2, pocket_alpha=pocket_dict[pocket_index])],
            )
        )
    # results 的长度是分子的个数
    return map_and_conjunction(list, statis_results)

# main Function

In [None]:
if __name__ == "__main__":
    # IO file
    dock_out_folder = r"/mnt/e/Python_Project/SMTarRNA/project/data/3a6p/100k/"
    fpocket_out_folder = (
        r"/mnt/e/Research/SM_miRNA/Data/Dock/complex/fpocket_results/3a6p_out/pockets"
    )
    pocket = [
        ("3a6p", 5),
        ("4z4c", 1),
        ("4z4d", 7),
        ("5zal", 7),
        ("5zam", 5),
        ("6cbd", 44),
        ("6lxd", 90),
        ("6v5b", 19),
    ]
    results = main(dock_out_folder, fpocket_out_folder, 5)
    pocket_sm = dict()
    # for key, distance in min_distance_dict.items():
    #     x = list()
    #     for molecular in distance:
    #         # m个原子的最近距离的平均值
    #         # print(np.mean(distance))
    #         x.append(np.mean(molecular))
    #     pocket_sm[key] = np.where(np.array(x) < 1)[0].tolist()
    # pocket_set = [set(pocket_sm[i+1]) for i in range(8)]
    # total_set = set()
    # for i in pocket_set:
    #     total_set = total_set | i

In [None]:
len(results)

In [None]:
import matplotlib
import matplotlib.pyplot as plt

# matplotlib.use('')
fig, ax = plt.subplots()
distance = [i[1] for i in results]
data = dict(x=distance)
ax.scatter("x", "x", data=data)
fig.savefig("test.png")

In [None]:
fig, ax = plt.subplots()
distance = [i[1] for i in results]
# data = dict(x=distance)
ax.hist(distance, bins=20)
# fig.savefig('test.png')

In [None]:
a = np.array(distance)
r_position = np.where(a < 1)
# r_position

In [None]:
# dock_out_folder = r'/mnt/e/Python_Project/SMTarRNA/project/data/3a6p/10k/'
# 提取对接输出目录
dock_conformation_sm = [
    os.path.join(dock_out_folder, file_name)
    for file_name in os.listdir(dock_out_folder)
    if file_name.endswith(".gz")
]

# 以每个分子中所有原子与最近的alpha球距离的平均值作为分子与口袋的距离
statis_results = []
for file in tqdm(dock_conformation_sm):
    statis_results.append(ZincPdbqt(file))
total_molecular = map_and_conjunction(list, statis_results)
# ecn = gz_writer('right_10k.pdbqt')

In [None]:
ecn = open("right_10k_small_pocket.pdbqt", "w")
for i in r_position[0]:
    molecular = total_molecular[i][1]
    ecn.writelines("MODEL \n" + molecular + "ENDMDL\n")
ecn.close()

In [None]:
with open("random_sample.pdbqt", "w") as f:
    for i in range(66):
        f.writelines("MODEL \n" + total_molecular[i][1] + "ENDMDL\n")

In [None]:
molecular

In [None]:
pocket_set = [set(pocket_sm[i + 1]) for i in range(8)]
total_set = set()
for i in pocket_set:
    total_set = total_set | i

In [None]:
a = np.array([[1, 2, 3], [2, 3, 4]])
b = np.array([[3, 4, 5], [4, 5, 6]])
a = a[:, :, np.newaxis]
b = b.T[np.newaxis, :]
b.shape

In [None]:
import builtins

dir(builtins)
all()

In [None]:
file = "/mnt/e/Python_Project/SMTarRNA/project/data/3a6p/100k/zinc_drug_like_3d_100k_rand_3a6p_dOthers_aH_dock_results_0-10044.pdbqt.gz"

data = ZincPdbqt(file, transform=[transform])

In [None]:
moleculars = [i for i in data]

In [None]:
atom_position = []
for i in moleculars:
    for t in i[1]:
        atom_position.append(t[1:])

In [None]:
import matplotlib.pyplot as plt
from scipy import spatial

results = spatial.distance_matrix(atom_position[:100], atom_position[:100])
fig, ax = plt.subplots()
# data = dict(x=distance)
ax.hist(results.reshape(-1, 1), bins=20)