In [54]:
import os
from lib.path import get_training_data_dir
from lib.helper import make_dir

In [1]:
import sys
sys.path.append('../..')

import freesasa
from lib.path import get_protein_path, get_ligand_path
from lib.pdb import get_pdb_names_by_txt
import numpy as np
from typing import Tuple

class ResSASACalculator:
    def __init__(self, pdb_path: str, resname: str):
        self.pdb_path = pdb_path
        self.resname = resname

    def _get_sasa(self, path: str) -> float:
        try:
            structure = freesasa.Structure(path)
            sasa_result = freesasa.calc(structure)
            return structure, sasa_result
        except Exception as e:
            raise ValueError(f"Error calculating SASA for {path}: {e}")

    def calculate_res_sasa(self) -> float:
        structure, sasa_result = self._get_sasa(self.pdb_path)
        # 指定した残機のSASAを選択する
        selection = freesasa.selectArea([f"water, resn {self.resname}"], structure, sasa_result)
        res_sasa = selection['water']
        return res_sasa


In [42]:
resname = 'BCD'

ligand_sasa_list = []
pdb_names = get_pdb_names_by_txt('/mnt/ito/pdbbind_raw/general_set/index/monomer_general_protein.txt')
for pdb_name in pdb_names:
    pdb_name = '4l23'
    print(pdb_name)
    sasa_calculator = ResSASACalculator(
        f'/mnt/ito/data/pdb_bind/{pdb_name}/water_sasa/water_sasa_23.pdb',
        resname
    )
    water_sasa = sasa_calculator.calculate_res_sasa()
    ligand_sasa_list.append(water_sasa)
    print(water_sasa)
    break

# np.save('ligand_sasa_general.npy', np.array(ligand_sasa_list))

4l23
0.0




In [8]:
from data_loader.SingleDataLoader import SingleDataLoader
data_dir = '../../data'
pdb_data_dir = '/mnt/ito/data/pdb_bind'
test_list = os.path.join(data_dir, 'all_valid_test.txt')
DATA_TYPE1 = 'gr'
DATA_VOXEL_NUM = 20
CLASSIFYING_RULE = 'WaterClassifyingRuleEmbedding'
LIGAND_POCKET_DEFINER = 'LigandPocketDefinerOriginal'
LIGAND_VOXEL_NUM = 8


training_data_dir1 = get_training_data_dir(DATA_TYPE1, DATA_VOXEL_NUM, CLASSIFYING_RULE, LIGAND_POCKET_DEFINER, LIGAND_VOXEL_NUM)
data_loader = SingleDataLoader(training_data_dir1)

In [53]:
pdbs = get_pdb_names_by_txt('../../../data/all_valid_test.txt')

In [58]:
for pdb in pdbs[822:]:
    print(pdb)
    _, water_ids_dis = data_loader.get_test_data_and_water_ids(pdb, 'displaceable')
    _, water_ids_non_dis = data_loader.get_test_data_and_water_ids(pdb, 'non_displaceable')
    water_ids = np.concatenate([water_ids_dis, water_ids_non_dis])

    water_path = os.path.join(pdb_data_dir, pdb, f'pred_O_placed_{pdb}_3.0.pdb')
    water_list = []
    with open(water_path, 'r') as f:
        water_lines = f.readlines()
    for line in water_lines:
        if line.startswith('ATOM'):
            atomic_id = line[7:12].strip() 
            if int(atomic_id) in water_ids:
                line = line[:12] + "  O " + line[16:]
                line = line[:76] + " O" + line[78:]            
                water_list.append(line)

    pdb_path = os.path.join(pdb_data_dir, pdb, f'{pdb}_min.pdb')
    for water_line in water_list:
        water_id = water_line[7:12].strip()
        water_sasa_path = os.path.join(pdb_data_dir, pdb, f'water_sasa/water_sasa_{water_id}.pdb')
        make_dir(water_sasa_path)
        with open(pdb_path, 'r') as f:
            lines = f.readlines()
        lines.append(water_line)
        with open(water_sasa_path, 'w') as f:
            f.writelines(lines)

1w70


FileNotFoundError: No data found for 1w70

In [52]:
pdb_path = os.path.join(pdb_data_dir, pdb, f'{pdb}_min.pdb')
for water_line in water_list:
    water_id = water_line[7:12].strip()
    water_sasa_path = os.path.join(pdb_data_dir, pdb, f'water_sasa/water_sasa_{water_id}.pdb')
    with open(pdb_path, 'r') as f:
        lines = f.readlines()
    lines.append(water_line)
    with open(water_sasa_path, 'w') as f:
        f.writelines(lines)