# Calculate Volume overlap

In [57]:
from JSU_lib import *
import copy

In [11]:
def extract_volume(file_path):
    volume_pattern = re.compile(r'REMARK Volume = (\d+\.\d+) Cubic Angstroms')
    
    with open(file_path, 'r') as file:
        lines = file.readlines()
        
    for line in lines:
        match = volume_pattern.match(line)
        if match:
            return float(match.group(1))
    return None

def inverse_transform(df, centroid, rotation_matrix):
    rotation_matrix_inverse = np.linalg.inv(rotation_matrix)
    # Step 1: Rotate the coordinates using the inverse rotation matrix
    rotated_coords = np.dot(df[['Cartn_x', 'Cartn_y', 'Cartn_z']].values, rotation_matrix_inverse.T)

    # Step 2: Translate the coordinates by adding the centroid
    translated_coords = rotated_coords + centroid

    # Create a new DataFrame with the transformed coordinates
    transformed_coords_df = pd.DataFrame(translated_coords, columns=['Cartn_x', 'Cartn_y', 'Cartn_z'])

    # Retain the rest of the original columns
    transformed_df = df.drop(columns=['Cartn_x', 'Cartn_y', 'Cartn_z']).join(transformed_coords_df)
    transformed_df.Cartn_x = transformed_df.Cartn_x.round(0)
    transformed_df.Cartn_y = transformed_df.Cartn_y.round(0)
    transformed_df.Cartn_z = transformed_df.Cartn_z.round(0)
    transformed_df.occupancy = 1.0
    transformed_df.B_iso_or_equiv = 1.0
    return transformed_df

def inverse_transform_array(coords, centroid, rotation_matrix):
    rotation_matrix_inverse = np.linalg.inv(rotation_matrix)
    
    # Step 1: Rotate the coordinates using the inverse rotation matrix
    rotated_coords = np.dot(coords, rotation_matrix_inverse.T)
    
    # Step 2: Translate the coordinates by adding the centroid
    translated_coords = rotated_coords + centroid
    
    return np.round(translated_coords)

In [2]:
povme_out_dir = "./../PROGRAMS/POVME-2.2.2/out/"

dirs_dict = {
    "LIGYSIS": os.path.join(povme_out_dir, "ligysis"),
    "VN-EGNN": os.path.join(povme_out_dir, "vnegnn"),
    "IF-SitePred": os.path.join(povme_out_dir, "ifsp"),
    "GrASP": os.path.join(povme_out_dir, "grasp_RIGHT_CLUSTERING"),
    "DeepPocket": os.path.join(povme_out_dir, "deeppocket"),
    "PUResNet": os.path.join(povme_out_dir, "puresnet"),
    "P2Rank+Cons": os.path.join(povme_out_dir, "p2rank_cons"),
    "P2Rank": os.path.join(povme_out_dir, "p2rank"),
    "fpocket": os.path.join(povme_out_dir, "fpocket"),
    "PocketFinder": os.path.join(povme_out_dir, "pocketfinder"),
    "Ligsite": os.path.join(povme_out_dir, "ligsite"),
    "Surfnet": os.path.join(povme_out_dir, "surfnet"),
}

## EXTRACT POVME VOLUME POINTS

In [5]:
MASTER_shapes_dict = {}
errors = []
for k, v in dirs_dict.items():
    MASTER_shapes_dict[k] = {}
    pockets = (os.listdir(v))
    print(k, len(pockets))
    for i, pocket in enumerate(pockets):
        pocket_dir = os.path.join(v, pocket)
        pocket_vol_path = os.path.join(pocket_dir, f'{pocket}_frame_1.pdb')
        try:
            volume = extract_volume(pocket_vol_path)
        except:
            errors.append(f'{k}_{pocket}')
            print(f'ERROR with: {k}_{pocket}')
            continue
        if volume == 0.0:
            MASTER_shapes_dict[k][pocket] = (volume, np.array([]))
            continue

        pocket_vol = PDBXreader(inputfile = pocket_vol_path).atoms(format_type="pdb", excluded=())
        MASTER_shapes_dict[k][pocket] = (volume, np.array(pocket_vol[['Cartn_x', 'Cartn_y', 'Cartn_z']]))
        if i % 100 == 0:
            print(i)
    #break

LIGYSIS 8245
0
100
200
300
400
500
ERROR with: LIGYSIS_4gl2_A_1
600
700
800
900
1000
1100
ERROR with: LIGYSIS_.DS_Store
1200
1400
ERROR with: LIGYSIS_3k2s_B_14
1500
1600
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
3000
3100
3400
3700
3800
3900
4000
4100
4300
4400
ERROR with: LIGYSIS_1w0s_C_8
4500
4600
ERROR with: LIGYSIS_5wde_A_0
4700
4800
5000
5200
5300
5400
5600
5700
5800
ERROR with: LIGYSIS_3k2s_B_27
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7100
7200
7300
ERROR with: LIGYSIS_5tee_A_0
7400
7500
7600
7700
7800
7900
8000
8100
8200
VN-EGNN 20612
0
100
200
400
600
700
800
900
ERROR with: VN-EGNN_5khr_R_1
1000
1100
1200
1300
1400
1500
1600
1700
ERROR with: VN-EGNN_4nhy_D_1
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
ERROR with: VN-EGNN_.DS_Store
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5400
5500
5600
5800
5900
6000
6100
6400
6500
6600
6700
6800
6900
7000
ERROR with: VN-EGNN_3e04_D

In [6]:
for k, v in MASTER_shapes_dict.items():
    print(k, len(v))

LIGYSIS 8238
VN-EGNN 20596
IF-SitePred 67269
GrASP 6449
DeepPocket 23452
PUResNet 3099
P2Rank+Cons 13791
P2Rank 16402
fpocket 72342
PocketFinder 9867
Ligsite 7671
Surfnet 9852


In [7]:
save_to_pickle(MASTER_shapes_dict, "./results/MASTER_POCKET_SHAPE_DICT_EXTENDED.pkl")

The point of this is to have another way of comparing pockets. We have the following:
- Distance between pocket centroids

- Overlap between pocket residues

- And now, pocket volume overlap. This one is a bit more nuanced, as not all pockets have volume, volume might be 0 if it is not a cavity.

## TRANSFORM POCKET SHAPES

In [15]:
to_transform = ["P2Rank+Cons", "fpocket"]

In [54]:
trans_p2rank_cons_shapes = {}
for k, v in MASTER_shapes_dict["P2Rank+Cons"].items():
    if v[0] == 0:
        trans_p2rank_cons_shapes[k] = v
        continue
    else:
        rep_chain = "_".join(k.split("_")[:2])
        trans_v = inverse_transform_array(v[1], orig_centroids[rep_chain], rot_matrices[rep_chain])
        trans_p2rank_cons_shapes[k] = (v[0], trans_v)


trans_fpocket_cons_shapes = {}
for k, v in MASTER_shapes_dict["fpocket"].items():
    if v[0] == 0:
        trans_fpocket_cons_shapes[k] = v
        continue
    else:
        rep_chain = "_".join(k.split("_")[:2])
        trans_v = inverse_transform_array(v[1], orig_centroids[rep_chain], rot_matrices[rep_chain])
        trans_fpocket_cons_shapes[k] = (v[0], trans_v)

In [55]:
print(len(MASTER_shapes_dict["P2Rank+Cons"]))
print(len(trans_p2rank_cons_shapes))

print(len(MASTER_shapes_dict["fpocket"]))
print(len(trans_fpocket_cons_shapes))

13791
13791
72342
72342


In [77]:
MASTER_shapes_dict_TRANS = copy.copy(MASTER_shapes_dict)

In [78]:
MASTER_shapes_dict_TRANS["P2Rank+Cons"] = trans_p2rank_cons_shapes
MASTER_shapes_dict_TRANS["fpocket"] = trans_fpocket_cons_shapes

In [79]:
MASTER_shapes_dict_TRANS["DeepPocket-Segmented"] = MASTER_shapes_dict_TRANS.pop("DeepPocket")

In [84]:
MASTER_shapes_dict_TRANS["VN-EGNN-NR"] = MASTER_shapes_dict_TRANS["VN-EGNN"]
MASTER_shapes_dict_TRANS["IF-SitePred-NR"] = MASTER_shapes_dict_TRANS["IF-SitePred"]
MASTER_shapes_dict_TRANS["IF-SitePred-rescored-NR"] = MASTER_shapes_dict_TRANS["IF-SitePred"]
MASTER_shapes_dict_TRANS["PUResNet+PRANK"] = MASTER_shapes_dict_TRANS["PUResNet"]
MASTER_shapes_dict_TRANS["DeepPocket-Segmented-NR"] = MASTER_shapes_dict_TRANS["DeepPocket-Segmented"]

MASTER_shapes_dict_TRANS["DeepPocket-Rescored"] = MASTER_shapes_dict_TRANS["fpocket"]
MASTER_shapes_dict_TRANS["fpocket+PRANK"] = MASTER_shapes_dict_TRANS["fpocket"]

MASTER_shapes_dict_TRANS["PocketFinder-SS"] = MASTER_shapes_dict_TRANS["PocketFinder"]
MASTER_shapes_dict_TRANS["PocketFinder+PRANK"] = MASTER_shapes_dict_TRANS["PocketFinder"]

MASTER_shapes_dict_TRANS["Ligsite-SS"] = MASTER_shapes_dict_TRANS["Ligsite"]
MASTER_shapes_dict_TRANS["Ligsite+PRANK"] = MASTER_shapes_dict_TRANS["Ligsite"]

MASTER_shapes_dict_TRANS["Surfnet-SS"] = MASTER_shapes_dict_TRANS["Surfnet"]
MASTER_shapes_dict_TRANS["Surfnet+PRANK"] = MASTER_shapes_dict_TRANS["Surfnet"]

In [85]:
save_to_pickle(MASTER_shapes_dict_TRANS, "./results/MASTER_POCKET_SHAPE_DICT_EXTENDED_TRANS.pkl")