# Preparation scripts
this notebook prepares polaris-related files to be in the format the lddt script accepts

In [40]:
from rdkit import RDLogger                                                                                                                                                               
from rdkit import Chem
RDLogger.DisableLog('rdApp.*')
MERSpath = "/home/mersA_apo.pdb"
SARSpath = "/home/sarsA_apo.pdb"

In [41]:
from pathlib import Path
test_poses = [m for m in Chem.SDMolSupplier("test_poses_with_properties.sdf")]
test_poses[0].GetPropsAsDict(includePrivate=True)
name_to_prot = {}
liglocs = []
Path("polaris_poses").mkdir(parents=True, exist_ok=True)
for i,m in enumerate(test_poses):
    wri = Chem.SDWriter(f"polaris_poses/Test_{770+i}.sdf")
    if m.GetProp("Protein_Label") == "MERS-CoV Mpro":
        name_to_prot[770+i] = MERSpath
    else:
        name_to_prot[770+i] = SARSpath
    liglocs.append(f"/home/polaris_poses/Test_{770+i}.sdf")
    wri.write(m)
    wri.close()

In [52]:
import os
import csv
for task in ["default","confs_1","confs_5","confs_10","confs_20","confs_50","confs_100","mers_native_only","mmff94x","no_realign","unconstrained"]:
    mypath = f"polaris/ablation_study_results_20250812_091259/poses/{task}/"
    lddt_input = []
    for path, dirs, files in os.walk(mypath):
        for name in (files + dirs):
            if "combo" in name:
                sdfpath = os.path.join(path, name)
                pose_number = int(name.split("_")[1].split(".")[0])
                lddt_input.append((name_to_prot[pose_number],f"/home/polaris_poses/Test_{pose_number}.sdf",f"/home/{sdfpath}"))
                break # one per dir
    print(task,len(lddt_input),pose_number,sdfpath)                
    
    with open(f'input_polaris_{task}.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL,lineterminator='\n')
        writer.writerow(["apo","ref","lig"])
        for line in lddt_input:
            writer.writerow(line)

default 195 906 polaris/ablation_study_results_20250812_091259/poses/default/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_091409.sdf
confs_1 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_1/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_095010.sdf
confs_5 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_5/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_095020.sdf
confs_10 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_10/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_095033.sdf
confs_20 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_20/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_095054.sdf
confs_50 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_50/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_combo_20250812_095129.sdf
confs_100 195 906 polaris/ablation_study_results_20250812_091259/poses/confs_100/Test_906.pdb_idx_0153/Test_906.

In [55]:
import os
import csv
for task in ["shape_metric","unconstrained"]:
    mypath = f"polaris/ablation_study_results_20250812_091259/poses/{task}/"
    lddt_input = []
    for path, dirs, files in os.walk(mypath):
        for name in (files + dirs):
            if "shape" in name:
                sdfpath = os.path.join(path, name)
                pose_number = int(name.split("_")[1].split(".")[0])
                lddt_input.append((name_to_prot[pose_number],f"/home/polaris_poses/Test_{pose_number}.sdf",f"/home/{sdfpath}"))
                break # one per dir
    print(task,len(lddt_input),pose_number,sdfpath)                
    
    with open(f'input_polaris_{task}_shape.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL,lineterminator='\n')
        writer.writerow(["apo","ref","lig"])
        for line in lddt_input:
            writer.writerow(line)

shape_metric 195 906 polaris/ablation_study_results_20250812_091259/poses/shape_metric/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_shape_20250812_091921.sdf
unconstrained 195 906 polaris/ablation_study_results_20250812_091259/poses/unconstrained/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_shape_20250812_092736.sdf


In [None]:
"polaris/ablation_study_results_20250812_091259/poses/

In [56]:
import os
import csv
for task in ["color_metric","unconstrained"]:
    mypath = f"polaris/ablation_study_results_20250812_091259/poses/{task}/"
    lddt_input = []
    for path, dirs, files in os.walk(mypath):
        for name in (files + dirs):
            if "color" in name:
                sdfpath = os.path.join(path, name)
                pose_number = int(name.split("_")[1].split(".")[0])
                lddt_input.append((name_to_prot[pose_number],f"/home/polaris_poses/Test_{pose_number}.sdf",f"/home/{sdfpath}"))
                break # one per dir
    print(task,len(lddt_input),pose_number,sdfpath)                
    
    with open(f'input_polaris_{task}_color.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, delimiter=',',
                                quotechar='"', quoting=csv.QUOTE_MINIMAL,lineterminator='\n')
        writer.writerow(["apo","ref","lig"])
        for line in lddt_input:
            writer.writerow(line)

color_metric 195 906 polaris/ablation_study_results_20250812_091259/poses/color_metric/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_color_20250812_092323.sdf
unconstrained 195 906 polaris/ablation_study_results_20250812_091259/poses/unconstrained/Test_906.pdb_idx_0153/Test_906.pdb_idx_0153_color_20250812_092736.sdf
