# Recreating metrics for solved structures

In [None]:
# Read in solved structure data

dataSolved = pd.DataFrame(columns=["set", "PDB_id", "DDI_pfam_id", "path", "chainA_id", "chainB_id"])

# DMI
for structure_file in [p for p in Path(path_solved / "DMI").iterdir() if p.is_file() and p.suffix == ".pdb"]:
    pdb_id = structure_file.name.split("_")[0]
    dataSolved.loc[len(dataSolved)] = {"set" : "DMI", "PDB_id": pdb_id, "path": structure_file.relative_to(path_solved), "chainA_id": "A", "chainB_id": "B"}

# DDI
for structure_file in [p for p in Path(path_solved / "DDI").iterdir() if p.is_file() and p.suffix == ".pdb"]:
    ddi_pfam_id = "_".join(structure_file.name.split("_")[0:2])
    pdb_id = structure_file.name.split("_")[2]
    chainA_id = structure_file.name.split("_")[3][0]
    chainB_id = structure_file.name.split("_")[3][1]
    dataSolved.loc[len(dataSolved)] = {"set" : "DDI", "PDB_id": pdb_id, "DDI_pfam_id": ddi_pfam_id, "path": structure_file.relative_to(path_solved), "chainA_id": chainA_id, "chainB_id": chainB_id}

display(dataSolved)

Unnamed: 0,set,PDB_id,DDI_pfam_id,path,chainA_id,chainB_id
0,DMI,1ATP,,DMI\1ATP_min_DMI.pdb,A,B
1,DMI,1AXC,,DMI\1AXC_min_DMI.pdb,A,B
2,DMI,1B72,,DMI\1B72_min_DMI.pdb,A,B
3,DMI,1B8Q,,DMI\1B8Q_min_DMI.pdb,A,B
4,DMI,1BXX,,DMI\1BXX_min_DMI.pdb,A,B
...,...,...,...,...,...,...
183,DDI,3ZNI,PF14447_PF00179,DDI\PF14447_PF00179_3ZNI_AC.pdb,A,C
184,DDI,3J7Y,PF14978_PF00327,DDI\PF14978_PF00327_3J7Y_oZ.pdb,o,Z
185,DDI,6D6Q,PF15985_PF10175,DDI\PF15985_PF10175_6D6Q_GL.pdb,G,L
186,DDI,3KZ1,PF17838_PF00071,DDI\PF17838_PF00071_3KZ1_BE.pdb,B,E


In [None]:
import sys
libpath = Path("../src").resolve()
print(libpath)
sys.path.insert(0, str(libpath))
import measure_PPI
pathObj = []

for i, row in dataSolved.iterrows():
    structure_path: Path = path_resources / "solved" / row["path"]
    if not structure_path.exists():
        print(f"\t{bcolors.FAIL}{structure_path.name} does not exist.{bcolors.ENDC} Skip interface metrics")
        continue
    pathObj.append((structure_path.resolve(), row["path"].stem))
df_intf_metrics = measure_PPI.Run(pathObj=pathObj, num_threads=12)

D:\Eigene Datein\Programmieren\Git\abrilka\bachelorthesis\src
[2025-04-22 17:32:29,438 | measure_PPI | INFO] Started Taskpool of 12 processes for 188 files
[2025-04-22 17:32:34,441 | measure_PPI | INFO] 46% - ETA 0:00:05 | current speed 17.591 s⁻¹ | average speed 17.391 s⁻¹
[2025-04-22 17:32:39,521 | measure_PPI | INFO] 80% - ETA 0:00:02 | current speed 12.596 s⁻¹ | average speed 14.975 s⁻¹
[2025-04-22 17:32:44,550 | measure_PPI | INFO] 91% - ETA 0:00:01 | current speed 4.176 s⁻¹ | average speed 11.382 s⁻¹
[2025-04-22 17:32:50,978 | measure_PPI | INFO] 98% - ETA 0:00:00 | current speed 2.022 s⁻¹ | average speed 8.588 s⁻¹
[2025-04-22 17:33:07,630 | measure_PPI | INFO] 99% - ETA 0:00:00 | current speed 0.12 s⁻¹ | average speed 4.896 s⁻¹
[2025-04-22 17:33:07,681 | measure_PPI | INFO] Finished processing 188 objects in 0:00:38 | average speed 4.916 s⁻¹


In [None]:
dataSolved["min_distance"] = None
dataSolved["buried_area"] = None
dataSolved["salt_bridges"] = None
dataSolved["hbonds"] = None
dataSolved["hydrophobic_interactions"] = None

for i, row in dataSolved.iterrows():
    prediction_name = row["path"].stem
    if len(row_intf_m := df_intf_metrics[df_intf_metrics["structure_name"] == prediction_name]) != 1:
        print(f"Failed to locate the experimental structure for {prediction_name}")
        continue

    dataSolved.at[i, "min_distance"] = row_intf_m["min_distance"].item()
    dataSolved.at[i, "buried_area"] = row_intf_m["buried_area"].item()
    dataSolved.at[i, "salt_bridges"] = row_intf_m["salt_bridges"].item()
    dataSolved.at[i, "hbonds"] = row_intf_m["hbonds"].item()
    dataSolved.at[i, "hydrophobic_interactions"] = row_intf_m["hydrophobic_interactions"].item()

In [None]:
dataSolved.to_csv(path_solved / "solved_metrics.tsv", index=None, sep="\t")
dataSolved.to_excel(path_solved / "solved_metrics.xlsx", sheet_name="solved metrics", index=None)