# Old vs new DockQ calculation
Comparison of previous DockQ calculation (Lee et al. 2024) with my script

#### A Script by John
extracted from [(Git)](https://github.com/KatjaLuckLab/AlphaFold_manuscript/blob/main/DDI_scripts/DDI_calculate_template_dependent_metrics.py) of AlphaFold_manuscript/DDI_scripts/DDI_calculate_template_dependend_metrics.py

In [None]:
import os
import subprocess
import time

def calculate_DockQ(predicted_model,template_model):
    """Calculate the DockQ metrics of a predicted model in reference to a provided template model by first running fix_numbering.pl to create an alignment file for the predicted model and use this alignment file to calculate the DockQ score in comparison to its template model

    Args:
        predicted_model (str): the absolute path to the processed predicted model (e.g. /Volumes/../ranked_0_min.pdb)
        template_model (str): the absolute path to the processed template model (e.g. /Volumes/../DDI_manual_curation/PF00023_PF07686/4NIK_min_DDI.pdb)

    Returns:
        DockQ_metrics (dict): DockQ metrics saved in a dict
    """
    # set up the path to DockQ and the fix_numbering.pl script
    current_path = os.path.abspath(__file__)
    one_level_up = os.path.dirname(current_path)
    DockQ_folder_path = f'{os.path.dirname(one_level_up)}/DockQ/'
    DockQ_path = os.path.join(DockQ_folder_path,'DockQ.py')
    fix_numbering_path = os.path.join(DockQ_folder_path,'scripts/fix_numbering.pl')

    # prepare a log file in the same folder as predicted model to store output of DockQ program
    prediction_folder, predicted_model_name = os.path.split(predicted_model)
    log_file = open(os.path.join(prediction_folder,'DockQ_log.log'),'a')
    log_file.write(f'Processing model {predicted_model_name}\n')

    try: # for some reason, minimal DMI requires the ranked_x.pdb file to be aligned using fix_numbering.pl first then use the ranked_x.pdb.fixed file for DockQ calculation
        # launch subprocess to first generate the .fixed file needed for DockQ calculation
        fix_numbering_process = [fix_numbering_path, predicted_model, template_model]
        result = subprocess.run(fix_numbering_process,capture_output=True,text=True,check=True)
        log_file.write(f'{result.stdout}\n')

        # launch subprocess to run DockQ script to compute DockQ score using the template model and predicted model with fixed numbering
        DockQ_process = ['python3', DockQ_path, f'{predicted_model}.fixed', template_model,'-short']
        result = subprocess.run(DockQ_process,capture_output=True,text=True,check=True)
        log_file.write(f'{result.stdout}\n')
        log_file.close()
    
    except: # for some reason, DockQ does not work on .fixed file, so I created this try and except to silence the error
        # launch subprocess to run DockQ script to compute DockQ score using the template model and predicted model with fixed numbering
        DockQ_process = ['python3', DockQ_path, f'{predicted_model}', template_model,'-short']
        result = subprocess.run(DockQ_process,capture_output=True,text=True,check=True)
        log_file.write(f'{result.stdout}\n')
        log_file.close()

    # parse subprocess output for relevant information
    result = result.stdout.split('\n')[-2]
    metrics = []
    values = []
    for i, ele in enumerate(result.split(' ')[:-3]):
        if i % 2 == 0:
            metrics.append(ele)
        else:
            values.append(ele)
    DockQ_metrics = {m:v for m, v in zip(metrics,values)}
    print(DockQ_metrics)

    # sleep the program for 1 second to avoid the error subprocess.CalledProcessError: died with <Signals.SIGTRAP: 5>.
    print('Sleeping for 1 second to avoid the error subprocess.CalledProcessError...')
    time.sleep(1)

    return DockQ_metrics

#### B Rewritten DockQ metric

In [7]:
# Imports and settings
from typing import Literal
import pandas as pd
import numpy as np
from pathlib import Path
from DockQ.DockQ import load_PDB, run_on_all_native_interfaces
class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

# Settings

# Which AF output should be parsed
af_mode: Literal["AF2", "AF3"] = "AF3"

# Path to resource folder with the structures and metadata tables
path_resources = Path(r"D:\Eigene Datein\dev\Uni\JGU Bio Bachelorthesis\Daten\resources")
# Path to the Luck Drive folder (used for ipSAE metric to get the json file)
path_AF_luck_drive = Path(r"L:\imb-luckgr2\projects\AlphaFold")
if af_mode == "AF3":
    path_AF_luck_drive = path_AF_luck_drive / "AlphaFold3"

# Paths to the local folders
path_AF = path_resources / af_mode
path_solved = path_resources / "solved"

# The path to the ipsae.py
path_ipsae_script = Path("../code ressources/ipsae.py")

# If set to true, load the previous dataframe
load_previous = True


# Dataframe
dataAF = pd.read_csv(path_resources / af_mode / (af_mode + "_metrics.tsv"), sep="\t")
print(f"{bcolors.OKCYAN}dataAF{bcolors.ENDC}")
display(dataAF)

# Read in solved structure data
dataSolved = pd.DataFrame(columns=["set", "PDB_id", "DDI_pfam_id", "path", "chainA_id", "chainB_id"])

# DMI
for structure_file in [p for p in Path(path_solved / "DMI").iterdir() if p.is_file() and p.suffix == ".pdb"]:
    pdb_id = structure_file.name.split("_")[0]
    dataSolved.loc[len(dataSolved)] = {"set" : "DMI", "PDB_id": pdb_id, "path": structure_file.relative_to(path_solved), "chainA_id": "A", "chainB_id": "B"}

# DDI
for structure_file in [p for p in Path(path_solved / "DDI").iterdir() if p.is_file() and p.suffix == ".pdb"]:
    ddi_pfam_id = "_".join(structure_file.name.split("_")[0:2])
    pdb_id = structure_file.name.split("_")[2]
    chainA_id = structure_file.name.split("_")[3][0]
    chainB_id = structure_file.name.split("_")[3][1]
    dataSolved.loc[len(dataSolved)] = {"set" : "DDI", "PDB_id": pdb_id, "DDI_pfam_id": ddi_pfam_id, "path": structure_file.relative_to(path_solved), "chainA_id": chainA_id, "chainB_id": chainB_id}

print(f"{bcolors.OKCYAN}dataSolved{bcolors.ENDC}")
display(dataSolved)

[96mdataAF[0m


Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,ranking_score,chainA_length,chainB_length,chainA_id,chainB_id,chainA_start,...,DockQ,iRMSD,LRMSD,Fnonnat,buried_area,min_distance,salt_bridges,hbonds,hydrophobic_interactions,ipSAE
0,alphafold3,known_DMI,DEG_APCC_KENBOX_2_4GGD,ranked_0,0.97,312,5,A,B,165.0,...,0.967617,0.341276,0.831159,0.000000,848.152,5.371,9,0,6,0.869025
1,alphafold3,known_DMI,DEG_APCC_KENBOX_2_4GGD,ranked_1,0.97,312,5,A,B,165.0,...,0.943274,0.426121,1.169542,0.076923,603.522,6.264,0,0,0,0.868551
2,alphafold3,known_DMI,DEG_APCC_KENBOX_2_4GGD,ranked_2,0.96,312,5,A,B,165.0,...,0.908970,0.621263,1.944753,0.111111,602.041,6.241,0,0,0,0.855837
3,alphafold3,known_DMI,DEG_APCC_KENBOX_2_4GGD,ranked_3,0.96,312,5,A,B,165.0,...,0.961964,0.382246,1.037074,0.038462,615.163,6.207,0,0,0,0.850758
4,alphafold3,known_DMI,DEG_APCC_KENBOX_2_4GGD,ranked_4,0.96,312,5,A,B,165.0,...,0.931179,0.573091,1.749101,0.074074,662.502,5.890,0,0,0,0.850449
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,ranked_0,0.36,60,113,B,B,392.0,...,,,,,1447.877,4.251,1,0,19,0.012324
3176,alphafold3,random_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,ranked_1,0.23,60,113,B,B,392.0,...,,,,,1482.311,4.197,1,0,25,0.000000
3177,alphafold3,random_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,ranked_2,0.22,60,113,B,B,392.0,...,,,,,1564.435,4.783,2,0,18,0.000000
3178,alphafold3,random_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,ranked_3,0.21,60,113,B,B,392.0,...,,,,,1431.933,4.257,0,0,59,0.000000


[96mdataSolved[0m


Unnamed: 0,set,PDB_id,DDI_pfam_id,path,chainA_id,chainB_id
0,DMI,1ATP,,DMI\1ATP_min_DMI.pdb,A,B
1,DMI,1AXC,,DMI\1AXC_min_DMI.pdb,A,B
2,DMI,1B72,,DMI\1B72_min_DMI.pdb,A,B
3,DMI,1B8Q,,DMI\1B8Q_min_DMI.pdb,A,B
4,DMI,1BXX,,DMI\1BXX_min_DMI.pdb,A,B
...,...,...,...,...,...,...
183,DDI,3ZNI,PF14447_PF00179,DDI\PF14447_PF00179_3ZNI_AC.pdb,A,C
184,DDI,3J7Y,PF14978_PF00327,DDI\PF14978_PF00327_3J7Y_oZ.pdb,o,Z
185,DDI,6D6Q,PF15985_PF10175,DDI\PF15985_PF10175_6D6Q_GL.pdb,G,L
186,DDI,3KZ1,PF17838_PF00071,DDI\PF17838_PF00071_3KZ1_BE.pdb,B,E


In [None]:
dataAF["DockQ"] = np.nan
dataAF["iRMSD"] = np.nan
dataAF["LRMSD"] = np.nan
dataAF["Fnonnat"] = np.nan
for i, row in dataAF[dataAF["benchmark_set"].isin(["known_DMI", "known_DDI"])].iterrows():
    # Reading data from the row
    benchmark_set = str(row["benchmark_set"])
    _set = "DDI" if "DDI" in benchmark_set else "DMI"
    prediction_name = str(row["prediction_name"]) if row.notnull()["prediction_name"] else None
    model_id = str(row["model_id"]) if row.notnull()["model_id"] else None
    chainA_id = str(row["chainA_id"]) if row.notnull()["chainA_id"] else None
    chainB_id = str(row["chainB_id"]) if row.notnull()["chainB_id"] else None

    # Printing
    if model_id == "ranked_0":
        print(f"{bcolors.OKBLUE}{prediction_name} ({benchmark_set}){bcolors.ENDC}")

    # Loading AF prediction
    structure_path = path_resources / af_mode / _set / benchmark_set / prediction_name / (model_id + ".pdb")
    if not structure_path.exists():
        print(f"\t{bcolors.FAIL}{prediction_name} ({benchmark_set}) does not exist.{bcolors.ENDC} Skip DockQ")
        continue

    # Loading template
    template_row = dataSolved.loc[np.logical_and(dataSolved["set"] == _set, np.logical_and(dataSolved["PDB_id"] == pdb_id, np.logical_or(dataSolved["DDI_pfam_id"] == ddi_pfam_id, dataSolved["DDI_pfam_id"].isna())))]
    if len(template_row) == 0:
        print(f"\t{bcolors.FAIL}Can't find template structure for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}.{bcolors.ENDC} Skip")
        continue
    elif len(template_row) >= 2:
        print(f"\t{bcolors.FAIL}Multiple template structures found for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}.{bcolors.ENDC} Skip")
        continue
    template_path = path_solved / str(template_row["path"].item())
    dockq_structure_af = load_PDB(str(structure_path))
    dockq_structure_solved = load_PDB(str(template_path))

    chain_map = {chainA_id: "A", chainB_id:"B"}
    chain_key = chainA_id + chainB_id

    # Don't use fix_numbering as the DockQ documentation (https://github.com/bjornwallner/DockQ) clearly states that without
    # --no_align a sequence alignment is used (instead of number alignment). Secondly, if there would be an effect, why only
    # on DDI and not DMI (both use same code)

    # Using the module instead of subprocess
    result = run_on_all_native_interfaces(dockq_structure_af, dockq_structure_solved, chain_map=chain_map)[0]
    dataAF.at[i, "DockQ"] = result[chain_key]["DockQ"]
    dataAF.at[i, "iRMSD"] = result[chain_key]["iRMSD"]
    dataAF.at[i, "LRMSD"] = result[chain_key]["LRMSD"]
    dataAF.at[i, "Fnonnat"] = np.float64(result[chain_key]["fnonnat"])

display(dataAF)
