# Recreating the plots for the measuring metrics for AF2 and AF3

In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes._axes import Axes
from matplotlib.figure import Figure
from pathlib import Path
from sklearn.metrics import roc_curve, roc_auc_score

In [2]:
# Settings

# Path to the parsed AF3 output
path_AF2_structures = Path("../ressources/AF2").resolve()
path_AF3_structures = Path("../ressources/AF3").resolve()

In [3]:
dataAF2 = pd.read_csv(path_AF2_structures / "AF_metrics_all_structures.tsv", sep="\t")
display(dataAF2)

Unnamed: 0,project_name,prediction_name,run_id,chainA_length,chainB_length,model_id,model_confidence,chainA_intf_avg_plddt,chainB_intf_avg_plddt,intf_avg_plddt,...,num_align_resi_domain,RMSD_backbone_peptide,RMSD_all_atom_peptide,known_motif_plddt,DockQ,iRMS,LRMS,Fnonnat,num_mutation_in_motif,label
0,AlphaFold_benchmark,DEG_APCC_KENBOX_2_4GGD,run37,312,5,ranked_0,0.887117,96.107999,77.495999,91.454999,...,312.0,1.592504,1.859964,,0.878,0.604,1.575,0.087,known minimal,1
1,AlphaFold_benchmark,DEG_APCC_KENBOX_2_4GGD,run37,312,5,ranked_1,0.871984,95.793846,73.986000,89.736111,...,312.0,1.133857,1.553759,,0.881,0.418,1.101,0.050,known minimal,1
2,AlphaFold_benchmark,DEG_APCC_KENBOX_2_4GGD,run37,312,5,ranked_2,0.760784,95.547501,57.906001,86.585239,...,312.0,1.792767,2.100310,,0.883,0.642,1.776,0.185,known minimal,1
3,AlphaFold_benchmark,DEG_APCC_KENBOX_2_4GGD,run37,312,5,ranked_3,0.413662,94.646667,21.510000,76.362500,...,312.0,5.357927,6.558613,,0.476,1.686,5.359,0.364,known minimal,1
4,AlphaFold_benchmark,DEG_APCC_KENBOX_2_4GGD,run37,312,5,ranked_4,0.359078,94.830001,19.753333,72.307001,...,312.0,9.901379,11.882125,,0.223,2.929,9.909,0.889,known minimal,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,AlphaFold_benchmark_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,run6,60,113,ranked_0,0.298701,49.827778,73.161904,62.392307,...,,,,,,,,,random minimal DDI,0
3176,AlphaFold_benchmark_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,run6,60,113,ranked_1,0.246961,54.362000,76.415000,66.390909,...,,,,,,,,,random minimal DDI,0
3177,AlphaFold_benchmark_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,run6,60,113,ranked_2,0.211579,52.651538,72.910908,61.937083,...,,,,,,,,,random minimal DDI,0
3178,AlphaFold_benchmark_DDI,D1PF18773_PF00071_2X19.D2PF00009_PF01873_2D74,run6,60,113,ranked_3,0.211144,50.398334,78.798751,66.627144,...,,,,,,,,,random minimal DDI,0


In [6]:
dataAF3 = pd.read_csv(path_AF3_structures / "AF3_metrics_full.tsv", sep="\t")
display(dataAF3)

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


## RMSD comparison

In [None]:
for i, row in dataAF3.iterrows():
    model_id = row["model_id"]
    if model_id != "ranked_0":
        continue
    benchmark_set, prediction_name = row["benchmark_set"], row["prediction_name"]