In [1]:
import json

def get_af3_data(tag, dir):
    base = f'{dir}/{tag}'
    model_path = f'{base}/{tag}_model.cif'
    confid_path = f'{base}/{tag}_summary_confidences.json'
    with open(confid_path, 'r') as f:
        confid_dict = json.load(f)
    iptm = confid_dict['iptm']
    ptm = confid_dict['ptm']
    score = confid_dict['ranking_score']
    return {
        'path': model_path,
        'iptm': iptm,
        'ptm': ptm,
        'score': score,
    }

def get_boltz_data(tag, dir):
    base = f'{dir}/{tag}'
    model_path = f'{base}/{tag}_model_0.cif'
    confid_path = f'{base}/confidence_{tag}_model_0.json'
    affinity_path = f'{base}/affinity_{tag}.json'
    with open(confid_path, 'r') as f:
        confid_dict = json.load(f)
    score = confid_dict['confidence_score']
    iptm = confid_dict['iptm']
    ptm = confid_dict['ptm']
    with open(affinity_path, 'r') as f:
        affinity_dict = json.load(f)
    affinity_ic50 = affinity_dict['affinity_pred_value']
    affinity_bind = affinity_dict['affinity_probability_binary']
    return {
        'path': model_path,
        'iptm': iptm,
        'ptm': ptm,
        'score': score,
        'affinity_ic50': affinity_ic50,
        'affinity_bind': affinity_bind,
    }

In [2]:
import pandas as pd

def process_af3_data(df, af3_dir, name='af3', col=('lmpnn', 'tag')):
    df['af3_data'] = df[col].apply(lambda x: get_af3_data(x, af3_dir))
    df_exp = pd.json_normalize(df['af3_data']).reset_index(drop=True)
    df_exp.columns = pd.MultiIndex.from_product([[name], df_exp.columns])
    df = pd.concat([df.drop(columns=['af3_data']).reset_index(drop=True), df_exp], axis=1)
    return df

def process_boltz_data(df, boltz_dir, name='boltz', col=('lmpnn', 'tag')):
    df['boltz_data'] = df[col].apply(lambda x: get_boltz_data(x, boltz_dir))
    df_exp = pd.json_normalize(df['boltz_data']).reset_index(drop=True)
    df_exp.columns = pd.MultiIndex.from_product([[name], df_exp.columns])
    df = pd.concat([df.drop(columns=['boltz_data']).reset_index(drop=True), df_exp], axis=1)
    df.columns = pd.MultiIndex.from_tuples([
        col if isinstance(col, tuple) else (col, '')
        for col in df.columns
    ])
    return df

In [3]:
import pandas as pd

path = '../4_rscore_filter/lmpnn_filt_pht.parquet'
df = pd.read_parquet(path)

df = process_af3_data(df, '../5_af3/output', name='af3', col=('lmpnn', 'tag'))
df = process_boltz_data(df, '../6_boltz/output/boltz_results_0/predictions', name='boltz', col=('lmpnn', 'tag'))

df.to_parquet('lmpnn_filt_data_added.parquet')
df

  df = pd.concat([df.drop(columns=['boltz_data']).reset_index(drop=True), df_exp], axis=1)


Unnamed: 0_level_0,diffusion,diffusion,lmpnn,lmpnn,lmpnn,lmpnn,lmpnn,lmpnn,lmpnn,lmpnn,af3,af3,af3,af3,boltz,boltz,boltz,boltz,boltz,boltz
Unnamed: 0_level_1,id,batch,tag,ddg,fa_rep,res_totalscore,totalscore,seq,path,relaxed_path,path,iptm,ptm,score,path,iptm,ptm,score,affinity_ic50,affinity_bind
0,result_7_packed_3_1,pht_demo,result_7_packed_3_1,-27.749462,95.805969,-1.583578,-199.530838,SLEEIIAKIRASDPATVDWGAHFREFCKAAGVAEVTPEERALAEKA...,../3_lmpnn/output/packed/result_7_packed_3_1.pdb,../3_lmpnn/output/packed/result_7_packed_3_1_b...,../5_af3/output/result_7_packed_3_1/result_7_p...,0.74,0.85,0.76,../6_boltz/output/boltz_results_0/predictions/...,0.769996,0.739775,0.712917,1.609957,0.187223
1,result_61_packed_8_1,pht_demo,result_61_packed_8_1,-26.640675,104.494186,-1.441779,-181.664185,SEELLAAIKAAFRKIAGDLLTDRVDLDELAQFILDTLTLSEEERAR...,../3_lmpnn/output/packed/result_61_packed_8_1.pdb,../3_lmpnn/output/packed/result_61_packed_8_1_...,../5_af3/output/result_61_packed_8_1/result_61...,0.7,0.65,0.69,../6_boltz/output/boltz_results_0/predictions/...,0.581382,0.760046,0.718728,1.729165,0.146521
2,result_7_packed_8_1,pht_demo,result_7_packed_8_1,-25.789537,98.276131,-1.57081,-197.922104,SLAEILAEIRAADPATVDWEAHFRRFCEAAGVEAVTPEERELAARA...,../3_lmpnn/output/packed/result_7_packed_8_1.pdb,../3_lmpnn/output/packed/result_7_packed_8_1_b...,../5_af3/output/result_7_packed_8_1/result_7_p...,0.64,0.59,0.63,../6_boltz/output/boltz_results_0/predictions/...,0.86967,0.904298,0.692104,1.849106,0.117982
3,result_7_packed_2_1,pht_demo,result_7_packed_2_1,-25.547806,102.521637,-1.61018,-202.882706,SLAELIQEIRDADPKTIDWEAFFRRFAEAAGVAAVTPEQRALAARM...,../3_lmpnn/output/packed/result_7_packed_2_1.pdb,../3_lmpnn/output/packed/result_7_packed_2_1_b...,../5_af3/output/result_7_packed_2_1/result_7_p...,0.51,0.51,0.51,../6_boltz/output/boltz_results_0/predictions/...,0.790683,0.717135,0.668814,1.888733,0.13917
4,result_16_packed_4_1,pht_demo,result_16_packed_4_1,-25.318375,91.409203,-1.448796,-160.816406,ALSDEVKAMLRRMAPAAERLGTEGLLRRMQELGVVPEVTPDLLKAF...,../3_lmpnn/output/packed/result_16_packed_4_1.pdb,../3_lmpnn/output/packed/result_16_packed_4_1_...,../5_af3/output/result_16_packed_4_1/result_16...,0.82,0.85,0.82,../6_boltz/output/boltz_results_0/predictions/...,0.873105,0.93513,0.949214,1.517867,0.142706
5,result_16_packed_7_1,pht_demo,result_16_packed_7_1,-24.98737,88.052048,-1.478883,-164.156021,MLSETVKNMLKRLAPAAERLGTEGLLRRMIEAGVIPEVTPELLKAL...,../3_lmpnn/output/packed/result_16_packed_7_1.pdb,../3_lmpnn/output/packed/result_16_packed_7_1_...,../5_af3/output/result_16_packed_7_1/result_16...,0.77,0.84,0.79,../6_boltz/output/boltz_results_0/predictions/...,0.828962,0.883267,0.866744,1.793561,0.134394
6,result_7_packed_4_1,pht_demo,result_7_packed_4_1,-23.823254,103.83667,-1.429071,-180.062973,SLAEILAEIRASDPATADWLALARRFAEAAGVDEVTPEERELAAKA...,../3_lmpnn/output/packed/result_7_packed_4_1.pdb,../3_lmpnn/output/packed/result_7_packed_4_1_b...,../5_af3/output/result_7_packed_4_1/result_7_p...,0.7,0.73,0.7,../6_boltz/output/boltz_results_0/predictions/...,0.607069,0.774949,0.785562,1.833631,0.143905
7,result_29_packed_7_1,pht_demo,result_29_packed_7_1,-22.68211,75.388725,-1.675448,-177.597519,SAAFRAILRAMCEAFAELAPGLTLSDEELELVLNPDDEELRKRLNV...,../3_lmpnn/output/packed/result_29_packed_7_1.pdb,../3_lmpnn/output/packed/result_29_packed_7_1_...,../5_af3/output/result_29_packed_7_1/result_29...,0.71,0.76,0.72,../6_boltz/output/boltz_results_0/predictions/...,0.709298,0.809468,0.882201,1.662283,0.137814
8,result_59_packed_1_1,pht_demo,result_59_packed_1_1,-22.405994,70.00029,-1.624849,-180.358276,LATEAFLRTFIQSAEALELMRARGTAAAAEIAALVLAALKAKGVSS...,../3_lmpnn/output/packed/result_59_packed_1_1.pdb,../3_lmpnn/output/packed/result_59_packed_1_1_...,../5_af3/output/result_59_packed_1_1/result_59...,0.47,0.37,0.45,../6_boltz/output/boltz_results_0/predictions/...,0.390886,0.405878,0.538988,1.754866,0.172469
9,result_59_packed_6_1,pht_demo,result_59_packed_6_1,-21.15624,66.255737,-1.786232,-198.271744,SATEAFLRLVIASPEALELMRTRGTAAADEIAALMLAALEAKGISA...,../3_lmpnn/output/packed/result_59_packed_6_1.pdb,../3_lmpnn/output/packed/result_59_packed_6_1_...,../5_af3/output/result_59_packed_6_1/result_59...,0.67,0.49,0.64,../6_boltz/output/boltz_results_0/predictions/...,0.592455,0.621564,0.68668,1.901321,0.184374
