In [1]:
import pandas as pd
import json
from Bio import PDB



In [2]:
def calculate_residue_plddt(json_path, cif_path):
    # Load the JSON data
    with open(json_path) as json_data:
        plddts = json.load(json_data)['atom_plddts']

    # Parse the CIF file
    structure = PDB.MMCIFParser(QUIET=True).get_structure('model', cif_path)

    # Extract atom-to-residue information 
    atom_to_residue = [(chain.id, residue.resname, residue.id[1]) for model in structure for chain in model for residue in chain for atom in residue]

    # Create dataframe
    df = pd.DataFrame({
        'chain_id': [item[0] for item in atom_to_residue],
        'res_name': [item[1] for item in atom_to_residue],
        'res_ids': [item[2] for item in atom_to_residue],
        'plddts': plddts
    })

    # Calculate the mean pLDDT for each residue
    residue_plddt = df.groupby(['chain_id', 'res_ids']).agg(
        res_name = ('res_name', 'first'),
        mean_plddt = ('plddts', 'mean')
    ).reset_index()

    return residue_plddt

In [3]:
json_path = "af3_predictions/fold_t1106/fold_t1106_full_data_0.json"
cif_path = "af3_predictions/fold_t1106/fold_t1106_model_0.cif"
display(calculate_residue_plddt(json_path, cif_path))

Unnamed: 0,chain_id,res_ids,res_name,mean_plddt
0,A,1,MET,32.923750
1,A,2,SER,33.043333
2,A,3,ARG,28.700909
3,A,4,ILE,33.417500
4,A,5,ILE,26.227500
...,...,...,...,...
231,B,110,HIS,76.075000
232,B,111,ASN,75.127500
233,B,112,GLU,68.702222
234,B,113,SER,70.651667
