In [2]:
from p2nd.utils.parse_cif import decompress_files, load_structures
# suppres the PDBConstructionWarning
import warnings
from Bio.PDB.PDBExceptions import PDBConstructionWarning
warnings.simplefilter('ignore', PDBConstructionWarning)

In [3]:
# Load Dev Data
dev_data_path = "data/dev/mmCIF"
#decompress_files(dev_data_path)

dev_data = load_structures(dev_data_path)
print(f"Loaded {len(dev_data)} structures from dev data")

Loaded structure: 2zq9
Loaded structure: 1h03
Loaded structure: 1sqk
Loaded structure: 5m1a
Failed to load 1p6g.cif: Empty file.
Loaded structure: 1p5k
Loaded structure: 1be3
Loaded structure: 4dg1
Failed to load 3j39.cif: Empty file.
Loaded structure: 1g5g
Loaded structure: 5gpe
Loaded structure: 1kyc
Loaded structure: 1a4e
Loaded structure: 1srq
Loaded structure: 1sfc
Loaded structure: 1hqj
Loaded structure: 2akw
Loaded structure: 4k6h
Loaded structure: 4i5q
Loaded structure: 1tye
Loaded structure: 6in5
Loaded structure: 1n0a
Loaded structure: 3sr6
Loaded structure: 1cvq
Loaded structure: 2x1w
Loaded structure: 5vqy
Loaded structure: 5s7w
Loaded structure: 1ibo
Loaded structure: 4zu5
Loaded structure: 3tmt
Loaded structure: 5bxn
Loaded structure: 3bwq
Loaded structure: 1o1d
Loaded structure: 3sn9
Loaded structure: 1is7
Loaded structure: 1w4k
Loaded structure: 1fmh
Loaded structure: 2f3i
Loaded structure: 5vmg
Loaded structure: 1mej
Loaded structure: 1jlc
Loaded structure: 4hlz
Loaded

In [4]:
type(dev_data["1a4e"][0])

Bio.PDB.Model.Model

In [7]:
from Bio.PDB.DSSP import  DSSP

dssp = DSSP(model=dev_data["1a4e"][0],
            in_file="data/dev/mmCIF/1a4e.cif",
            dssp="/usr/local/bin/mkdssp", file_type="mmCIF")
dssp

<Bio.PDB.DSSP.DSSP at 0x17a7b7ed0>

In [6]:
!mkdssp

zsh:1: command not found: mkdssp


In [7]:
# Iterate over all residues for which DSSP data is available
for key in dssp.keys():
    aa, ss, acc = dssp[key][1], dssp[key][2], dssp[key][3]
    print(f"Chain {key[0]}, Residue {key[1][1]}: "
          f"Amino acid {aa}, SS {ss}, Accessibility {acc}")


Chain A, Residue 15: Amino acid D, SS -, Accessibility 0.8098159509202454
Chain A, Residue 16: Amino acid V, SS -, Accessibility 0.2605633802816901
Chain A, Residue 17: Amino acid R, SS -, Accessibility 0.11290322580645161
Chain A, Residue 18: Amino acid E, SS T, Accessibility 1.0
Chain A, Residue 19: Amino acid D, SS T, Accessibility 0.5460122699386503
Chain A, Residue 20: Amino acid R, SS -, Accessibility 0.15725806451612903
Chain A, Residue 21: Amino acid V, SS B, Accessibility 0.028169014084507043
Chain A, Residue 22: Amino acid V, SS -, Accessibility 0.014084507042253521
Chain A, Residue 23: Amino acid T, SS B, Accessibility 0.0
Chain A, Residue 24: Amino acid N, SS -, Accessibility 0.03184713375796178
Chain A, Residue 25: Amino acid S, SS T, Accessibility 0.03076923076923077
Chain A, Residue 26: Amino acid T, SS T, Accessibility 0.11267605633802817
Chain A, Residue 27: Amino acid G, SS S, Accessibility 0.0
Chain A, Residue 28: Amino acid N, SS P, Accessibility 0.03184713375796178

In [8]:
def model_to_dssp_labels(model, in_file="data/dev/mmCIF/1a4e.cif"):
    """
    Convert a Bio.PDB model to DSSP labels.
    :param model: Bio.PDB model
    :return: list of DSSP labels
    """
    dssp = DSSP(model=model, in_file=in_file, dssp="mkdssp", file_type="mmCIF")
    labels = []
    for key in dssp.keys():
        ss = dssp[key][2]
        labels.append(ss)
    return labels

# Test the function
model = dev_data["1a4e"][0]
labels = model_to_dssp_labels(model)
print(labels)

['-', '-', '-', 'T', 'T', '-', 'B', '-', 'B', '-', 'T', 'T', 'S', 'P', 'B', 'P', 'S', 'S', 'S', 'S', '-', 'E', 'E', 'E', 'E', 'T', 'T', 'T', 'S', 'P', 'B', 'B', 'T', 'T', '-', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'T', 'T', '-', '-', '-', 'P', 'P', 'P', 'S', 'S', '-', '-', 'S', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', '-', 'S', '-', '-', 'T', 'T', 'T', '-', '-', '-', 'G', 'G', 'G', 'S', 'S', 'T', 'T', '-', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', '-', 'S', 'S', 'S', '-', 'T', 'T', '-', '-', 'S', 'S', 'S', 'S', 'S', '-', '-', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'B', 'T', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'E', 'S', 'S', 'S', 'S', '-', 'S', '-', 'S', '-', 'T', 'T', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'S', '-', '-', 'T', 'T', 'T', '-', 'S', '-', '-', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'T', 'S', 'G', 'G', 'G', 'G', 'G', 'G', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'S', 'G', 'G', 'G', 'S', 'B', 'S', '-', 'G', 'G', 'G', '-', '-', 'E', 'E', '-', '-',

In [13]:
dssp = DSSP(model=dev_data["1a4e"][0], in_file="data/dev/mmCIF/1a4e.cif", dssp="mkdssp", file_type="mmCIF")
dssp

<Bio.PDB.DSSP.DSSP at 0x107bf36f0>

In [18]:
a_key = list(dssp.keys())[2]
dssp[a_key]

(3,
 'R',
 '-',
 0.11290322580645161,
 -64.5,
 131.7,
 -2,
 -0.2,
 3,
 -1.8,
 4,
 -0.2,
 -1,
 -0.1)

In [None]:
def dssp_to_chain_lable(dssp, chain="A"):
    """
    Convert DSSP labels to chain labels.
    :param dssp: DSSP object
    :param chain: chain ID
    :return: list of chain labels
    """
    labels = []
    for key in dssp.keys():
        if key[0] == chain:
            ss = dssp[key][2]
            labels.append(ss)
    return labels

In [24]:
list(dssp.keys())

[('A', (' ', 15, ' ')),
 ('A', (' ', 16, ' ')),
 ('A', (' ', 17, ' ')),
 ('A', (' ', 18, ' ')),
 ('A', (' ', 19, ' ')),
 ('A', (' ', 20, ' ')),
 ('A', (' ', 21, ' ')),
 ('A', (' ', 22, ' ')),
 ('A', (' ', 23, ' ')),
 ('A', (' ', 24, ' ')),
 ('A', (' ', 25, ' ')),
 ('A', (' ', 26, ' ')),
 ('A', (' ', 27, ' ')),
 ('A', (' ', 28, ' ')),
 ('A', (' ', 29, ' ')),
 ('A', (' ', 30, ' ')),
 ('A', (' ', 31, ' ')),
 ('A', (' ', 32, ' ')),
 ('A', (' ', 33, ' ')),
 ('A', (' ', 34, ' ')),
 ('A', (' ', 35, ' ')),
 ('A', (' ', 36, ' ')),
 ('A', (' ', 37, ' ')),
 ('A', (' ', 38, ' ')),
 ('A', (' ', 39, ' ')),
 ('A', (' ', 40, ' ')),
 ('A', (' ', 41, ' ')),
 ('A', (' ', 42, ' ')),
 ('A', (' ', 43, ' ')),
 ('A', (' ', 44, ' ')),
 ('A', (' ', 45, ' ')),
 ('A', (' ', 46, ' ')),
 ('A', (' ', 47, ' ')),
 ('A', (' ', 48, ' ')),
 ('A', (' ', 49, ' ')),
 ('A', (' ', 50, ' ')),
 ('A', (' ', 51, ' ')),
 ('A', (' ', 52, ' ')),
 ('A', (' ', 53, ' ')),
 ('A', (' ', 54, ' ')),
 ('A', (' ', 55, ' ')),
 ('A', (' ', 56,

In [20]:
dssp['A']

ValueError: not enough values to unpack (expected 2, got 1)

In [19]:
for k, v in dssp.property_dict:
    print(k, "x", v)

A x (' ', 15, ' ')
A x (' ', 16, ' ')
A x (' ', 17, ' ')
A x (' ', 18, ' ')
A x (' ', 19, ' ')
A x (' ', 20, ' ')
A x (' ', 21, ' ')
A x (' ', 22, ' ')
A x (' ', 23, ' ')
A x (' ', 24, ' ')
A x (' ', 25, ' ')
A x (' ', 26, ' ')
A x (' ', 27, ' ')
A x (' ', 28, ' ')
A x (' ', 29, ' ')
A x (' ', 30, ' ')
A x (' ', 31, ' ')
A x (' ', 32, ' ')
A x (' ', 33, ' ')
A x (' ', 34, ' ')
A x (' ', 35, ' ')
A x (' ', 36, ' ')
A x (' ', 37, ' ')
A x (' ', 38, ' ')
A x (' ', 39, ' ')
A x (' ', 40, ' ')
A x (' ', 41, ' ')
A x (' ', 42, ' ')
A x (' ', 43, ' ')
A x (' ', 44, ' ')
A x (' ', 45, ' ')
A x (' ', 46, ' ')
A x (' ', 47, ' ')
A x (' ', 48, ' ')
A x (' ', 49, ' ')
A x (' ', 50, ' ')
A x (' ', 51, ' ')
A x (' ', 52, ' ')
A x (' ', 53, ' ')
A x (' ', 54, ' ')
A x (' ', 55, ' ')
A x (' ', 56, ' ')
A x (' ', 57, ' ')
A x (' ', 58, ' ')
A x (' ', 59, ' ')
A x (' ', 60, ' ')
A x (' ', 61, ' ')
A x (' ', 62, ' ')
A x (' ', 63, ' ')
A x (' ', 64, ' ')
A x (' ', 65, ' ')
A x (' ', 66, ' ')
A x (' ', 67