In [1]:
import os
import warnings

import pandas as pd
from python_pdb.entities import Structure
from python_pdb.parsers import parse_pdb

STCRDAB_PATH = '/project/koohylab/shared/tcr_data/raw_DONOTMODIFY/structure/STCRDab_all_2022-11-10'

In [2]:
stcrdab_summary = pd.read_csv(os.path.join(STCRDAB_PATH, 'db_summary.dat'), delimiter='\t')
stcrdab_summary

Unnamed: 0,pdb,Bchain,Achain,Dchain,Gchain,TCRtype,model,antigen_chain,antigen_type,antigen_name,...,authors,resolution,method,r_free,r_factor,affinity,affinity_method,affinity_temperature,affinity_pmid,engineered
0,7rk7,E,D,,,abTCR,0,C,peptide,tyrosinase peptide,...,"Singh, N.K., Davancaze, L.M., Arbuiso, A., Wei...",2.54,X-RAY DIFFRACTION,0.255,0.211,,,,,True
1,7s8i,B,A,,,abTCR,0,,,,...,"Patskovsky, Y., Nyovanie, S., Patskovska, L., ...",1.66,X-RAY DIFFRACTION,0.216,0.167,,,,,True
2,7s8j,B,A,,,abTCR,0,,,,...,"Patskovska, L., Patskovsky, Y., Nyovanie, S., ...",1.92,X-RAY DIFFRACTION,0.213,0.168,,,,,True
3,3qux,D,C,,,abTCR,0,A,Hapten,"N-[(3S,4S,5R)-4,5-DIHYDROXY-1-[(2R,3R,4R,5R,6R...",...,"Li, Y., Girardi, E., Yu, E.D., Zajonc, D.M.",2.91,X-RAY DIFFRACTION,0.252,0.208,,,,,True
4,2z31,B,A,,,abTCR,0,P,peptide,myelin basic protein (mbp)-peptide,...,"Feng, D., Bond, C.J., Ely, L.K., Garcia, K.C.",2.7,X-RAY DIFFRACTION,0.287,0.235,,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
890,3rtq,D,C,,,abTCR,0,A,Hapten,"N-[(2S,3S,4R)-3,4-DIHYDROXY-1-{[(1S,2S,3R,4R,5...",...,"Yu, E.D., Zajonc, D.M.",2.8,X-RAY DIFFRACTION,0.268,0.227,,,,,True
891,3dxa,O,N,,,abTCR,0,M,peptide,ebv decapeptide epitope,...,"Archbold, J.K., Macdonald, W.A., Gras, S., Ros...",3.5,X-RAY DIFFRACTION,0.330,0.286,,,,,True
892,1d9k,B,A,,,abTCR,0,P,peptide,conalbumin peptide,...,"Reinherz, E.L., Tan, K., Tang, L., Kern, P., L...",3.2,X-RAY DIFFRACTION,0.293,0.247,,,,,True
893,4gg6,H,G,,,abTCR,0,J,peptide,peptide from alpha/beta-gliadin mm1,...,"Broughton, S.E., Theodossis, A., Petersen, J.,...",3.2,X-RAY DIFFRACTION,0.285,0.246,,,,,True


In [3]:
stcrdab_summary['raw_path'] = stcrdab_summary['pdb'].map(lambda pdb_id: os.path.join(STCRDAB_PATH, 'raw', f'{pdb_id}.pdb'))

In [7]:
def identify_residues_with_alt_conf(struct):
    residues_with_alt_locs = []

    for model in struct:
        for chain in model:
            for residue in chain:
                alt_locs = set()

                for atom in residue:
                    if atom.alt_loc:
                        alt_locs.add(atom.alt_loc)

                if len(alt_locs) >= 2:
                    residues_with_alt_locs.append((model.serial_number, chain.name, residue))
    
    return len(residues_with_alt_locs) > 0

alternate_states_count = 0
total_count = 0

for index, stcrdab_entry in stcrdab_summary.iterrows():
    print(stcrdab_entry['pdb'])
    with open(stcrdab_entry['raw_path'], 'r') as fh:
            structure = parse_pdb(fh.read(), silent=True)
    
    structure_df = structure.to_pandas()
    tcr_chains = [chain 
                  for chain in [stcrdab_entry['Achain'], stcrdab_entry['Bchain'], stcrdab_entry['Gchain'], stcrdab_entry['Dchain']]
                  if not pd.isna(chain)]
    
    print(tcr_chains)
    tcr_df = structure_df[structure_df['chain_id'].isin(tcr_chains)]
    
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tcr_structure = Structure.from_pandas(tcr_df)
    
    if identify_residues_with_alt_conf(tcr_structure):
        print("Found one!")
        alternate_states_count += 1
        
    total_count += 1

print(f'{alternate_states_count} out of {total_count} have alternate residue conformations.')

7rk7
['D', 'E']
7s8i
['A', 'B']
Found one!
7s8j
['A', 'B']
Found one!
3qux
['C', 'D']
2z31
['A', 'B']
2ak4
['D', 'E']
7nme
['D', 'E']
4x6c
['G', 'H']
7pbe
['I', 'J']
4z7w
['E', 'F']
6v18
['D', 'E']
5wkh
['I', 'J']
6fr7
['A', 'B']
Found one!
3o8x
['C', 'D']
7sg1
['I', 'J']
3w0w
['D', 'E']
5w1v
['S', 'T']
4x6d
['E', 'F']
7rtr
['D', 'E']
Found one!
2nx5
['I', 'J']
5nht
['A', 'B']
6eh9
['A', 'B']
Found one!
6ulr
['D', 'E']
4nqe
['G', 'H']
Found one!
3tpu
['M', 'N']
1hxm
['H', 'G']
7rrg
['D', 'E']
Found one!
2p5e
['D', 'E']
Found one!
4p2o
['C', 'D']
2aq3
['G']
5jzi
['I', 'J']
3rug
['E', 'F']
3tpu
['C', 'D']
4pjg
['G', 'H']
2z35
['A', 'B']
4e41
['D', 'E']
Found one!
6am5
['D', 'E']
1kb5
['A', 'B']
3e2h
['B', 'C']
6c68
['A', 'B']
4udu
['A', 'B']
6puj
['B', 'G']
Found one!
2ak4
['I', 'J']
3e3q
['Z', 'a']
3mv8
['D', 'E']
Found one!
6puc
['G', 'H']
Found one!
5wjo
['C', 'D']
Found one!
3kpr
['D', 'E']
6fr9
['A', 'B']
Found one!
5hyj
['D', 'E']
5ks9
['G', 'H']
1kj2
['D', 'E']
6mja
['C', 'D']
6vt

['D', 'E']
Found one!
6u3n
['D', 'E']
1bwm
['a', 'A']
6w9u
['D', 'E']
Found one!
2vlr
['I', 'J']
2f54
['K', 'L']
6rsy
['D', 'E']
Found one!
4mvb
['C', 'D']
1mi5
['D', 'E']
1sbb
['C']
4en3
['A', 'B']
1zgl
['U', 'V']
6cwe
['C', 'D']
Found one!
3vxq
['A', 'B']
5swz
['I', 'J']
1h5b
['B']
Found one!
3vxs
['D', 'E']
5w1w
['S', 'T']
2po6
['C', 'D']
8gvg
['A', 'B']
4qrr
['D', 'E']
2vlk
['D', 'E']
3sjv
['S', 'T']
6cx9
['C', 'D']
4pj7
['E', 'F']
4mng
['e', 'E']
4l4t
['G', 'H']
Found one!
6eh4
['D', 'E']
Found one!
6vm7
['D', 'E']
3tpu
['G', 'H']
5d7l
['D', 'E']
1d9k
['E', 'F']
3uts
['I', 'J']
6cqq
['D', 'E']
Found one!
3owe
['G']
4l4v
['D', 'E']
Found one!
2aq3
['E']
5hhm
['I', 'J']
Found one!
1h5b
['D']
Found one!
6mji
['C', 'D']
Found one!
3sjv
['N', 'O']
3e3q
['R', 'S']
4n0c
['G', 'H']
5c0b
['I', 'J']
7pb2
['D', 'E']
6xng
['C', 'D']
4e41
['I', 'J']
3q5y
['B']
5til
['K', 'L']
4qrp
['J', 'I']
6rpa
['D', 'E']
6xqp
['G', 'H']
Found one!
2e7l
['A', 'D']
7n5p
['D', 'E']
Found one!
3to4
['C', 'D']
4

['G', 'H']
2apf
['A']
234 out of 895 have alternate residue conformations.
