# Introduction

In this notebook, we investigated whether TCRs can be both flexible and rigid depending on the antigen they are contacting.
We also looked at the opposite phenomenon of whether peptides can be both flexible and rigid depending on the TCR that is contacting them.

In [1]:
import pandas as pd

In [2]:
apo_holo_summary = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I/apo_holo_summary.csv')
apo_holo_summary['id'] = apo_holo_summary['file_name'].str.replace('.pdb', '', regex=False)
apo_holo_summary = apo_holo_summary.set_index('id')

apo_holo_summary

Unnamed: 0_level_0,file_name,pdb_id,structure_type,state,alpha_chain,beta_chain,antigen_chain,mhc_chain1,mhc_chain2,cdr_sequences_collated,peptide_sequence,mhc_slug
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1ao7_D-E-C-A-B_tcr_pmhc,1ao7_D-E-C-A-B_tcr_pmhc.pdb,1ao7,tcr_pmhc,holo,D,E,C,A,B,DRGSQS-IYSNGD-AVTTDSWGKLQ-MNHEY-SVGAGI-ASRPGLA...,LLFGYPVYV,hla_a_02_01
1b0g_C-A-B_pmhc,1b0g_C-A-B_pmhc.pdb,1b0g,pmhc,apo,,,C,A,B,,ALWGFFPVL,hla_a_02_01
1b0g_F-D-E_pmhc,1b0g_F-D-E_pmhc.pdb,1b0g,pmhc,apo,,,F,D,E,,ALWGFFPVL,hla_a_02_01
1bd2_D-E-C-A-B_tcr_pmhc,1bd2_D-E-C-A-B_tcr_pmhc.pdb,1bd2,tcr_pmhc,holo,D,E,C,A,B,NSMFDY-ISSIKDK-AAMEGAQKLV-MNHEY-SVGAGI-ASSYPGG...,LLFGYPVYV,hla_a_02_01
1bii_P-A-B_pmhc,1bii_P-A-B_pmhc.pdb,1bii,pmhc,apo,,,P,A,B,,RGPGRAFVTI,h2_dd
...,...,...,...,...,...,...,...,...,...,...,...,...
7rtd_C-A-B_pmhc,7rtd_C-A-B_pmhc.pdb,7rtd,pmhc,apo,,,C,A,B,,YLQPRTFLL,hla_a_02_01
7rtr_D-E-C-A-B_tcr_pmhc,7rtr_D-E-C-A-B_tcr_pmhc.pdb,7rtr,tcr_pmhc,holo,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01
8gvb_A-B-P-H-L_tcr_pmhc,8gvb_A-B-P-H-L_tcr_pmhc.pdb,8gvb,tcr_pmhc,holo,A,B,P,H,L,YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...,RYPLTFGW,hla_a_24_02
8gvg_A-B-P-H-L_tcr_pmhc,8gvg_A-B-P-H-L_tcr_pmhc.pdb,8gvg,tcr_pmhc,holo,A,B,P,H,L,YGATPY-YFSGDTLV-AVGFTGGGNKLT-SEHNR-FQNEAQ-ASSD...,RFPLTFGW,hla_a_24_02


In [3]:
def categorize_movement(rmsd: float) -> str:
    if rmsd < 0.5:
        return f'Little Movement (<0.5 Å)'

    if 0.5 <= rmsd < 1.0:
        return f'Some Movement (0.5 to 1.0 Å)'

    if 1.0 <= rmsd < 2.0:
        return 'Movement (1.0 to 2.0 Å)'

    if 2.0 <= rmsd < 4.0:
        return 'Large Movement (2.0 to 4.0 Å)'

    if 4.0 <= rmsd:
        return 'Significant Movement (>4.0 Å)'


movement_order = pd.CategoricalDtype(categories=['Little Movement (<0.5 Å)',
                                                 'Some Movement (0.5 to 1.0 Å)',
                                                 'Movement (1.0 to 2.0 Å)',
                                                 'Large Movement (2.0 to 4.0 Å)',
                                                 'Significant Movement (>4.0 Å)'], ordered=True)

In [4]:
def classify_modality(rmsd: float) -> str | None:
    return 'rigid' if rmsd < 0.5 else 'flexible' if rmsd > 1.0 else None

# TCR CDR Analysis

## Load data

In [5]:
results_fw_align = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I-comparisons/rmsd_cdr_fw_align_results.csv')

results_fw_align = results_fw_align.merge(
    apo_holo_summary[['file_name',
                      'pdb_id',
                      'structure_type',
                      'state',
                      'alpha_chain',
                      'beta_chain',
                      'antigen_chain',
                      'mhc_chain1',
                      'mhc_chain2']],
    how='left',
    left_on='structure_x_name',
    right_on='file_name',
).merge(
    apo_holo_summary[['file_name',
                      'pdb_id',
                      'structure_type',
                      'state',
                      'alpha_chain',
                      'beta_chain',
                      'antigen_chain',
                      'mhc_chain1',
                      'mhc_chain2']],
    how='left',
    left_on='structure_y_name',
    right_on='file_name',
).merge(
    apo_holo_summary[['cdr_sequences_collated', 'peptide_sequence', 'mhc_slug']],
    how='left',
    left_on='complex_id',
    right_index=True,
)

In [6]:
results_fw_align['comparison'] = results_fw_align['state_x'] + '-' + results_fw_align['state_y']
results_fw_align['comparison'] = results_fw_align['comparison'].map(
    lambda entry: 'apo-holo' if entry == 'holo-apo' else entry
)

results_fw_align = results_fw_align.query("comparison == 'apo-holo'").reset_index(drop=True)

In [7]:
results_fw_align['structure_comparison'] = results_fw_align.apply(
    lambda row: '-'.join(sorted([row.structure_x_name, row.structure_y_name])),
    axis='columns',
)
results_fw_align = results_fw_align.drop_duplicates(['structure_comparison', 'chain_type', 'cdr']).reset_index(drop=True)

In [8]:
results_fw_align

Unnamed: 0,complex_id,structure_x_name,structure_y_name,chain_type,cdr,rmsd,file_name_x,pdb_id_x,structure_type_x,state_x,...,alpha_chain_y,beta_chain_y,antigen_chain_y,mhc_chain1_y,mhc_chain2_y,cdr_sequences_collated,peptide_sequence,mhc_slug,comparison,structure_comparison
0,3qdg_D-E-C-A-B_tcr_pmhc,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qeu_A-B_tcr.pdb,alpha_chain,1,1.932806,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qdg,tcr_pmhc,holo,...,A,B,,,,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,ELAGIGILTV,hla_a_02_01,apo-holo,3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
1,3qdg_D-E-C-A-B_tcr_pmhc,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qeu_A-B_tcr.pdb,alpha_chain,2,1.308598,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qdg,tcr_pmhc,holo,...,A,B,,,,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,ELAGIGILTV,hla_a_02_01,apo-holo,3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
2,3qdg_D-E-C-A-B_tcr_pmhc,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qeu_A-B_tcr.pdb,alpha_chain,3,1.244062,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qdg,tcr_pmhc,holo,...,A,B,,,,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,ELAGIGILTV,hla_a_02_01,apo-holo,3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
3,3qdg_D-E-C-A-B_tcr_pmhc,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qeu_A-B_tcr.pdb,beta_chain,1,0.809066,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qdg,tcr_pmhc,holo,...,A,B,,,,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,ELAGIGILTV,hla_a_02_01,apo-holo,3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
4,3qdg_D-E-C-A-B_tcr_pmhc,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qeu_A-B_tcr.pdb,beta_chain,2,0.688597,3qdg_D-E-C-A-B_tcr_pmhc.pdb,3qdg,tcr_pmhc,holo,...,A,B,,,,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,ELAGIGILTV,hla_a_02_01,apo-holo,3qdg_D-E-C-A-B_tcr_pmhc.pdb-3qeu_A-B_tcr.pdb
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,7rtr_D-E-C-A-B_tcr_pmhc,7n1d_A-B_tcr.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,alpha_chain,2,0.810170,7n1d_A-B_tcr.pdb,7n1d,tcr,apo,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
570,7rtr_D-E-C-A-B_tcr_pmhc,7n1d_A-B_tcr.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,alpha_chain,3,0.563263,7n1d_A-B_tcr.pdb,7n1d,tcr,apo,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
571,7rtr_D-E-C-A-B_tcr_pmhc,7n1d_A-B_tcr.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,beta_chain,1,0.399182,7n1d_A-B_tcr.pdb,7n1d,tcr,apo,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
572,7rtr_D-E-C-A-B_tcr_pmhc,7n1d_A-B_tcr.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,beta_chain,2,0.284455,7n1d_A-B_tcr.pdb,7n1d,tcr,apo,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n1d_A-B_tcr.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb


## Analysis

In [9]:
def average_and_classify_cdr_movement(group):
    group_mean = group.groupby(['chain_type',
                                'cdr',
                                'peptide_sequence',
                                'mhc_slug']).agg({'rmsd': 'mean'})
    group_mean['movement'] = group_mean['rmsd'].map(categorize_movement).astype(movement_order)
    group_mean['classification'] = group_mean['rmsd'].map(classify_modality)

    return group_mean

cdr_results_agg = (results_fw_align.groupby('cdr_sequences_collated')
                                   .apply(average_and_classify_cdr_movement)
                                   .reset_index())

In [10]:
cdr_results_agg_multi_pmhc = (cdr_results_agg.groupby(['cdr_sequences_collated', 'chain_type', 'cdr'])
                                             .filter(lambda group: len(group) > 1))

In [11]:
cdr_results_agg_multi_pmhc['num_modalities'] = (
    cdr_results_agg_multi_pmhc.groupby(['cdr_sequences_collated', 'chain_type', 'cdr'])['classification']
                              .transform(lambda movement: movement.nunique())
)

In [12]:
cdr_results_agg_multi_pmhc

Unnamed: 0,cdr_sequences_collated,chain_type,cdr,peptide_sequence,mhc_slug,rmsd,movement,classification,num_modalities
11,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,alpha_chain,1,AAGIGILTV,hla_a_02_01,2.149292,Large Movement (2.0 to 4.0 Å),flexible,1
12,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,alpha_chain,1,ELAGIGILTV,hla_a_02_01,2.188011,Large Movement (2.0 to 4.0 Å),flexible,1
13,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,alpha_chain,1,MMWDRGLGMM,hla_a_02_01,1.983439,Movement (1.0 to 2.0 Å),flexible,1
14,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,alpha_chain,1,SMLGIGIVPV,hla_a_02_01,2.269887,Large Movement (2.0 to 4.0 Å),flexible,1
15,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,alpha_chain,2,AAGIGILTV,hla_a_02_01,1.095455,Movement (1.0 to 2.0 Å),flexible,1
...,...,...,...,...,...,...,...,...,...
287,YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY,beta_chain,1,SIYRYYGL,h2_kb,0.579553,Some Movement (0.5 to 1.0 Å),,0
288,YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY,beta_chain,2,EQYKFYSV,h2_kb,0.850569,Some Movement (0.5 to 1.0 Å),,0
289,YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY,beta_chain,2,SIYRYYGL,h2_kb,0.938313,Some Movement (0.5 to 1.0 Å),,0
290,YSATPY-YYSGDPVV-AVSGFASALT-NNHNN-SYGAGS-ASGGGGTLY,beta_chain,3,EQYKFYSV,h2_kb,1.084084,Movement (1.0 to 2.0 Å),flexible,1


In [13]:
cdr_results_agg_multi_pmhc['num_modalities'].describe()

count    226.000000
mean       0.814159
std        0.389842
min        0.000000
25%        1.000000
50%        1.000000
75%        1.000000
max        1.000000
Name: num_modalities, dtype: float64

In [14]:
cdr_results_agg_multi_pmhc.query('num_modalities > 1')

Unnamed: 0,cdr_sequences_collated,chain_type,cdr,peptide_sequence,mhc_slug,rmsd,movement,classification,num_modalities


No TCRs had both <0.5 Å RMSD movement binding to one antigen and >1.0 Å RMSD binding to a different antigen.

# Peptide Analysis

## Load data

In [15]:
pmhc_results = pd.read_csv('../data/processed/apo-holo-tcr-pmhc-class-I-comparisons/pmhc_tcr_contact_apo_holo.csv')

peptide_results = pmhc_results.query("chain_type == 'antigen_chain'").reset_index(drop=True)
peptide_results = peptide_results.drop(columns=['chain_type', 'tcr_contact'])

peptide_results = peptide_results.merge(
    apo_holo_summary[['file_name',
                      'pdb_id',
                      'structure_type',
                      'state',
                      'alpha_chain',
                      'beta_chain',
                      'antigen_chain',
                      'mhc_chain1',
                      'mhc_chain2']],
    how='left',
    left_on='structure_x_name',
    right_on='file_name',
).merge(
    apo_holo_summary[['file_name',
                      'pdb_id',
                      'structure_type',
                      'state',
                      'alpha_chain',
                      'beta_chain',
                      'antigen_chain',
                      'mhc_chain1',
                      'mhc_chain2']],
    how='left',
    left_on='structure_y_name',
    right_on='file_name',
).merge(
    apo_holo_summary[['cdr_sequences_collated', 'peptide_sequence', 'mhc_slug']],
    how='left',
    left_on='complex_id',
    right_index=True,
)

In [16]:
peptide_results['comparison'] = peptide_results['state_x'] + '-' + peptide_results['state_y']
peptide_results['comparison'] = peptide_results['comparison'].map(
    lambda entry: 'apo-holo' if entry == 'holo-apo' else entry
)

peptide_results = peptide_results.query("comparison == 'apo-holo'").reset_index(drop=True)

In [17]:
peptide_results['structure_comparison'] = peptide_results.apply(
    lambda row: '-'.join(sorted([row.structure_x_name, row.structure_y_name])),
    axis='columns',
)
peptide_results = peptide_results.drop_duplicates(['structure_comparison']).reset_index(drop=True)

In [18]:
peptide_results

Unnamed: 0,complex_id,structure_x_name,structure_y_name,rmsd,file_name_x,pdb_id_x,structure_type_x,state_x,alpha_chain_x,beta_chain_x,...,alpha_chain_y,beta_chain_y,antigen_chain_y,mhc_chain1_y,mhc_chain2_y,cdr_sequences_collated,peptide_sequence,mhc_slug,comparison,structure_comparison
0,5c0a_D-E-C-A-B_tcr_pmhc,5c0a_D-E-C-A-B_tcr_pmhc.pdb,5n1y_C-A-B_pmhc.pdb,0.448858,5c0a_D-E-C-A-B_tcr_pmhc.pdb,5c0a,tcr_pmhc,holo,D,E,...,,,C,A,B,NSAFQY-TYSSGN-AMRGDSSYKLI-SGHDY-FNNNVP-ASSLWEK...,MVWGPDPLYV,hla_a_02_01,apo-holo,5c0a_D-E-C-A-B_tcr_pmhc.pdb-5n1y_C-A-B_pmhc.pdb
1,5wlg_D-E-C-A-B_tcr_pmhc,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wli_C-A-B_pmhc.pdb,0.498148,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wlg,tcr_pmhc,holo,D,E,...,,,C,A,B,TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY,SQLLNAKYL,h2_db,apo-holo,5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_C-A-B_pmhc.pdb
2,5wlg_D-E-C-A-B_tcr_pmhc,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wli_F-D-E_pmhc.pdb,0.519507,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wlg,tcr_pmhc,holo,D,E,...,,,F,D,E,TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY,SQLLNAKYL,h2_db,apo-holo,5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_F-D-E_pmhc.pdb
3,5wlg_D-E-C-A-B_tcr_pmhc,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wli_I-G-H_pmhc.pdb,0.459025,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wlg,tcr_pmhc,holo,D,E,...,,,I,G,H,TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY,SQLLNAKYL,h2_db,apo-holo,5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_I-G-H_pmhc.pdb
4,5wlg_D-E-C-A-B_tcr_pmhc,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wli_L-J-K_pmhc.pdb,0.494705,5wlg_D-E-C-A-B_tcr_pmhc.pdb,5wlg,tcr_pmhc,holo,D,E,...,,,L,J,K,TYTTV-IRSNERE-ATVYAQGLT-NNHDY-SYVADS-ASSDWGDTGQLY,SQLLNAKYL,h2_db,apo-holo,5wlg_D-E-C-A-B_tcr_pmhc.pdb-5wli_L-J-K_pmhc.pdb
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,7rtr_D-E-C-A-B_tcr_pmhc,7n6d_G-E-F_pmhc.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,0.468869,7n6d_G-E-F_pmhc.pdb,7n6d,pmhc,apo,,,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n6d_G-E-F_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
373,7rtr_D-E-C-A-B_tcr_pmhc,7n6d_K-I-J_pmhc.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,0.411050,7n6d_K-I-J_pmhc.pdb,7n6d,pmhc,apo,,,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n6d_K-I-J_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
374,7rtr_D-E-C-A-B_tcr_pmhc,7n6d_O-M-N_pmhc.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,0.448905,7n6d_O-M-N_pmhc.pdb,7n6d,pmhc,apo,,,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7n6d_O-M-N_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb
375,7rtr_D-E-C-A-B_tcr_pmhc,7p3d_C-A-B_pmhc.pdb,7rtr_D-E-C-A-B_tcr_pmhc.pdb,0.765295,7p3d_C-A-B_pmhc.pdb,7p3d,pmhc,apo,,,...,D,E,C,A,B,DRGSQS-IYSNGD-AVNRDDKII-SEHNR-FQNEAQ-ASSPDIEQY,YLQPRTFLL,hla_a_02_01,apo-holo,7p3d_C-A-B_pmhc.pdb-7rtr_D-E-C-A-B_tcr_pmhc.pdb


## Analysis

In [19]:
def average_and_classify_peptide_movement(group):
    group_mean = group.groupby('cdr_sequences_collated').agg({'rmsd': 'mean'})
    group_mean['movement'] = group_mean['rmsd'].map(categorize_movement).astype(movement_order)
    group_mean['classification'] = group_mean['rmsd'].map(classify_modality)

    return group_mean

peptide_results_agg = (peptide_results.groupby(['peptide_sequence', 'mhc_slug'])
                                      .apply(average_and_classify_peptide_movement)
                                      .reset_index())

In [20]:
peptide_results_agg_multi_tcr = (peptide_results_agg.groupby(['mhc_slug', 'peptide_sequence'])
                                                    .filter(lambda group: len(group) > 1))

In [21]:
peptide_results_agg_multi_tcr['num_modalities'] = (
    peptide_results_agg_multi_tcr.groupby(['mhc_slug', 'peptide_sequence'])['classification']
                                 .transform(lambda movement: movement.nunique())
)

In [22]:
peptide_results_agg_multi_tcr

Unnamed: 0,peptide_sequence,mhc_slug,cdr_sequences_collated,rmsd,movement,classification,num_modalities
0,AAGIGILTV,hla_a_02_01,DRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSLSFGTEAF,1.157017,Movement (1.0 to 2.0 Å),flexible,1
1,AAGIGILTV,hla_a_02_01,DRGSQS-IYSNGD-AVNVAGKST-GTSNPN-SVGIG-AWSETGLGT...,1.134248,Movement (1.0 to 2.0 Å),flexible,1
2,AAGIGILTV,hla_a_02_01,FLGSQS-TYREGD-AVNDGGRLT-GTSNPN-WGPFG-AWSETGLGM...,0.986466,Some Movement (0.5 to 1.0 Å),,1
3,AAGIGILTV,hla_a_02_01,SIFNT-LYKAGEL-AGGTGNQFY-ENHRY-SYGVKD-AISEVGVGQPQH,1.323428,Movement (1.0 to 2.0 Å),flexible,1
4,AAGIGILTV,hla_a_02_01,YRGSQS-IYSNGD-AVNFGGGKLI-MRHNA-SNTAGT-ASSWSFGTEAF,0.526953,Some Movement (0.5 to 1.0 Å),,1
8,APRGPHGGAASGL,hla_b_07_02,NIATNDY-GYKTK-LVGEILDNFNKFY-MDHEN-SYDVKM-ASSQR...,4.232887,Significant Movement (>4.0 Å),flexible,1
9,APRGPHGGAASGL,hla_b_07_02,NIATNDY-GYKTK-LVVDQKLV-SGDLS-YYNGEE-ASSGGHTGSNEQF,2.814299,Large Movement (2.0 to 4.0 Å),flexible,1
11,ASNENMETM,h2_db,DSTFNY-IRSVSDK-AASEGSGSWQLI-MNHDT-YYDKIL-ASSAG...,0.396101,Little Movement (<0.5 Å),rigid,1
12,ASNENMETM,h2_db,DSTFNY-IRSVSDK-AASETSGSWQLI-MNHDT-YYDKIL-ASSRD...,0.409245,Little Movement (<0.5 Å),rigid,1
13,ASNENMETM,h2_db,TTMRS-LASGT-AAVTGNTGKLI-MNHDT-YYDKIL-ASSRGTIHS...,0.55853,Some Movement (0.5 to 1.0 Å),,1


In [23]:
peptide_results_agg_multi_tcr['num_modalities'].describe()

count    53.000000
mean      0.641509
std       0.484146
min       0.000000
25%       0.000000
50%       1.000000
75%       1.000000
max       1.000000
Name: num_modalities, dtype: float64

In [24]:
peptide_results_agg_multi_tcr.query('num_modalities > 1')

Unnamed: 0,peptide_sequence,mhc_slug,cdr_sequences_collated,rmsd,movement,classification,num_modalities


Similarly, no peptides have both large (>1.0 Å RMSD) conformational changes and small (<0.5 Å RMSD) conformational changes for different TCRs.

# Conclusion

Neither the TCR CDR loops, nor the peptides show different movement modalities (rigid vs flexible) when contacted by different entities.
These results indicate that the flexiblity of TCRs between *apo* and *holo* conformations is dependent on the composition of the entitity itself and not the entitity it is contacting.