In [151]:
import pandas as pd
import os
import sys
import importlib
import gpcrmining.gpcrdb as db
# modify the path to wherever the 'useful.py' file is
script_dir = os.path.expanduser("~/Desktop/dror")
sys.path.append(script_dir)
import useful
importlib.reload(useful)

<module 'useful' from '/Users/evefine/Desktop/dror/useful.py'>

In [58]:
# Load the uploaded CSV file, this should be from PENSA analsyis
root_dir = '.'

recep = '5HT2B'

ks_bb_file_path = f'{recep}/results/{recep}_bb-torsions_kolmogorov-smirnov.csv'
ks_bb_df = pd.read_csv(ks_bb_file_path)

ks_sc_file_path = f'{recep}/results/{recep}_sc-torsions_kolmogorov-smirnov.csv'
ks_sc_df = pd.read_csv(ks_sc_file_path)

# functions

In [148]:
def process_ks_df(df):
    # Clean and rename the columns
    df.columns = ['Name', 'KS_statistic', 'p_value', 'Unused']
    
    # Create a copy to avoid SettingWithCopyWarning
    df_new = df[['Name', 'KS_statistic', 'p_value']].copy()

    # Convert numeric columns safely
    df_new['KS_statistic'] = pd.to_numeric(df_new['KS_statistic'], errors='coerce')
    df_new['p_value'] = pd.to_numeric(df_new['p_value'], errors='coerce')

    return df_new
    

def filter_ks(dataframe, cutoff=0.3, pval_cutoff=0.05):
    """
    Returns rows from the dataframe where any divergence metric (JSD, KLD_A_to_B, KLD_B_to_A)
    exceeds the given cutoff.
    """
    mask = (
        (dataframe['KS_statistic'] > cutoff) &
        (dataframe['p_value'] < pval_cutoff) 
    )
    return dataframe[mask]


def make_pymol_selection(df, column='Name', return_indices=False, selection=False):
    """
    Generate a PyMOL selection string from residue numbers in a DataFrame,
    using 'resid X to Y' syntax for consecutive ranges.

    Parameters:
    - df: pandas DataFrame with a column containing residue names like 'PHI 0 LEU 59'
    - column: the column name that contains the residue names

    Returns:
    - str: a PyMOL selection string, e.g., '(resid 59 or resid 61 to 63)'
    """
    import re

    # Extract residue numbers using regex
    residues = set()
    for name in df[column]:
        match = re.search(r'\b(\d+)\b$', str(name).strip())
        if match:
            if selection:
                if str(match.group(1)) in selection:
                    residues.add(int(match.group(1)))
            else:
                residues.add(int(match.group(1)))

    sorted_residues = sorted(residues)
    if return_indices:
        return sorted_residues

    # Group into ranges
    ranges = []
    start = end = None
    for resid in sorted_residues:
        if start is None:
            start = end = resid
        elif resid == end + 1:
            end = resid
        else:
            ranges.append((start, end))
            start = end = resid
    if start is not None:
        ranges.append((start, end))

    # Build selection string
    selection_parts = []
    for start, end in ranges:
        if start == end:
            selection_parts.append(f"resid {start}")
        else:
            selection_parts.append(f"resid {start}-{end}")

    return f"({' or '.join(selection_parts)})"

aas = {
    "ALA": "A",
    "ARG": "R",
    "ASN": "N",
    "ASP": "D",
    "CYS": "C",
    "GLU": "E",
    "GLN": "Q",
    "GLY": "G",
    "HIS": "H",
    "ILE": "I",
    "LEU": "L",
    "LYS": "K",
    "MET": "M",
    "PHE": "F",
    "PRO": "P",
    "SER": "S",
    "THR": "T",
    "TRP": "W",
    "TYR": "Y",
    "VAL": "V",
}


def write_diheds_from_df(df, selection=False):
    list_chis = list(df['Name'].values)
    chis = []
    resids = []
    for chi in list_chis:
        chival, _, res, num = chi.split(' ')
        if selection:
            if num not in selection:
                continue
        chis.append(int(chival[3]))

        resids.append(aas[res] + num)
    useful.write_diheds(resids, chis)

In [150]:
def get_res_info(gpcr):
    """
    Modified to process a single GPCR at a time to facilitate parallel processing.

    Input:
    - gpcr: a gpcr name (str)

    Returns:
    - gpcr: same as above
    - db.get_residue_info(gpcr): a list of lists in the format
    [['TM#', residue # in pdb (int), one letter aa code, gpcrdb numbering of residue (str)],...]
    """
    return gpcr, db.get_residue_info(gpcr)


def take_from_mapping(mapping_dict, resnums):
    important = {}
    for resnum in resnums:
        important[mapping_dict[resnum][1]] = mapping_dict[resnum][0] + str(resnum)
    return important

bind_pock_resis = ['1.46', '1.43', '1.42', '1.39', '1.38', 
             '2.50', '2.53', '2.54', '2.57', '2.58', '2.61', '2.62', '2.65', '2.66',
             '3.40', '3.39', '3.36', '3.35', '3.33', '3.32', '3.29', '3.28',
             '5.47', '5.46', '5.43', '5.42', '5.39', '5.38',
             '6.48', '6.51', '6.52', '6.55',
             '7.43', '7.42', '7.39', '7.38', '7.35', '7.34'
            ]


intracell_interface = ['2.44', '2.43', '2.42', '2.40', '2.39',
                       '3.49', '3.50', '3.53', '3.54',
                       '5.65', '5.68', '5.69', '5.71', '5.72', '5.73',
                       '6.37', '6.36', '6.33', '6.32', '6.30', '6.29', '6.26',
                       '7.56', '7.55', '7.54', '7.53',
                      ]
                       
def extract_location(important_dict, lst):
    location = {}

    for BW in list(important_dict.keys()):
        if BW in lst:
            location[BW] = important_dict[BW]
    
    return location


def proc_and_filter_df(df, cutoff=0.3):
    processed_df = process_ks_df(df)
    filtered_df = filter_ks(processed_df, cutoff=cutoff)
    return filtered_df

    
def get_important_resis_in_loc(filtered_df, mapping_dict, lst):
    important = make_pymol_selection(filtered_df, return_indices=True)

    if important:
        important_map = take_from_mapping(mapping_dict, important)
    
        relevant_important = extract_location(important_map, lst)
    
        return relevant_important
    else:
        return 'nothing'


def make_dict_string(dic):
    string = ''
    for key in list(dic.keys()):
        string += f"{key} : {dic[key]} \n"
    return string


def find_dif_imp_resis(bb_df, sc_df, mapping=None, recep=None, cutoff_int=0.3, cutoff_bind=0.1,
                      return_dicts=False):

    bb_df_filt_forbind = proc_and_filter_df(bb_df, cutoff=cutoff_bind)
    sc_df_filt_forbind = proc_and_filter_df(sc_df, cutoff=cutoff_bind)

    bb_df_filt_forint = proc_and_filter_df(bb_df, cutoff=cutoff_int)
    sc_df_filt_forint = proc_and_filter_df(sc_df, cutoff=cutoff_int)

    if not mapping:
        if not recep:
            print("""If you don't specify a receptor (keyword arg recep), this function can't map.
            Use a gpcrdb receptor name as the entry, or specify a mapping""")
            return None

        mapping = db.get_residue_info(recep)

    mapping_dict = {item[1]: (item[2], item[3].split('x')[0]) for item in mapping}
    
    bb_bind_pock = get_important_resis_in_loc(bb_df_filt_forbind, mapping_dict, bind_pock_resis)
    bb_intracell = get_important_resis_in_loc(bb_df_filt_forint, mapping_dict, intracell_interface)

    sc_bind_pock = get_important_resis_in_loc(sc_df_filt_forbind, mapping_dict, bind_pock_resis)
    sc_intracell = get_important_resis_in_loc(sc_df_filt_forint, mapping_dict, intracell_interface)
    
    
    if return_dicts:
        return bb_bind_pock, bb_intracell, sc_bind_pock, sc_intracell
    
    dicts = [bb_bind_pock, bb_intracell, sc_bind_pock, sc_intracell]
    strings = []
    for dic in dicts:
        if dic != 'nothing':
            strings.append(make_dict_string(dic))
        else:
            strings.append(dic)
    
    print(f"""
binding pocket bb > {cutoff_bind} ks: \n{strings[0]}
    
binding pocket sc > {cutoff_bind} ks: \n{strings[2]}
    
intracellular interface bb > {cutoff_int} ks: \n{strings[1]}
    
intracellular interface sc > {cutoff_int} ks: \n{strings[3]}
    """)
    
        
def print_imp_chis(ks_bb_df, ks_sc_df, mapping=False, pymol=False):
    _, _, sc_bind, sc_int = find_dif_imp_resis(ks_bb_df, ks_sc_df, mapping=mapping, cutoff_bind=0.15, return_dicts=True)

    selections = {'sc_bind': [], 'sc_int': []}
    dicts = {'sc_bind': sc_bind, 'sc_int': sc_int}

    for key in list(selections.keys()):
        for key_2 in list(dicts[key].keys()):
            selections[key].append(dicts[key][key_2][1:])

    ks_sc_df_new = process_ks_df(ks_sc_df)
    
    filtered_ks_sc_bind = filter_ks(ks_sc_df_new, cutoff=0.15)
    filtered_ks_sc_int = filter_ks(ks_sc_df_new, cutoff=0.3)

    filtered_dfs = {'sc_bind': filtered_ks_sc_bind, 'sc_int': filtered_ks_sc_int}
    for key in list(selections.keys()):
        print(key)
        write_diheds_from_df(filtered_dfs[key], selection=selections[key])
    
    if pymol:
        for key in list(selections.keys()):
            print(key)
            pymol_string = make_pymol_selection(filtered_dfs[key], column='Name', return_indices=False, selection=selections[key])
            print('pymol selection string:')
            print(pymol_string)

# processing

In [5]:
ks_sc_df_new = process_ks_df(ks_sc_df)

# Apply the function with a sample cutoff
filtered_ks_sc = filter_ks(ks_sc_df_new, cutoff=0.15)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['KS_statistic'] = pd.to_numeric(df['KS_statistic'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['p_value'] = pd.to_numeric(df['p_value'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['KS_statistic'] = pd.to_numeric(df['KS_statistic'], errors='coerce')

In [6]:
print(make_pymol_selection(filtered_ks_sc))


# make analysis files

In [76]:
write_diheds_from_df(filtered_ks_sc)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3]
dihedrals = {
    'N90-chi1': ['{N90} and name N',
               '{N90} and name CA',
               '{N90} and name CB',
               '{N90} and name CG'],
    'V235-chi1': ['{V235} and name N',
               '{V235} and name CA',
               '{V235} and name CB',
               '{V235} and name CG'],
    'K247-chi1': ['{K247} and name N',
               '{K247} and name CA',
               '{K247} and name CB',
               '{K247} and name CG'],
    'R321-chi1': ['{R321} and name N',
               '{R321} and name CA',
               '{R321} and name CB',
               '{R321} and name CG'],
    'S372-chi1': ['{S372} and name N',
               '{S372} and name CA',
               '{S372} and name CB',
               '{S372} and name CG'],
    'V375-chi1': ['{V375} and name N',
               '{V375} and name CA',
               '{V375} and name CB',
               '{V375} and name CG'],
    'N376-chi1': [

In [98]:
metrics = {
    'distances': {
        # TM6 Intracellular Position
        '7.56N-7.52O': ['{7x56} and name N', '{7x52} and name O'],},

    # TM6 is 6.32-6.36
    # TM3 is 3.46-3.50
    # TM7 is 7.55-7.51
    # TM2 is 2.46-2.42
    'centroid_distances': {'TM6-TM3': ['{TM6} and name CA', '{TM3} and name CA'],
                            'TM6-TM7': ['{TM6} and name CA', '{TM7} and name CA'],
                            'TM7-TM2': ['{TM7} and name CA', '{TM2} and name CA'],
                            'TM6_up-TM7_up':['{TM6} and name CA', '{TM7} and name CA'],
                            'TM5_up-TM6_up': ['{TM5_up} and name CA', '{TM6_up} and name CA'],
                          },


    'dihedrals':{
    'N90-chi1': ['{N90} and name N',
               '{N90} and name CA',
               '{N90} and name CB',
               '{N90} and name CG'],
    'V235-chi1': ['{V235} and name N',
               '{V235} and name CA',
               '{V235} and name CB',
               '{V235} and name CG1'],
    'K247-chi1': ['{K247} and name N',
               '{K247} and name CA',
               '{K247} and name CB',
               '{K247} and name CG'],
    'R321-chi1': ['{R321} and name N',
               '{R321} and name CA',
               '{R321} and name CB',
               '{R321} and name CG'],
    'S372-chi1': ['{S372} and name N',
               '{S372} and name CA',
               '{S372} and name CB',
               '{S372} and name CG'],
    'V375-chi1': ['{V375} and name N',
               '{V375} and name CA',
               '{V375} and name CB',
               '{V375} and name CG'],
    'N376-chi1': ['{N376} and name N',
               '{N376} and name CA',
               '{N376} and name CB',
               '{N376} and name CG'],
    'P377-chi1': ['{P377} and name N',
               '{P377} and name CA',
               '{P377} and name CB',
               '{P377} and name CG'],
    'Y380-chi1': ['{Y380} and name N',
               '{Y380} and name CA',
               '{Y380} and name CB',
               '{Y380} and name CG'],
    'T381-chi1': ['{T381} and name N',
               '{T381} and name CA',
               '{T381} and name CB',
               '{T381} and name CG'],
    'N72-chi2': ['{N72} and name CA',
               '{N72} and name CB',
               '{N72} and name CG',
               '{N72} and name CD1'],
    'N90-chi2': ['{N90} and name CA',
               '{N90} and name CB',
               '{N90} and name CG',
               '{N90} and name CD1'],
    'N164-chi2': ['{N164} and name CA',
               '{N164} and name CB',
               '{N164} and name CG',
               '{N164} and name CD1'],
    'F217-chi2': ['{F217} and name CA',
               '{F217} and name CB',
               '{F217} and name CG',
               '{F217} and name CD1'],
    'F226-chi2': ['{F226} and name CA',
               '{F226} and name CB',
               '{F226} and name CG',
               '{F226} and name CD1'],
    'N318-chi2': ['{N318} and name CA',
               '{N318} and name CB',
               '{N318} and name CG',
               '{N318} and name CD1'],
    'L332-chi2': ['{L332} and name CA',
               '{L332} and name CB',
               '{L332} and name CG',
               '{L332} and name CD1'],
    'N376-chi2': ['{N376} and name CA',
               '{N376} and name CB',
               '{N376} and name CG',
               '{N376} and name CD1'],
    'P377-chi2': ['{P377} and name CA',
               '{P377} and name CB',
               '{P377} and name CG',
               '{P377} and name CD1'],
},


'avg_vector_angles': {'TM7 twist 1':[['{7x47} and name CA','{7x46} and name CA','{7x47_ref} and name CA','{7x46_ref} and name CA' ]],
                    '7.43 twist':[['{7x43} and name CA','{7x42} and name CA','{7x43_ref} and name CA','{7x42_ref} and name CA' ]],
                        'TM7 twist mult': [['{7x46} and name CA','{7x47} and name CA','{7x46_ref} and name CA','{7x47_ref} and name CA' ],
                                        ['{7x47} and name CA','{7x48} and name CA','{7x47_ref} and name CA','{7x48_ref} and name CA' ],
                                        ['{7x48} and name CA','{7x49} and name CA','{7x48_ref} and name CA','{7x49_ref} and name CA' ],
                                        ['{7x49} and name CA','{7x50} and name CA','{7x49_ref} and name CA','{7x50_ref} and name CA' ]],
                      'TM6 twist': [['{6x45} and name CA', '{6x44} and name CA', '{6x45_ref} and name CA', '{6x44_ref} and name CA'],
                      ['{6x44} and name CA', '{6x43} and name CA', '{6x44_ref} and name CA', '{6x43_ref} and name CA'],
                      ['{6x43} and name CA', '{6x42} and name CA', '{6x43_ref} and name CA', '{6x42_ref} and name CA'],
                      ['{6x42} and name CA', '{6x41} and name CA', '{6x42_ref} and name CA', '{6x41_ref} and name CA'],
                      ['{6x41} and name CA', '{6x40} and name CA', '{6x41_ref} and name CA', '{6x40_ref} and name CA'],
                      ], 
                      '6x48 twist' :[['{6x48} and name CA', '{6x47} and name CA', '{6x48_ref} and name CA', '{6x47_ref} and name CA']],
                      'TM5 twist up':[['{5x48} and name CA', '{5x49} and name CA', '{5x48_ref} and name CA', '{5x49_ref} and name CA'],
                      ['{5x47} and name CA', '{5x48} and name CA', '{5x47_ref} and name CA', '{5x48_ref} and name CA'],
                      ['{5x46} and name CA', '{5x47} and name CA', '{5x46_ref} and name CA', '{5x47_ref} and name CA'],
                      ], 
                    },
    # the key of the align dictionary MUST be 'ref', but everything else can change
    # the first entry is the selection you will align on in the simulation molecule,
    # the second entry is the selection you will align on in the reference molecule,
    # and the third entry is the name of the entry in structs that you will be comparing to
    # (it can be self, then it will just compare to the first frame)
    'align':{'ref':['({sim_selection}) and name CA', '({ref_selection}) and name CA','ref'] }
    } 

In [100]:
useful.make_cond_dict(metrics)

['7x56', '7x52', 'TM6', 'TM3', 'TM7', 'TM2', 'TM5_up', 'TM6_up', 'N90', 'V235', 'K247', 'R321', 'S372', 'V375', 'N376', 'P377', 'Y380', 'T381', 'N72', 'N164', 'F217', 'F226', 'N318', 'L332', '7x47', '7x46', '7x47_ref', '7x46_ref', '7x43', '7x42', '7x43_ref', '7x42_ref', '7x48', '7x48_ref', '7x49', '7x49_ref', '7x50', '7x50_ref', '6x45', '6x44', '6x45_ref', '6x44_ref', '6x43', '6x43_ref', '6x42', '6x42_ref', '6x41', '6x41_ref', '6x40', '6x40_ref', '6x48', '6x47', '6x48_ref', '6x47_ref', '5x48', '5x49', '5x48_ref', '5x49_ref', '5x47', '5x47_ref', '5x46', '5x46_ref', 'sim_selection', 'ref_selection']
Couldn't process key sim_selection, add on your own
Couldn't process key ref_selection, add on your own



 Enter the segid for the protein:  P252






Enter the residue number for 2.50:  100
Enter the residue number for 3.50:  153
Enter the residue number for 5.50:  229
Enter the residue number for 6.50:  339
Enter the residue number for 7.50:  377
Enter the GPCRDB range for TM6 (e.g., 3.44-3.56):  6.32-6.36
Enter the GPCRDB range for TM3 (e.g., 3.44-3.56):  3.46-3.50
Enter the GPCRDB range for TM7 (e.g., 3.44-3.56):  7.51-7.55
Enter the GPCRDB range for TM2 (e.g., 3.44-3.56):  2.46-2.42


Can't process TM5_up, figure out on your own
Can't process TM6_up, figure out on your own


UnboundLocalError: cannot access local variable 'segid' where it is not associated with a value

In [106]:
useful.make_cond_dict(metrics)

['7x56', '7x52', 'TM6', 'TM3', 'TM7', 'TM2', 'TM5_up', 'TM6_up', 'N90', 'V235', 'K247', 'R321', 'S372', 'V375', 'N376', 'P377', 'Y380', 'T381', 'N72', 'N164', 'F217', 'F226', 'N318', 'L332', '7x47', '7x46', '7x47_ref', '7x46_ref', '7x43', '7x42', '7x43_ref', '7x42_ref', '7x48', '7x48_ref', '7x49', '7x49_ref', '7x50', '7x50_ref', '6x45', '6x44', '6x45_ref', '6x44_ref', '6x43', '6x43_ref', '6x42', '6x42_ref', '6x41', '6x41_ref', '6x40', '6x40_ref', '6x48', '6x47', '6x48_ref', '6x47_ref', '5x48', '5x49', '5x48_ref', '5x49_ref', '5x47', '5x47_ref', '5x46', '5x46_ref', 'sim_selection', 'ref_selection']
Couldn't process key sim_selection, add on your own
Couldn't process key ref_selection, add on your own



 Enter the segid for the protein:  P252






Enter the residue number for 2.50:  100
Enter the residue number for 3.50:  153
Enter the residue number for 5.50:  229
Enter the residue number for 6.50:  339
Enter the residue number for 7.50:  377
Enter the GPCRDB range for TM6 (e.g., 3.44-3.56):  6.32-6.36
Enter the GPCRDB range for TM3 (e.g., 3.44-3.56):  3.46-3.50
Enter the GPCRDB range for TM7 (e.g., 3.44-3.56):  7.51-7.55
Enter the GPCRDB range for TM2 (e.g., 3.44-3.56):  2.42-2.46
Enter the GPCRDB range for TM5 (e.g., 3.44-3.56):  5.46-5.49
Enter the GPCRDB range for TM6 (e.g., 3.44-3.56):  6.50-6.54

 Enter the segid for the protein:  P252

Should the reference selections be the same as the original? Y/N :  Y


{'7x56': 'protein and segid P252 and resid 383',
 '7x52': 'protein and segid P252 and resid 379',
 '7x47': 'protein and segid P252 and resid 374',
 '7x46': 'protein and segid P252 and resid 373',
 '7x43': 'protein and segid P252 and resid 370',
 '7x42': 'protein and segid P252 and resid 369',
 '7x48': 'protein and segid P252 and resid 375',
 '7x49': 'protein and segid P252 and resid 376',
 '7x50': 'protein and segid P252 and resid 377',
 '6x45': 'protein and segid P252 and resid 334',
 '6x44': 'protein and segid P252 and resid 333',
 '6x43': 'protein and segid P252 and resid 332',
 '6x42': 'protein and segid P252 and resid 331',
 '6x41': 'protein and segid P252 and resid 330',
 '6x40': 'protein and segid P252 and resid 329',
 '6x48': 'protein and segid P252 and resid 337',
 '6x47': 'protein and segid P252 and resid 336',
 '5x48': 'protein and segid P252 and resid 227',
 '5x49': 'protein and segid P252 and resid 228',
 '5x47': 'protein and segid P252 and resid 226',
 '5x46': 'protein an

# gpcrdb

In [10]:
gpcrdb_names = {'B1AR':'adrb1_human',
                '5HT2B':'5ht2b_human'}

In [11]:
mapping = db.get_residue_info(gpcrdb_names[recep])
mapping_dict = {item[1]: (item[2], item[3].split('x')[0]) for item in mapping}

In [85]:
print(recep)
find_dif_imp_resis(ks_bb_df, ks_sc_df, mapping, cutoff_bind=0.15)

5HT2B

binding pocket bb > 0.15 ks: 
2.50 : D100 
6.48 : W337 
7.38 : F365 
7.42 : G369 
7.43 : Y370 

    
binding pocket sc > 0.15 ks: 
1.43 : I65 
1.46 : T68 
2.50 : D100 
3.28 : W131 
3.29 : L132 
5.38 : F217 
5.47 : F226 
6.55 : N344 
7.38 : F365 
7.43 : Y370 

    
intracellular interface bb > 0.3 ks: 
5.65 : L244 
5.68 : K247 
5.69 : A248 
7.54 : T381 
7.55 : L382 

    
intracellular interface sc > 0.3 ks: 
2.40 : N90 
5.68 : K247 
6.29 : N318 
6.32 : R321 
7.53 : Y380 
7.54 : T381 

    


In [149]:
print_imp_chis(ks_bb_df, ks_sc_df, mapping=mapping, pymol=True)

sc_bind
dihedrals : {
    'T68-chi1': ['{T68} and name N',
               '{T68} and name CA',
               '{T68} and name CB',
               '{T68} and name OG1'],
    'W131-chi1': ['{W131} and name N',
               '{W131} and name CA',
               '{W131} and name CB',
               '{W131} and name CG'],
    'L132-chi1': ['{L132} and name N',
               '{L132} and name CA',
               '{L132} and name CB',
               '{L132} and name CG'],
    'N344-chi1': ['{N344} and name N',
               '{N344} and name CA',
               '{N344} and name CB',
               '{N344} and name CG'],
    'Y370-chi1': ['{Y370} and name N',
               '{Y370} and name CA',
               '{Y370} and name CB',
               '{Y370} and name CG'],
    'I65-chi2': ['{I65} and name CA',
               '{I65} and name CB',
               '{I65} and name CG',
               '{I65} and name CD1'],
    'D100-chi2': ['{D100} and name CA',
               '{D100} and name CB',
  

In [153]:
metrics = {'dihedrals' : {
    'T68-chi1': ['{T68} and name N',
               '{T68} and name CA',
               '{T68} and name CB',
               '{T68} and name OG1'],
    'W131-chi1': ['{W131} and name N',
               '{W131} and name CA',
               '{W131} and name CB',
               '{W131} and name CG'],
    'L132-chi1': ['{L132} and name N',
               '{L132} and name CA',
               '{L132} and name CB',
               '{L132} and name CG'],
    'N344-chi1': ['{N344} and name N',
               '{N344} and name CA',
               '{N344} and name CB',
               '{N344} and name CG'],
    'Y370-chi1': ['{Y370} and name N',
               '{Y370} and name CA',
               '{Y370} and name CB',
               '{Y370} and name CG'],
    'I65-chi2': ['{I65} and name CA',
               '{I65} and name CB',
               '{I65} and name CG',
               '{I65} and name CD1'],
    'D100-chi2': ['{D100} and name CA',
               '{D100} and name CB',
               '{D100} and name CG',
               '{D100} and name OD1'],
    'L132-chi2': ['{L132} and name CA',
               '{L132} and name CB',
               '{L132} and name CG',
               '{L132} and name CD1'],
    'F217-chi2': ['{F217} and name CA',
               '{F217} and name CB',
               '{F217} and name CG',
               '{F217} and name CD1'],
    'F226-chi2': ['{F226} and name CA',
               '{F226} and name CB',
               '{F226} and name CG',
               '{F226} and name CD1'],
    'F365-chi2': ['{F365} and name CA',
               '{F365} and name CB',
               '{F365} and name CG',
               '{F365} and name CD1'],
},

'dihedrals_@' : {
    'N90-chi1': ['{N90} and name N',
               '{N90} and name CA',
               '{N90} and name CB',
               '{N90} and name CG'],
    'K247-chi1': ['{K247} and name N',
               '{K247} and name CA',
               '{K247} and name CB',
               '{K247} and name CG'],
    'R321-chi1': ['{R321} and name N',
               '{R321} and name CA',
               '{R321} and name CB',
               '{R321} and name CG'],
    'Y380-chi1': ['{Y380} and name N',
               '{Y380} and name CA',
               '{Y380} and name CB',
               '{Y380} and name CG'],
    'T381-chi1': ['{T381} and name N',
               '{T381} and name CA',
               '{T381} and name CB',
               '{T381} and name OG1'],
    'N90-chi2': ['{N90} and name CA',
               '{N90} and name CB',
               '{N90} and name CG',
               '{N90} and name ND2'],
    'N318-chi2': ['{N318} and name CA',
               '{N318} and name CB',
               '{N318} and name CG',
               '{N318} and name ND2'],
},}

In [156]:
useful.make_cond_dict(metrics)

['T68', 'W131', 'L132', 'N344', 'Y370', 'I65', 'D100', 'F217', 'F226', 'F365', 'N90', 'K247', 'R321', 'Y380', 'T381', 'N318']



 Enter the segid for the protein:  P388


{'T68': 'protein and segid P388 and resid 68',
 'W131': 'protein and segid P388 and resid 131',
 'L132': 'protein and segid P388 and resid 132',
 'N344': 'protein and segid P388 and resid 344',
 'Y370': 'protein and segid P388 and resid 370',
 'I65': 'protein and segid P388 and resid 65',
 'D100': 'protein and segid P388 and resid 100',
 'F217': 'protein and segid P388 and resid 217',
 'F226': 'protein and segid P388 and resid 226',
 'F365': 'protein and segid P388 and resid 365',
 'N90': 'protein and segid P388 and resid 90',
 'K247': 'protein and segid P388 and resid 247',
 'R321': 'protein and segid P388 and resid 321',
 'Y380': 'protein and segid P388 and resid 380',
 'T381': 'protein and segid P388 and resid 381',
 'N318': 'protein and segid P388 and resid 318'}