In [162]:
from utils import *
from utils2 import *
from utils3 import *
from plotting import *
from gpcrdb_soup import *

In [163]:
from tqdm import tqdm, trange

In [164]:
import sys
import pandas
import gemmi
from gemmi import cif
import random
from math import degrees
import mplcursors

In [213]:
class CifProcessor():
    def __init__(self, 
                 path = 'data/',
                 structure = 'mmcif/',
                 starting_idx=0,
                 limit=None,
                 shuffle = False,
                 reload=True,
                 remove_hetatm=True,
                 allow_exception=False):
        self.path = path
        self.structure_path = self.path + structure
        self.path_table = path + 'gpcrdb/' + 'structures.pkl'
        
        self.shuffle = shuffle
        self.limit = limit
        self.reload = reload
        self.remove_hetatm = remove_hetatm
        self.allow_exception = allow_exception
        
        self.filenames, self.pdb_ids = self.get_pdb_files()
        self.filenames = self.filenames[starting_idx:]
        self.pdb_ids = self.pdb_ids[starting_idx:]
        if self.limit == None:
            self.limit = len(self.pdb_ids)
        if len(self.filenames) > self.limit:
            self.filenames = self.filenames[:self.limit]
            self.pdb_ids = self.pdb_ids[:self.limit]
        # Columns for structure dataframe
        self.cols = ['group_PDB', 'auth_asym_id', 'label_asym_id', 'label_seq_id', 'auth_seq_id', 
                     'label_comp_id', 'id', 'label_atom_id', 
                     'type_symbol', 'Cartn_x', 'Cartn_y', 'Cartn_z']
        self.numbering = pd.DataFrame()
                
    # ==============================================================================================================
    
    def get_pdb_files(self):
        # just a helper function that returns all pdb files in specified path
        (_, _, filenames) = next(os.walk(self.structure_path))
        if self.shuffle:
            random.shuffle(filenames)
        files = [self.structure_path + x for x in filenames]
        pdb_ids = list(set([x[-8:-4] for x in files]))
        return files, pdb_ids
    
    def make_metainfo(self):
        self.table = pd.read_pickle(self.path_table)
        for i, pdb_id in tqdm(enumerate(self.pdb_ids)):
            if i < self.limit:
                protein, family = self.get_prot_info(pdb_id)
                numbering = self.get_res_nums(protein)
                if i == 0:
                    self.mappings = self.get_mapping(pdb_id)
                    numb = pd.DataFrame([pdb_id, protein, family, numbering]).T
                    # numb = [pdb_id, protein, self.entry_to_ac(protein), family, numbering]
                    numb.columns = ['PDB', 'identifier', 'family', 'numbering']
                    self.numbering = self.numbering.append(numb)
                else:
                    self.mappings = self.mappings.append(self.get_mapping(pdb_id), ignore_index=True)
                    numb = pd.DataFrame(data=[pdb_id, protein, family, numbering]).T
                    numb.columns = ['PDB', 'identifier', 'family', 'numbering']
                    self.numbering = self.numbering.append(numb, ignore_index=True)
        

    def make_raws(self):
        for i, pdb_id in tqdm(enumerate(self.pdb_ids)):
            if i < self.limit:
                # only process if the file has not already been generated
                # if not self.reload & 
                protein, family = self.get_prot_info(pdb_id)
                if i == 0:
                    self.structure = self.load_cifs(pdb_id)
                    self.structure['identifier'] = protein.upper()
                    if self.remove_hetatm:
                        self.structure = self.structure[self.structure['group_PDB']!='HETATM']
                        self.structure['label_seq_id'] = self.structure['label_seq_id'].astype(np.int64)
                    self.structure['label_comp_sid'] = self.structure.apply(lambda x:
                                                        gemmi.find_tabulated_residue(x.label_comp_id).one_letter_code, 
                                                        axis=1)
                else:
                    structure = self.load_cifs(pdb_id)
                    structure['identifier'] = protein.upper()
                    if self.remove_hetatm:
                        structure = structure[structure['group_PDB']!='HETATM']
                        structure['label_seq_id'] = structure['label_seq_id'].astype(np.int64)
                    structure['label_comp_sid'] = structure.apply(lambda x:
                                                        gemmi.find_tabulated_residue(x.label_comp_id).one_letter_code, 
                                                        axis=1)
                    self.structure = self.structure.append(structure, ignore_index=True)
         
    # ==============================================================================================================
        
    def entry_to_ac(self, entry: str):
        query = 'https://www.uniprot.org/uniprot/'+entry+'.txt'
        response = requests.get(query)
        return response.text.split('\n')[1].split('AC   ')[1][:6]
    
    def get_prot_info(self, pdb_id):
        # query structure
        query = 'https://gpcrdb.org/services/structure/'+pdb_id.upper()+'/'
        response = requests.get(query)
        protein = response.json()['protein']
        family = response.json()['family']
        return protein, family
    
    def get_res_nums(self, protein):
        # query uniprot -> res num
        query = 'https://gpcrdb.org/services/residues/extended/'+protein+'/'
        response = requests.get(query)
        # select res num
        # assign res_num to structure data
        return response.json()
    
    def get_mapping(self, pdb_id):
        maps = get_mappings_data(pdb_id)[pdb_id.lower()]['UniProt']
        uniprots = maps.keys()
        full_table=pd.DataFrame()
        for i, uniprot in enumerate(uniprots):
            table = pd.DataFrame.from_dict(maps[uniprot])
            table['PDB'] = pdb_id
            table['uniprot'] = uniprot
            if i == 0:
                full_table = table
            else:
                full_table = full_table.append(table, ignore_index=True)
        return full_table
    
    # ==============================================================================================================
    
    def load_cifs(self, pdb_id):
        print("Loading cif file of", pdb_id)
        path = 'data/mmcif/' + pdb_id + '.cif'
        print(path)
        try:
            doc = cif.read_file(path)  # copy all the data from mmCIF file
            lol = []  # list of lists
            for b, block in enumerate(doc):
                table = block.find('_atom_site.', self.cols)
                for row in table:
                    lol.append([pdb_id]+list(row))
        
        
        except Exception as e:
            print("Oops. %s" % e)
            sys.exit(1)
        cols = ['PDB']+self.cols
        st = gemmi.read_structure(path)
        model = st[0]
        if len(st) > 1:
            print("There are multiple models!")
        rol = []
        for chain in model:
            for r, res in enumerate(chain.get_polymer()):
                # previous_residue() and next_residue() return previous/next
                # residue only if the residues are bonded. Otherwise -- None.
                prev_res = chain.previous_residue(res)
                next_res = chain.next_residue(res)
                try:
                    phi, psi = gemmi.calculate_phi_psi(prev_res, res, next_res)
                except:
                    phi, psi = np.nan, np.nan
                try:
                    omega = gemmi.calculate_omega(res, next_res)
                except:
                    omega = np.nan
                rol.append([res.label_seq, res.subchain, 
                            degrees(phi), degrees(omega), degrees(psi)])
        cols2 = ['label_seq_id', 'label_asym_id', 'phi', 'omega', 'psi']
        rol_df = pd.DataFrame(data=rol, columns=cols2)
        rol_df['label_seq_id'] = rol_df['label_seq_id'].astype(int)
        lol_df = pd.DataFrame(data=lol, columns=cols)
        lol_df['label_seq_id'] = lol_df.apply(lambda x: int(x.label_seq_id) if x.label_seq_id != '.' else np.nan, axis=1)
        return pd.merge(lol_df, rol_df, how='inner', on=['label_asym_id', 'label_seq_id'])
    
    # ==============================================================================================================   
            
    def to_pkl_metainfo(self):
        self.numbering.to_pickle(self.path + 'data_numbering.pkl')
        self.table.to_pickle(self.path + 'data_table.pkl')
        self.mappings.to_pickle(self.path + 'data_mappings.pkl')
    
    def to_pkl_raw(self, folder='data/raw/', overwrite=False):
        for pdb_id in self.pdb_ids:
            structure = self.structure[self.structure['PDB']==pdb_id]
            if len(structure) >= 1:
                if (not os.path.isfile(folder + pdb_id + '.pkl')) or overwrite:
                    structure.to_pickle(folder + pdb_id + '.pkl')
                    print("writing to file:", folder + pdb_id + '.pkl')
    
    def to_pkl_processed(self, folder='data/processed/', overwrite=False):
        for df in self.dfl:
            pdb_id = df['PDB'].unique()[0]
            if (not os.path.isfile(folder + pdb_id + '.pkl')) or overwrite:
                df.to_pickle(folder + pdb_id + '.pkl')
                print("writing to file:", folder + pdb_id + '.pkl')
    
    # ==============================================================================================================   
    
    def del_pkl(self, folder='data/raw/'):
        files = [f for f in os.listdir(folder) if '.pkl' in f]
        for file in files:
            os.remove(folder + file)
    
    def del_pkl_metainfo(self):
        os.remove(self.path + 'data_numbering.pkl')
        os.remove(self.path + 'data_table.pkl')
        os.remove(self.path + 'data_mappings.pkl')
            
    # ==============================================================================================================
    
    def read_pkl_raw(self):
        # not needed atm
        pass
    
    def read_pkl_processed(self, folder='data/processed/'):
        files = [f for f in os.listdir(folder) if '.pkl' in f]
        self.dfl = []
        for f in files:
            self.dfl.append(pd.read_pickle(folder+f).reset_index().drop('index', axis=1))
    
    def read_pkl_metainfo(self):
        self.numbering = pd.read_pickle(self.path + 'data_numbering.pkl')
        self.table = pd.read_pickle(self.path + 'data_table.pkl')
        self.mappings = pd.read_pickle(self.path + 'data_mappings.pkl')
    
    # ==============================================================================================================    
    
    def get_stacked_maps(self, pdb):
        # add gene to mapping
        mappings_ = self.mappings[self.mappings['PDB']==pdb]
        pref_chain = self.table[self.table['PDB']==pdb.upper()]['Preferred Chain'].iloc[0]
        map_df_list = []
        for j in range(len(mappings_)):
            chain = pd.DataFrame.from_dict(mappings_.iloc[j]['mappings'])['chain_id'].iloc[0]
            identifier = mappings_.iloc[j]['name']
            dict_ = pd.DataFrame.from_dict(mappings_.iloc[j]['mappings'])
            dict_['identifier'] = identifier
            map_df_list.append(pd.DataFrame.from_dict(dict_))
        _ = pd.concat(map_df_list)
        _ = _[_['chain_id']==pref_chain]
        _['PDB'] = pdb
        return _


    def get_generic_nums(self, pdb_id):
        sequence_numbers = []
        amino_acids = []
        generic_numbers = []
        for i in self.numbering[self.numbering['PDB']==pdb_id].iloc[0]['numbering']:
            if i['alternative_generic_numbers'] != []:
                sequence_numbers.append(i['sequence_number'])
                amino_acids.append(i['amino_acid'])
                generic_numbers.append(i['display_generic_number'])
        return list(zip(sequence_numbers, amino_acids, generic_numbers))


    def get_generic_number(self, zipped_pos_dict, l2u, comp_sid):
        if l2u >= 0:
            if l2u in list(zip(*zipped_pos_dict))[0]:
                idx = list(zip(*zipped_pos_dict))[0].index(l2u)
                row = zipped_pos_dict[idx]
                if row[1] == comp_sid:
                    # print("found row", row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1]), comp_sid)
                    return row[2], row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1])
                else:
                    # print("found row, but residue are not the same", row, comp_sid)
                    return row[2]+'?', row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1])
            else:
                return ['', '', 0, 0]
        else:
            return ['', '', 0, 0]


    def assign_generic_numbers_(self, pdb_id, overwrite, folder):
        data = pd.read_pickle(folder + pdb_id + '.pkl').reset_index().drop('index', axis=1)
        print("loaded data to assign gen. numbers from", folder + pdb_id + '.pkl')
        cols = data.columns
        columns = ['gen_pos', 'gen_pos1', 'gen_pos2', 'uniprot_comp_sid']
        _ = [i for i in columns if i in cols]
        if len(_) > 0:
            if overwrite:
                data.drop(_, axis=1, inplace=True)
                data['label_2_uni'] = 0
                data[columns[0]] = ''
                data[columns[1]] = 0
                data[columns[2]] = 0
                data[columns[3]] = ''
            else:
                return data
        else:
            data['label_2_uni'] = 0
            data[columns[0]] = ''
            data[columns[1]] = 0
            data[columns[2]] = 0
            data[columns[3]] = ''
        maps_stacked = self.get_stacked_maps(pdb_id)
        if 'residue_number' in maps_stacked.index:
            pass
        else:
            return data
        if type(maps_stacked[maps_stacked['PDB']==pdb_id].\
                loc['residue_number'][['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']])\
                    == pandas.core.series.Series:
            pref_mapping = maps_stacked[maps_stacked['PDB']==pdb_id].loc['residue_number']\
                [['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']].to_frame().T
        else:
            pref_mapping = maps_stacked[maps_stacked['PDB']==pdb_id].\
                loc['residue_number'][['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']]
        pref_chain = pref_mapping['chain_id'].iloc[0]
        pref_mapping = pref_mapping.sort_values('start')
        uniprot_identifier_ = data[data['PDB']==pdb_id]['identifier'].unique()
        uniprot_identifier = uniprot_identifier_[0]
        natoms = len(data[data['PDB']==pdb_id])
        
        for j in range(len(pref_mapping)):
            row = pref_mapping.iloc[j].to_dict()
            map_identifier = row['identifier']
            map_pdb = row['PDB']
            start_label_seq_id = row['start']
            start_uniprot = row['unp_start']
            end_label_seq_id = row['end']
            end_uniprot = row['unp_end']
            if map_identifier == uniprot_identifier:
                idxs = [x for x in range(natoms+1) \
                        if ((x <= end_label_seq_id) & (x >= start_label_seq_id))]
                vals = [x + start_uniprot - start_label_seq_id for x in range(natoms+1) \
                        if ((x <= end_label_seq_id) & (x >= start_label_seq_id))]
                for k, idx in enumerate(idxs):
                    line = data[(data['PDB'] == pdb_id) &
                                (data['label_seq_id'] == idx) &
                                (data['label_atom_id'] == 'CA')]
                    lines = len(line)
                    if len(line) > 1:
                        line = line[line['auth_asym_id'] == pref_chain]
                    if len(line) > 0:
                        data.at[line.index[0], 'label_2_uni'] = int(vals[k])
            else:
                #print('Didnt find correct uniprotmap (not a gpcr):', map_identifier)
                pass
        # Generate generic numbers
        zipped_pos_dict = self.get_generic_nums(pdb_id)
        if type(data) == pandas.core.series.Series:
            data = data.to_frame().T
        
        data[['gen_pos', 'uniprot_comp_sid', 'gen_pos1', 'gen_pos2']] = data.\
            apply(lambda x: self.get_generic_number(zipped_pos_dict, x.label_2_uni, x.label_comp_sid) if x.PDB==pdb_id\
                  else [x.gen_pos, x.uniprot_comp_sid, x.gen_pos1, x.gen_pos2], axis=1, result_type='expand')
        return data
    
    def assign_generic_numbers(self, pdb_ids=None, overwrite=True, folder='data/raw/'):
        dfl_ = []
        if pdb_ids != None:
            self.pdb_ids = pdb_ids
        if not isinstance(self.pdb_ids, list):
            self.pdb_ids = [self.pdb_ids]
        for pdb_id in self.pdb_ids:
            if self.allow_exception:
                print("trying to assign generic nubmers to", pdb_id)
                try:
                    dfl_.append(self.assign_generic_numbers_(pdb_id, overwrite=overwrite, folder=folder))
                    print("assigned generic numbers to", pdb_id, "\n\n\n")
                except:
                    print("assigning failed for", pdb_id)
            else:
                print("trying to assign generic nubmers to", pdb_id)
                dfl_.append(self.assign_generic_numbers_(pdb_id, overwrite=overwrite, folder=folder))
                print("assigned generic numbers to", pdb_id, "\n\n\n")
        self.dfl = dfl_
        del dfl_
        
    # ==============================================================================================================
    
    def plot_angles_for_gen_pos(self, region=[7.40, 7.60], legend=True):
        categories = []
        for df in self.dfl:
            category = np.unique(df['PDB'])[0]
            categories.append(category)
        # categories = list(set(categories))

        cmap = plt.cm.get_cmap('RdYlGn', len(categories))
        colors = np.linspace(0, len(categories)-1, len(categories))
        colordict = dict(zip(categories,colors))


        fig, axes = plt.subplots(nrows=1,ncols=2,figsize=(24,12))

        xlabel = 'generic position numbers'
        ylabel = 'degrees'

        xtick_min = region[0]
        xtick_max = region[1]

        nsteps = int(round(xtick_max - xtick_min, 2) / 0.01)
        xticks = np.linspace(xtick_min, xtick_max, nsteps+1)

        for i, cat in enumerate(categories):
            c = int(colordict[cat])
            df = self.dfl[i]
            roi = df[(df['gen_pos1']>=region[0]) & 
                     (df['gen_pos1']<region[1]) & 
                     (df['label_atom_id']=='CA')].copy()
            roi['phi'] = roi.apply(lambda x: float(x.phi), axis=1)
            roi['omega'] = roi.apply(lambda x: float(x.omega), axis=1)
            roi['psi'] = roi.apply(lambda x: float(x.psi), axis=1)
            tot_len = len(roi['phi'].to_list())+len(roi['omega'].to_list())+len(roi['psi'].to_list())
            if tot_len > 0:
                roi.plot(kind='line', x='gen_pos1', y='phi', 
                        ax = axes[0], subplots = True, color = cmap(c), 
                        xticks = xticks, label=cat)
                if legend:
                    axes[0].legend(bbox_to_anchor=(1.0, 1.0))
                axes[0].set_xlabel(xlabel)
                axes[0].set_ylabel(ylabel)
                axes[0].set_title('Psi Angles')

                roi.plot(kind='line', x='gen_pos1', y='psi', 
                        ax = axes[1], subplots = True, color = cmap(c), 
                        xticks = xticks, label=cat)
                if legend:
                    axes[1].legend(bbox_to_anchor=(1.0, 1.0))
                axes[1].set_xlabel(xlabel)
                axes[1].set_ylabel(ylabel)
                axes[1].set_title('Phi Angles')
        plt.show()
        
    def get_family():
        pass

In [214]:
for i in range(100):
    start = (i+7) * 20
    print(i)
    data = CifProcessor(starting_idx = start, limit = 20, allow_exception=False)
    data.read_pkl_metainfo()
    data.make_raws()
    data.to_pkl_raw(overwrite=False)
    data.assign_generic_numbers(overwrite=False)
    data.to_pkl_processed()

0


1it [00:00,  4.48it/s]

Loading cif file of 4BVN
data/mmcif/4BVN.cif
Loading cif file of 5DSG
data/mmcif/5DSG.cif


2it [00:00,  2.69it/s]

Loading cif file of 7BR3
data/mmcif/7BR3.cif


3it [00:01,  2.90it/s]

Loading cif file of 6ME6
data/mmcif/6ME6.cif


5it [00:01,  2.84it/s]

Loading cif file of 2YDO
data/mmcif/2YDO.cif
Loading cif file of 2RH1
data/mmcif/2RH1.cif


6it [00:02,  2.92it/s]

Loading cif file of 6KNM
data/mmcif/6KNM.cif


7it [00:02,  2.95it/s]

Loading cif file of 6OIJ
data/mmcif/6OIJ.cif


8it [00:03,  2.26it/s]

Loading cif file of 5EN0
data/mmcif/5EN0.cif


9it [00:03,  2.51it/s]

Loading cif file of 7D77
data/mmcif/7D77.cif


10it [00:04,  2.12it/s]

Loading cif file of 7D7M
data/mmcif/7D7M.cif


12it [00:04,  2.28it/s]

Loading cif file of 3REY
data/mmcif/3REY.cif
Loading cif file of 4QKX
data/mmcif/4QKX.cif


13it [00:05,  2.32it/s]

Loading cif file of 5IU4
data/mmcif/5IU4.cif


14it [00:05,  2.42it/s]

Loading cif file of 5W0P
data/mmcif/5W0P.cif


15it [00:07,  1.21it/s]

Loading cif file of 6TQ6
data/mmcif/6TQ6.cif


16it [00:07,  1.38it/s]

Loading cif file of 6XBK
data/mmcif/6XBK.cif


17it [00:08,  1.35it/s]

Loading cif file of 5UZ7
data/mmcif/5UZ7.cif


18it [00:09,  1.42it/s]

Loading cif file of 5VRA
data/mmcif/5VRA.cif


19it [00:09,  1.49it/s]

Loading cif file of 3QAK
data/mmcif/3QAK.cif


20it [00:10,  1.93it/s]


trying to assign generic nubmers to 4BVN
loaded data to assign gen. numbers from data/raw/4BVN.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: ADRB1_MELGA ADRB1_MELGA
trying to assign generic nubmers to 5DSG
loaded data to assign gen. numbers from data/raw/5DSG.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ACM4_HUMAN ACM4_HUMAN
Found correct uniprot map: ACM4_HUMAN ACM4_HUMAN
trying to assign generic nubmers to 7BR3
loaded data to assign gen. numbers from data/raw/7BR3.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residu

0it [00:00, ?it/s]

Loading cif file of 6ME8
data/mmcif/6ME8.cif


2it [00:00,  3.21it/s]

Loading cif file of 2YDV
data/mmcif/2YDV.cif


3it [00:00,  3.44it/s]

Loading cif file of 4YAY
data/mmcif/4YAY.cif
Loading cif file of 4S0V
data/mmcif/4S0V.cif


4it [00:01,  3.32it/s]

Loading cif file of 6IGL
data/mmcif/6IGL.cif


6it [00:01,  3.36it/s]

Loading cif file of 2ZIY
data/mmcif/2ZIY.cif
Loading cif file of 6KJV
data/mmcif/6KJV.cif


7it [00:02,  2.70it/s]

Loading cif file of 6FFH
data/mmcif/6FFH.cif


8it [00:02,  2.85it/s]

Loading cif file of 7K15
data/mmcif/7K15.cif


9it [00:02,  2.92it/s]

Loading cif file of 5T1A
data/mmcif/5T1A.cif


10it [00:03,  2.95it/s]

Loading cif file of 2PED
data/mmcif/2PED.cif


11it [00:03,  2.68it/s]

Loading cif file of 6AKX
data/mmcif/6AKX.cif


12it [00:04,  2.54it/s]

Loading cif file of 5OLH
data/mmcif/5OLH.cif


13it [00:04,  2.67it/s]

Loading cif file of 6WI9
data/mmcif/6WI9.cif


14it [00:05,  2.22it/s]

Loading cif file of 7CA5
data/mmcif/7CA5.cif


15it [00:05,  2.11it/s]

Loading cif file of 2I35
data/mmcif/2I35.cif


16it [00:05,  2.38it/s]

Loading cif file of 6D9H
data/mmcif/6D9H.cif


17it [00:06,  2.17it/s]

Loading cif file of 6AK3
data/mmcif/6AK3.cif


18it [00:06,  2.24it/s]

Loading cif file of 3PXO
data/mmcif/3PXO.cif


19it [00:07,  2.41it/s]

Loading cif file of 5JQH
data/mmcif/5JQH.cif


20it [00:07,  2.52it/s]


trying to assign generic nubmers to 6ME8
loaded data to assign gen. numbers from data/raw/6ME8.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: MTR1B_HUMAN MTR1B_HUMAN
Found correct uniprot map: MTR1B_HUMAN MTR1B_HUMAN
trying to assign generic nubmers to 2YDV
loaded data to assign gen. numbers from data/raw/2YDV.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 4YAY
loaded data to assign gen. numbers from data/raw/4YAY.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 're

0it [00:00, ?it/s]

Loading cif file of 6WGT
data/mmcif/6WGT.cif


1it [00:00,  1.85it/s]

Loading cif file of 7CFN
data/mmcif/7CFN.cif


2it [00:01,  1.81it/s]

Loading cif file of 6PS0
data/mmcif/6PS0.cif


3it [00:01,  2.27it/s]

Loading cif file of 6IBL
data/mmcif/6IBL.cif


4it [00:01,  2.04it/s]

Loading cif file of 6TP4
data/mmcif/6TP4.cif


5it [00:02,  2.16it/s]

Loading cif file of 6LI3
data/mmcif/6LI3.cif


6it [00:02,  1.98it/s]

Loading cif file of 6NBI
data/mmcif/6NBI.cif


7it [00:03,  1.81it/s]

Loading cif file of 6OSA
data/mmcif/6OSA.cif


8it [00:04,  1.80it/s]

Loading cif file of 6K41
data/mmcif/6K41.cif


9it [00:04,  1.73it/s]

Loading cif file of 6QNO
data/mmcif/6QNO.cif


10it [00:05,  1.59it/s]

Loading cif file of 5ZKQ
data/mmcif/5ZKQ.cif


11it [00:06,  1.72it/s]

Loading cif file of 5IUB
data/mmcif/5IUB.cif


12it [00:06,  1.94it/s]

Loading cif file of 3RZE
data/mmcif/3RZE.cif


13it [00:06,  2.11it/s]

Loading cif file of 7JVQ
data/mmcif/7JVQ.cif


14it [00:07,  1.90it/s]

Loading cif file of 2Y00
data/mmcif/2Y00.cif


15it [00:07,  1.98it/s]

Loading cif file of 6WWZ
data/mmcif/6WWZ.cif


16it [00:08,  1.75it/s]

Loading cif file of 5X93
data/mmcif/5X93.cif


17it [00:09,  1.92it/s]

Loading cif file of 6LW5
data/mmcif/6LW5.cif


18it [00:09,  2.09it/s]

Loading cif file of 6RZ8
data/mmcif/6RZ8.cif


19it [00:09,  2.25it/s]

Loading cif file of 5X7D
data/mmcif/5X7D.cif


20it [00:10,  1.97it/s]


trying to assign generic nubmers to 6WGT
loaded data to assign gen. numbers from data/raw/6WGT.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: 5HT2A_HUMAN 5HT2A_HUMAN
Found correct uniprot map: 5HT2A_HUMAN 5HT2A_HUMAN
trying to assign generic nubmers to 7CFN
loaded data to assign gen. numbers from data/raw/7CFN.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GPBAR_HUMAN GPBAR_HUMAN
trying to assign generic nubmers to 6PS0
loaded data to assign gen. numbers from data/raw/6PS0.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 're

1it [00:00,  4.15it/s]

Loading cif file of 6PEL
data/mmcif/6PEL.cif
Loading cif file of 6LN2
data/mmcif/6LN2.cif


2it [00:00,  2.63it/s]

Loading cif file of 6OT0
data/mmcif/6OT0.cif


3it [00:01,  1.89it/s]

Loading cif file of 7BZ2
data/mmcif/7BZ2.cif


4it [00:01,  1.89it/s]

Loading cif file of 4LDE
data/mmcif/4LDE.cif


5it [00:02,  2.10it/s]

Loading cif file of 6MEO
data/mmcif/6MEO.cif


6it [00:02,  2.00it/s]

Loading cif file of 2G87
data/mmcif/2G87.cif


7it [00:03,  2.06it/s]

Loading cif file of 4Z34
data/mmcif/4Z34.cif


8it [00:03,  2.33it/s]

Loading cif file of 6WHC
data/mmcif/6WHC.cif


9it [00:04,  2.03it/s]

Loading cif file of 2Z73
data/mmcif/2Z73.cif


10it [00:04,  2.05it/s]

Loading cif file of 2HPY
data/mmcif/2HPY.cif


11it [00:05,  2.07it/s]

Loading cif file of 6C1R
data/mmcif/6C1R.cif


12it [00:05,  2.29it/s]

Loading cif file of 7CKY
data/mmcif/7CKY.cif


13it [00:06,  2.02it/s]

Loading cif file of 6UUN
data/mmcif/6UUN.cif


14it [00:06,  1.82it/s]

Loading cif file of 7CKX
data/mmcif/7CKX.cif


15it [00:07,  1.72it/s]

Loading cif file of 6LMK
data/mmcif/6LMK.cif


16it [00:08,  1.61it/s]

Loading cif file of 5IU7
data/mmcif/5IU7.cif


17it [00:08,  1.79it/s]

Loading cif file of 4GPO
data/mmcif/4GPO.cif


18it [00:09,  1.91it/s]

Loading cif file of 1f88
data/mmcif/1f88.cif


19it [00:09,  1.95it/s]

Loading cif file of 4BUO
data/mmcif/4BUO.cif


20it [00:10,  1.99it/s]


trying to assign generic nubmers to 6PEL
loaded data to assign gen. numbers from data/raw/6PEL.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OPSD_BOVIN OPSD_BOVIN
trying to assign generic nubmers to 6LN2
loaded data to assign gen. numbers from data/raw/6LN2.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: GLP1R_HUMAN GLP1R_HUMAN
Found correct uniprot map: GLP1R_HUMAN GLP1R_HUMAN
trying to assign generic nubmers to 6OT0
loaded data to assign gen. numbers from data/raw/6OT0.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: SMO_HUMAN SMO_HUMAN
trying to assign generic nubmers to 7BZ2
loaded data to assign gen. numbe

0it [00:00, ?it/s]

Loading cif file of 4GRV
data/mmcif/4GRV.cif


1it [00:00,  3.53it/s]

Loading cif file of 6N51
data/mmcif/6N51.cif


3it [00:01,  1.87it/s]

Loading cif file of 4MQT
data/mmcif/4MQT.cif


4it [00:01,  2.30it/s]

Loading cif file of 5ZKB
data/mmcif/5ZKB.cif
Loading cif file of 2Y03
data/mmcif/2Y03.cif


5it [00:02,  2.42it/s]

Loading cif file of 4AMI
data/mmcif/4AMI.cif


6it [00:02,  2.51it/s]

Loading cif file of 4RWS
data/mmcif/4RWS.cif


7it [00:02,  2.61it/s]

Loading cif file of 7CUM
data/mmcif/7CUM.cif


9it [00:04,  1.71it/s]

Loading cif file of 4BEZ
data/mmcif/4BEZ.cif
Loading cif file of 4OO9
data/mmcif/4OO9.cif


10it [00:05,  1.97it/s]

Loading cif file of 5WIU
data/mmcif/5WIU.cif


11it [00:05,  2.22it/s]

Loading cif file of 6PH7
data/mmcif/6PH7.cif


12it [00:05,  2.46it/s]

Loading cif file of 7DFP
data/mmcif/7DFP.cif


13it [00:06,  2.29it/s]

Loading cif file of 3C9L
data/mmcif/3C9L.cif


14it [00:06,  2.50it/s]

Loading cif file of 7JVP
data/mmcif/7JVP.cif


15it [00:07,  2.12it/s]

Loading cif file of 6WJC
data/mmcif/6WJC.cif


16it [00:07,  2.18it/s]

Loading cif file of 5NM4
data/mmcif/5NM4.cif


17it [00:08,  2.08it/s]

Loading cif file of 5V56
data/mmcif/5V56.cif


18it [00:08,  1.77it/s]

Loading cif file of 6BQH
data/mmcif/6BQH.cif


19it [00:09,  1.99it/s]

Loading cif file of 5WF6
data/mmcif/5WF6.cif


20it [00:09,  2.09it/s]


trying to assign generic nubmers to 4GRV
loaded data to assign gen. numbers from data/raw/4GRV.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: NTR1_RAT NTR1_RAT
Found correct uniprot map: NTR1_RAT NTR1_RAT
trying to assign generic nubmers to 6N51
loaded data to assign gen. numbers from data/raw/6N51.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GRM5_HUMAN GRM5_HUMAN
trying to assign generic nubmers to 4MQT
loaded data to assign gen. numbers from data/raw/4MQT.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
Fo

0it [00:00, ?it/s]

Loading cif file of 5MZJ
data/mmcif/5MZJ.cif


1it [00:00,  3.36it/s]

Loading cif file of 7CKW
data/mmcif/7CKW.cif


3it [00:01,  2.66it/s]

Loading cif file of 6J21
data/mmcif/6J21.cif


4it [00:01,  3.06it/s]

Loading cif file of 4A4M
data/mmcif/4A4M.cif
Loading cif file of 5O9H
data/mmcif/5O9H.cif


5it [00:01,  2.82it/s]

Loading cif file of 2YCZ
data/mmcif/2YCZ.cif


6it [00:02,  2.77it/s]

Loading cif file of 6FUF
data/mmcif/6FUF.cif


7it [00:02,  2.83it/s]

Loading cif file of 6LFM
data/mmcif/6LFM.cif


8it [00:03,  2.14it/s]

Loading cif file of 3V2Y
data/mmcif/3V2Y.cif


9it [00:03,  2.35it/s]

Loading cif file of 6X18
data/mmcif/6X18.cif


10it [00:04,  1.93it/s]

Loading cif file of 7DDZ
data/mmcif/7DDZ.cif


11it [00:04,  2.15it/s]

Loading cif file of 6V9S
data/mmcif/6V9S.cif


12it [00:05,  2.27it/s]

Loading cif file of 5LWE
data/mmcif/5LWE.cif


13it [00:05,  2.29it/s]

Loading cif file of 6UUS
data/mmcif/6UUS.cif


14it [00:06,  1.96it/s]

Loading cif file of 5WS3
data/mmcif/5WS3.cif


15it [00:06,  2.08it/s]

Loading cif file of 3NY8
data/mmcif/3NY8.cif


16it [00:06,  2.25it/s]

Loading cif file of 6KK7
data/mmcif/6KK7.cif


17it [00:07,  2.09it/s]

Loading cif file of 6DS0
data/mmcif/6DS0.cif


18it [00:07,  2.30it/s]

Loading cif file of 6I9K
data/mmcif/6I9K.cif


19it [00:08,  2.51it/s]

Loading cif file of 3EML
data/mmcif/3EML.cif


20it [00:08,  2.35it/s]


trying to assign generic nubmers to 5MZJ
loaded data to assign gen. numbers from data/raw/5MZJ.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 7CKW
loaded data to assign gen. numbers from data/raw/7CKW.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: DRD1_HUMAN DRD1_HUMAN
trying to assign generic nubmers to 6J21
loaded data to assign gen. numbers from data/raw/6J21.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'resi

0it [00:00, ?it/s]

Loading cif file of 6B73
data/mmcif/6B73.cif


1it [00:00,  2.25it/s]

Loading cif file of 6VI4
data/mmcif/6VI4.cif


3it [00:01,  2.76it/s]

Loading cif file of 5N2S
data/mmcif/5N2S.cif
Loading cif file of 4DJH
data/mmcif/4DJH.cif


4it [00:01,  2.35it/s]

Loading cif file of 5L7D
data/mmcif/5L7D.cif


5it [00:02,  1.95it/s]

Loading cif file of 3OE9
data/mmcif/3OE9.cif


7it [00:03,  2.25it/s]

Loading cif file of 6KUX
data/mmcif/6KUX.cif
Loading cif file of 4DAJ
data/mmcif/4DAJ.cif


8it [00:04,  1.69it/s]

Loading cif file of 7C61
data/mmcif/7C61.cif


9it [00:04,  1.76it/s]

Loading cif file of 6D26
data/mmcif/6D26.cif


10it [00:04,  1.99it/s]

Loading cif file of 6FK6
data/mmcif/6FK6.cif


11it [00:05,  2.26it/s]

Loading cif file of 6ZDV
data/mmcif/6ZDV.cif


12it [00:05,  2.43it/s]

Loading cif file of 3ZPQ
data/mmcif/3ZPQ.cif


13it [00:06,  2.40it/s]

Loading cif file of 3RFM
data/mmcif/3RFM.cif


14it [00:06,  2.66it/s]

Loading cif file of 5N2R
data/mmcif/5N2R.cif


15it [00:06,  2.71it/s]

Loading cif file of 4Z36
data/mmcif/4Z36.cif


16it [00:06,  2.76it/s]

Loading cif file of 5YQZ
data/mmcif/5YQZ.cif


17it [00:07,  2.61it/s]

Loading cif file of 3DQB
data/mmcif/3DQB.cif


18it [00:07,  2.73it/s]

Loading cif file of 6LPL
data/mmcif/6LPL.cif


19it [00:08,  2.73it/s]

Loading cif file of 6LFO
data/mmcif/6LFO.cif


20it [00:08,  2.25it/s]


trying to assign generic nubmers to 6B73
loaded data to assign gen. numbers from data/raw/6B73.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: OPRK_HUMAN OPRK_HUMAN
trying to assign generic nubmers to 6VI4
loaded data to assign gen. numbers from data/raw/6VI4.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OPRK_HUMAN OPRK_HUMAN
trying to assign generic nubmers to 5N2S
loaded data to assign gen. numbers from data/raw/5N2S.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA1R_HUMAN AA1R_HUMAN
trying to assign generic nubmers to 4DJH
loaded data to assign gen. numbers from data/raw/4DJH.pkl
Index(['author_resi

1it [00:00,  3.97it/s]

Loading cif file of 4MQS
data/mmcif/4MQS.cif
Loading cif file of 6XBM
data/mmcif/6XBM.cif


2it [00:00,  2.12it/s]

Loading cif file of 7LCI
data/mmcif/7LCI.cif


3it [00:01,  1.90it/s]

Loading cif file of 6RNK
data/mmcif/6RNK.cif


4it [00:03,  1.00it/s]

Loading cif file of 6H7J
data/mmcif/6H7J.cif


5it [00:03,  1.18it/s]

Loading cif file of 4IB4
data/mmcif/4IB4.cif


6it [00:04,  1.52it/s]

Loading cif file of 7D3S
data/mmcif/7D3S.cif


7it [00:04,  1.56it/s]

Loading cif file of 6FKA
data/mmcif/6FKA.cif


8it [00:04,  1.89it/s]

Loading cif file of 6PB0
data/mmcif/6PB0.cif


9it [00:05,  1.78it/s]

Loading cif file of 6FK8
data/mmcif/6FK8.cif


10it [00:05,  2.08it/s]

Loading cif file of 6UO8
data/mmcif/6UO8.cif


11it [00:06,  1.74it/s]

Loading cif file of 4BEY
data/mmcif/4BEY.cif


12it [00:06,  2.03it/s]

Loading cif file of 4Z35
data/mmcif/4Z35.cif


13it [00:07,  2.24it/s]

Loading cif file of 6JZH
data/mmcif/6JZH.cif


14it [00:07,  2.12it/s]

Loading cif file of 5C1M
data/mmcif/5C1M.cif


15it [00:08,  2.27it/s]

Loading cif file of 5WF5
data/mmcif/5WF5.cif


16it [00:08,  2.39it/s]

Loading cif file of 6PT2
data/mmcif/6PT2.cif


17it [00:09,  2.21it/s]

Loading cif file of 5WKT
data/mmcif/5WKT.cif


18it [00:09,  2.40it/s]

Loading cif file of 6XBJ
data/mmcif/6XBJ.cif


19it [00:10,  1.90it/s]

Loading cif file of 6FKB
data/mmcif/6FKB.cif


20it [00:10,  1.89it/s]


trying to assign generic nubmers to 4MQS
loaded data to assign gen. numbers from data/raw/4MQS.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
trying to assign generic nubmers to 6XBM
loaded data to assign gen. numbers from data/raw/6XBM.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: SMO_HUMAN SMO_HUMAN
trying to assign generic nubmers to 7LCI
loaded data to assign gen. numbers from data/raw/7LCI.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GLP1R_HUMAN GLP1R_HUMAN
trying to assign generic nubmers to 6RNK
loaded data to assign gen. numbers from data/raw/6RNK.pkl
Index(['author_residue_number', 'author_insertion_c

0it [00:00, ?it/s]

Loading cif file of 5OM4
data/mmcif/5OM4.cif


2it [00:00,  3.49it/s]

Loading cif file of 7LCJ
data/mmcif/7LCJ.cif
Loading cif file of 4K5Y
data/mmcif/4K5Y.cif


3it [00:01,  2.35it/s]

Loading cif file of 6ME7
data/mmcif/6ME7.cif


4it [00:01,  2.30it/s]

Loading cif file of 6PS3
data/mmcif/6PS3.cif


5it [00:01,  2.53it/s]

Loading cif file of 2I37
data/mmcif/2I37.cif


6it [00:02,  2.23it/s]

Loading cif file of 6FFI
data/mmcif/6FFI.cif


7it [00:02,  2.48it/s]

Loading cif file of 7C7S
data/mmcif/7C7S.cif


8it [00:03,  1.94it/s]

Loading cif file of 5D6L
data/mmcif/5D6L.cif


9it [00:03,  2.16it/s]

Loading cif file of 7DHI
data/mmcif/7DHI.cif


10it [00:04,  2.00it/s]

Loading cif file of 6OBA
data/mmcif/6OBA.cif


11it [00:04,  2.03it/s]

Loading cif file of 6KPC
data/mmcif/6KPC.cif


12it [00:05,  2.23it/s]

Loading cif file of 6NIY
data/mmcif/6NIY.cif


13it [00:05,  2.03it/s]

Loading cif file of 4PXF
data/mmcif/4PXF.cif


14it [00:06,  2.27it/s]

Loading cif file of 2X72
data/mmcif/2X72.cif


15it [00:06,  2.48it/s]

Loading cif file of 6DDF
data/mmcif/6DDF.cif


16it [00:07,  2.23it/s]

Loading cif file of 3UZC
data/mmcif/3UZC.cif


17it [00:07,  2.48it/s]

Loading cif file of 5ZKC
data/mmcif/5ZKC.cif


18it [00:07,  2.59it/s]

Loading cif file of 6NBH
data/mmcif/6NBH.cif


19it [00:08,  2.06it/s]

Loading cif file of 5MZP
data/mmcif/5MZP.cif


20it [00:08,  2.26it/s]


trying to assign generic nubmers to 5OM4
loaded data to assign gen. numbers from data/raw/5OM4.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 7LCJ
loaded data to assign gen. numbers from data/raw/7LCJ.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GLP1R_HUMAN GLP1R_HUMAN
trying to assign generic nubmers to 4K5Y
loaded data to assign gen. numbers from data/raw/4K5Y.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 're

0it [00:00, ?it/s]

Loading cif file of 6IIV
data/mmcif/6IIV.cif


1it [00:00,  3.41it/s]

Loading cif file of 3VGA
data/mmcif/3VGA.cif


2it [00:00,  2.65it/s]

Loading cif file of 6H7N
data/mmcif/6H7N.cif


4it [00:01,  2.54it/s]

Loading cif file of 6S0Q
data/mmcif/6S0Q.cif
Loading cif file of 6H7M
data/mmcif/6H7M.cif


5it [00:02,  2.18it/s]

Loading cif file of 6OS0
data/mmcif/6OS0.cif


6it [00:02,  2.37it/s]

Loading cif file of 6TPN
data/mmcif/6TPN.cif


7it [00:02,  2.47it/s]

Loading cif file of 6A94
data/mmcif/6A94.cif


8it [00:03,  2.33it/s]

Loading cif file of 6OY9
data/mmcif/6OY9.cif


9it [00:03,  2.12it/s]

Loading cif file of 5VAI
data/mmcif/5VAI.cif


10it [00:04,  1.88it/s]

Loading cif file of 6KK1
data/mmcif/6KK1.cif


11it [00:06,  1.06it/s]

Loading cif file of 3VG9
data/mmcif/3VG9.cif


12it [00:06,  1.23it/s]

Loading cif file of 6H7O
data/mmcif/6H7O.cif


13it [00:07,  1.32it/s]

Loading cif file of 6OS2
data/mmcif/6OS2.cif


14it [00:08,  1.53it/s]

Loading cif file of 7CA3
data/mmcif/7CA3.cif


15it [00:09,  1.16it/s]

Loading cif file of 5ZBQ
data/mmcif/5ZBQ.cif


16it [00:09,  1.38it/s]

Loading cif file of 6VMS
data/mmcif/6VMS.cif


17it [00:10,  1.40it/s]

Loading cif file of 4IAR
data/mmcif/4IAR.cif


18it [00:10,  1.63it/s]

Loading cif file of 4EJ4
data/mmcif/4EJ4.cif


19it [00:11,  1.83it/s]

Loading cif file of 2R4R
data/mmcif/2R4R.cif


20it [00:11,  1.70it/s]


trying to assign generic nubmers to 6IIV
loaded data to assign gen. numbers from data/raw/6IIV.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: TA2R_HUMAN TA2R_HUMAN
Found correct uniprot map: TA2R_HUMAN TA2R_HUMAN
trying to assign generic nubmers to 3VGA
loaded data to assign gen. numbers from data/raw/3VGA.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 6H7N
loaded data to assign gen. numbers from data/raw/6H7N.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: ADRB1_MELGA ADRB1_ME

1it [00:00,  3.77it/s]

Loading cif file of 5YC8
data/mmcif/5YC8.cif
Loading cif file of 6VN7
data/mmcif/6VN7.cif


2it [00:00,  2.26it/s]

Loading cif file of 7JJO
data/mmcif/7JJO.cif


3it [00:01,  1.44it/s]

Loading cif file of 6MH8
data/mmcif/6MH8.cif


4it [00:02,  1.67it/s]

Loading cif file of 3D4S
data/mmcif/3D4S.cif


5it [00:02,  1.99it/s]

Loading cif file of 2Y01
data/mmcif/2Y01.cif


6it [00:03,  2.14it/s]

Loading cif file of 3NY9
data/mmcif/3NY9.cif


7it [00:03,  2.38it/s]

Loading cif file of 6FKD
data/mmcif/6FKD.cif


8it [00:03,  2.65it/s]

Loading cif file of 4XNW
data/mmcif/4XNW.cif


9it [00:04,  2.51it/s]

Loading cif file of 6MET
data/mmcif/6MET.cif


10it [00:04,  2.11it/s]

Loading cif file of 6P9Y
data/mmcif/6P9Y.cif


11it [00:05,  1.96it/s]

Loading cif file of 2YCW
data/mmcif/2YCW.cif


12it [00:05,  2.07it/s]

Loading cif file of 3AYN
data/mmcif/3AYN.cif


13it [00:06,  2.07it/s]

Loading cif file of 4NTJ
data/mmcif/4NTJ.cif


14it [00:06,  2.27it/s]

Loading cif file of 4U16
data/mmcif/4U16.cif


15it [00:07,  2.18it/s]

Loading cif file of 4PY0
data/mmcif/4PY0.cif


16it [00:07,  2.34it/s]

Loading cif file of 6LFL
data/mmcif/6LFL.cif


17it [00:07,  2.40it/s]

Loading cif file of 5K2D
data/mmcif/5K2D.cif


18it [00:08,  2.48it/s]

Loading cif file of 6WIV
data/mmcif/6WIV.cif


19it [00:08,  1.89it/s]

Loading cif file of 4U14
data/mmcif/4U14.cif


20it [00:09,  2.13it/s]


trying to assign generic nubmers to 5YC8
loaded data to assign gen. numbers from data/raw/5YC8.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
trying to assign generic nubmers to 6VN7
loaded data to assign gen. numbers from data/raw/6VN7.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: VIPR1_HUMAN VIPR1_HUMAN
trying to assign generic nubmers to 7JJO
loaded data to assign gen. numbers from data/raw/7JJO.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: ADRB1_MELGA ADRB1_MELGA
trying to assign generic nubmers to 6MH8
loaded data to assign gen. num

0it [00:00, ?it/s]

Loading cif file of 6D32
data/mmcif/6D32.cif


1it [00:00,  2.94it/s]

Loading cif file of 4LDL
data/mmcif/4LDL.cif


2it [00:00,  2.89it/s]

Loading cif file of 4ZWJ
data/mmcif/4ZWJ.cif


3it [00:03,  1.52s/it]

Loading cif file of 7L0Q
data/mmcif/7L0Q.cif


4it [00:04,  1.15s/it]

Loading cif file of 6H7L
data/mmcif/6H7L.cif


5it [00:04,  1.06it/s]

Loading cif file of 7CZ5
data/mmcif/7CZ5.cif


6it [00:05,  1.20it/s]

Loading cif file of 4J4Q
data/mmcif/4J4Q.cif


7it [00:05,  1.52it/s]

Loading cif file of 6DO1
data/mmcif/6DO1.cif


8it [00:06,  1.57it/s]

Loading cif file of 6IBB
data/mmcif/6IBB.cif


9it [00:06,  1.62it/s]

Loading cif file of 6AQF
data/mmcif/6AQF.cif


10it [00:07,  1.88it/s]

Loading cif file of 5K2C
data/mmcif/5K2C.cif


11it [00:07,  2.08it/s]

Loading cif file of 7D76
data/mmcif/7D76.cif


12it [00:08,  1.98it/s]

Loading cif file of 6LI0
data/mmcif/6LI0.cif


13it [00:08,  2.14it/s]

Loading cif file of 6PS4
data/mmcif/6PS4.cif


14it [00:08,  2.25it/s]

Loading cif file of 6N52
data/mmcif/6N52.cif


15it [00:09,  1.80it/s]

Loading cif file of 4N6H
data/mmcif/4N6H.cif


16it [00:10,  1.98it/s]

Loading cif file of 6LI1
data/mmcif/6LI1.cif


17it [00:10,  2.13it/s]

Loading cif file of 5OLV
data/mmcif/5OLV.cif


18it [00:10,  2.23it/s]

Loading cif file of 6DRX
data/mmcif/6DRX.cif


19it [00:11,  2.38it/s]

Loading cif file of 6M1I
data/mmcif/6M1I.cif


20it [00:11,  1.68it/s]


trying to assign generic nubmers to 6D32
loaded data to assign gen. numbers from data/raw/6D32.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
trying to assign generic nubmers to 4LDL
loaded data to assign gen. numbers from data/raw/4LDL.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ADRB2_HUMAN ADRB2_HUMAN
trying to assign generic nubmers to 4ZWJ
loaded data to assign gen. numbers from data/raw/4ZWJ.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
     

0it [00:00, ?it/s]

Loading cif file of 5UEN
data/mmcif/5UEN.cif


1it [00:00,  2.20it/s]

Loading cif file of 6W2X
data/mmcif/6W2X.cif


2it [00:01,  1.75it/s]

Loading cif file of 3OE8
data/mmcif/3OE8.cif


3it [00:01,  1.55it/s]

Loading cif file of 6GPS
data/mmcif/6GPS.cif


4it [00:02,  1.79it/s]

Loading cif file of 5X33
data/mmcif/5X33.cif


5it [00:02,  2.12it/s]

Loading cif file of 6UVA
data/mmcif/6UVA.cif


6it [00:03,  1.85it/s]

Loading cif file of 5T04
data/mmcif/5T04.cif


7it [00:03,  2.09it/s]

Loading cif file of 5V57
data/mmcif/5V57.cif


8it [00:04,  1.78it/s]

Loading cif file of 6WH4
data/mmcif/6WH4.cif


9it [00:04,  1.74it/s]

Loading cif file of 4XT3
data/mmcif/4XT3.cif


10it [00:05,  2.05it/s]

Loading cif file of 7C6A
data/mmcif/7C6A.cif


11it [00:05,  2.00it/s]

Loading cif file of 1GZM
data/mmcif/1GZM.cif


12it [00:06,  2.04it/s]

Loading cif file of 5GLI
data/mmcif/5GLI.cif


13it [00:06,  2.11it/s]

Loading cif file of 6ORV
data/mmcif/6ORV.cif


14it [00:07,  1.93it/s]

Loading cif file of 5OLZ
data/mmcif/5OLZ.cif


15it [00:07,  2.10it/s]

Loading cif file of 5VEX
data/mmcif/5VEX.cif


16it [00:08,  1.98it/s]

Loading cif file of 3ZPR
data/mmcif/3ZPR.cif


17it [00:08,  2.02it/s]

Loading cif file of 6KQI
data/mmcif/6KQI.cif


18it [00:09,  2.14it/s]

Loading cif file of 6OFJ
data/mmcif/6OFJ.cif


19it [00:09,  1.77it/s]

Loading cif file of 6PS5
data/mmcif/6PS5.cif


20it [00:10,  1.93it/s]


trying to assign generic nubmers to 5UEN
loaded data to assign gen. numbers from data/raw/5UEN.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA1R_HUMAN AA1R_HUMAN
Found correct uniprot map: AA1R_HUMAN AA1R_HUMAN
trying to assign generic nubmers to 6W2X
loaded data to assign gen. numbers from data/raw/6W2X.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GABR2_HUMAN GABR2_HUMAN
trying to assign generic nubmers to 3OE8
loaded data to assign gen. numbers from data/raw/3OE8.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residu

0it [00:00, ?it/s]

Loading cif file of 6WPW
data/mmcif/6WPW.cif


1it [00:00,  1.65it/s]

Loading cif file of 5NLX
data/mmcif/5NLX.cif


2it [00:01,  1.95it/s]

Loading cif file of 5NM2
data/mmcif/5NM2.cif


4it [00:03,  1.29it/s]

Loading cif file of 6FK9
data/mmcif/6FK9.cif
Loading cif file of 5V54
data/mmcif/5V54.cif


6it [00:03,  1.90it/s]

Loading cif file of 6DRZ
data/mmcif/6DRZ.cif
Loading cif file of 5L7I
data/mmcif/5L7I.cif


8it [00:04,  2.12it/s]

Loading cif file of 5A8E
data/mmcif/5A8E.cif
Loading cif file of 6HLL
data/mmcif/6HLL.cif


9it [00:05,  2.31it/s]

Loading cif file of 3NYA
data/mmcif/3NYA.cif


10it [00:05,  2.50it/s]

Loading cif file of 6GT3
data/mmcif/6GT3.cif


11it [00:05,  2.61it/s]

Loading cif file of 6OL9
data/mmcif/6OL9.cif


12it [00:06,  2.73it/s]

Loading cif file of 5ZK3
data/mmcif/5ZK3.cif


13it [00:06,  2.85it/s]

Loading cif file of 5XJM
data/mmcif/5XJM.cif


14it [00:06,  2.50it/s]

Loading cif file of 5UIW
data/mmcif/5UIW.cif


15it [00:07,  2.60it/s]

Loading cif file of 5DHG
data/mmcif/5DHG.cif


16it [00:07,  2.48it/s]

Loading cif file of 2J4Y
data/mmcif/2J4Y.cif


17it [00:08,  2.39it/s]

Loading cif file of 7LCK
data/mmcif/7LCK.cif


18it [00:08,  2.54it/s]

Loading cif file of 6WHA
data/mmcif/6WHA.cif


19it [00:09,  2.18it/s]

Loading cif file of 3ODU
data/mmcif/3ODU.cif


20it [00:09,  2.05it/s]


trying to assign generic nubmers to 6WPW
loaded data to assign gen. numbers from data/raw/6WPW.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GLR_HUMAN GLR_HUMAN
trying to assign generic nubmers to 5NLX
loaded data to assign gen. numbers from data/raw/5NLX.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 5NM2
loaded data to assign gen. numbers from data/raw/5NM2.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residu

writing to file: data/processed/5NM2.pkl
writing to file: data/processed/3NYA.pkl
writing to file: data/processed/2J4Y.pkl
writing to file: data/processed/6WHA.pkl
14


0it [00:00, ?it/s]

Loading cif file of 6TKO
data/mmcif/6TKO.cif


1it [00:00,  1.84it/s]

Loading cif file of 4Z9G
data/mmcif/4Z9G.cif


3it [00:01,  2.08it/s]

Loading cif file of 3V2W
data/mmcif/3V2W.cif


4it [00:01,  2.50it/s]

Loading cif file of 4NC3
data/mmcif/4NC3.cif
Loading cif file of 5GLH
data/mmcif/5GLH.cif


5it [00:02,  2.73it/s]

Loading cif file of 4WW3
data/mmcif/4WW3.cif


6it [00:02,  2.57it/s]

Loading cif file of 5VEW
data/mmcif/5VEW.cif


7it [00:03,  2.31it/s]

Loading cif file of 6KUW
data/mmcif/6KUW.cif


8it [00:03,  2.09it/s]

Loading cif file of 3OAX
data/mmcif/3OAX.cif


9it [00:04,  2.11it/s]

Loading cif file of 5CXV
data/mmcif/5CXV.cif


10it [00:04,  2.31it/s]

Loading cif file of 5UVI
data/mmcif/5UVI.cif


11it [00:04,  2.54it/s]

Loading cif file of 3PBL
data/mmcif/3PBL.cif


12it [00:05,  2.28it/s]

Loading cif file of 5D5A
data/mmcif/5D5A.cif


13it [00:05,  2.40it/s]

Loading cif file of 6LPK
data/mmcif/6LPK.cif


14it [00:06,  2.54it/s]

Loading cif file of 6RZ5
data/mmcif/6RZ5.cif


15it [00:06,  2.34it/s]

Loading cif file of 7C7Q
data/mmcif/7C7Q.cif


16it [00:07,  1.86it/s]

Loading cif file of 4O9R
data/mmcif/4O9R.cif


17it [00:07,  2.06it/s]

Loading cif file of 6OMM
data/mmcif/6OMM.cif


18it [00:08,  1.82it/s]

Loading cif file of 5XRA
data/mmcif/5XRA.cif


19it [00:08,  2.01it/s]

Loading cif file of 5NDZ
data/mmcif/5NDZ.cif


20it [00:09,  2.18it/s]


trying to assign generic nubmers to 6TKO
loaded data to assign gen. numbers from data/raw/6TKO.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: ADRB1_MELGA ADRB1_MELGA
trying to assign generic nubmers to 4Z9G
loaded data to assign gen. numbers from data/raw/4Z9G.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: CRFR1_HUMAN CRFR1_HUMAN
Found correct uniprot map: CRFR1_HUMAN CRFR1_HUMAN
trying to assign generic nubmers to 3V2W
loaded data to assign gen. numbers from data/raw/3V2W.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 're

Found correct uniprot map: PAR2_HUMAN PAR2_HUMAN
writing to file: data/processed/4Z9G.pkl
writing to file: data/processed/4NC3.pkl
writing to file: data/processed/5UVI.pkl
writing to file: data/processed/3PBL.pkl
writing to file: data/processed/7C7Q.pkl
15


0it [00:00, ?it/s]

Loading cif file of 6LML
data/mmcif/6LML.cif


2it [00:00,  2.56it/s]

Loading cif file of 3C9M
data/mmcif/3C9M.cif
Loading cif file of 6OYA
data/mmcif/6OYA.cif


3it [00:01,  2.13it/s]

Loading cif file of 5DHH
data/mmcif/5DHH.cif


4it [00:01,  2.26it/s]

Loading cif file of 6BQG
data/mmcif/6BQG.cif


5it [00:02,  2.61it/s]

Loading cif file of 6WZG
data/mmcif/6WZG.cif


6it [00:02,  2.16it/s]

Loading cif file of 6PS2
data/mmcif/6PS2.cif


7it [00:03,  2.36it/s]

Loading cif file of 5DGY
data/mmcif/5DGY.cif


8it [00:06,  1.24s/it]

Loading cif file of 4LDO
data/mmcif/4LDO.cif


9it [00:06,  1.02it/s]

Loading cif file of 4XES
data/mmcif/4XES.cif


10it [00:06,  1.26it/s]

Loading cif file of 6PS1
data/mmcif/6PS1.cif


11it [00:07,  1.52it/s]

Loading cif file of 6A93
data/mmcif/6A93.cif


12it [00:07,  1.63it/s]

Loading cif file of 3P0G
data/mmcif/3P0G.cif


13it [00:08,  1.88it/s]

Loading cif file of 7CFM
data/mmcif/7CFM.cif


14it [00:08,  1.78it/s]

Loading cif file of 6TQ4
data/mmcif/6TQ4.cif


15it [00:09,  1.87it/s]

Loading cif file of 6Z10
data/mmcif/6Z10.cif


16it [00:09,  2.02it/s]

Loading cif file of 6RZ4
data/mmcif/6RZ4.cif


17it [00:09,  2.20it/s]

Loading cif file of 4UHR
data/mmcif/4UHR.cif


18it [00:10,  2.41it/s]

Loading cif file of 5UIG
data/mmcif/5UIG.cif


19it [00:10,  2.53it/s]

Loading cif file of 5IUA
data/mmcif/5IUA.cif


20it [00:10,  1.82it/s]


trying to assign generic nubmers to 6LML
loaded data to assign gen. numbers from data/raw/6LML.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: GLR_HUMAN GLR_HUMAN
trying to assign generic nubmers to 3C9M
loaded data to assign gen. numbers from data/raw/3C9M.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OPSD_BOVIN OPSD_BOVIN
trying to assign generic nubmers to 6OYA
loaded data to assign gen. numbers from data/raw/6OYA.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OPSD_BOVIN OPSD_BOVIN
trying to assign generic nubmers to 5DHH
loaded data to assign gen. numbers from data/raw/5DHH.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found corr

0it [00:00, ?it/s]

Loading cif file of 6NI3
data/mmcif/6NI3.cif


1it [00:00,  1.77it/s]

Loading cif file of 2I36
data/mmcif/2I36.cif


2it [00:01,  1.77it/s]

Loading cif file of 6PS6
data/mmcif/6PS6.cif


3it [00:01,  2.21it/s]

Loading cif file of 7BW0
data/mmcif/7BW0.cif


4it [00:02,  2.01it/s]

Loading cif file of 6DRY
data/mmcif/6DRY.cif


6it [00:02,  2.58it/s]

Loading cif file of 5ZK8
data/mmcif/5ZK8.cif


7it [00:02,  2.83it/s]

Loading cif file of 6C1Q
data/mmcif/6C1Q.cif
Loading cif file of 5NX2
data/mmcif/5NX2.cif


8it [00:03,  2.93it/s]

Loading cif file of 6N48
data/mmcif/6N48.cif


9it [00:03,  2.83it/s]

Loading cif file of 4L6R
data/mmcif/4L6R.cif


10it [00:03,  2.93it/s]

Loading cif file of 2Y04
data/mmcif/2Y04.cif


11it [00:04,  2.75it/s]

Loading cif file of 6G79
data/mmcif/6G79.cif


12it [00:04,  2.48it/s]

Loading cif file of 4MBS
data/mmcif/4MBS.cif


13it [00:05,  2.32it/s]

Loading cif file of 6LPB
data/mmcif/6LPB.cif


14it [00:05,  1.99it/s]

Loading cif file of 6J20
data/mmcif/6J20.cif


15it [00:06,  2.21it/s]

Loading cif file of 6ME4
data/mmcif/6ME4.cif


16it [00:06,  2.32it/s]

Loading cif file of 5XR8
data/mmcif/5XR8.cif


17it [00:07,  2.44it/s]

Loading cif file of 4AMJ
data/mmcif/4AMJ.cif


18it [00:07,  2.39it/s]

Loading cif file of 7CRH
data/mmcif/7CRH.cif


19it [00:08,  2.05it/s]

Loading cif file of 6O3C
data/mmcif/6O3C.cif


20it [00:08,  2.32it/s]


trying to assign generic nubmers to 6NI3
loaded data to assign gen. numbers from data/raw/6NI3.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ADRB2_HUMAN ADRB2_HUMAN
trying to assign generic nubmers to 2I36
loaded data to assign gen. numbers from data/raw/2I36.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OPSD_BOVIN OPSD_BOVIN
trying to assign generic nubmers to 6PS6
loaded data to assign gen. numbers from data/raw/6PS6.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ADRB2_HUMAN ADRB2_HUMAN
Found correct uniprot map: ADRB2_HUMAN 

0it [00:00, ?it/s]

Loading cif file of 6HLO
data/mmcif/6HLO.cif


1it [00:00,  3.09it/s]

Loading cif file of 6TQ9
data/mmcif/6TQ9.cif


2it [00:00,  2.73it/s]

Loading cif file of 2R4S
data/mmcif/2R4S.cif


4it [00:01,  3.05it/s]

Loading cif file of 5TZY
data/mmcif/5TZY.cif


5it [00:01,  3.28it/s]

Loading cif file of 4X1H
data/mmcif/4X1H.cif
Loading cif file of 6TQ7
data/mmcif/6TQ7.cif


6it [00:02,  2.93it/s]

Loading cif file of 6PRZ
data/mmcif/6PRZ.cif


7it [00:02,  2.98it/s]

Loading cif file of 1U19
data/mmcif/1U19.cif


8it [00:02,  2.71it/s]

Loading cif file of 6RZ6
data/mmcif/6RZ6.cif


9it [00:03,  2.86it/s]

Loading cif file of 6D27
data/mmcif/6D27.cif


10it [00:03,  2.91it/s]

Loading cif file of 5WB2
data/mmcif/5WB2.cif


11it [00:03,  2.78it/s]

Loading cif file of 6KPF
data/mmcif/6KPF.cif


12it [00:04,  2.27it/s]

Loading cif file of 6TPK
data/mmcif/6TPK.cif


13it [00:04,  2.40it/s]

Loading cif file of 5TVN
data/mmcif/5TVN.cif


14it [00:05,  2.60it/s]

Loading cif file of 5JTB
data/mmcif/5JTB.cif


15it [00:05,  2.71it/s]

Loading cif file of 5EE7
data/mmcif/5EE7.cif


16it [00:05,  2.75it/s]

Loading cif file of 6FK7
data/mmcif/6FK7.cif


17it [00:06,  2.88it/s]

Loading cif file of 4BWB
data/mmcif/4BWB.cif


18it [00:06,  2.70it/s]

Loading cif file of 4N4W
data/mmcif/4N4W.cif


19it [00:06,  2.69it/s]

Loading cif file of 4EIY
data/mmcif/4EIY.cif


20it [00:07,  2.74it/s]


trying to assign generic nubmers to 6HLO
loaded data to assign gen. numbers from data/raw/6HLO.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: NK1R_HUMAN NK1R_HUMAN
Found correct uniprot map: NK1R_HUMAN NK1R_HUMAN
trying to assign generic nubmers to 6TQ9
loaded data to assign gen. numbers from data/raw/6TQ9.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: OX1R_HUMAN OX1R_HUMAN
trying to assign generic nubmers to 2R4S
loaded data to assign gen. numbers from data/raw/2R4S.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: ADRB2_HUMAN ADRB2_HUMAN
trying to assign generic nubmers to 5TZY
loaded data to assign gen. numbe

0it [00:00, ?it/s]

Loading cif file of 6MXT
data/mmcif/6MXT.cif


1it [00:00,  2.80it/s]

Loading cif file of 6U1N
data/mmcif/6U1N.cif


2it [00:00,  2.53it/s]

Loading cif file of 6PT0
data/mmcif/6PT0.cif


3it [00:01,  1.91it/s]

Loading cif file of 5IU8
data/mmcif/5IU8.cif


4it [00:01,  2.26it/s]

Loading cif file of 6W25
data/mmcif/6W25.cif


5it [00:02,  2.49it/s]

Loading cif file of 4BV0
data/mmcif/4BV0.cif


6it [00:02,  2.51it/s]

Loading cif file of 4PXZ
data/mmcif/4PXZ.cif


7it [00:02,  2.72it/s]

Loading cif file of 6M1H
data/mmcif/6M1H.cif


8it [00:03,  2.21it/s]

Loading cif file of 6HLP
data/mmcif/6HLP.cif


9it [00:03,  2.33it/s]

Loading cif file of 5ZBH
data/mmcif/5ZBH.cif


10it [00:04,  2.48it/s]

Loading cif file of 7L0P
data/mmcif/7L0P.cif


11it [00:04,  2.27it/s]

Loading cif file of 3AYM
data/mmcif/3AYM.cif


12it [00:05,  2.24it/s]

Loading cif file of 6X1A
data/mmcif/6X1A.cif


13it [00:05,  1.88it/s]

Loading cif file of 6OS9
data/mmcif/6OS9.cif


14it [00:06,  1.74it/s]

Loading cif file of 4XT1
data/mmcif/4XT1.cif


15it [00:06,  1.93it/s]

Loading cif file of 6VCB
data/mmcif/6VCB.cif


16it [00:07,  1.76it/s]

Loading cif file of 7L0R
data/mmcif/7L0R.cif


17it [00:08,  1.75it/s]

Loading cif file of 5WB1
data/mmcif/5WB1.cif


18it [00:08,  1.98it/s]

Loading cif file of 6ME9
data/mmcif/6ME9.cif


19it [00:09,  1.95it/s]

Loading cif file of 6ME5
data/mmcif/6ME5.cif


20it [00:09,  2.09it/s]


trying to assign generic nubmers to 6MXT
loaded data to assign gen. numbers from data/raw/6MXT.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ADRB2_HUMAN ADRB2_HUMAN
trying to assign generic nubmers to 6U1N
loaded data to assign gen. numbers from data/raw/6U1N.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: ACM2_HUMAN ACM2_HUMAN
trying to assign generic nubmers to 6PT0
loaded data to assign gen. numbers from data/raw/6PT0.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: CNR2_HUMAN CNR2_HUMAN
trying to assign generic nubmers to 5IU8
loaded data to assign gen. numbers from data/raw/5IU8.pkl
Index(['author_re

1it [00:00,  3.75it/s]

Loading cif file of 5K2B
data/mmcif/5K2B.cif
Loading cif file of 7JV5
data/mmcif/7JV5.cif


2it [00:00,  2.27it/s]

Loading cif file of 6M9T
data/mmcif/6M9T.cif


3it [00:01,  2.66it/s]

Loading cif file of 7L0S
data/mmcif/7L0S.cif


4it [00:01,  2.27it/s]

Loading cif file of 4UG2
data/mmcif/4UG2.cif


5it [00:02,  2.43it/s]

Loading cif file of 6KPG
data/mmcif/6KPG.cif


6it [00:02,  2.09it/s]

Loading cif file of 6LPJ
data/mmcif/6LPJ.cif


7it [00:02,  2.36it/s]

Loading cif file of 6RZ7
data/mmcif/6RZ7.cif


8it [00:03,  2.47it/s]


trying to assign generic nubmers to 5K2B
loaded data to assign gen. numbers from data/raw/5K2B.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number'],
      dtype='object')
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
Found correct uniprot map: AA2AR_HUMAN AA2AR_HUMAN
trying to assign generic nubmers to 7JV5
loaded data to assign gen. numbers from data/raw/7JV5.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number'], dtype='object')
Found correct uniprot map: DRD1_HUMAN DRD1_HUMAN
trying to assign generic nubmers to 6M9T
loaded data to assign gen. numbers from data/raw/6M9T.pkl
Index(['author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'residue_number',
       'author_residue_number', 'author_insertion_code', 'resi

0it [00:00, ?it/s]


21


0it [00:00, ?it/s]


22


0it [00:00, ?it/s]


23


0it [00:00, ?it/s]


24


0it [00:00, ?it/s]


25


0it [00:00, ?it/s]


26


0it [00:00, ?it/s]


27


0it [00:00, ?it/s]


28


0it [00:00, ?it/s]


29


0it [00:00, ?it/s]


30


0it [00:00, ?it/s]


31


0it [00:00, ?it/s]


32


0it [00:00, ?it/s]


33


0it [00:00, ?it/s]


34


0it [00:00, ?it/s]


35


0it [00:00, ?it/s]


36


0it [00:00, ?it/s]


37


0it [00:00, ?it/s]


38


0it [00:00, ?it/s]


39


0it [00:00, ?it/s]


40


0it [00:00, ?it/s]


41


0it [00:00, ?it/s]


42


0it [00:00, ?it/s]


43


0it [00:00, ?it/s]


44


0it [00:00, ?it/s]


45


0it [00:00, ?it/s]


46


0it [00:00, ?it/s]


47


0it [00:00, ?it/s]


48


0it [00:00, ?it/s]


49


0it [00:00, ?it/s]


50


0it [00:00, ?it/s]


51


0it [00:00, ?it/s]


52


0it [00:00, ?it/s]


53


0it [00:00, ?it/s]


54


0it [00:00, ?it/s]


55


0it [00:00, ?it/s]


56


0it [00:00, ?it/s]


57


0it [00:00, ?it/s]


58


0it [00:00, ?it/s]


59


0it [00:00, ?it/s]


60


0it [00:00, ?it/s]


61


0it [00:00, ?it/s]


62


0it [00:00, ?it/s]


63


0it [00:00, ?it/s]


64


0it [00:00, ?it/s]


65


0it [00:00, ?it/s]


66


0it [00:00, ?it/s]


67


0it [00:00, ?it/s]


68


0it [00:00, ?it/s]


69


0it [00:00, ?it/s]


70


0it [00:00, ?it/s]


71


0it [00:00, ?it/s]


72


0it [00:00, ?it/s]


73


0it [00:00, ?it/s]


74


0it [00:00, ?it/s]


75


0it [00:00, ?it/s]


76


0it [00:00, ?it/s]


77


0it [00:00, ?it/s]


78


0it [00:00, ?it/s]


79


0it [00:00, ?it/s]


80


0it [00:00, ?it/s]


81


0it [00:00, ?it/s]


82


0it [00:00, ?it/s]


83


0it [00:00, ?it/s]


84


0it [00:00, ?it/s]


85


0it [00:00, ?it/s]


86


0it [00:00, ?it/s]


87


0it [00:00, ?it/s]


88


0it [00:00, ?it/s]


89


0it [00:00, ?it/s]


90


0it [00:00, ?it/s]


91


0it [00:00, ?it/s]


92


0it [00:00, ?it/s]


93


0it [00:00, ?it/s]


94


0it [00:00, ?it/s]


95


0it [00:00, ?it/s]


96


0it [00:00, ?it/s]


97


0it [00:00, ?it/s]


98


0it [00:00, ?it/s]


99


0it [00:00, ?it/s]


In [215]:
data = CifProcessor()

In [216]:
# tbd include 

In [217]:
data.read_pkl_processed()

In [218]:
# data.to_pkl_metainfo()

In [219]:
# data.make_metainfo()

In [220]:
data.read_pkl_metainfo()

In [221]:
data.numbering

Unnamed: 0,PDB,identifier,family,numbering
0,6FJ3,pth1r_human,002_001_004_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
1,5WS3,ox2r_human,001_002_023_002,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
2,6FKD,opsd_bovin,001_009_001_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
3,6TP4,ox1r_human,001_002_023_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
4,4JKV,smo_human,006_001_001_011,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
...,...,...,...,...
523,6ME6,mtr1b_human,001_005_001_002,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
524,4UG2,aa2ar_human,001_006_001_002,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
525,6NWE,opsd_bovin,001_009_001_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."
526,1U19,opsd_bovin,001_009_001_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr..."


In [222]:
len(data.dfl)

528

In [223]:
data.table

Unnamed: 0,uniprot(gene),Cl.,PDB,Resolution,Preferred Chain,State,Function
0,STE2,D1(Ste2-likefungalpheromone),7AD3,3.5,A,Active,Agonist
1,CCR2,A(Rhodopsin),5T1A,2.8,A,Inactive,AntagonistNAM
2,OPRM,A(Rhodopsin),4DKL,2.8,A,Inactive,Antagonist
3,CNR2,A(Rhodopsin),5ZTY,2.8,A,Inactive,Antagonist
4,5HT1B,A(Rhodopsin),6G79,3.8,S,Active,Agonist
...,...,...,...,...,...,...,...
523,OPSD,A(Rhodopsin),6PH7,2.9,A,Active,unknown
524,NTR1,A(Rhodopsin),7L0S,4.5,C,Active,Agonist
525,AA2AR,A(Rhodopsin),6S0Q,2.7,A,Inactive,Antagonist
526,GLP1R,B1(Secretin),7LCJ,2.8,R,Active,Agonist


In [224]:
df = data.dfl[0]

In [225]:
def split_family(family):
    return family.split('_')

In [226]:
split = ['001', '009', '001', '001']

In [227]:
def fam_id_to_name(fam_id:str):
    return None

In [228]:
def group_by_family(numbering, family):
    """
    family: a length = 4 list with 4 identifiers
    """
    numbering[['f1', 'f2', 'f3', 'f4']] = numbering.apply(lambda x: x.family.split('_'), axis = 1, result_type='expand')
    for i, f in enumerate(family):
        if f != '':
            col = 'f'+ str(i+1)
            numbering = numbering[numbering[col] == family[i]]
    return list(numbering['PDB'].unique())

In [229]:
group = group_by_family(data.numbering, split)

In [230]:
data.numbering[data.numbering['PDB'] == '6FKD']

Unnamed: 0,PDB,identifier,family,numbering,f1,f2,f3,f4
2,6FKD,opsd_bovin,001_009_001_001,"[{'sequence_number': 1, 'amino_acid': 'M', 'pr...",1,9,1,1


In [231]:
def get_group_from_dfl(dfl, pdb_ids):
    selection = []
    sel_pdbs = []
    for df in dfl:
        if df['PDB'].unique()[0] in pdb_ids:
            selection.append(df)
            sel_pdbs.append(df['PDB'].unique()[0])
    return selection, sel_pdbs

In [232]:
selection, sel_pdbs = get_group_from_dfl(data.dfl, group)

In [233]:
len(selection)

58

In [234]:
len(group)

58

In [235]:
len(data.dfl)

528

In [236]:
print(group)

['6FKD', '5EN0', '6OFJ', '2G87', '1GZM', '6I9K', '3OAX', '6FUF', '6FK6', '4ZWJ', '6FKB', '4PXF', '5W0P', '6FKA', '4BEY', '5TE3', '4BEZ', '1L9H', '4A4M', '2I35', '2X72', '2PED', '2Z73', '6FK9', '6OY9', '6QNO', '2J4Y', '6CMO', '5DGY', '2I37', '5WKT', '6FKC', '3CAP', '6PEL', '5TE5', '1f88', '4X1H', '6PGS', '3C9M', '6OYA', '5DYS', '3C9L', '6FK8', '2I36', '4WW3', '3PXO', '3PQR', '3AYN', '3AYM', '2HPY', '6FK7', '3DQB', '2ZIY', '1HZX', '4J4Q', '6PH7', '6NWE', '1U19']


In [237]:
print(sel_pdbs)

['1f88', '1GZM', '1HZX', '1L9H', '1U19', '2G87', '2HPY', '2I35', '2I36', '2I37', '2J4Y', '2PED', '2X72', '2Z73', '2ZIY', '3AYM', '3AYN', '3C9L', '3C9M', '3CAP', '3DQB', '3OAX', '3PQR', '3PXO', '4A4M', '4BEY', '4BEZ', '4J4Q', '4PXF', '4WW3', '4X1H', '4ZWJ', '5DGY', '5DYS', '5EN0', '5TE3', '5TE5', '5W0P', '5WKT', '6CMO', '6FK6', '6FK7', '6FK8', '6FK9', '6FKA', '6FKB', '6FKC', '6FKD', '6FUF', '6I9K', '6NWE', '6OFJ', '6OY9', '6OYA', '6PEL', '6PGS', '6PH7', '6QNO']


In [238]:
def get_activity(table, sel_pdbs):
    activity_dict = {}
    actives = []
    inactives = []
    for pdb in sel_pdbs:
        act = table[table['PDB']==pdb.upper()]['State'].iloc[0]
        activity_dict.update({pdb: act})
        if act == 'Active':
            actives.append(pdb)
        elif act == 'Inactive':
            inactives.append(pdb)
    return activity_dict, actives, inactives

In [239]:
filter_dict, actives, inactives = get_activity(data.table, sel_pdbs)

In [240]:
active_dfl = get_group_from_dfl(data.dfl, actives)[0]

In [241]:
len(actives)

35

In [242]:
len(inactives)

23

In [243]:
inactive_dfl = get_group_from_dfl(data.dfl, inactives)[0]

# PLOTTING

In [245]:
def plot_angles_for_gen_pos(dfl, region=[7.40, 7.60], legend=True, title=''):
    categories = []
    for df in dfl:
        category = np.unique(df['PDB'])[0]
        categories.append(category.upper())

    cmap = plt.cm.get_cmap('RdYlGn', len(categories))
    colors = np.linspace(0, len(categories)-1, len(categories))
    colordict = dict(zip(categories,colors))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(24,12))

    xlabel = 'generic position numbers'
    ylabel = 'degrees'
    
    start_regions = [x for _, x in enumerate(region) if _ % 2 == 0]
    end_regions = [x for _, x in enumerate(region) if _ % 2 == 1]
    
    for i in range(len(start_regions)):
        xtick_min = start_regions[i]
        xtick_max = end_regions[i]
        nsteps = int(round(xtick_max - xtick_min, 2) / 0.01)
        if i == 0:
            xticks = np.linspace(start_regions[i], end_regions[i], nsteps+1)
        else:
            xticks = np.hstack([xticks, np.linspace(start_regions[i], end_regions[i], nsteps+1)])
    xticks = [round(x, 2) for x in xticks]
    xticks_ = [i for i in range(len(xticks))]
    xtick_dict = dict(zip(xticks, xticks_))
    
    for i, cat in enumerate(categories):
        c = int(colordict[cat])
        df = dfl[i]
        roi = pd.concat([df.loc[(df['gen_pos1'] > start_regions[i]) &
                                (df['gen_pos1'] < end_regions[i]) &
                                (df['label_atom_id']=='CA')] for i in range(len(start_regions))], join='outer')
        roi['xticks'] = roi.apply(lambda x: xtick_dict[x.gen_pos1], axis=1)
        roi['phi'] = roi.apply(lambda x: float(x.phi), axis=1)
        roi['omega'] = roi.apply(lambda x: float(x.omega), axis=1)
        roi['psi'] = roi.apply(lambda x: float(x.psi), axis=1)
        tot_len = len(roi['phi'].to_list())+len(roi['omega'].to_list())+len(roi['psi'].to_list())
        if tot_len > 0:
            roi.plot(kind='line', x='xticks', y='phi', 
                    ax = axes[0], subplots = True, color = cmap(c), label=cat)
            if legend:
                axes[0].legend(bbox_to_anchor=(1.0, 1.0))
            else:
                axes[0].get_legend().remove()
            axes[0].set_xticks(xticks_)
            axes[0].set_xticklabels([str(x) for x in xticks], minor=False)
            axes[0].set_xlabel(xlabel)
            axes[0].set_ylabel(ylabel)
            axes[0].set_title('Phi Angles')

            roi.plot(kind='line', x='xticks', y='psi', 
                    ax = axes[1], subplots = True, color = cmap(c), label=cat)
            if legend:
                axes[1].legend(bbox_to_anchor=(1.0, 1.0))
            else:
                axes[1].get_legend().remove()
            axes[1].set_xticks(xticks_)
            axes[1].set_xticklabels([str(x) for x in xticks], minor=False)
            axes[1].set_xlabel(xlabel)
            axes[1].set_ylabel(ylabel)
            axes[1].set_title('Psi Angles')
    fig.suptitle(title, fontsize=12)

    mplcursors.cursor(hover=True)

    plt.show()

In [247]:
%pylab
plot_angles_for_gen_pos(active_dfl, region=[7.44, 7.55, 8.51, 8.54], legend=False, title='Active GPCRs (Complexes)')
plot_angles_for_gen_pos(inactive_dfl, region=[7.44, 7.55, 8.51, 8.54], legend=False, title='Inactive GPCRs')

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [153]:
# download gproteins structures?  ==> where do i get regions

In [154]:
# predict contact maps

In [155]:
# predict activity based on contact maps

# StructLoader

In [319]:
# 8  load structs (structs dataloader) and table
# btw how do i get the affinities?

In [320]:
# 9  plot structs

In [321]:
# 10 plot deltas (make interaction map based on genes)

In [322]:
# 11 input features

# GPROTEINS

In [161]:
path = 'data/gproteins'

In [None]:
gproteins_table = 'data/gproteins.'

In [None]:
request = ''

# AFFINITIES

In [None]:
affinities_table = 'data/affinities.'