In [1]:
from utils import *
from utils2 import * 
from utils3 import *
from plotting import *
from gpcrdb_soup import *

In [2]:
from tqdm import tqdm, trange

In [3]:
import sys
import pandas
import gemmi
from gemmi import cif
import random
from math import degrees
import mplcursors

In [4]:
class CifProcessor():
    def __init__(self, 
                 path = 'data/',
                 structure = 'mmcif/',
                 starting_idx=0,
                 limit=None,
                 shuffle = False,
                 reload=True,
                 remove_hetatm=True,
                 allow_exception=False):
        self.path = path
        self.structure_path = self.path + structure
        self.path_table = path + 'gpcrdb/' + 'structures.pkl'
        
        self.shuffle = shuffle
        self.limit = limit
        self.reload = reload
        self.remove_hetatm = remove_hetatm
        self.allow_exception = allow_exception
        
        self.filenames, self.pdb_ids = self.get_pdb_files()
        self.filenames = self.filenames[starting_idx:]
        self.pdb_ids = self.pdb_ids[starting_idx:]
        if self.limit == None:
            self.limit = len(self.pdb_ids)
        if len(self.filenames) > self.limit:
            self.filenames = self.filenames[:self.limit]
            self.pdb_ids = self.pdb_ids[:self.limit]
        # Columns for structure dataframe
        self.cols = ['group_PDB', 'auth_asym_id', 'label_asym_id', 'label_seq_id', 'auth_seq_id', 
                     'label_comp_id', 'id', 'label_atom_id', 
                     'type_symbol', 'Cartn_x', 'Cartn_y', 'Cartn_z']
        self.numbering = pd.DataFrame()
                
    # ==============================================================================================================
    
    def get_pdb_files(self):
        # just a helper function that returns all pdb files in specified path
        (_, _, filenames) = next(os.walk(self.structure_path))
        if self.shuffle:
            random.shuffle(filenames)
        files = [self.structure_path + x for x in filenames]
        pdb_ids = list(set([x[-8:-4] for x in files]))
        return files, pdb_ids
    
    def make_metainfo(self):
        self.table = pd.read_pickle(self.path_table)
        for i, pdb_id in tqdm(enumerate(self.pdb_ids)):
            if i < self.limit:
                protein, family = self.get_prot_info(pdb_id)
                if protein == None:
                    pass
                else:
                    numbering = self.get_res_nums(protein)
                    if i == 0:
                        self.mappings = self.get_mapping(pdb_id)
                        numb = pd.DataFrame([pdb_id, protein, family, numbering]).T
                        # numb = [pdb_id, protein, self.entry_to_ac(protein), family, numbering]
                        numb.columns = ['PDB', 'identifier', 'family', 'numbering']
                        self.numbering = self.numbering.append(numb)
                    else:
                        self.mappings = self.mappings.append(self.get_mapping(pdb_id), ignore_index=True)
                        numb = pd.DataFrame(data=[pdb_id, protein, family, numbering]).T
                        numb.columns = ['PDB', 'identifier', 'family', 'numbering']
                        self.numbering = self.numbering.append(numb, ignore_index=True)

    def make_raws(self):
        for i, pdb_id in tqdm(enumerate(self.pdb_ids)):
            if i < self.limit:
                # only process if the file has not already been generated
                # if not self.reload & 
                protein, family = self.get_prot_info(pdb_id)
                if protein != None:
                    if i == 0:
                        self.structure = self.load_cifs(pdb_id)
                        self.structure['identifier'] = protein.upper()
                        if self.remove_hetatm:
                            self.structure = self.structure[self.structure['group_PDB']!='HETATM']
                            self.structure['label_seq_id'] = self.structure['label_seq_id'].astype(np.int64)
                        self.structure['label_comp_sid'] = self.structure.apply(lambda x:
                                                            gemmi.find_tabulated_residue(x.label_comp_id).one_letter_code, 
                                                            axis=1)
                    else:
                        structure = self.load_cifs(pdb_id)
                        structure['identifier'] = protein.upper()
                        if self.remove_hetatm:
                            structure = structure[structure['group_PDB']!='HETATM']
                            structure['label_seq_id'] = structure['label_seq_id'].astype(np.int64)
                        structure['label_comp_sid'] = structure.apply(lambda x:
                                                            gemmi.find_tabulated_residue(x.label_comp_id).one_letter_code, 
                                                            axis=1)
                        self.structure = self.structure.append(structure, ignore_index=True)
         
    # ==============================================================================================================
        
    def entry_to_ac(self, entry: str):
        query = 'https://www.uniprot.org/uniprot/'+entry+'.txt'
        response = requests.get(query)
        return response.text.split('\n')[1].split('AC   ')[1][:6]
    
    def get_prot_info(self, pdb_id):
        # query structure
        query = 'https://gpcrdb.org/services/structure/'+pdb_id.upper()+'/'
        response = requests.get(query)
        if len(response.json()) > 0:
            protein = response.json()['protein']
            family = response.json()['family']
            return protein, family
        else:
            return None, None
    
    def get_res_nums(self, protein):
        # query uniprot -> res num
        query = 'https://gpcrdb.org/services/residues/extended/'+protein+'/'
        response = requests.get(query)
        # select res num
        # assign res_num to structure data
        return response.json()
    
    def get_mapping(self, pdb_id):
        maps = get_mappings_data(pdb_id)[pdb_id.lower()]['UniProt']
        uniprots = maps.keys()
        full_table=pd.DataFrame()
        for i, uniprot in enumerate(uniprots):
            table = pd.DataFrame.from_dict(maps[uniprot])
            table['PDB'] = pdb_id
            table['uniprot'] = uniprot
            if i == 0:
                full_table = table
            else:
                full_table = full_table.append(table, ignore_index=True)
        return full_table
    
    # ==============================================================================================================
    
    def load_cifs(self, pdb_id):
        path = 'data/mmcif/' + pdb_id + '.cif'
        try:
            doc = cif.read_file(path)  # copy all the data from mmCIF file
            lol = []  # list of lists
            for b, block in enumerate(doc):
                table = block.find('_atom_site.', self.cols)
                for row in table:
                    lol.append([pdb_id]+list(row))
        
        except Exception as e:
            print("Hoppla. %s" % e)
            sys.exit(1)
        cols = ['PDB']+self.cols
        st = gemmi.read_structure(path)
        model = st[0]
        if len(st) > 1:
            print("There are multiple models!")
        rol = []
        for chain in model:
            for r, res in enumerate(chain.get_polymer()):
                # previous_residue() and next_residue() return previous/next
                # residue only if the residues are bonded. Otherwise -- None.
                prev_res = chain.previous_residue(res)
                next_res = chain.next_residue(res)
                try:
                    phi, psi = gemmi.calculate_phi_psi(prev_res, res, next_res)
                except:
                    phi, psi = np.nan, np.nan
                try:
                    omega = gemmi.calculate_omega(res, next_res)
                except:
                    omega = np.nan
                rol.append([res.label_seq, res.subchain, 
                            degrees(phi), degrees(omega), degrees(psi)])
        cols2 = ['label_seq_id', 'label_asym_id', 'phi', 'omega', 'psi']
        rol_df = pd.DataFrame(data=rol, columns=cols2)
        rol_df['label_seq_id'] = rol_df['label_seq_id'].astype(int)
        lol_df = pd.DataFrame(data=lol, columns=cols)
        lol_df['label_seq_id'] = lol_df.apply(lambda x: int(x.label_seq_id) if x.label_seq_id != '.' else np.nan, axis=1)
        return pd.merge(lol_df, rol_df, how='inner', on=['label_asym_id', 'label_seq_id'])
    
    # ==============================================================================================================   
            
    def to_pkl_metainfo(self):
        self.numbering.to_pickle(self.path + 'data_numbering.pkl')
        self.table.to_pickle(self.path + 'data_table.pkl')
        self.mappings.to_pickle(self.path + 'data_mappings.pkl')
    
    def to_pkl_raw(self, folder='data/raw/', overwrite=False):
        for pdb_id in self.pdb_ids:
            structure = self.structure[self.structure['PDB']==pdb_id]
            if len(structure) >= 1:
                if (not os.path.isfile(folder + pdb_id + '.pkl')) or overwrite:
                    structure.to_pickle(folder + pdb_id + '.pkl')
                    print("writing to file:", folder + pdb_id + '.pkl')
    
    def to_pkl_processed(self, folder='data/processed/', overwrite=False):
        for df in self.dfl:
            pdb_id = df['PDB'].unique()[0]
            if (not os.path.isfile(folder + pdb_id + '.pkl')) or overwrite:
                df.to_pickle(folder + pdb_id + '.pkl')
                print("writing to file:", folder + pdb_id + '.pkl')
    
    # ==============================================================================================================   
    
    def del_pkl(self, folder='data/raw/'):
        files = [f for f in os.listdir(folder) if '.pkl' in f]
        for file in files:
            os.remove(folder + file)
    
    def del_pkl_metainfo(self):
        os.remove(self.path + 'data_numbering.pkl')
        os.remove(self.path + 'data_table.pkl')
        os.remove(self.path + 'data_mappings.pkl')
            
    # ==============================================================================================================
    
    def read_pkl_raw(self):
        # not needed atm
        pass
    
    def read_pkl_processed(self, folder='data/processed/'):
        files = [f for f in os.listdir(folder) if '.pkl' in f]
        self.dfl = []
        for f in files:
            self.dfl.append(pd.read_pickle(folder+f).reset_index().drop('index', axis=1))
    
    def read_pkl_metainfo(self):
        self.numbering = pd.read_pickle(self.path + 'data_numbering.pkl')
        self.table = pd.read_pickle(self.path + 'data_table.pkl')
        self.mappings = pd.read_pickle(self.path + 'data_mappings.pkl')
    
    # ==============================================================================================================    
    
    def get_stacked_maps(self, pdb):
        # add gene to mapping
        mappings_ = self.mappings[self.mappings['PDB']==pdb]
        pref_chain = self.table[self.table['PDB']==pdb.upper()]['Preferred Chain'].iloc[0]
        map_df_list = []
        for j in range(len(mappings_)):
            chain = pd.DataFrame.from_dict(mappings_.iloc[j]['mappings'])['chain_id'].iloc[0]
            identifier = mappings_.iloc[j]['name']
            dict_ = pd.DataFrame.from_dict(mappings_.iloc[j]['mappings'])
            dict_['identifier'] = identifier
            map_df_list.append(pd.DataFrame.from_dict(dict_))
        _ = pd.concat(map_df_list)
        _ = _[_['chain_id']==pref_chain]
        _['PDB'] = pdb
        return _


    def get_generic_nums(self, pdb_id):
        sequence_numbers = []
        amino_acids = []
        generic_numbers = []
        for i in self.numbering[self.numbering['PDB']==pdb_id].iloc[0]['numbering']:
            if i['alternative_generic_numbers'] != []:
                sequence_numbers.append(i['sequence_number'])
                amino_acids.append(i['amino_acid'])
                generic_numbers.append(i['display_generic_number'])
        return list(zip(sequence_numbers, amino_acids, generic_numbers))


    def get_generic_number(self, zipped_pos_dict, l2u, comp_sid):
        if l2u >= 0:
            if l2u in list(zip(*zipped_pos_dict))[0]:
                idx = list(zip(*zipped_pos_dict))[0].index(l2u)
                row = zipped_pos_dict[idx]
                if row[1] == comp_sid:
                    # print("found row", row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1]), comp_sid)
                    return row[2], row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1])
                else:
                    # print("found row, but residue are not the same", row, comp_sid)
                    return row[2]+'?', row[1], float(row[2].split('x')[0]), int(row[2].split('x')[1])
            else:
                return ['', '', 0, 0]
        else:
            return ['', '', 0, 0]


    def assign_generic_numbers_(self, pdb_id, overwrite, folder):
        data = pd.read_pickle(folder + pdb_id + '.pkl').reset_index().drop('index', axis=1)
        print("loaded data to assign gen. numbers from", folder + pdb_id + '.pkl')
        cols = data.columns
        columns = ['gen_pos', 'gen_pos1', 'gen_pos2', 'uniprot_comp_sid']
        _ = [i for i in columns if i in cols]
        if len(_) > 0:
            if overwrite:
                data.drop(_, axis=1, inplace=True)
                data['label_2_uni'] = 0
                data[columns[0]] = ''
                data[columns[1]] = 0
                data[columns[2]] = 0
                data[columns[3]] = ''
            else:
                return data
        else:
            data['label_2_uni'] = 0
            data[columns[0]] = ''
            data[columns[1]] = 0
            data[columns[2]] = 0
            data[columns[3]] = ''
        maps_stacked = self.get_stacked_maps(pdb_id)
        if 'residue_number' in maps_stacked.index:
            pass
        else:
            return data
        if type(maps_stacked[maps_stacked['PDB']==pdb_id].\
                loc['residue_number'][['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']])\
                    == pandas.core.series.Series:
            pref_mapping = maps_stacked[maps_stacked['PDB']==pdb_id].loc['residue_number']\
                [['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']].to_frame().T
        else:
            pref_mapping = maps_stacked[maps_stacked['PDB']==pdb_id].\
                loc['residue_number'][['chain_id', 'start','end','unp_start','unp_end', 'identifier', 'PDB']]
        pref_chain = pref_mapping['chain_id'].iloc[0]
        pref_mapping = pref_mapping.sort_values('start')
        uniprot_identifier_ = data[data['PDB']==pdb_id]['identifier'].unique()
        uniprot_identifier = uniprot_identifier_[0]
        natoms = len(data[data['PDB']==pdb_id])
        
        for j in range(len(pref_mapping)):
            row = pref_mapping.iloc[j].to_dict()
            map_identifier = row['identifier']
            map_pdb = row['PDB']
            start_label_seq_id = row['start']
            start_uniprot = row['unp_start']
            end_label_seq_id = row['end']
            end_uniprot = row['unp_end']
            if map_identifier == uniprot_identifier:
                idxs = [x for x in range(natoms+1) \
                        if ((x <= end_label_seq_id) & (x >= start_label_seq_id))]
                vals = [x + start_uniprot - start_label_seq_id for x in range(natoms+1) \
                        if ((x <= end_label_seq_id) & (x >= start_label_seq_id))]
                for k, idx in enumerate(idxs):
                    line = data[(data['PDB'] == pdb_id) &
                                (data['label_seq_id'] == idx) &
                                (data['label_atom_id'] == 'CA')]
                    lines = len(line)
                    if len(line) > 1:
                        line = line[line['auth_asym_id'] == pref_chain]
                    if len(line) > 0:
                        data.at[line.index[0], 'label_2_uni'] = int(vals[k])
            else:
                #print('Didnt find correct uniprotmap (not a gpcr):', map_identifier)
                pass
        # Generate generic numbers
        zipped_pos_dict = self.get_generic_nums(pdb_id)
        if type(data) == pandas.core.series.Series:
            data = data.to_frame().T
        
        data[['gen_pos', 'uniprot_comp_sid', 'gen_pos1', 'gen_pos2']] = data.\
            apply(lambda x: self.get_generic_number(zipped_pos_dict, x.label_2_uni, x.label_comp_sid) if x.PDB==pdb_id\
                  else [x.gen_pos, x.uniprot_comp_sid, x.gen_pos1, x.gen_pos2], axis=1, result_type='expand')
        return data
    
    def assign_generic_numbers(self, pdb_ids=None, overwrite=True, folder='data/raw/'):
        dfl_ = []
        if pdb_ids != None:
            self.pdb_ids = pdb_ids
        if not isinstance(self.pdb_ids, list):
            self.pdb_ids = [self.pdb_ids]
        for pdb_id in self.pdb_ids:
            if self.allow_exception:
                print("trying to assign generic nubmers to", pdb_id)
                try:
                    dfl_.append(self.assign_generic_numbers_(pdb_id, overwrite=overwrite, folder=folder))
                    print("assigned generic numbers to", pdb_id, "\n\n\n")
                except:
                    print("assigning failed for", pdb_id)
            else:
                print("trying to assign generic nubmers to", pdb_id)
                dfl_.append(self.assign_generic_numbers_(pdb_id, overwrite=overwrite, folder=folder))
                print("assigned generic numbers to", pdb_id, "\n\n\n")
        self.dfl = dfl_
        del dfl_
        
    # ==============================================================================================================
    
    def plot_angles_for_gen_pos(self, region=[7.40, 7.60], legend=True):
        categories = []
        for df in self.dfl:
            category = np.unique(df['PDB'])[0]
            categories.append(category)
        # categories = list(set(categories))

        cmap = plt.cm.get_cmap('RdYlGn', len(categories))
        colors = np.linspace(0, len(categories)-1, len(categories))
        colordict = dict(zip(categories,colors))


        fig, axes = plt.subplots(nrows=1,ncols=2,figsize=(24,12))

        xlabel = 'generic position numbers'
        ylabel = 'degrees'

        xtick_min = region[0]
        xtick_max = region[1]

        nsteps = int(round(xtick_max - xtick_min, 2) / 0.01)
        xticks = np.linspace(xtick_min, xtick_max, nsteps+1)

        for i, cat in enumerate(categories):
            c = int(colordict[cat])
            df = self.dfl[i]
            roi = df[(df['gen_pos1']>=region[0]) & 
                     (df['gen_pos1']<region[1]) & 
                     (df['label_atom_id']=='CA')].copy()
            roi['phi'] = roi.apply(lambda x: float(x.phi), axis=1)
            roi['omega'] = roi.apply(lambda x: float(x.omega), axis=1)
            roi['psi'] = roi.apply(lambda x: float(x.psi), axis=1)
            tot_len = len(roi['phi'].to_list())+len(roi['omega'].to_list())+len(roi['psi'].to_list())
            if tot_len > 0:
                roi.plot(kind='line', x='gen_pos1', y='phi', 
                        ax = axes[0], subplots = True, color = cmap(c), 
                        xticks = xticks, label=cat)
                if legend:
                    axes[0].legend(bbox_to_anchor=(1.0, 1.0))
                axes[0].set_xlabel(xlabel)
                axes[0].set_ylabel(ylabel)
                axes[0].set_title('Psi Angles')

                roi.plot(kind='line', x='gen_pos1', y='psi', 
                        ax = axes[1], subplots = True, color = cmap(c), 
                        xticks = xticks, label=cat)
                if legend:
                    axes[1].legend(bbox_to_anchor=(1.0, 1.0))
                axes[1].set_xlabel(xlabel)
                axes[1].set_ylabel(ylabel)
                axes[1].set_title('Phi Angles')
        plt.show()
        
    def get_family():
        pass

In [6]:
data = CifProcessor()

In [7]:
# tbd include 

In [8]:
data.read_pkl_processed()

In [9]:
# data.to_pkl_metainfo()

In [10]:
# data.make_metainfo()

In [11]:
data.read_pkl_metainfo()

In [15]:
def split_family(fam_id):
    return fam_id.split('_')

In [16]:
def fam_id_to_df(fam_id, path='data/families.pkl'):
    split = split_family(fam_id)
    print(split)
    family_df = get_families(path=path)
    zipped = list(zip(['v1', 'v2', 'v3', 'v4'], split))
    zipped = [(x[0], int(x[1])) for x in zipped if x[1] != '']
    for z in zipped:
        family = family[family[z[0]]==z[1]]
    return family

In [17]:
def group_by_family(numbering, family):
    """
    family: a length = 4 list with 4 identifiers
    """
    numbering[['f1', 'f2', 'f3', 'f4']] = numbering.apply(lambda x: x.family.split('_'), axis = 1, result_type='expand')
    for i, f in enumerate(family):
        if f != '':
            col = 'f'+ str(i+1)
            numbering = numbering[numbering[col] == family[i]]
    return list(numbering['PDB'].unique())

In [18]:
def get_group_from_dfl(dfl, pdb_ids):
    selection = []
    sel_pdbs = []
    for df in dfl:
        if df['PDB'].unique()[0] in pdb_ids:
            selection.append(df)
            sel_pdbs.append(df['PDB'].unique()[0])
    return selection, sel_pdbs

In [19]:
def get_activity(table, sel_pdbs):
    activity_dict = {}
    actives = []
    inactives = []
    for pdb in sel_pdbs:
        act = table[table['PDB']==pdb.upper()]['State'].iloc[0]
        activity_dict.update({pdb: act})
        if act == 'Active':
            actives.append(pdb)
        elif act == 'Inactive':
            inactives.append(pdb)
    return activity_dict, actives, inactives

In [47]:
def compare_act_inact(data, family_id):
    split = split_family(family_id)
    group = group_by_family(data.numbering, split)
    _, sel_pdbs = get_group_from_dfl(data.dfl, group)
    _, actives, inactives = get_activity(data.table, sel_pdbs)
    active_dfl = get_group_from_dfl(data.dfl, actives)[0]
    inactive_dfl = get_group_from_dfl(data.dfl, inactives)[0]
    family_name = family_id
    return active_dfl, inactive_dfl, family_name

In [21]:
def get_families(path = 'data/families.txt'):
    with open(path) as f:
        cols = ['f1', 'v1', 'f2', 'v2', 'f3', 'v3', 'f4', 'v4']
        family_df = pd.DataFrame(columns=cols)
        v0_ = 0
        v1_ = 0
        v2_ = 0
        v3_ = 0
        for row in f.readlines():
            print(row)
            if not '    ' in row:
                v0 = row.split('|')[0][:-1]
                v0_ += 1
                v1_ = 0
                v2_ = 0
                v3_ = 0
            elif not '        ' in row:
                v1 = row.replace("\n", "")[4:]
                v1_ += 1
                v2_ = 0
                v3_ = 0
            elif not '            ' in row:
                v2 = row.replace("        ", "").replace("\n", "")
                v2_ += 1
                v3_ = 0
            else:
                try:
                    v3 = row.split(',')[8]
                    v3_ += 1
                except:
                    v3 = row.split(',')
                    v3_ += 1
                dict_ = dict(zip(cols, [v0, v0_, v1, v1_, v2, v2_, v3, v3_]))
                family_df=family_df.append(dict_, ignore_index=True)
    return family_df
            

In [75]:
family_id = '001_001_003_'

In [76]:
# family = fam_id_to_df(family_id)

In [77]:
# family

In [78]:
active_dfl, inactive_dfl, family_name = compare_act_inact(data, family_id)

In [79]:
len(active_dfl)

23

In [80]:
len(inactive_dfl)

45

# PLOTTING

In [81]:
def plot_angles_for_gen_pos(dfl, region=[7.40, 7.60], legend=True, title='', save=False):
    categories = []
    for df in dfl:
        category = np.unique(df['PDB'])[0]
        categories.append(category.upper())

    cmap = plt.cm.get_cmap('RdYlGn', len(categories))
    colors = np.linspace(0, len(categories)-1, len(categories))
    colordict = dict(zip(categories,colors))

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(24,12))

    xlabel = 'generic position numbers'
    ylabel = 'degrees'
    
    start_regions = [x for _, x in enumerate(region) if _ % 2 == 0]
    end_regions = [x for _, x in enumerate(region) if _ % 2 == 1]
    
    for i in range(len(start_regions)):
        xtick_min = start_regions[i]
        xtick_max = end_regions[i]
        nsteps = int(round(xtick_max - xtick_min, 2) / 0.01)
        if i == 0:
            xticks = np.linspace(start_regions[i], end_regions[i], nsteps+1)
        else:
            xticks = np.hstack([xticks, np.linspace(start_regions[i], end_regions[i], nsteps+1)])
    xticks = [round(x, 2) for x in xticks]
    xticks_ = [i for i in range(len(xticks))]
    xtick_dict = dict(zip(xticks, xticks_))
    
    for i, cat in enumerate(categories):
        c = int(colordict[cat])
        df = dfl[i]
        roi = pd.concat([df.loc[(df['gen_pos1'] > start_regions[i]) &
                                (df['gen_pos1'] < end_regions[i]) &
                                (df['label_atom_id']=='CA')] for i in range(len(start_regions))], join='outer').copy()
        try:
            # better: check if this is not an empty series!
            roi['xticks'] = roi.apply(lambda x: xtick_dict[x.gen_pos1], axis=1)
        except:
            roi['xticks'] = np.nan
        roi['phi'] = roi.apply(lambda x: float(x.phi), axis=1)
        roi['omega'] = roi.apply(lambda x: float(x.omega), axis=1)
        roi['psi'] = roi.apply(lambda x: float(x.psi), axis=1)
        tot_len = len(roi['phi'].to_list())+len(roi['omega'].to_list())+len(roi['psi'].to_list())
        if tot_len > 0:
            roi.plot(kind='line', x='xticks', y='phi', 
                    ax = axes[0], subplots = True, color = cmap(c), label=cat)
            if legend:
                axes[0].legend(bbox_to_anchor=(1.0, 1.0))
            else:
                axes[0].get_legend().remove()
            axes[0].set_xticks(xticks_)
            axes[0].set_xticklabels([str("%.2f" % round(x,2)) for x in xticks], minor=False)
            axes[0].set_ylim(-150, 50)
            axes[0].set_xlabel(xlabel)
            axes[0].set_ylabel(ylabel)
            axes[0].set_title('Phi Angles')

            roi.plot(kind='line', x='xticks', y='psi', 
                    ax = axes[1], subplots = True, color = cmap(c), label=cat)
            if legend:
                axes[1].legend(bbox_to_anchor=(1.0, 1.0))
            else:
                axes[1].get_legend().remove()
            axes[1].set_xticks(xticks_)
            axes[1].set_xticklabels([str("%.2f" % round(x,2)) for x in xticks], minor=False)
            axes[1].set_ylim(-150, 50)
            axes[1].set_xlabel(xlabel)
            axes[1].set_ylabel(ylabel)
            axes[1].set_title('Psi Angles')
    fig.suptitle(title, fontsize=12)
    mplcursors.cursor(hover=True)
    plt.show()
    if save:
        print("saving")
        plt.savefig('data/plots/'+title.replace(' ', '_')+'.png')

In [82]:
%pylab

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [83]:
plot_angles_for_gen_pos(active_dfl, region=[7.45, 7.56, 8.51, 8.54], legend=False, title='Active GPCRs (Complexes) '+family_name)

In [84]:
plot_angles_for_gen_pos(inactive_dfl, region=[7.45, 7.58, 8.51, 8.54], legend=False, title='Inactive GPCRs '+family_name)

In [153]:
# download gproteins structures?  ==> where do i get regions

In [154]:
# predict contact maps

In [155]:
# predict activity based on contact maps

# StructLoader

In [319]:
# 8  load structs (structs dataloader) and table
# btw how do i get the affinities?

In [320]:
# 9  plot structs

In [321]:
# 10 plot deltas (make interaction map based on genes)

In [322]:
# 11 input features

# GPROTEINS

In [161]:
path = 'data/gproteins'

In [None]:
gproteins_table = 'data/gproteins.'

In [None]:
request = ''

# AFFINITIES

In [None]:
affinities_table = 'data/affinities.'