In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import os,re,math

from degpred_model import DegpredEmbedder, get_degpred_model

device = "cuda:0"

### load TAPE bert-based pre-trained model

In [5]:
embed = DegpredEmbedder(device=device)
# define the architecture
degpred_models = [get_degpred_model(fold, device=device) for fold in range(1, 6)]

# average the outputs of five models
def degpred(seq_bert):
    preds = [model(seq_bert) for model in degpred_models]
    return torch.stack(preds, dim=0).mean(dim=0)

### predict a sequence (example: P53)

In [6]:
seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
seq_bert = embed(seq)
pred = degpred(seq_bert.unsqueeze(0)).squeeze().cpu().detach().numpy()[1:-1]

### find degrons on the sequence

In [12]:
def get_connected_ranges_and_mask(pred, prob_thres=0.3, connect_thres=3):
    num_list = torch.where(pred > prob_thres)[0].tolist()
    
    ranges=[]
    s=1
    while s <= len(num_list)-1:
        if num_list[s] - num_list[s-1] <= connect_thres:
            flag=s-1
            while (s<=len(num_list)-1) and (num_list[s]-num_list[s-1] <= connect_thres):
                s+=1
            ranges.append(range(num_list[flag], num_list[s-1]))
        else:
            if abs(num_list[s-1]-num_list[s-2]) > connect_thres:
                ranges.append(range(num_list[s-1], num_list[s-1]+1))
            s+=1
            
    mask = torch.zeros(len(pred), dtype=torch.bool, device=pred.device)
    for rng in ranges:
        mask[rng] = True
    return ranges, mask

In [13]:

deg_ranges, deg_mask = get_connected_ranges_and_mask(pred, prob_thres=0.3, connect_thres=3)

for rng in deg_ranges:
    print('start:', rng.start, 'end: ', rng.stop, 'degron_seq:', seq[rng])

AttributeError: 'numpy.ndarray' object has no attribute 'device'

### predict binding E3s of the degrons

In [None]:
e3 = pd.read_csv('motifs/pssm_cutoffs.csv')
pssms = []
for i in e3.index:
    a = pd.read_table('motifs/' + e3.loc[i, 'E3_entry'] + '_' + str(e3.loc[i, 'length']) + '_pssm.txt', index_col=0)
    a.columns = a.columns.astype(int)
    pssms.append(a)
e3['pssm'] = pssms

In [None]:
def findE3(dseq):
    e3s = []
    for i in e3.index:
        scores = [0,]
        ps = e3.loc[i, 'pssm']
        length = e3.loc[i, 'length']
        try:
            for k in range(len(dseq) - length + 1):
                p = dseq[k : k+length]
                s = 0
                for j in range(length):
                    s += ps.loc[p[j], j+1]
                scores.append(s)
            if max(scores) > e3.loc[i, 'thre1000']:
                e3s.append(e3.loc[i, 'E3'])
        except:
            print('error in degron sequence', dseq)
    return e3s

In [None]:
for i in deg_interval:
    print(i, seq[i[0]: i[1]+1], findE3(seq[max(0, i[0]-3): min(i[1]+4, len(seq))]))