# 导包

In [21]:
import pandas as pd
import numpy as np
from prettytable import PrettyTable, from_csv
import csv
import os
import torch
import torch.nn as nn
import os
import pandas as pd
import csv
from Bio import SeqIO
import pickle
import math

In [22]:
# 氨基酸集合（字典）
amino_acid_set = { "A": 1, "C": 2, "B": 3, "E": 4, "D": 5, "G": 6, 
				"F": 7, "I": 8, "H": 9, "K": 10, "M": 11, "L": 12, 
				"O": 13, "N": 14, "Q": 15, "P": 16, "S": 17, "R": 18, 
				"U": 19, "T": 20, "W": 21, 
				"V": 22, "Y": 23, "X": 24, 
				"Z": 25 } # consider non-standard residues
# 氨基酸数目
amino_acid_num = 25
# 二级结构类别集合（字典）
ss_set = {"H": 1, "C": 2, "E": 3} # revise order, not necessary if training your own model
# 二级结构类别数目
ss_number = 3
# 氨基酸类别集合（字典）
physicochemical_set={'A': 1, 'C': 3, 'B': 7, 'E': 5, 'D': 5, 'G': 2, 'F': 1, 
			'I': 1, 'H': 6, 'K': 6, 'M': 1, 'L': 1, 'O': 7, 'N': 4, 
			'Q': 4, 'P': 1, 'S': 4, 'R': 6, 'U': 7, 'T': 4, 'W': 2, 
			'V': 1, 'Y': 4, 'X': 7, 'Z': 7}
# 残基列表
residue_list = list(amino_acid_set.keys())
# 二级结构列表
ss_list = list(ss_set.keys())


new_key_list = []
for i in residue_list:
    for j in ss_list:
        str_1 = str(i)+str(j)
        new_key_list.append(str_1)

new_value_list = [x+1 for x in list(range(amino_acid_num*ss_number))]

seq_ss_dict = dict(zip(new_key_list,new_value_list))
seq_ss_number = amino_acid_num*ss_number #75



def label_sequence(line, pad_prot_len, res_ind):
	X = np.zeros(pad_prot_len)

	for i, res in enumerate(line[:pad_prot_len]):
		X[i] = res_ind[res]

	return X

def label_seq_ss(line, pad_prot_len, res_ind):
	line = line.strip().split(',')
	X = np.zeros(pad_prot_len)
	for i ,res in enumerate(line[:pad_prot_len]):
		X[i] = res_ind[res]
	return X


def sigmoid(x):
	return 1 / (1 + math.exp(-x))

sigmoid_array=np.vectorize(sigmoid)

def padding_sigmoid_pssm(x,N):
	x = sigmoid_array(x)
	padding_array = np.zeros([N,x.shape[1]])
	if x.shape[0]>=N: # sequence is longer than N
		padding_array[:N,:x.shape[1]] = x[:N,:]
	else:
		padding_array[:x.shape[0],:x.shape[1]] = x
	return padding_array

def padding_intrinsic_disorder(x,N):
	padding_array = np.zeros([N,x.shape[1]])
	if x.shape[0]>=N: # sequence is longer than N
		padding_array[:N,:x.shape[1]] = x[:N,:]
	else:
		padding_array[:x.shape[0],:x.shape[1]] = x
	return padding_array


def cls_scores(label, pred):
	label = label.reshape(-1)
	pred = pred.reshape(-1)
	# r2_score, mean_squred_error are ignored
	return roc_auc_score(label, pred), average_precision_score(label, pred)


# 定义模型结构

In [23]:
class GlobalMaxPool1d(nn.Module):
    def __init__(self):
        super(GlobalMaxPool1d,self).__init__()
    def forward(self,x):
        output, _ = torch.max(x,1)
        return output

class ConvNN(nn.Module):
    def __init__(self,in_dim,c_dim,kernel_size):
        super(ConvNN,self).__init__()
        self.convs = nn.Sequential(
            nn.Conv1d(in_channels=in_dim, out_channels= c_dim, kernel_size=kernel_size,padding='same'),
            nn.ReLU(),
            nn.Conv1d(in_channels=c_dim, out_channels= c_dim*2, kernel_size=kernel_size,padding='same'),
            nn.ReLU(),
            nn.Conv1d(in_channels=c_dim*2, out_channels= c_dim*3, kernel_size=kernel_size,padding='same'),
            nn.ReLU(),
            #GlobalMaxPool1d() # 192
            )
    def forward(self,x):
        x = self.convs(x)
        return x

class Self_Attention(nn.Module):
    # input : batch_size * seq_len * input_dim
    # q : batch_size * input_dim * dim_k
    # k : batch_size * input_dim * dim_k
    # v : batch_size * input_dim * dim_v
    def __init__(self,input_dim,dim_k,dim_v):
        super(Self_Attention,self).__init__()
        self.q = nn.Linear(input_dim,dim_k)
        self.k = nn.Linear(input_dim,dim_k)
        self.v = nn.Linear(input_dim,dim_v)
        self._norm_fact = 1 / math.sqrt(dim_k)
        
    
    def forward(self,x):
        Q = self.q(x) # Q: batch_size * seq_len * dim_k
        K = self.k(x) # K: batch_size * seq_len * dim_k
        V = self.v(x) # V: batch_size * seq_len * dim_v
         
        atten = nn.Softmax(dim=-1)(torch.bmm(Q,K.permute(0,2,1))) * self._norm_fact # Q * K.T() # batch_size * seq_len * seq_len
        
        output = torch.bmm(atten,V) # Q * K.T() * V # batch_size * seq_len * dim_v
        
        return output


class CAMP(nn.Module):
    def __init__(self):
        super(CAMP,self).__init__()
        #self.config = config
        self.embed_seq = nn.Embedding(65+1, 128) # padding_idx=0, vocab_size = 65/25, embedding_size=128
        self.embed_ss = nn.Embedding(75+1,128)
        self.embed_two = nn.Embedding(7+1,128)
        self.pep_convs = ConvNN(512,64,7)
        self.prot_convs = ConvNN(512,64,8)
        self.pep_fc = nn.Linear(3,128)    
        self.prot_fc = nn.Linear(23,128)
        self.global_max_pooling = GlobalMaxPool1d()
        #self.dnns = DNN(config.in_dim,config.d_dim1,config.d_dim2,config.dropout)
        self.dnns = nn.Sequential(
            nn.Linear(640,1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024,1024),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1024,512))
        
        self.att = Self_Attention(128,128,128)
        #c_dim
        self.output = nn.Linear(512,1)

    #@torchsnooper.snoop()
    def forward(self, x_pep,x_prot,x_pep_ss,x_prot_ss,x_pep_2,x_prot_2,x_pep_dense,x_prot_dense):

        pep_seq_emb = self.embed_seq(x_pep.long())#.type(torch.LongTensor))
        prot_seq_emb = self.embed_seq(x_prot.long())#.type(torch.LongTensor))
        pep_ss_emb = self.embed_ss(x_pep_ss.long())#type(torch.LongTensor))
        prot_ss_emb = self.embed_ss(x_prot_ss.long())
        pep_2_emb = self.embed_two(x_pep_2.long())
        prot_2_emb = self.embed_two(x_prot_2.long())
        pep_dense = self.pep_fc(x_pep_dense.float())
        prot_dense = self.prot_fc(x_prot_dense.float())
        

        encode_peptide = torch.cat([pep_seq_emb, pep_ss_emb, pep_2_emb, pep_dense],dim=-1)
        encode_protein = torch.cat([prot_seq_emb, prot_ss_emb, prot_2_emb, prot_dense],dim=-1)

        encode_peptide = encode_peptide.permute(0,2,1)
        encode_protein = encode_protein.permute(0,2,1)

        encode_peptide = self.pep_convs(encode_peptide)
        encode_peptide = encode_peptide.permute(0,2,1)
        encode_peptide_global = self.global_max_pooling(encode_peptide)

        encode_protein = self.prot_convs(encode_protein)
        encode_protein = encode_protein.permute(0,2,1)
        encode_protein_global = self.global_max_pooling(encode_protein)
        
        # self-attention
        pep_seq_att = self.embed_seq(x_pep.long())
        peptide_att = self.att(pep_seq_att)
        peptide_att = self.global_max_pooling(peptide_att)
        
        prot_seq_att = self.embed_seq(x_prot.long())
        protein_att = self.att(prot_seq_att)
        protein_att = self.global_max_pooling(protein_att)

        encode_interaction = torch.cat([encode_peptide_global,encode_protein_global,peptide_att,protein_att],axis=-1)
        encode_interaction = self.dnns(encode_interaction)
        predictions = torch.sigmoid(self.output(encode_interaction))

        return predictions.squeeze(dim=1)

# 加载训练好的模型

In [24]:
def load_checkpoint(filepath):
    ckpt = torch.load(filepath, map_location=torch.device('cpu'))
    model = ckpt['model']
    model.load_state_dict(ckpt['model_state_dict'])
    for parameter in model.parameters():
        parameter.requires_grad=False
    model.eval()
    return model
    
device = torch.device('cuda:1')
camp = load_checkpoint('/geniusland/home/liuxianliang1/code/PeptideOpt_4090/amp/inference/CAMP/CAMP_pytorch/model_full_ckpts_4.pkl')
camp = camp.to(device)

# 准备数据

In [25]:

data = pd.read_csv('data/Antimicrobials.csv')
mask = data['sequence'].str.len() <= 25
peptides = data.loc[mask]['sequence'].tolist()
peps_len = len(peptides)
# cxcr4
protein = 'MEGISIYTSDNYTEEMGSGDYDSMKEPCFREENANFNKIFLPTIYSIIFLTGIVGNGLVI\
LVMGYQKKLRSMTDKYRLHLSVADLLFVITLPFWAVDAVANWYFGNFLCKAVHVIYTVNL\
YSSVLILAFISLDRYLAIVHATNSQRPRKLLAEKVVYVGVWIPALLLTIPDFIFANVSEA\
DDRYICDRFYPNDLWVVVFQFQHIMVGLILPGIVILSCYCIIISKLSHSKGHQKRKALKT\
TVILILAFFACWLPYYIGISIDSFILLEIIKQGCEFENTVHKWISITEALAFFHCCLNPI\
LYAFLGAKFKTSAQHALTSVSRGSSLKILSKGKRGGHSSVSTESESSSFHSS'

# acthr
# protein = 'MKHIINSYENINNTARNNSDCPRVVLPEEIFFTISIVGVLENLIVLLAVFKNKNLQAPMY\
# FFICSLAISDMLGSLYKILENILIILRNMGYLKPRGSFETTADDIIDSLFVLSLLGSIFS\
# LSVIAADRYITIFHALRYHSIVTMRRTVVVLTVIWTFCTGTGITMVIFSHHVPTVITFTS\
# LFPLMLVFILCLYVHMFLLARSHTRKISTLPRANMKGAITLTILLGVFIFCWAPFVLHVL\
# LMTFCPSNPYCACYMSLFQVNGMLIMCNAVIDPFIYAFRSPELRDAFKKMIFCSRYW'

with open('features/amps.fasta', 'w') as wf:
    for i in range(peps_len):
        f = open(f'features/{i+1}.fasta','w')
        f.write('>peptide')
        f.write('\n')
        f.write(peptides[i])
        f.write('\n')
        f.close()

        wf.write(f'>{i+1}')
        wf.write('\n')
        wf.write(peptides[i])
        wf.write('\n')

f = open('features/target_protein.fasta','w')
f.write('>target_protein')
f.write('\n')
f.write(protein)
f.write('\n')
f.close()               

# 获取序列二级结构信息


In [26]:
if not os.path.exists('features/peptides.out.ss'):
    os.system("/geniusland/home/liuxianliang1/code/PeptideOpt_4090/SCRATCH-1D_1.2/bin/run_SCRATCH-1D_predictors.sh\
        features/amps.fasta features/peptides.out 32")
if not os.path.exists('features/protein.out.ss'):
    os.system("/geniusland/home/liuxianliang1/code/PeptideOpt_4090/SCRATCH-1D_1.2/bin/run_SCRATCH-1D_predictors.sh\
        features/target_protein.fasta features/protein.out 32")
# 保存序列以及对应的二级结构信息
wf = open('features/example.tsv','w')
tsv_w = csv.writer(wf, delimiter='\t')
tsv_w.writerow(['prot_seq', 'pep_seq', 'pep_concat_seq', 'prot_concat_seq'])

with open('features/peptides.out.ss', 'r') as f:
    peps_data = f.readlines()
for i in range(len(peps_data)):
    peps_data[i] = peps_data[i].strip('\n')
with open('features/protein.out.ss', 'r') as f:
    prot_data = f.readlines()
for i in range(len(prot_data)):
    prot_data[i] = prot_data[i].strip('\n')

# 目标蛋白
prot_seq = protein
# 目标蛋白二级结构信息
prot_concat_seq = ""
for j in range(len(prot_seq)):
    if j < len(prot_seq) - 1:
        prot_concat_seq = prot_concat_seq + prot_seq[j] + prot_data[1][j] + ','
    else:
        prot_concat_seq = prot_concat_seq + prot_seq[j] + prot_data[1][j]
# 多肽二级结构信息
pep_concat_seq = ""
filename = 'features/amps.fasta'
iterator = SeqIO.parse(filename,'fasta')
seqs = []
for record in iter(iterator):
    seqs.append(record)

for i in range(len(seqs)):
    for j in range(len(seqs[i].seq)):
        if j < len(seqs[i].seq) - 1:
            pep_concat_seq = pep_concat_seq + seqs[i].seq[j] + peps_data[2*i+1][j] + ','
        else:
            pep_concat_seq = pep_concat_seq + seqs[i].seq[j] + peps_data[2*i+1][j]
    tsv_w.writerow([prot_seq, seqs[i].seq, pep_concat_seq, prot_concat_seq])
    pep_concat_seq = ""
wf.close()


###################################
#                                 #
#  SCRATCH-1D release 1.2 (2018)  #
#                                 #
###################################

[SCRATCH-1D_predictions.pl] 10 protein sequence(s) found
[SCRATCH-1D_predictions.pl] generating sequence profiles...
[SCRATCH-1D_predictions.pl] running SCRATCH-1D predictors...
[SCRATCH-1D_predictions.pl] running homology analysis...
[SCRATCH-1D_predictions.pl] writing SSpro predictions...
[SCRATCH-1D_predictions.pl] writing SSpro8 predictions...
[SCRATCH-1D_predictions.pl] writing ACCpro predictions...
[SCRATCH-1D_predictions.pl] writing ACCpro20 predictions...
[SCRATCH-1D_predictions.pl] job successfully completed!


###################################
#                                 #
#  SCRATCH-1D release 1.2 (2018)  #
#                                 #
###################################

[SCRATCH-1D_predictions.pl] 1 protein sequence(s) found
[SCRATCH-1D_predictions.pl] generating sequence profile

# 计算目标蛋白的pssm矩阵

In [27]:
prot_pssm_dict = {}   
# 调用命令行计算多序列比对结果(pssm)
if not os.path.exists('features/target_protein.pssm'):
    os.system("psiblast -query /geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/target_protein.fasta -db /geniusland/dataset/uniprot/uniref90/uniref90.fasta -num_iterations 3 -out_ascii_pssm features/target_protein.pssm")
# 解析结果
with open('/geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/target_protein.pssm','r') as rf:
    data = rf.readlines()
tmp = np.zeros((len(protein),20))
for i in range(3, 3+len(protein)):
    char = data[i].split(' ')
    count = 0
    for j in range(6, len(char)):
        if char[j] != char[0]:
            tmp[i-3][count] = float(char[j])
            count += 1
        if count >= 20:
            break
prot_pssm_dict[protein] = tmp

# 计算内在紊乱度信息

In [28]:
prot_intrinsic_dict = {}
pep_intrinsic_dict = {}
# 多肽序列
for i in range(peps_len):
    result = os.popen(f"python3 /geniusland/home/liuxianliang1/code/PeptideOpt_4090/amp/iupred2a/iupred2a.py -a /geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/{i+1}.fasta long")
    res = result.read()
    long_val = res.splitlines()[0].split(',')
    long_list = np.zeros((len(long_val), 1))
    long_list[0][0] = float(long_val[0].split('[')[1])
    for j in range(1, len(long_val)-1):
        long_list[j][0] = float(long_val[j])
    long_list[j][0] = float(long_val[j].split(']')[0])

    result = os.popen(f"python3 /geniusland/home/liuxianliang1/code/PeptideOpt_4090/amp/iupred2a/iupred2a.py -a /geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/{i+1}.fasta short")
    res = result.read()
    short_val = res.splitlines()[0].split(',')
    short_list = np.zeros((len(short_val), 1))
    short_list[0][0] = float(short_val[0].split('[')[1])
    for j in range(1, len(short_val)-1):
        short_list[j][0] = float(short_val[j])
    short_list[j][0] = float(short_val[j].split(']')[0])

    anchor_val = res.splitlines()[1].split(',')
    anchor_list = np.zeros((len(anchor_val), 1))
    anchor_list[0][0] = float(anchor_val[0].split('[')[1])
    for j in range(1, len(anchor_val)-1):
        anchor_list[j][0] = float(anchor_val[j])
    anchor_list[j][0] = float(anchor_val[j].split(']')[0])

    results = np.concatenate((long_list, short_list, anchor_list), axis=1)
    pep_intrinsic_dict[peptides[i]] = results
# 靶点蛋白质
result = os.popen("python3 /geniusland/home/liuxianliang1/code/PeptideOpt_4090/amp/iupred2a/iupred2a.py -a /geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/target_protein.fasta long")
res = result.read()
long_val = res.splitlines()[0].split(',')
long_list = np.zeros((len(long_val), 1))
long_list[0][0] = float(long_val[0].split('[')[1])
for j in range(1, len(long_val)-1):
    long_list[j][0] = float(long_val[j])
long_list[j][0] = float(long_val[j].split(']')[0])

result = os.popen("python3 /geniusland/home/liuxianliang1/code/PeptideOpt_4090/amp/iupred2a/iupred2a.py -a /geniusland/home/liuxianliang1/code/PeptideOpt_4090/features/target_protein.fasta short")
res = result.read()
short_val = res.splitlines()[0].split(',')
short_list = np.zeros((len(short_val), 1))
short_list[0][0] = float(short_val[0].split('[')[1])
for j in range(1, len(short_val)-1):
    short_list[j][0] = float(short_val[j])
short_list[j][0] = float(short_val[j].split(']')[0])

anchor_val = res.splitlines()[1].split(',')
anchor_list = np.zeros((len(anchor_val), 1))
anchor_list[0][0] = float(anchor_val[0].split('[')[1])
for j in range(1, len(anchor_val)-1):
    anchor_list[j][0] = float(anchor_val[j])
anchor_list[j][0] = float(anchor_val[j].split(']')[0])
results = np.concatenate((long_list, short_list, anchor_list), axis=1)
prot_intrinsic_dict[protein] = results

# 特征拼接

In [29]:
protein_dense_feature_dict = np.concatenate((prot_pssm_dict[protein], prot_intrinsic_dict[protein]),axis=1)
with open('features/protein_dense_feature_dict','wb') as f:
    pickle.dump(protein_dense_feature_dict,f)

f = open('features/example.tsv')
pep_set = set()
seq_set = set()
pep_ss_set = set()
seq_ss_set = set()
for line in f.readlines()[1:]: # if the file has headers and pay attention to the columns (whether have peptide binding site labels)
    # seq, pep, label, pep_ss, seq_ss  = line.strip().split('\t')
    seq, pep, pep_ss, seq_ss  = line.strip().split('\t')
    pep_set.add(pep)
    seq_set.add(seq)
    pep_ss_set.add(pep_ss)
    seq_ss_set.add(seq_ss)

f.close()
pep_len = [len(pep) for pep in pep_set]
seq_len = [len(seq) for seq in seq_set]
pep_ss_len = [len(pep_ss) for pep_ss in pep_ss_set]
seq_ss_len = [len(seq_ss) for seq_ss in seq_ss_set]

pep_len.sort()
seq_len.sort()
pep_ss_len.sort()
seq_ss_len.sort()
pad_pep_len = 50 
pad_prot_len = seq_len[int(0.8*len(seq_len))-1]
# print('num of peptides', len(pep_len), 'pad_pep_len', pad_pep_len)
# print('seq_set', len(seq_len), 'pad_prot_len', pad_prot_len)
# print('num of peptide ss', len(pep_ss_len), 'pad_pep_len', pad_pep_len)
# print('seq_ss_set', len(seq_ss_len), 'pad_prot_len', pad_prot_len)
# np.save('features/pad_pep_len',pad_pep_len)
# np.save('features/pad_prot_len',pad_prot_len)

peptide_feature_dict = {}
protein_feature_dict = {}

peptide_ss_feature_dict = {}
protein_ss_feature_dict = {}

peptide_2_feature_dict = {}
protein_2_feature_dict = {}

peptide_dense_feature_dict = {}
protein_dense_feature_dict = {}

f = open('features/example.tsv')
for line in f.readlines()[1:]:
    seq, pep, pep_ss, seq_ss  = line.strip().split('\t')
    if pep not in peptide_feature_dict:
        feature = label_sequence(pep, pad_pep_len, amino_acid_set)
        peptide_feature_dict[pep] = feature
    if seq not in protein_feature_dict:
        feature = label_sequence(seq, pad_prot_len, amino_acid_set)
        protein_feature_dict[seq] = feature
    if pep_ss not in peptide_ss_feature_dict:
        feature = label_seq_ss(pep_ss, pad_pep_len, seq_ss_dict)
        peptide_ss_feature_dict[pep_ss] = feature
    if seq_ss not in protein_ss_feature_dict:
        feature = label_seq_ss(seq_ss, pad_prot_len, seq_ss_dict)
        protein_ss_feature_dict[seq_ss] = feature
    if pep not in peptide_2_feature_dict:
        feature = label_sequence(pep, pad_pep_len, physicochemical_set)
        peptide_2_feature_dict[pep] = feature
    if seq not in protein_2_feature_dict:
        feature = label_sequence(seq, pad_prot_len, physicochemical_set)
        protein_2_feature_dict[seq] = feature
    if pep not in peptide_dense_feature_dict:
        feature = padding_intrinsic_disorder(pep_intrinsic_dict[pep], pad_pep_len)
        peptide_dense_feature_dict[pep] = feature
    if seq not in protein_dense_feature_dict:
        feature_pssm = padding_sigmoid_pssm(prot_pssm_dict[seq], pad_prot_len)
        feature_intrinsic = padding_intrinsic_disorder(prot_intrinsic_dict[seq], pad_prot_len)
        feature_dense = np.concatenate((feature_pssm, feature_intrinsic), axis=1)
        protein_dense_feature_dict[seq] = feature_dense

f.close()

print('load feature dict')
X_pep, X_prot, X_pep_SS, X_prot_SS, X_pep_2, X_prot_2 = [], [], [], [], [], []
X_dense_pep,X_dense_prot = [],[]
pep_sequence, prot_sequence, Y = [], [], []
with open('features/example.tsv') as f:  # change your own data here
    for line in f.readlines()[1:]:
        protein, peptide, pep_ss, prot_ss  = line.strip().split('\t')
        # protein, peptide,label, pep_ss, prot_ss  = line.strip().split('\t')
        pep_sequence.append(peptide)
        prot_sequence.append(protein)

        X_pep.append(peptide_feature_dict[peptide])
        X_prot.append(protein_feature_dict[protein])
        X_pep_SS.append(peptide_ss_feature_dict[pep_ss])
        X_prot_SS.append(protein_ss_feature_dict[prot_ss])
        X_pep_2.append(peptide_2_feature_dict[peptide])
        X_prot_2.append(protein_2_feature_dict[protein])
        X_dense_pep.append(peptide_dense_feature_dict[peptide])
        X_dense_prot.append(protein_dense_feature_dict[protein])
        
X_pep = torch.from_numpy(np.array(X_pep)).to(device)
X_prot = torch.from_numpy(np.array(X_prot)).to(device)
X_pep_ss = torch.from_numpy(np.array(X_pep_SS)).to(device)
X_prot_ss = torch.from_numpy(np.array(X_prot_SS)).to(device)
X_pep_2 = torch.from_numpy(np.array(X_pep_2)).to(device)
X_prot_2 = torch.from_numpy(np.array(X_prot_2)).to(device)
X_pep_dense = torch.from_numpy(np.array(X_dense_pep)).to(device)
X_prot_dense = torch.from_numpy(np.array(X_dense_prot)).to(device)

load feature dict


# 预测

In [30]:
camp.eval()
preds = []
pred=camp(X_pep,X_prot,X_pep_ss,X_prot_ss,X_pep_2,X_prot_2,X_pep_dense,X_prot_dense)
preds.extend(pred.detach().cpu().numpy().tolist())
preds = np.array(preds) 

In [31]:
for i in preds:
    print(i)

0.09502498805522919
0.027402497828006744
0.014363469555974007
0.0008419280056841671
3.308707891847007e-05
0.006236928049474955
5.825800286629601e-08
3.649595382848592e-12
6.020334097911473e-08
0.0864051803946495


In [28]:
for i in preds:
    print(i)

0.0009264955879189074
0.01075808983296156
3.281701594914921e-07
0.007561501115560532
0.2916243076324463
5.587330451817252e-05
4.477945481085044e-07
2.7672586444538183e-10
0.00021700849174521863
0.0028090523555874825
0.0014142475556582212


In [20]:
for i in preds:
    print(i)

0.06463415920734406
0.9980089068412781
0.03452111780643463
0.9999321699142456
0.0005423002294264734
8.511067335348343e-07
5.082506030623657e-12
3.1918244047801636e-08
6.124983853439403e-11
0.30908203125
2.2646936486125924e-06
