In [27]:
import tensorflow as tf
import tflearn
import tflearn.variables as va

# Loading MNIST dataset
import tflearn.datasets.mnist as mnist
trainX, trainY, testX, testY = mnist.load_data(one_hot=True)

Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz


In [16]:
import numpy as np
from Bio import SeqIO
import json
import re
# 两联体编码
def dAAOneHot(fastafile):
    daa=[x+y for x in text for y in text]
    X = []
    for seq_record in SeqIO.parse(fastafile, 'fasta'):
        seq = str(seq_record.seq)
        seq = re.sub('[XZUB]',"",seq)
        t = np.zeros(400)
        for j in range(400):
            t[j] = seq.count(daa[j])
        s = sum(t)
        for j in range(400):
            t[j] = t[j]/s
        X.append(t)

    return X

In [22]:
text='PQRYWTMNVELHSFCIKADG'

In [20]:
# 对矩阵进行归一化
def maxminnorm(array):
    maxcols=array.max(axis=0)
    mincols=array.min(axis=0)
    data_shape = array.shape
    data_rows = data_shape[0]
    data_cols = data_shape[1]
    t=np.empty((data_rows,data_cols))
    for i in range(data_cols):
        if maxcols[i] > mincols[i]:
            t[:,i]=(array[:,i]-mincols[i])/(maxcols[i]-mincols[i])
    return t

In [18]:
# 加载来自hmmer profil的数据
# jsonfile 存储hmmer profil数据的json格式文件名
# fastafile 序列的参照文件，fasta格式
# numAA>0 表示从头取numAA个氨基酸的profil，尾不足补0; numAA<0 表示从尾往前取numAA个氨基酸的profil，头不足补0
def load_hmm_prof(jsonfile, fastafile, numAA=50):
    records = SeqIO.parse(fastafile, 'fasta')
    seqID = [str(x.id) for x in records]
    records.close()
    
    M = len(seqID)
    N = abs(numAA) * 20
    
    X = np.ndarray((M,N))

    k = 0
    
    fr = open(jsonfile,'r')
    p = json.load(fr)
    fr.close()
    
    for key in seqID:
        ary = p[key]
        tm = np.array(ary).reshape([-1,20])
        tm = tm[1:,:]
        c = len(ary)-20
        if numAA > 0:
            if c < N:
                tm = maxminnorm(tm)# 归一化
                X[k][:c] = tm.reshape(c)
                X[k][c:] = 0
            elif c == N:
                tm = maxminnorm(tm)# 归一化
                X[k] = tm.reshape(c)
            else:
                t = tm[:numAA,:]
                t = maxminnorm(t)# 归一化
                X[k] = t.reshape(N)
        else:# numAA < 0
            if c < N:
                tm = maxminnorm(tm)
                X[k][-c:] = tm.reshape(c)
                X[k][:-c] = 0
            elif c==N:
                tm = maxminnorm(tm)
                X[k] = tm.reshape(c)
            else:
                t = tm[numAA:,:]
                t = maxminnorm(t)
                X[k] = t.reshape(N)

        k += 1
    return X

In [23]:
families = ['antiB', 'antiC', 'antiF', 'antiH', 'antiP', 'antiV',
            'antiA', 'antiD', 'antiE', 'antiI', 'antiO', 'antiS', 'antiT', 'antiW', 'surface', 'taxis']
#include_T = ['antiB', 'antiC', 'antiF', 'antiH', 'antiP', 'antiV']
#exclude_T = ['antiA', 'antiD', 'anitE', 'antiI', 'antiO', 'antiS', 'antiT', 'antiW', 'surface', 'taxis' ]
targetFile = './data/benchmark/amps_60_Targets.json'
seqFile = './data/benchmark/amps_60_Sequence.json'
include_fastafile = './data/benchmark/include_amps_60.fasta'
exclude_fastafile = './data/benchmark/exclude_amps_60.fasta'

seq_recorders = SeqIO.parse(include_fastafile, 'fasta')
seq_ids = [str(r.id) for r in seq_recorders]
seq_recorders.close()

M = len(seq_ids)

# 构建标签数据。共分6类，是include_T中包含的标签。
# 构建一个6-by-6的矩阵，
ft = open(targetFile,'r')
targets = json.load(ft)
ft.close()

y = np.zeros([M,6])
j = 0
for key in seq_ids:
    keys = targets[key]
    for k in keys:
        i = families.index(k)
        if i < 6:
            y[j][i] = 1
    j+=1

X1 = load_hmm_prof('./data/benchmark/includeAMPs_hmm_profil.json',include_fastafile,20)
X2 = load_hmm_prof('./data/benchmark/includeAMPs_hmm_profil.json',include_fastafile,-20)
X3 = np.array(dAAOneHot(include_fastafile))

X = np.ndarray([X1.shape[0],20,20,3])
X[:,:,:,0] = X1.reshape([-1,20,20])
X[:,:,:,1] = X2.reshape([-1,20,20])
X[:,:,:,2] = X3.reshape([-1,20,20])

In [25]:
from tflearn.data_utils import shuffle
X,y = shuffle(X,y)
X_train,X_test=X[:1100],X[1100:]
y_train,y_test=y[:1100],y[1100:]

In [36]:
import tflearn
import tensorflow as tf
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from sklearn.model_selection import LeaveOneOut, KFold
from sklearn.metrics import accuracy_score, auc, roc_curve, matthews_corrcoef

In [40]:
# Define a dnn using Tensorflow
with tf.Graph().as_default():

    # Model variables
    XX = tf.placeholder("float", [None,20,20,3])
    YY = tf.placeholder("float", [None, 6])

    # Using TFLearn wrappers for network building
    #net = tf.reshape(XX, [-1, 28, 28, 1])
    net = tflearn.conv_2d(XX, 32, 3, activation='relu')
    net = tflearn.max_pool_2d(net, 2)
    net = tflearn.local_response_normalization(net)
    net = tflearn.dropout(net, 0.8)
    net = tflearn.conv_2d(net, 64, 3, activation='relu')
    net = tflearn.max_pool_2d(net, 2)
    net = tflearn.local_response_normalization(net)
    net = tflearn.dropout(net, 0.8)
    net = tflearn.fully_connected(net, 128, activation='tanh')
    net = tflearn.dropout(net, 0.8)
    #net = tflearn.fully_connected(net, 256, activation='tanh')
    #net = tflearn.dropout(net, 0.8)
    net = tflearn.fully_connected(net, 6, activation='linear')
    
    with tf.name_scope('Summaries'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net,labels=YY))
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(net, 1), tf.argmax(YY, 1)), tf.float32),
            name="acc")

    # construct two varaibles to add as additional "valiation monitors"
    # these varaibles are evaluated each time validation happens (eg at a snapshot)
    # and the results are summarized and output to the tensorboard events file,
    # together with the accuracy and loss plots.
    #
    # Here, we generate a dummy variable given by the sum over the current
    # network tensor, and a constant variable.  In practice, the validation
    # monitor may present useful information, like confusion matrix
    # entries, or an AUC metric.
    #with tf.name_scope('CustomMonitor'):
    #    test_var = tf.reduce_sum(tf.cast(net, tf.float32), name="test_var")
    #    test_const = tf.constant(32.0, name="custom_constant")
        # Define a train op
    trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer,
                           # validation_monitors=[test_var, test_const],
                            metric=accuracy, batch_size=128)

    # Tensorboard logs stored in /tmp/tflearn_logs/. Using verbose level 2.
    trainer = tflearn.Trainer(train_ops=trainop,
                              tensorboard_dir='/tmp/tflearn_logs/',
                              tensorboard_verbose=2)
    # Training for 10 epochs.
    trainer.fit({XX: X_train, YY: y_train}, val_feed_dicts={XX: X_test, YY: y_test},
                n_epoch=1, show_metric=True, run_id='Summaries_example')

    # Run the following command to start tensorboard:
    # >> tensorboard /tmp/tflearn_logs/
# Navigate with your web browser to http://0.0.0.0:6006/
    

Training Step: 8  | total loss: [1m[32m3.18247[0m[0m | time: 1.783s
| Optimizer | epoch: 001 | loss: 3.18247 - Summaries/acc: 0.8709 -- iter: 1024/1100
Training Step: 9  | total loss: [1m[32m3.48769[0m[0m | time: 2.947s
| Optimizer | epoch: 001 | loss: 3.48769 - Summaries/acc: 0.4388 | val_loss: 4.83203 - val_acc: 0.8921 -- iter: 1100/1100
--


AttributeError: 'Trainer' object has no attribute 'pred_label'

In [32]:
y_test

array([[1., 0., 1., 0., 0., 0.],
       [1., 1., 1., 1., 0., 1.],
       [1., 0., 1., 0., 1., 0.],
       ...,
       [1., 0., 0., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0.]])

In [17]:
with tf.Graph().as_default():

    # Model variables
    X = tf.placeholder("float", [None, 784])
    Y = tf.placeholder("float", [None, 10])

    W1 = tf.Variable(tf.random_normal([784, 256]))
    W2 = tf.Variable(tf.random_normal([256, 256]))
    W3 = tf.Variable(tf.random_normal([256, 10]))
    b1 = tf.Variable(tf.random_normal([256]))
    b2 = tf.Variable(tf.random_normal([256]))
    b3 = tf.Variable(tf.random_normal([10]))

    # Multilayer perceptron
    def dnn(x):
        x = tf.nn.tanh(tf.add(tf.matmul(x, W1), b1))
        x = tf.nn.tanh(tf.add(tf.matmul(x, W2), b2))
        x = tf.add(tf.matmul(x, W3), b3)
        return x

    net = dnn(X)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(net, 1), tf.argmax(Y, 1)), tf.float32),
        name='acc')

    # Using TFLearn Trainer
    # Define a training op (op for backprop, only need 1 in this model)
    trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer,
                              metric=accuracy, batch_size=128)

    # Create Trainer, providing all training ops. Tensorboard logs stored
    # in /tmp/tflearn_logs/. It is possible to change verbose level for more
    # details logs about gradients, variables etc...
    trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=0)
    # Training for 10 epochs.
    trainer.fit({X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY},
n_epoch=1, show_metric=True)

---------------------------------
Run id: 7EP0AW
Log directory: /tmp/tflearn_logs/
INFO:tensorflow:Summary name acc_0/ (raw) is illegal; using acc_0/__raw_ instead.


INFO:tensorflow:Summary name acc_0/ (raw) is illegal; using acc_0/__raw_ instead.


---------------------------------
Training samples: 55000
Validation samples: 10000
--


ValueError: Tag: acc:0 cannot be found in summaries list.

In [46]:
import numpy as np

def sigmoid(x):
    return 1.0/(1+np.exp(-x))
 
labels1=np.array([[0.,1.,0.],[1.,1.,0.],[0.,0.,1.]])#不一定只属于一个类别
logits1=np.array([[0,1,1.],[1,1,0.],[0.,0.,1]])
y_pred1=sigmoid(logits1)
prob_error11=-labels1*np.log(y_pred1)-(1-labels1)*np.log(1-y_pred1)
print(prob_error11)
 
print(".............")
with tf.Session() as sess:
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels1,logits=logits1)
    loss = tf.reduce_sum(loss, axis=1)
    loss = tf.reduce_mean(loss)
    acc1=tf.cast(tf.equal(labels1,logits1),tf.float32)
    acc2=tf.reduce_mean(acc1)
    print(sess.run(acc2))
    print(sess.run(acc1))

[[0.69314718 0.31326169 1.31326169]
 [0.31326169 0.31326169 0.69314718]
 [0.69314718 0.69314718 0.31326169]]
.............
0.8888889
[[1. 1. 0.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [None]:
y_pred1