In [1]:
# import the necessary packages
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K

Using TensorFlow backend.


In [2]:
class SmallerVGGNet:
    @staticmethod
    def build(width, height, depth, classes, finalAct="softmax"):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1

        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
            
        # CONV => RELU => POOL
        model.add(Conv2D(32, (3, 3), padding="same",
            input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(3, 3)))
        model.add(Dropout(0.25))
        
        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(128, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(1024))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))

        # use a *softmax* activation for single-label classification
        # and *sigmoid* activation for multi-label classification
        model.add(Dense(classes))
        model.add(Activation(finalAct))

        # return the constructed network architecture
        return model

In [3]:
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [4]:
import numpy as np
data = np.load('AMPs-ML.npz')
X = data['X']
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.33, random_state=42)

In [None]:
sum(y_train)

In [None]:

y1 = y_train[:,0]

y1_cate = to_categorical(y1,num_classes=2)
sum(y1_cate)

In [5]:
# 分成6个2分类预测器，每个预测器预测一个标签
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss

INIT_LR = 1e-3
EPOCHS = 20
BS = 32
y_pred=np.zeros([y_test.shape[0],6])
for i in range(6):
    y_sub = y_train[:,i]
    y_bin = to_categorical(y_sub, num_classes=2)
    y_count = sum(y_bin)
    cw = {0: 1/(2*y_count[0]), 1: 1/(2*y_count[1])}
    model = SmallerVGGNet.build(width=20, height=20, depth=3, classes=2, finalAct='softmax')
    opt = Adam(lr=INIT_LR, decay=INIT_LR/EPOCHS)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    print('[INFO] training network for label {}'.format(i))
    model.fit(X_train,y_bin, batch_size=BS, epochs=20,
             class_weight=cw, verbose=0)
    y_pred[:,i] = model.predict_classes(X_test)


print("the subset accuracy: {}".format(accuracy_score(y_test,y_pred)))
print("hamming loss: {}".format(hamming_loss(y_test,y_pred)))

[INFO] training network for label 0
[INFO] training network for label 1
[INFO] training network for label 2
[INFO] training network for label 3
[INFO] training network for label 4
[INFO] training network for label 5


NameError: name 'pred_y' is not defined

In [11]:
accuracy_score(y_test[:,5],y_pred[:,5])

0.7362637362637363

In [None]:
#同时预测6个标签
import numpy as np
data = np.load('AMPs-ML.npz')
X = data['X']
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.33, random_state=42)

INIT_LR = 1e-3
EPOCHS = 75
BS = 32
model = SmallerVGGNet.build(width=20, height=20, depth=3, classes=6, finalAct="sigmoid")

# initiallize the optimizer
opt = Adam(lr=INIT_LR, decay=INIT_LR/EPOCHS)
w = sum(y_train)
cw = {}
for i in range(6):
    cw[i] = 1/(6*w[i])
# compile the model using binary cross-entropy rather than
# categorical cross-entropy -- this may seem counterintuitive for
# multi-label classification, but keep in mind that the goal here
# is to treat each output label as an independent Bernoulli distribution
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

# train the network
print("[INFO] training network...")
model.fit(X_train, y_train,
          epochs=20,
          batch_size=64,
         class_weight=cw,
         verbose=0)
score = model.evaluate(X_test, y_test, batch_size=64)

In [None]:
p = model.predict(X_test)
#print("the subset accuracy: {}".format(accuracy_score(y_test,p)))
#print("hamming loss: {}".format(hamming_loss(y_test,p)))

In [None]:
y_test

In [None]:
p

In [None]:
y_test

In [None]:
1/6
