# 例题

## 导入数据

In [None]:
import numpy
import sklearn.metrics as metrics
from skmultilearn.dataset import load_dataset
X_train, y_train, feature_names, label_names = load_dataset('emotions','train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')

In [None]:
print(y_train)

In [None]:
X_train.shape

## Single-class Keras classifier

In [None]:
from keras.models import Sequential
from keras.layers import Dense
def create_model_single_class(input_dim, output_dim):
    # create model
    print("output_dim: {}".format( output_dim))
    print("input_dim: {}".format( input_dim))
    model = Sequential()
    model.add(Dense(12, input_dim=input_dim, activation='relu'))# Dense就是全连接层
    model.add(Dense(8, activation='relu'))
    model.add(Dense(output_dim, activation='sigmoid'))
    # compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model



## Multi-class Keras classifier

In [None]:
def create_model_multi_class(input_dim, output_dim):
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=input_dim, activation='relu'))
    model.add(Dense(output_dim, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [None]:
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=5, batch_size=100, verbose=0)
clf = BinaryRelevance(classifier=Keras(create_model_single_class, False, KERAS_PARAMS), require_dense=[True, True])
clf.fit(X_train, y_train)
result = clf.predict(X_test)

In [None]:
from skmultilearn.problem_transform import LabelPowerset
clf = LabelPowerset(classifier=Keras(create_model_multi_class, True, KERAS_PARAMS), require_dense=[True,True])
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

## 预测精度

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
print("Use Single-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test, result)))
print("Use Single-class keras classifier, the hamming loss: {}".format(hamming_loss(y_test, result)))
print("--------------------")
#print("Use Multi-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test,y_pred)))
#print("Use Multi-class keras classifier, hamming loss: {}".format(hamming_loss(y_test,y_pred)))

# 练习

## 导入数据

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
data = np.load('AMPs-ML.npz')
X = data['X']
y = data['y']
X = np.reshape(X,[-1,1200])
X_train, X_test, y_train, y_test = train_test_split(
                 X, y, test_size=0.33, random_state=42)


In [None]:
from scipy.sparse import lil_matrix
sy_train = lil_matrix(yy_train)
sy_test = lil_matrix(yy_test)

In [None]:
type(sy_train)

## Single-class keras classifier

In [None]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Reshape
def create_model_single_class(input_dim, output_dim):
    print("input_dim: {}".format( input_dim))
    print("output_dim: {}".format( output_dim))
    # create model
    model = Sequential()
    chanDim = -1
    # CONV => RELU => POOL
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(32, (3, 3), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # use a *softmax* activation for single-label classification
    # and *sigmoid* activation for multi-label classification
    model.add(Dense(output_dim, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [None]:
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=5, batch_size=100, verbose=0)
clf = BinaryRelevance(classifier=Keras(create_model_single_class, False, KERAS_PARAMS), require_dense=[True, True])
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

## Multi-class Keras classifier

In [None]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Reshape
def create_model_multiclass(input_dim, output_dim):
    # create model
    model = Sequential()
    chanDim = -1
    # CONV => RELU => POOL
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(32, (3, 3), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # use a *softmax* activation for single-label classification
    # and *sigmoid* activation for multi-label classification
    model.add(Dense(output_dim, activation='softmax'))

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [None]:
from skmultilearn.problem_transform import LabelPowerset
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=10, batch_size=32, verbose=0)
clf = LabelPowerset(classifier=Keras(create_model_multiclass, True, KERAS_PARAMS), require_dense=[True,True])
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

## 预测精度

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
print("Use Single-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test, result)))
print("Use Single-class keras classifier, the hamming loss: {}".format(hamming_loss(y_test, result)))
print("--------------------")
print("Use Multi-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test,y_pred)))
print("Use Multi-class keras classifier, hamming loss: {}".format(hamming_loss(y_test,y_pred)))

# AMPs Mutli-Label Prediction

## 导入AMPs多标签数据

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
data = np.load('AMPs-ML.npz')
X = data['X']
y = data['y']
X = np.reshape(X,[-1,1200])

## Multi-class Keras classifier

In [2]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Reshape
def create_smallerVGG_multiclass(input_dim, output_dim):
    # create model
    model = Sequential()
    chanDim = 1
    # CONV => RELU => POOL
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(32, (2, 2), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.75))
    # use a *softmax* activation for single-label classification
    # and *sigmoid* activation for multi-label classification
    model.add(Dense(output_dim, activation='softmax'))

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

Using TensorFlow backend.


In [None]:
def create_Alex_multiclass(input_dim, output_dim):
    # create model
    model = Sequential()
    chanDim = -1
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(96, (11, 11), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(3, 3),strides=2))
    model.add(BatchNormalization(axis=chanDim))
    
    model.add(Conv2D(256, (5, 5), padding="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(3, 3),strides=2))
    model.add(BatchNormalization(axis=chanDim))
     
    model.add(Conv2D(384, (3, 3), padding="same"))
    model.add(Conv2D(384, (3, 3), padding="same"))
    model.add(Conv2D(256, (3, 3), padding="same"))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
    model.add(BatchNormalization(axis=chanDim))
    
    model.add(Flatten())
    model.add(Dense(4096))
    model.add(Activation("tanh"))
    model.add(Dropout(0.5))
    
    model.add(Dense(4096))
    model.add(Activation("tanh"))
    model.add(Dropout(0.5))
    
    model.add(Dense(output_dim, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

## 交叉验证函数

In [7]:
from sklearn.model_selection import KFold
from skmultilearn.problem_transform import LabelPowerset
from skmultilearn.problem_transform import ClassifierChain
from skmultilearn.ext import Keras
from scipy.sparse import lil_matrix
# 返回预测的结果，预测的结果为各个类的概率
def cross_validate(X,y,n_splits=3,width=20, height=20, num_classes=2, channels=3, n_epoch=100, n_batch=64):
    M = X.shape[0]
    y_pred = np.zeros([M,6])
    kf = KFold(n_splits)
    KERAS_PARAMS = dict(epochs=n_epoch, batch_size=n_batch, verbose=0)
    
    for train_index, test_index in kf.split(X):
        #print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        

        #clf = LabelPowerset(classifier=Keras(create_Alex_multiclass, True, KERAS_PARAMS), require_dense=[True,True])
        #clf = LabelPowerset(classifier=Keras(create_smallerVGG_multiclass, True, KERAS_PARAMS), require_dense=[True,True])
        clf = ClassifierChain(classifier=Keras(create_smallerVGG_multiclass, True, KERAS_PARAMS), require_dense=[True,True])
        clf.fit(X_train,y_train)
        pred = clf.predict(X_test)
        
        y_pred[test_index] = pred.todense()
    return y_pred

## K-Fold预测及其精度

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss

# 10-Fold 测试
y_pred = cross_validate(X,y,n_splits=10,width=20,height=20,num_classes=6,channels=3,n_epoch=100, n_batch=64)

# 输出预测sub accuracy和hamming loss
#print("Use Single-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test, result)))
#print("Use Single-class keras classifier, the hamming loss: {}".format(hamming_loss(y_test, result)))
#print("--------------------")
print("Use Multi-class keras classifier, the subset accuracy: {}".format(accuracy_score(y,y_pred)))
print("Use Multi-class keras classifier, hamming loss: {}".format(hamming_loss(y,y_pred)))

ValueError: Error when checking input: expected reshape_2_input to have shape (1200,) but got array with shape (1201,)

使用两个全连接层，1024个节点，丢弃率0.5和0.75
Use Multi-class keras classifier, the subset accuracy: 0.4716981132075472
Use Multi-class keras classifier, hamming loss: 0.13388969521044994

In [None]:
suy_pred[57]

In [None]:
y