# 例题

## 导入数据

In [40]:
import numpy
import sklearn.metrics as metrics
from skmultilearn.dataset import load_dataset
X_train, y_train, feature_names, label_names = load_dataset('emotions','train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')

emotions:train - exists, not redownloading
emotions:test - exists, not redownloading


In [54]:
print(y_test[0:7])

[[1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]]


## Single-class Keras classifier

In [47]:
from keras.models import Sequential
from keras.layers import Dense
def create_model_single_class(input_dim, output_dim):
    # create model
    print("output_dim: {}".format( output_dim))
    print("input_dim: {}".format( input_dim))
    model = Sequential()
    model.add(Dense(12, input_dim=input_dim, activation='relu'))# Dense就是全连接层
    model.add(Dense(8, activation='relu'))
    model.add(Dense(output_dim, activation='sigmoid'))
    # compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model



## Multi-class Keras classifier

In [None]:
def create_model_multi_class(input_dim, output_dim):
    # create model
    model = Sequential()
    model.add(Dense(8, input_dim=input_dim, activation='relu'))
    model.add(Dense(output_dim, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [50]:
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=5, batch_size=100, verbose=0)
clf = BinaryRelevance(classifier=Keras(create_model_single_class, False, KERAS_PARAMS), require_dense=[True, True])
clf.fit(X_train, y_train)
result = clf.predict(X_test)

output_dim: 1
input_dim: 1200
output_dim: 1
input_dim: 1200
output_dim: 1
input_dim: 1200
output_dim: 1
input_dim: 1200
output_dim: 1
input_dim: 1200
output_dim: 1
input_dim: 1200


In [None]:
from skmultilearn.problem_transform import LabelPowerset
clf = LabelPowerset(classifier=Keras(create_model_multi_class, True, KERAS_PARAMS), require_dense=[True,True])
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

## 预测精度

In [51]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
print("Use Single-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test, result)))
print("Use Single-class keras classifier, the hamming loss: {}".format(hamming_loss(y_test, result)))
print("--------------------")
#print("Use Multi-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test,y_pred)))
#print("Use Multi-class keras classifier, hamming loss: {}".format(hamming_loss(y_test,y_pred)))

Use Single-class keras classifier, the subset accuracy: 0.47692307692307695
Use Single-class keras classifier, the hamming loss: 0.13553113553113552
--------------------


# 使用

## 导入数据

In [49]:
import numpy as np
from sklearn.model_selection import train_test_split
data = np.load('AMPs-ML.npz')
X = data['X']
y = data['y']
X = np.reshape(X,[-1,1200])
X_train, X_test, y_train, y_test = train_test_split(
                 X, y, test_size=0.33, random_state=42)


In [36]:
y_train.shape

(923, 6)

## Single-class keras classifier

In [52]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Reshape
def create_model_single_class(input_dim, output_dim):
    print("input_dim: {}".format( input_dim))
    print("output_dim: {}".format( output_dim))
    # create model
    model = Sequential()
    chanDim = -1
    # CONV => RELU => POOL
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(32, (3, 3), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # use a *softmax* activation for single-label classification
    # and *sigmoid* activation for multi-label classification
    model.add(Dense(output_dim, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [53]:
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=3, batch_size=32, verbose=0)
clf = BinaryRelevance(classifier=Keras(create_model_single_class, True, KERAS_PARAMS), require_dense=[True,True])
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

input_dim: 1200
output_dim: 2


ValueError: Error when checking target: expected dense_64 to have shape (2,) but got array with shape (1,)

## Multi-class Keras classifier

In [20]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers.core import Reshape
def create_model_multiclass(input_dim, output_dim):
    # create model
    model = Sequential()
    chanDim = -1
    # CONV => RELU => POOL
    model.add(Reshape((20,20,3), input_shape=(1200,)))
    model.add(Conv2D(32, (3, 3), padding="same",
        input_shape=[20,20,3]))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # use a *softmax* activation for single-label classification
    # and *sigmoid* activation for multi-label classification
    model.add(Dense(output_dim, activation='softmax'))

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model    

In [21]:
from skmultilearn.problem_transform import LabelPowerset
from skmultilearn.ext import Keras
KERAS_PARAMS = dict(epochs=10, batch_size=32, verbose=0)
clf = LabelPowerset(classifier=Keras(create_model_multiclass, True, KERAS_PARAMS), require_dense=[True,True])
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

## 预测精度

In [22]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
print("Use Single-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test, result)))
print("Use Single-class keras classifier, the hamming loss: {}".format(hamming_loss(y_test, result)))
print("--------------------")
print("Use Multi-class keras classifier, the subset accuracy: {}".format(accuracy_score(y_test,y_pred)))
print("Use Multi-class keras classifier, hamming loss: {}".format(hamming_loss(y_test,y_pred)))

--------------------
Use Multi-class keras classifier, the subset accuracy: 0.47912087912087914
Use Multi-class keras classifier, hamming loss: 0.13406593406593406


In [28]:
print(y_pred)

  (0, 0)	1
  (1, 0)	1
  (2, 0)	1
  (3, 0)	1
  (4, 0)	1
  (5, 0)	1
  (6, 0)	1
  (7, 0)	1
  (8, 0)	1
  (9, 0)	1
  (10, 0)	1
  (11, 0)	1
  (11, 2)	1
  (12, 0)	1
  (12, 2)	1
  (13, 0)	1
  (14, 0)	1
  (15, 0)	1
  (16, 0)	1
  (17, 0)	1
  (18, 0)	1
  (19, 0)	1
  (20, 0)	1
  (21, 0)	1
  (21, 2)	1
  (22, 0)	1
  (23, 0)	1
  (24, 0)	1
  (25, 0)	1
  (26, 0)	1
  (26, 2)	1
  (27, 0)	1
  (28, 0)	1
  (29, 0)	1
  (30, 0)	1
  (31, 0)	1
  (32, 0)	1
  (33, 0)	1
  (34, 0)	1
  (35, 0)	1
  (36, 0)	1
  (37, 0)	1
  (38, 0)	1
  (39, 0)	1
  (40, 0)	1
  (41, 0)	1
  (42, 0)	1
  (43, 0)	1
  (44, 0)	1
  (44, 2)	1
  (45, 0)	1
  (46, 0)	1
  (47, 0)	1
  (48, 0)	1
  (49, 0)	1
  (50, 0)	1
  (51, 0)	1
  (52, 0)	1
  (53, 0)	1
  (54, 0)	1
  (55, 0)	1
  (56, 0)	1
  (57, 0)	1
  (58, 0)	1
  (59, 0)	1
  (60, 0)	1
  (61, 0)	1
  (62, 0)	1
  (62, 2)	1
  (63, 0)	1
  (64, 0)	1
  (65, 0)	1
  (66, 0)	1
  (66, 2)	1
  (67, 0)	1
  (68, 0)	1
  (69, 0)	1
  (70, 0)	1
  (71, 0)	1
  (72, 0)	1
  (73, 0)	1
  (74, 0)	1
  (75, 0)	1
  (76, 0)	1
  

In [33]:
y_test[440]

array([1., 0., 1., 0., 0., 0.])