## Available datasets in scikit-multilearn

In [36]:
from skmultilearn.dataset import available_data_sets
set([x[0] for x in available_data_sets().keys()])

{'Corel5k',
 'bibtex',
 'birds',
 'delicious',
 'emotions',
 'enron',
 'genbase',
 'mediamill',
 'medical',
 'rcv1subset1',
 'rcv1subset2',
 'rcv1subset3',
 'rcv1subset4',
 'rcv1subset5',
 'scene',
 'tmc2007_500',
 'yeast'}

## Load Datasets

In [7]:
from skmultilearn.dataset import load_dataset

In [8]:
X_train, y_train, feature_names, label_names = load_dataset('emotions', 'train')
X_test, y_test, _, _ = load_dataset('emotions', 'test')

emotions:train - exists, not redownloading
emotions:test - exists, not redownloading


## BinaryRelevance tranining

In [9]:
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.svm import SVC

In [None]:
clf = BinaryRelevance(
    classifier=SVC(),
    require_dense=None
)

In [29]:
clf = BinaryRelevance(
    classifier=SVC(probability=True),
    require_dense=None
)

In [30]:
clf.fit(X_train, y_train)

BinaryRelevance(classifier=SVC(probability=True), require_dense=[True, True])

## Result

In [21]:
prediction = clf.predict(X_test)

In [25]:
import sklearn.metrics as metrics

In [26]:
metrics.hamming_loss(y_test, prediction)

0.3250825082508251

In [27]:
metrics.accuracy_score(y_test, prediction)

0.01485148514851485

## Probability Result

In [31]:
predict_proba = clf.predict_proba(X_test)

In [35]:
predict_proba.todense().tolist()

[[0.1640769655329737,
  0.26980495178870073,
  0.68594147323851,
  0.2986933666009838,
  0.44784057638056024,
  0.2603520344811792],
 [0.3600055265748196,
  0.31916338763970753,
  0.3322582735183976,
  0.05929782656341777,
  0.061224597276117135,
  0.37957591091571974],
 [0.6311450671698821,
  0.27892425042463365,
  0.31859664931243875,
  0.031091375466145075,
  0.18118186616573886,
  0.41052107504334556],
 [0.14608957556590893,
  0.28050613797921337,
  0.6380648030574928,
  0.5638067332978642,
  0.22401779320549223,
  0.2009132619173422],
 [0.5699260102604268,
  0.30257282645062383,
  0.4008953884922595,
  0.060823341275232604,
  0.2814526501770287,
  0.3445993313220102],
 [0.16717480368989943,
  0.20368756402201288,
  0.6058669920969323,
  0.3649726621975805,
  0.5235979395608498,
  0.2438510585074204],
 [0.7085277186276595,
  0.25413105645588485,
  0.3068661030088902,
  0.013804154953704914,
  0.1895678358713636,
  0.4447996523846351],
 [0.1409190583022991,
  0.28618544655509126,
  