In [148]:

# Import the usual suspects.

from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from skmultilearn.problem_transform import BinaryRelevance, LabelPowerset
from skmultilearn.problem_transform import ClassifierChain

from sklearn.model_selection import GridSearchCV


import warnings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
sns.set_context('paper')



def print_ln():
    print('-' * 80, '\n')


In [2]:
renamed_drug_columns_names = [                     'rifampicin_resistance',
                                                   'isoniazid_resistance',
                                                   'pyrazinamide_resistance',
                                                   'ethambutol_resistance',
                                                   'streptomycin_resistance',
                                                   'fluoroquinolones_resistance',
                                                   'moxifloxacin_resistance',
                                                   'ofloxacin_resistance',
                                                   'levofloxacin_resistance',
                                                   'ciprofloxacin_resistance',
                                                   'aminoglycosides_resistance',
                                                   'amikacin_resistance',
                                                   'kanamycin_resistance',
                                                   'capreomycin_resistance',
                                                   'ethionamide_resistance',
                                                   'para-aminosalicylic_acid_resistance',
                                                   'cycloserine_resistance',
                                                   'linezolid_resistance',
                                                   'bedaquiline_resistance',
                                                   'clofazimine_resistance',
                                                   'delamanid_resistance']


In [4]:
multi_resistance_df_filledna = pd.read_csv("../data/processed/multi_resistance_filledna_df.csv")

multi_resistance_df_filledna.head()

Unnamed: 0.1,Unnamed: 0,SampleID,NC000962_3.78,NC000962_3.80,NC000962_3.102,NC000962_3.104,NC000962_3.117,NC000962_3.120,NC000962_3.135,NC000962_3.138,...,amikacin_resistance,kanamycin_resistance,capreomycin_resistance,ethionamide_resistance,para-aminosalicylic_acid_resistance,cycloserine_resistance,linezolid_resistance,bedaquiline_resistance,clofazimine_resistance,delamanid_resistance
0,0,ERR3129939,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,ERR3148148,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,ERR3148149,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,ERR3148151,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,ERR3148153,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
multi_resistance_df_filledna = multi_resistance_df_filledna.drop(columns= ['Unnamed: 0']).set_index('SampleID')
multi_resistance_df_filledna.head()



Unnamed: 0_level_0,NC000962_3.78,NC000962_3.80,NC000962_3.102,NC000962_3.104,NC000962_3.117,NC000962_3.120,NC000962_3.135,NC000962_3.138,NC000962_3.150,NC000962_3.155,...,amikacin_resistance,kanamycin_resistance,capreomycin_resistance,ethionamide_resistance,para-aminosalicylic_acid_resistance,cycloserine_resistance,linezolid_resistance,bedaquiline_resistance,clofazimine_resistance,delamanid_resistance
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERR3129939,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR3148148,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR3148149,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR3148151,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR3148153,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [128]:
# FIXME I've relied on manual conversion
# https://stackoverflow.com/questions/15891038/change-column-type-in-pandas#:~:text=The%20best%20way%20to%20convert%20one%20or%20more,Series%20or%20a%20single%20column%20of%20a%20DataFrame.

X = multi_resistance_df_filledna.drop(columns= renamed_drug_columns_names)
y = multi_resistance_df_filledna[renamed_drug_columns_names] \
                            .drop(columns= ['delamanid_resistance', 'cycloserine_resistance','linezolid_resistance' ], axis=1) 
#                             .astype(int) \
#                             .astype(str) \
#                             .astype('category') \

                            

In [129]:
X

Unnamed: 0_level_0,NC000962_3.78,NC000962_3.80,NC000962_3.102,NC000962_3.104,NC000962_3.117,NC000962_3.120,NC000962_3.135,NC000962_3.138,NC000962_3.150,NC000962_3.155,...,NC000962_3.4409993,NC000962_3.4409994,NC000962_3.4410001,NC000962_3.4410033,NC000962_3.4410043,NC000962_3.4410061,NC000962_3.4410065,NC000962_3.4410066,NC000962_3.4410070,NC000962_3.4411245
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERR3129939,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR3148148,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR3148149,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR3148151,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR3148153,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SRR9224981,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SRR9224985,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SRR9224986,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SRR9224992,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [130]:
X['NC000962_3.78']

SampleID
ERR3129939    0
ERR3148148    0
ERR3148149    0
ERR3148151    0
ERR3148153    0
             ..
SRR9224981    0
SRR9224985    0
SRR9224986    0
SRR9224992    0
SRR9224997    0
Name: NC000962_3.78, Length: 301, dtype: int64

In [None]:
y


In [None]:
y.describe().T


# Scikit-learn models

In [45]:
# multi-label stratrification technique

from collections import Counter
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix

from skmultilearn.dataset import load_dataset
X,y, _, _ = load_dataset('scene', 'undivided')

Counter(combination for row in get_combination_wise_output_matrix(y.A, order=2) for combination in row)



scene:undivided - exists, not redownloading


Counter({(0, 0): 427,
         (4, 4): 533,
         (0, 4): 38,
         (0, 5): 19,
         (5, 5): 431,
         (1, 1): 364,
         (2, 2): 397,
         (2, 3): 24,
         (3, 3): 433,
         (3, 4): 76,
         (2, 4): 14,
         (4, 5): 1,
         (3, 5): 6,
         (0, 3): 1})

In [47]:
X.shape

(2407, 294)

In [160]:
_, original_y_train, _, _ = load_dataset('scene', 'train')
_, original_y_test, _, _ = load_dataset('scene', 'test')

scene:train - exists, not redownloading
scene:test - exists, not redownloading


In [167]:
original_y_train

scipy.sparse.lil.lil_matrix

In [50]:

pd.DataFrame({
    'train': Counter(str(combination) for row in get_combination_wise_output_matrix(original_y_train.A, order=2) for combination in row),
    'test' : Counter(str(combination) for row in get_combination_wise_output_matrix(original_y_test.A, order=2) for combination in row)
}).T.fillna(0.0)

Unnamed: 0,"(0, 0)","(4, 4)","(0, 4)","(0, 5)","(5, 5)","(1, 1)","(2, 2)","(2, 3)","(3, 3)","(3, 4)","(2, 4)","(4, 5)","(3, 5)","(0, 3)"
train,227.0,277.0,21.0,12.0,224.0,165.0,197.0,8.0,196.0,27.0,6.0,1.0,1.0,0.0
test,200.0,256.0,17.0,7.0,207.0,199.0,200.0,16.0,237.0,49.0,8.0,0.0,5.0,1.0


In [77]:
original_y_train

<1211x6 sparse matrix of type '<class 'numpy.int64'>'
	with 1286 stored elements in List of Lists format>

In [133]:
# traditional splitting technique

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=0.7,
                                                    test_size=0.3,
                                                    random_state=100)


In [88]:
from skmultilearn.model_selection import iterative_train_test_split

X_train, X_test, y_train, y_test = iterative_train_test_split(X, 
                                                              y,
                                                              test_size=0.3)


TypeError: '(array([  0,   1,   3,   4,   5,   8,   9,  10,  12,  14,  16,  19,  21,
        22,  24,  25,  26,  27,  28,  31,  32,  34,  35,  36,  37,  40,
        42,  45,  47,  49,  51,  52,  53,  56,  58,  60,  61,  62,  64,
        66,  68,  71,  72,  73,  74,  75,  77,  78,  79,  81,  83,  85,
        87,  89,  91,  92,  94,  96,  98, 100, 103, 105, 107, 109, 111,
       112, 114, 115, 116, 118, 121, 123, 124, 125, 127, 129, 132, 134,
       135, 136, 137, 139, 141, 143, 145, 146, 148, 151, 153, 155, 156,
       158, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
       172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
       186, 187, 188, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
       201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
       214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226,
       227, 228, 229, 230, 231, 234, 235, 237, 238, 240, 241, 242, 245,
       246, 247, 249, 250, 251, 253, 254, 256, 257, 262, 263, 264, 265,
       267, 268, 269, 270, 271, 272, 276, 277, 278, 279, 280, 281, 282,
       284, 286, 287, 288, 289, 290, 291, 292, 294, 295, 296, 297, 298,
       299, 300]), slice(None, None, None))' is an invalid key

In [101]:
X_train

Unnamed: 0_level_0,NC000962_3.78,NC000962_3.80,NC000962_3.102,NC000962_3.104,NC000962_3.117,NC000962_3.120,NC000962_3.135,NC000962_3.138,NC000962_3.150,NC000962_3.155,...,NC000962_3.4409993,NC000962_3.4409994,NC000962_3.4410001,NC000962_3.4410033,NC000962_3.4410043,NC000962_3.4410061,NC000962_3.4410065,NC000962_3.4410066,NC000962_3.4410070,NC000962_3.4411245
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERR760609,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR751469,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SRR7517767,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR775346,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR751511,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ERR751403,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR751375,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ERR751434,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SRR8698482,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [144]:
y_train

Unnamed: 0_level_0,rifampicin_resistance,isoniazid_resistance,pyrazinamide_resistance,ethambutol_resistance,streptomycin_resistance,fluoroquinolones_resistance,moxifloxacin_resistance,ofloxacin_resistance,levofloxacin_resistance,ciprofloxacin_resistance,aminoglycosides_resistance,amikacin_resistance,kanamycin_resistance,capreomycin_resistance,ethionamide_resistance,para-aminosalicylic_acid_resistance,bedaquiline_resistance,clofazimine_resistance
SampleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ERR760609,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
ERR751469,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SRR7517767,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
ERR775346,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
ERR751511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ERR751403,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR751375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ERR751434,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
SRR8698482,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
Counter(combination for row in get_combination_wise_output_matrix(y_train, order=2) for combination in row)


Counter({(0, 0): 20})

In [149]:
from sklearn.svm import SVC
from sklearn.svm import LinearSVC

# model_svm= BinaryRelevance(classifier= LinearSVC(), require_dense = [False, True])
model_svm= LabelPowerset(classifier= LinearSVC(), require_dense = [False, True])

# model_svm= ClassifierChain(classifier= SVC())

model_svm.fit(X_train, y_train)

y_pred= model_svm.predict(X_test)


# model_performance_metrics(model_svm, X, X_test, X_train, y, y_test, y_pred, show_feature_importances= False)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
# print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
# print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
# print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

Accuracy: 0.3516483516483517


In [150]:
print("Hamming loss:",metrics.hamming_loss(y_test, y_pred))

Hamming loss: 0.12148962148962149


In [143]:
model_svm

BinaryRelevance(classifier=LinearSVC(), require_dense=[False, True])

In [159]:
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from skmultilearn.cluster import LabelCooccurrenceGraphBuilder
from skmultilearn.ensemble import LabelSpacePartitioningClassifier

from sklearn.svm import SVC

parameters = {
    'classifier': [LabelPowerset(), BinaryRelevance()],
    'classifier__classifier': [SVC()]
}

clf = GridSearchCV(LabelSpacePartitioningClassifier(), parameters, scoring = 'f1_macro')
clf.fit(X_train, y_train)

print (clf.best_score_, clf.best_params_)

AttributeError: 'NoneType' object has no attribute 'fit_predict'

In [151]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB

parameters = [
    {
        'classifier': [BernoulliNB()],
    },
    {
        'classifier': [MultinomialNB()],
        'classifier__alpha': [0.7, 1.0],
    },
    {
        'classifier': [SVC()],
        'classifier__kernel': ['rbf', 'linear'],
    },
]

# clf = GridSearchCV(BinaryRelevance(), parameters, scoring='accuracy')
clf = GridSearchCV(LabelPowerset(), parameters, scoring='accuracy')
clf.fit(X, y)

print(clf.best_params_, "\n", clf.best_score_,"\n", metrics.accuracy_score(y_test, y_pred))




KeyboardInterrupt: 

In [153]:
from sklearn.ensemble import RandomForestClassifier



# model_rf= BinaryRelevance(classifier= RandomForestClassifier(n_estimators= 100,
#                                   random_state = 100,
#                                   max_depth=5,
#                                   min_samples_leaf=50,
#                                   min_samples_split=50))


model_rf= LabelPowerset(classifier= RandomForestClassifier(n_estimators= 100,
                                  random_state = 100,
                                  max_depth=5,
                                  min_samples_leaf=50,
                                  min_samples_split=50),
                       require_dense = [False, False])



model_rf.fit(X_train, y_train)

y_pred= model_rf.predict(X_test)

# model_performance_metrics(model_rf, X, X_test, X_train, y, y_test, y_pred)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))



Accuracy: 0.24175824175824176


In [155]:
from sklearn.ensemble import GradientBoostingClassifier

# model_gb= BinaryRelevance(classifier=GradientBoostingClassifier(
#                                      n_estimators= 100,
#                                      random_state = 100,
#                                      max_depth=5
#                                     ),
#                          require_dense= [False, True])



model_gb= LabelPowerset(classifier=GradientBoostingClassifier(
                                     n_estimators= 100,
                                     random_state = 100,
                                     max_depth=5
                                    ),
                         require_dense= [False, False])


model_gb.fit(X_train, y_train)

y_pred= model_gb.predict(X_test)


# model_performance_metrics(model_gb, X, X_test, X_train, y, y_test, y_pred)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

KeyboardInterrupt: 

In [None]:
from sklearn.naive_bayes import GaussianNB
# from sklearn.naive_bayes import BernoulliNB

# model_nb= BinaryRelevance(classifier= GaussianNB())
model_nb= ClassifierChain(classifier= GaussianNB())
# model_nb= BernoulliNB()

model_nb.fit(X_train, y_train)

y_pred= model_nb.predict(X_test)


# model_performance_metrics(model_nb, X, X_test, X_train, y, y_test, y_pred, show_feature_importances= False)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.neural_network import MLPClassifier

model_mlp = BinaryRelevance(classifier= MLPClassifier(
                          solver='lbfgs',
                          alpha=1e-5,
                          hidden_layer_sizes=(5, 2),
                          random_state=1
))

# model_mlp = ClassifierChain(classifier= MLPClassifier(
#                           solver='lbfgs',
#                           alpha=1e-5,
#                           hidden_layer_sizes=(5, 2),
#                           random_state=1
# ))




model_mlp.fit(X_train, y_train)

y_pred= model_mlp.predict(X_test)

# model_performance_metrics(model_mlp, X, X_test, X_train, y, y_test, y_pred, show_feature_importances= False)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [112]:
from xgboost import XGBClassifier

model_xgb = BinaryRelevance(classifier= XGBClassifier(
                          learning_rate= 0.01,
                          random_state= 1
))

# model_xgb = ClassifierChain(classifier= XGBClassifier(
#                           learning_rate= 0.01,
#                           random_state= 1
# ))



model_xgb.fit(X_train, y_train)

y_pred= model_xgb.predict(X_test)

# model_performance_metrics(model_xgb, X, X_test, X_train, y, y_test, y_pred)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.24666666666666667


In [110]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import StackingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier


estimators = [
    ('rf', RandomForestClassifier()),
    ('gb', GradientBoostingClassifier()),
    ('svc', LinearSVC()),
    ('mlp', MLPClassifier()),
    ('nb', GaussianNB()),
    ('xgb', XGBClassifier())
]

# model_se = BinaryRelevance(classifier= StackingClassifier(
#     estimators=estimators,
#     final_estimator=RandomForestClassifier()
# ))

model_se = ClassifierChain(classifier= StackingClassifier(
    estimators=estimators,
    final_estimator=RandomForestClassifier()
))

model_se.fit(X_train, y_train)

y_pred = model_se.predict(X_test)

# model_performance_metrics(model_se, X, X_test, X_train, y, y_test, y_pred, show_feature_importances= False)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.2966666666666667


# Scikit-multilabel classifiers


In [113]:
from skmultilearn.adapt import BRkNNaClassifier

model = BRkNNaClassifier(k=10)



model_xgb.fit(X_train, y_train)

y_pred= model_xgb.predict(X_test)

# model_performance_metrics(model_xgb, X, X_test, X_train, y, y_test, y_pred)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.24666666666666667


In [121]:
from skmultilearn.adapt import BRkNNaClassifier
from sklearn.model_selection import GridSearchCV

parameters = {'k': range(1,3)}
score = 'f1_macro'

clf = GridSearchCV(BRkNNaClassifier(), parameters, scoring=score)

clf.fit(X, y)

print(clf.best_params_, "\n", clf.best_score_,"\n", metrics.accuracy_score(y_test, y_pred))


{'k': 1} 
 0.23993721806418025 
 0.14833333333333334
