In [53]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [67]:
from sklearn.model_selection import train_test_split
from sklearn.multioutput import ClassifierChain
from sklearn.multiclass import OneVsRestClassifier

from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

In [26]:
x = pd.read_csv('data/x.csv')
y = pd.read_csv('data/y.csv')

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1)
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.1)

In [68]:
NB = GaussianNB()
SVM = SVC()
RF = RandomForestClassifier()
AdaBoost = AdaBoostClassifier()
KNN = KNeighborsClassifier()

In [58]:
def classifier_chain(model, num_chian = 10):
    chains = [ClassifierChain(model, order='random', random_state=i)
              for i in range(num_chian)]
    for chain in chains:
        chain.fit(x_train.drop('utterance', axis=1), y_train)

    y_pred = np.array([chain.predict(x_test.drop('utterance', axis=1)) for chain in
                              chains]).mean(axis=0)
    
    acc = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum(np.logical_or((y_pred>0.5), y_test), axis = 1))
    precision = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum((y_pred>0.5), axis = 1))
    recall = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum(y_test, axis = 1))
    F1 = 2*(precision*recall)/(precision+recall)
    
    print('Accuracy:', acc)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1 score:', F1)

In [59]:
classifier_chain(NB)

Accuracy: 0.3107019674092845
Precision: 0.4761564625850341
Recall: 0.5469155844155843
F1 score: 0.5090890534472667


In [57]:
%%time
classifier_chain(SVM)

Accuracy: 0.4198909830007391
Precision: 0.7879310344827586
Recall: 0.42306910569105693
F1 score: 0.5505354905359536
Wall time: 9min 14s


In [60]:
%%time
classifier_chain(RF)

Accuracy: 0.5643873403019743
Precision: 0.847682119205298
Recall: 0.5663274733396685
F1 score: 0.6790133182911708
Wall time: 17.3 s


In [61]:
%%time
classifier_chain(AdaBoost)

Accuracy: 0.596593548727695
Precision: 0.8014588329336532
Recall: 0.6077908879738148
F1 score: 0.6913173279601974
Wall time: 2min 41s


In [70]:
KNN.fit(x_train.drop('utterance', axis=1), y_train)
y_pred = KNN.predict(x_test.drop('utterance', axis=1))

acc = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum(np.logical_or((y_pred>0.5), y_test), axis = 1))
precision = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum((y_pred>0.5), axis = 1))
recall = np.mean(np.sum(np.logical_and((y_pred>0.5), y_test), axis = 1)/np.sum(y_test, axis = 1))
F1 = 2*(precision*recall)/(precision+recall)

print('Accuracy:', acc)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', F1)

Accuracy: 0.3906455671699574
Precision: 0.6158508158508158
Recall: 0.40373244641537326
F1 score: 0.48772663442456476
