In [1]:
import pandas as pd, numpy as np 
import pickle

In [2]:
# Load train & valid splits
f = open('../data/preprocessed.pkl','rb')
train, valid = pickle.load(f)
labels = train.columns[2:]
y_valid = valid[labels]
y_valid = y_valid.values

In [3]:
## Load SVM, XGBoost, and GRU results
r1 = pd.read_csv('../artifacts/preds/glove_preds.csv')
GRU = pd.read_csv('../artifacts/preds/glove_gru_preds.csv')
SVM = r1['SVM_'+labels]
XGB = r1['XGBoost_'+labels]

In [4]:
## Ensemble and get preds
preds = (GRU.values + SVM.values + XGB.values)/3
preds[preds<=0.5] = 0
preds[preds>0.5] = 1

In [7]:
## Get results
results = pd.DataFrame(columns=['Label','Accuracy', 'Recall', 'Precision', 'F1', 'Vectorizer', 'model'])

## Print results
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
for i in range(preds.shape[1]):
    i_preds = preds[:,i]
    i_true = y_valid[:,i]
    label = labels[i]
    # Evaluate predictions
    acc, prec, recall, f1 = (accuracy_score(i_true, i_preds), 
                            precision_score(i_true, i_preds), 
                            recall_score(i_true, i_preds), 
                            f1_score(i_true, i_preds))
    
    # Save results to dataframe
    results = results.append({'Label': label,
                            'Accuracy':acc,
                            'Recall':recall,
                            'Precision':prec,
                            'F1':f1,
                            'Vectorizer':'glove',
                            'model': 'ensemble'}, 
                            ignore_index = True)
    
    # print results
    print('Results for {0} comments: Accuracy - {1:.2f}; Precision - {2:.2f}; Recall - {3:.2f}; F1 - {4:.2f}'.format(
                                    label, 
                                    acc, 
                                    prec, 
                                    recall,
                                    f1))

Results for toxic comments: Accuracy - 0.95; Precision - 0.76; Recall - 0.78; F1 - 0.77
Results for severe_toxic comments: Accuracy - 0.99; Precision - 0.43; Recall - 0.54; F1 - 0.48
Results for obscene comments: Accuracy - 0.98; Precision - 0.80; Recall - 0.79; F1 - 0.79
Results for threat comments: Accuracy - 1.00; Precision - 0.48; Recall - 0.30; F1 - 0.37
Results for insult comments: Accuracy - 0.97; Precision - 0.68; Recall - 0.75; F1 - 0.72
Results for identity_hate comments: Accuracy - 0.99; Precision - 0.52; Recall - 0.47; F1 - 0.50


In [8]:
results.to_csv('../artifacts/glove_ensemble.csv')