In [1]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import recall_score, precision_score, f1_score, multilabel_confusion_matrix
import numpy as np
import pandas as pd
import re

In [2]:
# Для лучшей оценки воспользуемся kfold
kf = KFold(n_splits=5)
r = re.compile(r'[\w]+')

In [3]:
train = pd.read_csv('train.csv')

X = train['comment_text']
y = train[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].values

In [4]:
def build_pipeline():
    p = Pipeline([
                  ('feature', TfidfVectorizer(ngram_range=(1,2), min_df=3, max_df=0.9, 
                                              preprocessor=lambda x: " ".join(r.findall(x.lower())), 
                                              max_features=100)),
                  ('classifier', OneVsRestClassifier(LogisticRegression()))
      ])
    return p

In [5]:
recall = {'macro': [], 'micro': []}
precision = {'macro': [], 'micro': []}
f1 = {'macro': [], 'micro': []}

for tr_index, ts_index in kf.split(X):
    X_train, X_test = X[tr_index], X[ts_index]
    y_train, y_test = y[tr_index], y[ts_index]
    pipeline = build_pipeline()
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    for i in ['macro', 'micro']:
        recall[i].append(recall_score(y_test, y_pred, average=i))
        precision[i].append(precision_score(y_test, y_pred, average=i))
        f1[i].append(f1_score(y_test, y_pred, average=i))

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [6]:
score = pd.DataFrame({'precision micro': precision['micro'], 'precision macro': precision['macro'],
                      'recall micro': recall['micro'], 'recall macro': recall['macro'],
                      'f1 micro': f1['micro'], 'f1 macro': f1['macro']})
score.mean()

precision micro    0.604107
precision macro    0.296288
recall micro       0.128799
recall macro       0.066344
f1 micro           0.212291
f1 macro           0.107423
dtype: float64

In [7]:
multilabel_confusion_matrix(y_test, y_pred) # Проверка, что классификатор не выдает один класс

array([[[28518,   359],
        [ 2457,   580]],

       [[31602,     1],
        [  311,     0]],

       [[30122,   123],
        [ 1517,   152]],

       [[31822,     0],
        [   92,     0]],

       [[30170,   162],
        [ 1387,   195]],

       [[31609,     0],
        [  305,     0]]])