In [1]:
# importing libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report, precision_recall_fscore_support, recall_score

from sklearn.externals import joblib

import matplotlib.pyplot as plt
%config InlineBackend.figure_format='retina'
pd.options.display.max_colwidth = 500



In [2]:
# defining functions

class TextSelector(BaseEstimator, TransformerMixin):
    """
    Transformer to select a single column from the data frame to perform additional transformations on
    Use on text columns in the data
    """
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[self.key]
    
class NumberSelector(BaseEstimator, TransformerMixin):
    """
    Transformer to select a single column from the data frame to perform additional transformations on
    Use on numeric columns in the data
    """
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[[self.key]]

In [3]:
# reading the data

df = pd.read_csv('sampdf_feat.csv')
print(df.shape)
display(df.head())

(54870, 19)


Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,clean,cleaned_comment_text,word_count,char_count,word_density,total_length,capitals,caps_vs_length,num_exclamation_marks,num_unique_words,words_vs_unique
0,3bf7c95e20e164f1,"you do interesting work! \n\nReally, whatever the outcome on Aetherometry, all the best with your wheelchair work. That looks like fascinating engineering.",0,0,0,0,0,0,1,interest work ! Really whatever outcome Aetherometry best wheelchair work That look like fascinate engineer,15,93,0.159574,107,3,0.028037,1,14,0.933333
1,8dfbb26d7edb4e39,"Let me see if I understand you. Because the site is run by religious, nationalistic Iranians, it must have stolen all its content? This must be some bizarro-world logic I'm not familiar with.",0,0,0,0,0,0,1,Let see I understand you Because site run religious nationalistic Iranians must steal content This must bizarro world logic I familiar with,22,118,0.184874,139,6,0.043165,0,20,0.909091
2,ec8eb2974a3b7686,"Yes, I will try to cut out the details to make it more compact.",0,0,0,0,0,0,1,Yes I try cut detail make compact,7,27,0.25,33,2,0.060606,0,7,1.0
3,587075cbd150aee1,"timestamp to unsigned templates, so you do it for me. Thanks! ) t c 03:08, 25",0,0,0,0,0,0,1,timestamp unsigned templates me Thanks ! c,7,36,0.189189,42,1,0.02381,1,7,1.0
4,d8d76d16e8369ff8,"I replaced the quote as to my count three contributers seem to have been happy with the quote, i.e. a consensus within the terms of the page. It may be an idea with anything as contentious as articles that address issues surrounding the troubles to fully reference everything according to wiki standards.",0,0,0,0,0,0,1,I replace quote count three contributers seem happy quote i e consensus within term page It may idea anything contentious article address issue surround trouble fully reference everything accord wiki standards,31,179,0.172222,209,2,0.009569,0,30,0.967742


In [4]:
# labels

labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

In [5]:
# Create X and y dataframe

X = df.loc[:, ['id', 'comment_text', 'cleaned_comment_text', 'word_count', 'char_count', 'word_density', 'total_length',
               'capitals', 'caps_vs_length', 'num_exclamation_marks', 'num_unique_words', 'words_vs_unique'
               ]]
y = df[labels]

In [6]:
# train and test data for Level 1
 
X_train_L1, X_test_L1, y_train_L1, y_test_L1 = train_test_split(X, y, test_size=0.20, random_state=2019)

print("X_train:", X_train_L1.shape)
print("y_train:", y_train_L1.shape)
print("X_test:", X_test_L1.shape)
print("y_test:", y_test_L1.shape)

X_train: (43896, 12)
y_train: (43896, 6)
X_test: (10974, 12)
y_test: (10974, 6)


In [7]:
# value counts for every label

for label in labels:
    print(y_test_L1[label].value_counts())

0    6729
1    4245
Name: toxic, dtype: int64
0    10621
1      353
Name: severe_toxic, dtype: int64
0    8542
1    2432
Name: obscene, dtype: int64
0    10836
1      138
Name: threat, dtype: int64
0    8753
1    2221
Name: insult, dtype: int64
0    10534
1      440
Name: identity_hate, dtype: int64


In [8]:
# combining features and labels for test (to reset index)

X_test_L1 = X_test_L1.reset_index().drop(['index'], axis=1)
y_test_L1 = y_test_L1.reset_index().drop(['index'], axis=1)
test_L1 = pd.merge(X_test_L1, y_test_L1, left_index=True, right_index=True)

### LEVEL 1 CLASSIFIER

In [9]:
# loading the model

filename = 'TFIDF_SVM_Level_1.joblib'
model_L1 = joblib.load(filename)

In [10]:
# predict

y_pred_L1 = model_L1.predict(X_test_L1)
y_pred_L1 = pd.DataFrame(y_pred_L1, columns=['pred_toxic'])

In [11]:
# combine with features and true labels

df_with_predictions_L1 = pd.merge(test_L1, y_pred_L1, left_index=True, right_index=True)

### LEVEL 2 CLASSIFIER

In [12]:
# train and test data for Level 1

test_L2 = df_with_predictions_L1[df_with_predictions_L1['pred_toxic']==1]
test_L2 = test_L2.reset_index().drop(['index'], axis=1)

X_test_L2 = test_L2.loc[:, ['cleaned_comment_text', 'word_count', 'char_count', 'word_density', 'total_length',
               'capitals', 'caps_vs_length', 'num_exclamation_marks', 'num_unique_words', 'words_vs_unique'
               ]]
y_test_L2 = test_L2[labels[-5:]]

In [13]:
# for every label, load the model and predict

all_y_pred_L2 = []

for label in labels[-5:]:
    print('... Processing {}'.format(label))
    # load the model
    filename = 'Binary_LR_Level_2_' + str(label) + '.joblib'
    model_L2 = joblib.load(filename)
    # predict
    y_pred_L2 = model_L2.predict(X_test_L2)
    #print(y_pred)
    print(pd.DataFrame(y_pred_L2, columns=['pred_'+str(label)])['pred_'+str(label)].value_counts())
    all_y_pred_L2.append(y_pred_L2)

... Processing severe_toxic
0    3948
1     173
Name: pred_severe_toxic, dtype: int64
... Processing obscene
1    2235
0    1886
Name: pred_obscene, dtype: int64
... Processing threat
0    4042
1      79
Name: pred_threat, dtype: int64
... Processing insult
1    2078
0    2043
Name: pred_insult, dtype: int64
... Processing identity_hate
0    3836
1     285
Name: pred_identity_hate, dtype: int64


In [14]:
# preparing a DataFrame

pred_label_col_names = []
for label in labels[-5:]:
    pred_label_col_names.append('pred_'+str(label)) 
    
all_y_pred_L2_df = pd.DataFrame(columns=pred_label_col_names)
for i in range(5):
    all_y_pred_L2_df[pred_label_col_names[i]] = pd.Series(all_y_pred_L2[i])
    display(all_y_pred_L2_df)

Unnamed: 0,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,0,,,,
1,0,,,,
2,0,,,,
3,0,,,,
4,0,,,,
5,0,,,,
6,0,,,,
7,0,,,,
8,0,,,,
9,0,,,,


Unnamed: 0,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,0,1,,,
1,0,0,,,
2,0,0,,,
3,0,0,,,
4,0,0,,,
5,0,1,,,
6,0,0,,,
7,0,0,,,
8,0,1,,,
9,0,1,,,


Unnamed: 0,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,0,1,0,,
1,0,0,0,,
2,0,0,0,,
3,0,0,0,,
4,0,0,0,,
5,0,1,0,,
6,0,0,0,,
7,0,0,0,,
8,0,1,0,,
9,0,1,0,,


Unnamed: 0,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,0,1,0,1,
1,0,0,0,0,
2,0,0,0,0,
3,0,0,0,0,
4,0,0,0,0,
5,0,1,0,0,
6,0,0,0,0,
7,0,0,0,0,
8,0,1,0,1,
9,0,1,0,1,


Unnamed: 0,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,0,1,0,1,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0
5,0,1,0,0,0
6,0,0,0,0,0
7,0,0,0,0,0
8,0,1,0,1,0
9,0,1,0,1,0


In [15]:
# combining predictions with features and true labels

df_with_predictions_L2 = pd.merge(test_L2, all_y_pred_L2_df, left_index=True, right_index=True)
df_with_predictions_L2

Unnamed: 0,id,comment_text,cleaned_comment_text,word_count,char_count,word_density,total_length,capitals,caps_vs_length,num_exclamation_marks,...,obscene,threat,insult,identity_hate,pred_toxic,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,c18844370ce4a022,you sound the excat same as the tide fat rolls guy you 2 must be the same guy jeez what the fuck is this 1984 down with the wikapeida commies,sound excat tide fat roll guy must guy jeez fuck wikapeida commies,12,55,0.214286,66,0,0.000000,0,...,0,0,0,0,1,0,1,0,1,0
1,f38bf2e294608598,"""\n\nThis discussion page may contain comments better considered as trolling, posted in order to prompt irate replies that can then be even further ridiculed, in turn. Before you post any reply, consider how you might minimize the effects of trollish comments. Simply ignoring certain comments may be the best option.\n\nAnd NOW-I have another NEW username and I go an my merry way despite the rude, condescending, and out-right disturbing attacks on me by a 'person' who claims to be an adult!! ...",This discussion page may contain comment better consider troll post order prompt irate reply even ridicule turn Before post reply consider might minimize effect trollish comment Simply ignore certain comment may best option And NOW I another NEW username I go merry way despite rude condescend out right disturb attack person claim adult ! ! Yep right user McNeight worst nightmare I ever singular displeasure subjectified by But in review I right Mitchell Award I right E E issue I factully corr...,186,913,0.203501,1098,68,0.061931,13,...,0,0,0,0,1,0,0,0,0,0
2,9c2215575841e751,== Hi == \n\n I hate you so much.,Hi I hate much,4,11,0.333333,14,2,0.142857,0,...,0,0,0,0,1,0,0,0,0,0
3,379898bfe3511bde,""" \n\n == Boybang == \n\n I removed the text """"One option is that man can give a woman oral sex, give to another woman the vaginal sex and accept oral sex in the coition of a third woman, all at the same time. With this, may the boy have space he may engage in finger f*****g 2 or more females whilst foot f*****g another female."""" as it was unsourced and the f******g is not encyclopedic. """,Boybang I remove text One option man give woman oral sex give another woman vaginal sex accept oral sex coition third woman time With this may boy space may engage finger f g females whilst foot f g another female unsourced f g encyclopedic,44,197,0.222222,240,4,0.016667,0,...,0,0,0,0,1,0,0,0,0,0
4,09d74105462445fe,every one who reads this wiil die=] )lol im sexy and hot with huge boobies call me at \n 236 6679,every one read wiil die lol im sexy hot huge boobies call,12,46,0.255319,57,0,0.000000,0,...,0,0,0,0,1,0,0,0,0,0
5,0c7a8e0dfa97c844,"""\n Well go verify it, """"Dick and Jane get sexually mutilated"""" page 18. """,Well go verify it Dick Jane get sexually mutilate page,10,45,0.217391,54,3,0.055556,0,...,0,0,0,0,1,0,1,0,0,0
6,e10aa1c772931dd0,At the point you've smoked yourself into a tard .....,At point smoke tard,4,16,0.235294,19,1,0.052632,0,...,0,0,0,0,1,0,0,0,0,0
7,3253dbe44c634ecc,-by mr. monkey man \n HELLO. I AM YOUR FATHER. HEHEHEHEHEHEHEHE YES YES. GO STICK YOUR HEAD IN A TOILET.,by mr monkey man HELLO I AM YOUR FATHER HEHEHEHEHEHEHEHE YES YES GO STICK YOUR HEAD IN A TOILET,19,77,0.243590,95,64,0.673684,0,...,0,0,0,0,1,0,0,0,0,0
8,f1e0e1f2b18e416c,"""\n\nHello, """"Erik,"""" or whatever your name is. Now, if you are done whining like a little bitch, please either kindly contribute to the encylopedia at hand, or GTFO. What do you think this is, Twitter? Oh, and the next time you are hanging around that cult-like website you control, can you do me a favor and tell that """"Kohai"""" to go fuck herself? Same goes for KCO and CorriJean and the rest of that gang of patzers you have sucking your dick. \nWelcome to Wikipedia, Brah! """,Hello Erik whatever name be Now do whine like little bitch please either kindly contribute encylopedia hand GTFO What think be Twitter Oh next time hang around cult like website control favor tell Kohai go fuck herself Same go KCO CorriJean rest gang patzers suck dick Welcome Wikipedia Brah !,50,244,0.204082,293,20,0.068259,1,...,0,0,0,0,1,0,1,0,1,0
9,d5627e3cd6e37c73,Abey Gandu.. Wikipedian.. \n\nSalley how dare you delete all the pic's of my collection to Kolkata page.\n\nHarami.. 1st of all you don't put a pic. Then u try delete the pic's... Fuck u off.. gandu salla.,Abey Gandu Wikipedian Salley dare delete pic collection Kolkata page Harami st put pic Then u try delete pic be Fuck u off gandu salla,25,110,0.225225,134,8,0.059701,0,...,1,0,1,0,1,0,1,0,1,0


In [16]:
# combining predictions and true labels from both levels

pred_labels = ['pred_toxic', 'pred_severe_toxic', 'pred_obscene', 'pred_threat', 'pred_insult', 'pred_identity_hate']
all_cols_L1 = ['id', 'comment_text', 'cleaned_comment_text', 'toxic', 'pred_toxic']
all_cols_L2 = ['id', 'comment_text', 'cleaned_comment_text'] + labels[-5:] + pred_labels[-5:]
all_true_with_predictions = pd.merge(df_with_predictions_L1[all_cols_L1], df_with_predictions_L2[all_cols_L2], 
                                     on=['id', 'comment_text', 'cleaned_comment_text'], how='left')
all_true_with_predictions

Unnamed: 0,id,comment_text,cleaned_comment_text,toxic,pred_toxic,severe_toxic,obscene,threat,insult,identity_hate,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,c18844370ce4a022,you sound the excat same as the tide fat rolls guy you 2 must be the same guy jeez what the fuck is this 1984 down with the wikapeida commies,sound excat tide fat roll guy must guy jeez fuck wikapeida commies,0,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,25b14fc6b4548258,":Hi . I'm flattered to be asked to take a look, though the article is about something I have absolutely no knowledge! On first reading, it seems a well written article, with appropriate references to support the claims. The only comment I have is about expressing dates in terms of 'BP (Before Present)' rather than in terms of BC or even BCE - I confess I have never seen 'BP' before! As for the possible tautology, I'm not convinced it is - It appears, to me, that it reads well enough as it is...",Hi I flatter ask take look though article something I absolutely knowledge ! On first read seem well write article appropriate reference support claim The comment I express date term BP Before Present rather term BC even BCE I confess I never see BP before ! As possible tautology I convince It appear me read well enough be I would certainly encourage crack Hope use ! Cheers,0,0,,,,,,,,,,
2,e04e436acbee81bc,"::Sure thingthanks for making me look twice. As for MagicView, you said what I meant much better and in a much more appropriate tone; I admire your patience.",Sure thingthanks make look twice As MagicView say I mean much better much appropriate tone I admire patience,0,0,,,,,,,,,,
3,5abeb566790f299e,re:Gates \n\nWhat diff is going to make? are you talking technical or just any other reason.,re Gates What diff go make talk technical reason,0,0,,,,,,,,,,
4,9ce29fbc9129e190,"""\nOppose per Vegaswikian. I've never heard of the book, but have heard of plenty of people with this given name. There is also an Ian McKellen film called """"Emile"""" so really there is no legitimate reason to make this move. ÃÂ Ã¢ÂÂÃÂ """,Oppose per Vegaswikian I never hear book hear plenty people give name There also Ian McKellen film call Emile really legitimate reason make move,0,0,,,,,,,,,,
5,4a50c736345f0d9e,Preceded by - Twilight Followed by - Eclipse \n\nWhere are they at? The other movies dont have it either. Fix it.,Preceded Twilight Followed Eclipse Where at The movies dont either Fix it,0,0,,,,,,,,,,
6,f38bf2e294608598,"""\n\nThis discussion page may contain comments better considered as trolling, posted in order to prompt irate replies that can then be even further ridiculed, in turn. Before you post any reply, consider how you might minimize the effects of trollish comments. Simply ignoring certain comments may be the best option.\n\nAnd NOW-I have another NEW username and I go an my merry way despite the rude, condescending, and out-right disturbing attacks on me by a 'person' who claims to be an adult!! ...",This discussion page may contain comment better consider troll post order prompt irate reply even ridicule turn Before post reply consider might minimize effect trollish comment Simply ignore certain comment may best option And NOW I another NEW username I go merry way despite rude condescend out right disturb attack person claim adult ! ! Yep right user McNeight worst nightmare I ever singular displeasure subjectified by But in review I right Mitchell Award I right E E issue I factully corr...,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,f8c40dc909946cdb,"""\n\n Blocked \n\nI've blocked you for 24 hours for making edits against our biography of living persons policy. You were adding Category:People with dyslexia into many biographies without any sources to back it up (some did have sources, but the majority didn't). You need to add reliable sources if you want to add these categories in the future. Negative contributions against living people can't be accepted without appropriate attribution. See the mess I've created or let's have banter """,Blocked I block hours make edit biography live persons policy You add Category People dyslexia many biographies without source back some source majority not You need add reliable source want add categories future Negative contributions live people ca accept without appropriate attribution See mess I create let banter,0,0,,,,,,,,,,
8,19b2f197f67c046a,"""\n\nYour block is outrageous, look on the talkpage and see that I have entered the discussion as you suggest. Also I suggest you look at what Chip Berlet reverted especially under the """"Italian Fascism"""" section, he is violating WP:OWN and intentionally holding the article back. How can you justify the removal of 50 citations? I will be taking this higher in regards to Chip Berlet's intentional holding back of the article and if you have not issued an apology, you will be mentioned in the ...",Your block outrageous look talkpage see I enter discussion suggest Also I suggest look Chip Berlet revert especially Italian Fascism section violate WP OWN intentionally hold article back How justify removal citations I take higher regard Chip Berlet intentional hold back article issue apology mention report,0,0,,,,,,,,,,
9,b579be7b49c9c73c,"""\n\nI don't think you can fault the logic. It might be slightly flowery language, but I consider\n\n""""Harpenden is a town in the City and District of St Albans of Hertfordshire in the East of England. It lies on the A1081, between Luton and St Albans. It smelled of cockroaches, with rats all over and that there is no sewage system and the people do not have anything no arms, no legs, no eyes""""\n\nto be an accurate description of Harpenden. No?""",I think fault logic It might slightly flowery language I consider Harpenden town City District St Albans Hertfordshire East England It lie A Luton St Albans It smell cockroaches rat sewage system people anything arm legs eye to accurate description Harpenden No,0,0,,,,,,,,,,


In [17]:
# replacing NaN with 0 and converting to int

all_true_with_predictions.fillna(0, inplace=True)
all_true_with_predictions[labels] = all_true_with_predictions[labels].astype(int)
all_true_with_predictions[pred_labels] = all_true_with_predictions[pred_labels].astype(int)
all_true_with_predictions

Unnamed: 0,id,comment_text,cleaned_comment_text,toxic,pred_toxic,severe_toxic,obscene,threat,insult,identity_hate,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
0,c18844370ce4a022,you sound the excat same as the tide fat rolls guy you 2 must be the same guy jeez what the fuck is this 1984 down with the wikapeida commies,sound excat tide fat roll guy must guy jeez fuck wikapeida commies,0,1,0,0,0,0,0,0,1,0,1,0
1,25b14fc6b4548258,":Hi . I'm flattered to be asked to take a look, though the article is about something I have absolutely no knowledge! On first reading, it seems a well written article, with appropriate references to support the claims. The only comment I have is about expressing dates in terms of 'BP (Before Present)' rather than in terms of BC or even BCE - I confess I have never seen 'BP' before! As for the possible tautology, I'm not convinced it is - It appears, to me, that it reads well enough as it is...",Hi I flatter ask take look though article something I absolutely knowledge ! On first read seem well write article appropriate reference support claim The comment I express date term BP Before Present rather term BC even BCE I confess I never see BP before ! As possible tautology I convince It appear me read well enough be I would certainly encourage crack Hope use ! Cheers,0,0,0,0,0,0,0,0,0,0,0,0
2,e04e436acbee81bc,"::Sure thingthanks for making me look twice. As for MagicView, you said what I meant much better and in a much more appropriate tone; I admire your patience.",Sure thingthanks make look twice As MagicView say I mean much better much appropriate tone I admire patience,0,0,0,0,0,0,0,0,0,0,0,0
3,5abeb566790f299e,re:Gates \n\nWhat diff is going to make? are you talking technical or just any other reason.,re Gates What diff go make talk technical reason,0,0,0,0,0,0,0,0,0,0,0,0
4,9ce29fbc9129e190,"""\nOppose per Vegaswikian. I've never heard of the book, but have heard of plenty of people with this given name. There is also an Ian McKellen film called """"Emile"""" so really there is no legitimate reason to make this move. ÃÂ Ã¢ÂÂÃÂ """,Oppose per Vegaswikian I never hear book hear plenty people give name There also Ian McKellen film call Emile really legitimate reason make move,0,0,0,0,0,0,0,0,0,0,0,0
5,4a50c736345f0d9e,Preceded by - Twilight Followed by - Eclipse \n\nWhere are they at? The other movies dont have it either. Fix it.,Preceded Twilight Followed Eclipse Where at The movies dont either Fix it,0,0,0,0,0,0,0,0,0,0,0,0
6,f38bf2e294608598,"""\n\nThis discussion page may contain comments better considered as trolling, posted in order to prompt irate replies that can then be even further ridiculed, in turn. Before you post any reply, consider how you might minimize the effects of trollish comments. Simply ignoring certain comments may be the best option.\n\nAnd NOW-I have another NEW username and I go an my merry way despite the rude, condescending, and out-right disturbing attacks on me by a 'person' who claims to be an adult!! ...",This discussion page may contain comment better consider troll post order prompt irate reply even ridicule turn Before post reply consider might minimize effect trollish comment Simply ignore certain comment may best option And NOW I another NEW username I go merry way despite rude condescend out right disturb attack person claim adult ! ! Yep right user McNeight worst nightmare I ever singular displeasure subjectified by But in review I right Mitchell Award I right E E issue I factully corr...,0,1,0,0,0,0,0,0,0,0,0,0
7,f8c40dc909946cdb,"""\n\n Blocked \n\nI've blocked you for 24 hours for making edits against our biography of living persons policy. You were adding Category:People with dyslexia into many biographies without any sources to back it up (some did have sources, but the majority didn't). You need to add reliable sources if you want to add these categories in the future. Negative contributions against living people can't be accepted without appropriate attribution. See the mess I've created or let's have banter """,Blocked I block hours make edit biography live persons policy You add Category People dyslexia many biographies without source back some source majority not You need add reliable source want add categories future Negative contributions live people ca accept without appropriate attribution See mess I create let banter,0,0,0,0,0,0,0,0,0,0,0,0
8,19b2f197f67c046a,"""\n\nYour block is outrageous, look on the talkpage and see that I have entered the discussion as you suggest. Also I suggest you look at what Chip Berlet reverted especially under the """"Italian Fascism"""" section, he is violating WP:OWN and intentionally holding the article back. How can you justify the removal of 50 citations? I will be taking this higher in regards to Chip Berlet's intentional holding back of the article and if you have not issued an apology, you will be mentioned in the ...",Your block outrageous look talkpage see I enter discussion suggest Also I suggest look Chip Berlet revert especially Italian Fascism section violate WP OWN intentionally hold article back How justify removal citations I take higher regard Chip Berlet intentional hold back article issue apology mention report,0,0,0,0,0,0,0,0,0,0,0,0
9,b579be7b49c9c73c,"""\n\nI don't think you can fault the logic. It might be slightly flowery language, but I consider\n\n""""Harpenden is a town in the City and District of St Albans of Hertfordshire in the East of England. It lies on the A1081, between Luton and St Albans. It smelled of cockroaches, with rats all over and that there is no sewage system and the people do not have anything no arms, no legs, no eyes""""\n\nto be an accurate description of Harpenden. No?""",I think fault logic It might slightly flowery language I consider Harpenden town City District St Albans Hertfordshire East England It lie A Luton St Albans It smell cockroaches rat sewage system people anything arm legs eye to accurate description Harpenden No,0,0,0,0,0,0,0,0,0,0,0,0


### EVALUTAION

In [18]:
y_true = np.array(all_true_with_predictions[labels])
y_pred = np.array(all_true_with_predictions[pred_labels])

no_of_labels = 6

In [19]:
# confusion matrix
mcm = multilabel_confusion_matrix(y_true, y_pred)
print(mcm)

[[[ 6173   556]
  [  680  3565]]

 [[10585    43]
  [  216   130]]

 [[ 8462   243]
  [  277  1992]]

 [[10836    19]
  [   59    60]]

 [[ 8547   375]
  [  349  1703]]

 [[10522    45]
  [  167   240]]]


In [20]:
# TP, FP, FN, TN

TP = list()
FP = list()
FN = list()
TN = list()

for i in range(no_of_labels):
    TN_i = mcm[i][0][0]
    FP_i = mcm[i][0][1]
    FN_i = mcm[i][1][0]
    TP_i = mcm[i][1][1]
    
    TP.append(TP_i)
    FP.append(FP_i)
    FN.append(FN_i)
    TN.append(TN_i)
    
print(TP)
print(FP)
print(FN)
print(TN)

# e.g. TP for label 0 ==> TP[0]

[3565, 130, 1992, 60, 1703, 240]
[556, 43, 243, 19, 375, 45]
[680, 216, 277, 59, 349, 167]
[6173, 10585, 8462, 10836, 8547, 10522]


In [21]:
# accuracy

accuracy = list()
for i in range(no_of_labels):
    accuracy_i = (TP[i] + TN[i]) / (TP[i] + FP[i] + FN[i] + TN[i])
    accuracy.append(accuracy_i)

print(accuracy)

[0.8873701476216512, 0.976398760707126, 0.9526152724621834, 0.9928922908693275, 0.9340258793511937, 0.9806816110807363]


In [22]:
# precision, recall, f1 score

precision = list()
recall = list()
f1_score = list()

for i in range(no_of_labels):
    precision_i = (TP[i]) / ((TP[i] + FP[i]) or not (TP[i] + FP[i]))
    precision.append(precision_i)
    
    recall_i = (TP[i]) / ((TP[i] + FN[i]) or not (TP[i] + FN[i]))
    recall.append(recall_i)
    
    f1_score_i = (2 * precision_i * recall_i) / ((precision_i + recall_i) or not (precision_i + recall_i))
    f1_score.append(f1_score_i)
    
print(precision)
print(recall)
print(f1_score)

[0.8650812909487988, 0.7514450867052023, 0.8912751677852349, 0.759493670886076, 0.8195380173243504, 0.8421052631578947]
[0.839811542991755, 0.37572254335260113, 0.8779197884530631, 0.5042016806722689, 0.8299220272904484, 0.5896805896805897]
[0.8522591441549128, 0.5009633911368016, 0.8845470692717584, 0.6060606060606061, 0.8246973365617434, 0.6936416184971098]


In [23]:
# average accuracy

avg_accuracy = sum(accuracy) / no_of_labels
print('Average Accruacy: {}%'.format(round(100*avg_accuracy, 2)))

Average Accruacy: 95.4%


In [24]:
# micro averaging

micro_precision = sum(TP) / (sum(TP) + sum(FP))
micro_recall = sum(TP) / (sum(TP) + sum(FN))
micro_f1_score = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall)

print('Micro Precision: {}%'.format(round(100*micro_precision, 2)))
print('Micro Recall: {}%'.format(round(100*micro_recall, 2)))
print('Micro F1-score: {}%'.format(round(100*micro_f1_score, 2)))

Micro Precision: 85.72%
Micro Recall: 81.48%
Micro F1-score: 83.55%


In [25]:
# export to csv

all_true_with_predictions = all_true_with_predictions[['id', 'comment_text', 'toxic', 'severe_toxic', 'obscene', 
                                                       'threat', 'insult', 'identity_hate', 'pred_toxic', 'pred_severe_toxic', 
                                                       'pred_obscene', 'pred_threat', 'pred_insult', 'pred_identity_hate']]
all_true_with_predictions.to_csv('test_predictions.csv', index=False)

In [26]:
# some examples

all_true_with_predictions[(all_true_with_predictions['identity_hate']==1) & (all_true_with_predictions['pred_identity_hate']==1)
                         & (all_true_with_predictions['threat']==1) & (all_true_with_predictions['pred_threat']==1)]

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,pred_toxic,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
1588,c47353648be65580,BUSH NUKE IRAN!!! IT IS ARABIAN GULF STUPID PERSIANS!!!! \n\ni wanna see britney spears ruling iran and i hope it will be soon inshallah.. we support united states and israel against the terrorist iranians who support hizbolshaitan and many terrorist groups.. and also we wanna see the persians beg us for food. then they will stop climing that the ARABIAN GULF is persian.,1,0,1,1,1,1,1,0,1,1,1,1
1889,cf012f8122791d7e,DEATH TO ARABS! FREE THE JEWISH HOMELAND OF JUDAH AND SHOMRON FRÃÂM ILLEGAL ÃÂRAB OCCUPATION! END ARAB COLONIALISM AND ARAB IMPERIALISM! \n\nDEATH TO ARABS! FREE THE JEWISH HOMELAND OF JUDAH AND SHOMRON FRÃÂM ILLEGAL ÃÂRAB OCCUPATION! END ARAB COLONIALISM AND ARAB IMPERIALISM!,1,1,0,1,0,1,1,1,0,1,0,1
4178,256d494d8ec09cdc,nigga dont fuck wit this nigga hell naw dis nigga mary ohh hell naw dont try dis nigga she kill you nigga,1,0,1,1,1,1,1,0,1,1,1,1
5034,9e069160cb8d6e68,== KILL ALL NGIGERS == \n\n PROPOSAL KILL ALL NIGGERS,1,0,1,1,1,1,1,0,1,1,1,1
5366,9ecf3329158e0276,"JEW \n\nGet the fuck out of here you jewish son of a bitch, I'll rape your fucking family if you don't leave you semite bastard. I will shoot you if you return, because you're a dirty semite, I hope you choke on a fucking bagel, prick. Shalom. We Came In?",1,1,1,1,1,1,1,1,1,1,1,1
6532,057894cf4738a5d8,You are a gay homo. I hope you choke on your penis and die. I am sending a big bleck guy to jizz in your face.,1,0,1,1,1,1,1,0,1,1,1,1
6716,03c6344729c4e665,"Azerbaijanis Proud of Ramil Safarov \n If I had the same opportunity under same circumstances, i would kill both bastard and mother fucker armenians bitches soldiers for insulting Azerbaijani flag. Bastard gurgen lay in hell mother fucker. I fuck your soul and your whore mother. Assholes. Fuck armenian fascists and natinoalists.",1,0,1,1,1,1,1,0,1,1,1,1
7150,faf54ad8704cfbaa,"EAT SHIT AND DIE YOU FAGGOT NAZI LOVING JEW HATING MOTHERFUCKER!!! I'LL MURDER YOU IN YOUR OWN HOME YOU FAGGOT!! I WILL FIND YOU, SLIT YOUR THROAT, DRINK YOUR BLOOD, PISS DOWN YOUR MOUTH, CUT YOUR LIVER OUT, COOK IT AND EAT IT FOR BREAKFAST, LUNCH AND DINER. THEN I WILL FUCK YOUR MOM, DIG YOUR GRANDMOM UP FROM HER GRAVE AND FUCK HER IN HER ASS LIKE SHE COULDN'T GET IT FROM YOUR GRANDDAD. I'LL DIG HIM UP TO, SIT HIM UP AND HE CAN WATCH ME ASS FUCK YOUR DEAD GRANNY!",1,1,1,1,1,1,1,1,1,1,1,1
7955,eef4a004cdb68696,I AM GONNA CUT OFF ZHANZHAO'S FUCKING INDIAN-CHINESE BASTARD BALLS (CHECK OUT THE CONTENTS FOR PERSONAL INFORMATION RELEASED!) \n\nZhanzhao is GAY!!! WAHAHAH,1,0,1,1,1,1,1,0,1,1,1,1
8613,799638b93f93a0c4,::Damn you cocksuckers! FUCK YOU AND YOUR MOM. I HOPE ALL OF YOU DIE AND GO TO HELL. GO LICK PUSSY YOU DIRTY PIECE OF SHIT FUCK YOU NIGGA FUCK YOU ASSSHIT I HOPE YOU DIE YOU DIRTY PIECE OF SHIT . YOU'RE SO ULGY FUCK YOU GET A FUCKING LIFE ASS FUCK FUCK FUCK FUCK FUCK YOU COCKSUCKER YOU FUCKING FAG PUSSY BITCH NIGGA FUCK YOU FUCKING CRACKER,1,1,1,1,1,1,1,1,1,1,1,1


In [27]:
# some examples

all_true_with_predictions[(all_true_with_predictions['threat']==1) & (all_true_with_predictions['pred_threat']==0) 
                          & (all_true_with_predictions['toxic']==1) & (all_true_with_predictions['pred_toxic']==1)]

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,pred_toxic,pred_severe_toxic,pred_obscene,pred_threat,pred_insult,pred_identity_hate
173,0ee454e997194378,"How dare you send me such abuse you fucking cockmaster!\nIf you dare warn me again, Mr Jeff. I will savagely attack your mother!\n\nI know where you live",1,1,1,1,1,0,1,0,1,0,1,0
202,9ade615b4724e01b,"Dick \n\nHey, dick.\n\nWhy don't you go read the Aqua Teen Hunger Force wiki page for Marines episode and see where it actually says jigsaw. I say that would constitue in other media references, wouldn't you?\n\nProbably not, because it seems all of you wiki monitors are fucking cock sucking assholes who don't understand what the fuck is going on half the time.\n\nSeriously, you're a dick. Did you even read what I wrote, or did you just automatically delete it because you're a cock?\n\nFuck ...",1,0,1,1,1,0,1,0,1,0,1,0
228,92dd587c7d3e92ed,"You Ain't \n\nNotorious or Infamous, you a playboy gangsta. I'm a do what you did only bigger and better. I'm what you used to be and I been to both of your lame Tevapedias. If we fought, I'd kill ya, boy.",1,0,0,1,0,0,1,0,0,0,0,0
926,1e6b732c678f6910,I'm going to start killing these assholes. Chin chin.,1,0,1,1,1,0,1,0,1,0,1,0
1052,41c4c104018db22f,"I am going to shit on your face, and cut your head off !",1,0,1,1,1,0,1,0,1,0,1,0
1457,7521c02d205b8da4,hey pice of shit don't tell me what to write on wikipedia...i'm gona hunt you down and kill everyone you ever loved,1,0,1,1,1,0,1,0,1,0,0,0
1590,fa3c0dc48563a1f8,faggot \n\nYou lil piece of shit I havenÃ¢ÂÂt vandalised nothing I will fuck u up wh,1,1,1,1,1,1,1,1,1,0,1,1
1702,887488b5a5f7ca3c,Jerk\nIll shwo you idiot! I will finish your life you idiot! For the last time! There is NO name to the Cathedral! That is it! Do uyou understand?,1,0,1,1,1,0,1,0,1,0,1,0
1806,385ea7fc41136c96,Graeme Bartlet MUST die,1,0,0,1,0,0,1,0,0,0,0,0
1905,0e640c74529f24aa,"Wtf, Why You deleting Maashel? Bad things will happen to you now.",1,0,0,1,0,0,1,0,0,0,0,0


In [28]:
all_true_with_predictions[all_true_with_predictions['id'].isin(['afe3305d737023ee', 
                                                               '864fc5f05042b680', 
                                                               '99e358ed517a8e2f', 
                                                               '88149e06a71a830d', 
                                                               '8685e98b780e4874', 
                                                               'be121dec217158da', 
                                                               'c629d9a36acc549f', 
                                                               '9e069160cb8d6e68', 
                                                               'c47353648be65580', 
                                                               '256d494d8ec09cdc', 
                                                               '24a6e0047720debd', 
                                                               'a5d240cc8bc87faf'])].to_csv('examples.csv', index=False)