In [122]:
import nltk
from nltk.stem import PorterStemmer
from collections import Counter
from math import log, sqrt
import pandas as pd
import matplotlib.pyplot as plt
import re
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score

# Toxic Comments: TFIDF vs MaxEnt vs Polarity - David Duffrin

In [7]:
sw = {"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", "now"}

In [13]:
comments = pd.read_csv('toxic/train.csv')
comments.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [14]:
print(sum(comments.toxic == 1), len(comments))

15294 159571


In [15]:
comments = comments[['comment_text', 'toxic']]

I will be looking at the comment_text and try to predict toxic

In [19]:
comments_subset = comments.groupby('toxic').apply(lambda x: x.sample(15000, random_state=0))
comments_subset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,comment_text,toxic
toxic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,87940,"Dear Mr Moody,\n\nMy website does not discoura...",0
0,19563,Winx Club\nYou recently edited the list of epi...,0
0,135136,without any concern for Wikipedia's lazy admin...,0
0,11405,There was a documentary shown on Mediacorp Cha...,0
0,23189,"""\nI don't see the need for a """"lasting impact...",0


In [20]:
ps = PorterStemmer()
comments_subset.comment_text = comments_subset.comment_text.apply(lambda w: Counter([ps.stem(word) for word in re.sub('[^a-z]', ' ', w.lower()).split() if word not in sw]))

In [21]:
comments_subset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,comment_text,toxic
toxic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,87940,"{'dear': 1, 'mr': 1, 'moodi': 1, 'websit': 2, ...",0
0,19563,"{'winx': 2, 'club': 2, 'recent': 1, 'edit': 1,...",0
0,135136,"{'without': 1, 'concern': 1, 'wikipedia': 1, '...",0
0,11405,"{'documentari': 1, 'shown': 1, 'mediacorp': 1,...",0
0,23189,"{'see': 1, 'need': 1, 'last': 1, 'impact': 1, ...",0


In [24]:
num_toxic = len(comments_subset[comments_subset.toxic == 1])
num_train = len(comments_subset)
pc = num_toxic / num_train
pnc = 1 - pc
pc = -(pc*log(pc) + pnc*log(pnc))

In [26]:
words = set()
for word_set in [w for w in comments_subset.comment_text]:
    words |= set(word_set)

In [27]:
term_info = Counter()
train_set = [(x[2], x[1]) for x in comments_subset.itertuples()]

for term in words:
    pt = len([1 for _, x in train_set if term in x])/num_train
    with_term = [x for x, text in train_set if term in text]
    pc_t = len([1 for x in with_term if x == 1]) / len(with_term)
    if pc_t == 0:
        pc_t = 0.00001
    elif pc_t == 1:
        pc_t = 0.99999
    t = pt * (pc_t*log(pc_t) + (1-pc_t)*log(1-pc_t))
    without_term = [x for x, text in train_set if term not in text]
    if len(without_term) == 0:
        pc_nt = 0
    else:
        pc_nt = len([1 for x in without_term if x == 1]) / len(without_term)
    if pc_nt == 0:
        pc_nt = 0.00001
    elif pc_nt == 1:
        pc_nt = 0.99999
    nt = (1-pt) * (pc_nt*log(pc_nt) + (1-pc_nt)*log(1-pc_nt))
    term_info[term] = pc + t + nt

In [28]:
feature_selection = [x for x, _ in term_info.most_common(50)]

In [29]:
feature_selection[:10]

['fuck',
 'shit',
 'articl',
 'thank',
 'ass',
 'suck',
 'bitch',
 'stupid',
 'pleas',
 'talk']

It appears that MaxEnt highly weights words that are toxic. This makes sense because you can see all kinds of words in toxic comments, however seeing toxic words in a nontoxic comment is highly unlikely. 

In [49]:
train_joint = [(y, 'pos' if x==1 else 'neg') for x, y in train_set]
test_joint = train_joint[1::2]
train_joint = train_joint[0::2]


train_dict = []

empty_dict = {word: 0 for word in feature_selection[:50]}

for x, y in train_joint:
    new_dict = empty_dict.copy()#dict()
    for word in x:
        if word in feature_selection[:50]:
            new_dict[word] = x[word]
    train_dict.append((new_dict, y))

test_dict = []
for x, y in test_joint:
    new_dict = empty_dict.copy()#dict()
    for word in x:
        if word in feature_selection[:50]:
            new_dict[word] = x[word]
    test_dict.append((new_dict, y))

In [53]:
encoding = nltk.classify.maxent.BinaryMaxentFeatureEncoding.train(train_dict, alwayson_features=True)
trained_model = nltk.classify.MaxentClassifier.train(train_dict, encoding=encoding)

  ==> Training (100 iterations)

      Iteration    Log Likelihood    Accuracy
      ---------------------------------------
             1          -0.69315        0.500
             2          -0.66339        0.759
             3          -0.63701        0.784
             4          -0.61380        0.784
             5          -0.59335        0.785
             6          -0.57528        0.785
             7          -0.55922        0.785
             8          -0.54487        0.786
             9          -0.53197        0.786
            10          -0.52032        0.786
            11          -0.50974        0.786
            12          -0.50010        0.786
            13          -0.49127        0.786
            14          -0.48317        0.786
            15          -0.47569        0.786
            16          -0.46878        0.787
            17          -0.46236        0.787
            18          -0.45640        0.787
            19          -0.45084        0.787
 

In [54]:
# baseline
print('Baseline accuracy: ', len([1 for _, y in test_dict if y == 'neg'])/len(test_dict))

Baseline accuracy:  0.5


In [55]:
y_pred = trained_model.classify_many([x for x, _ in test_dict])
y_true = [y for _, y in test_dict]

print('F1: ', f1_score(y_true, y_pred, pos_label='pos'))
print('Precision: ', precision_score(y_true, y_pred, pos_label='pos'))
print('Recall: ', recall_score(y_true, y_pred, pos_label='pos'))
print('Accuracy: ', nltk.classify.accuracy(trained_model, test_dict))

F1:  0.7063714261474507
Precision:  0.9710753440634476
Recall:  0.5550666666666667
Accuracy:  0.7692666666666667


In [146]:
meme = 'What the fuck did you just fucking say about me, you little bitch? I will have you know I graduated top of my class in the Navy Seals, and I have been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I am the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You are fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that is just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little "clever" comment was about to bring down upon you, maybe you would have held your fucking tongue. But you could not, you did not, and now you are paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You are fucking dead, kiddo.'

stemmed_meme = []
for word in meme.lower().split():
    stemmed_meme.append(ps.stem(word))

trained_model.classify(Counter(stemmed_meme))

'pos'

It works!

Now, time to see if TFIDF gives similar output.

In [56]:
tf = comments_subset.copy()
tf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,comment_text,toxic
toxic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,87940,"{'dear': 1, 'mr': 1, 'moodi': 1, 'websit': 2, ...",0
0,19563,"{'winx': 2, 'club': 2, 'recent': 1, 'edit': 1,...",0
0,135136,"{'without': 1, 'concern': 1, 'wikipedia': 1, '...",0
0,11405,"{'documentari': 1, 'shown': 1, 'mediacorp': 1,...",0
0,23189,"{'see': 1, 'need': 1, 'last': 1, 'impact': 1, ...",0


In [71]:
tf = tf.reset_index(drop=True).groupby('toxic').comment_text.apply(list).apply(lambda counter_list: sum(counter_list, Counter()))
tf

toxic
0    {'dear': 104, 'mr': 177, 'moodi': 2, 'websit':...
1    {'niggah': 3, 'hey': 793, 'fool': 372, 'whyd':...
Name: comment_text, dtype: object

In [76]:
def numDocsWithWord(word, trainList):
    count = 0
    for doc in trainList:
        if word in doc:
            count += 1
    return count

In [79]:
def compute_idf(tf, all_words):
    idf = dict()
    for word in all_words:
        idf[word] = log((1+len(tf))/(1+numDocsWithWord(word, tf)))+1
    return idf

In [95]:
def compute_tfidf(tf, idf):
    tfidf = tf.apply(lambda word_dict: {word: tf_val * idf[word] for word, tf_val in word_dict.items()})
    tfidf = pd.DataFrame(tfidf)
    tfidf['mag'] = tfidf.comment_text.apply(lambda word_dict: sqrt(sum([x**2 for x in word_dict.values()])))
    tfidf['wordsNorm'] = tfidf.apply(lambda row: {word:tfidf_score/row.mag for word, tfidf_score in row.comment_text.items()}, axis=1)
    normedTfidf = pd.DataFrame(list(tfidf.wordsNorm))
    return normedTfidf

In [96]:
idf = compute_idf(tf, set(tf[0]) | set(tf[1]))
tfidf = compute_tfidf(tf, idf)

In [98]:
list(tfidf.max(axis=0).sort_values(ascending=False)[:10].index)

['fuck',
 'articl',
 'page',
 'wikipedia',
 'suck',
 'edit',
 'talk',
 'use',
 'go',
 'shit']

In [99]:
feature_selection[:10]

['fuck',
 'shit',
 'articl',
 'thank',
 'ass',
 'suck',
 'bitch',
 'stupid',
 'pleas',
 'talk']

TFIDF and MaxEnt gave pretty similar results.

In [100]:
profanity = {'4r5e','5h1t','5hit','a55','anal','anus','ar5e','arrse','arse','ass','ass-fucker','asses','assfucker','assfukka','asshole','assholes','asswhole','a_s_s','b!tch','b00bs','b17ch','b1tch','ballbag','balls','ballsack','bastard','beastial','beastiality','bellend','bestial','bestiality','bi+ch','biatch','bitch','bitcher','bitchers','bitches','bitchin','bitching','bloody','blow job','blowjob','blowjobs','boiolas','bollock','bollok','boner','boob','boobs','booobs','boooobs','booooobs','booooooobs','breasts','buceta','bugger','bum','bunny fucker','butt','butthole','buttmuch','buttplug','c0ck','c0cksucker','carpet muncher','cawk','chink','cipa','cl1t','clit','clitoris','clits','cnut','cock','cock-sucker','cockface','cockhead','cockmunch','cockmuncher','cocks','cocksuck ','cocksucked ','cocksucker','cocksucking','cocksucks ','cocksuka','cocksukka','cok','cokmuncher','coksucka','coon','cox','crap','cum','cummer','cumming','cums','cumshot','cunilingus','cunillingus','cunnilingus','cunt','cuntlick ','cuntlicker ','cuntlicking ','cunts','cyalis','cyberfuc','cyberfuck ','cyberfucked ','cyberfucker','cyberfuckers','cyberfucking ','d1ck','damn','dick','dickhead','dildo','dildos','dink','dinks','dirsa','dlck','dog-fucker','doggin','dogging','donkeyribber','doosh','duche','dyke','ejaculate','ejaculated','ejaculates ','ejaculating ','ejaculatings','ejaculation','ejakulate','f u c k','f u c k e r','f4nny','fag','fagging','faggitt','faggot','faggs','fagot','fagots','fags','fanny','fannyflaps','fannyfucker','fanyy','fatass','fcuk','fcuker','fcuking','feck','fecker','felching','fellate','fellatio','fingerfuck ','fingerfucked ','fingerfucker ','fingerfuckers','fingerfucking ','fingerfucks ','fistfuck','fistfucked ','fistfucker ','fistfuckers ','fistfucking ','fistfuckings ','fistfucks ','flange','fook','fooker','fuck','fucka','fucked','fucker','fuckers','fuckhead','fuckheads','fuckin','fucking','fuckings','fuckingshitmotherfucker','fuckme ','fucks','fuckwhit','fuckwit','fudge packer','fudgepacker','fuk','fuker','fukker','fukkin','fuks','fukwhit','fukwit','fux','fux0r','f_u_c_k','gangbang','gangbanged ','gangbangs ','gaylord','gaysex','goatse','God','god-dam','god-damned','goddamn','goddamned','hardcoresex ','hell','heshe','hoar','hoare','hoer','homo','hore','horniest','horny','hotsex','jack-off ','jackoff','jap','jerk-off ','jism','jiz ','jizm ','jizz','kawk','knob','knobead','knobed','knobend','knobhead','knobjocky','knobjokey','kock','kondum','kondums','kum','kummer','kumming','kums','kunilingus','l3i+ch','l3itch','labia','lmfao','lust','lusting','m0f0','m0fo','m45terbate','ma5terb8','ma5terbate','masochist','master-bate','masterb8','masterbat*','masterbat3','masterbate','masterbation','masterbations','masturbate','mo-fo','mof0','mofo','mothafuck','mothafucka','mothafuckas','mothafuckaz','mothafucked ','mothafucker','mothafuckers','mothafuckin','mothafucking ','mothafuckings','mothafucks','mother fucker','motherfuck','motherfucked','motherfucker','motherfuckers','motherfuckin','motherfucking','motherfuckings','motherfuckka','motherfucks','muff','mutha','muthafecker','muthafuckker','muther','mutherfucker','n1gga','n1gger','nazi','nigg3r','nigg4h','nigga','niggah','niggas','niggaz','nigger','niggers ','nob','nob jokey','nobhead','nobjocky','nobjokey','numbnuts','nutsack','orgasim ','orgasims ','orgasm','orgasms ','p0rn','pawn','pecker','penis','penisfucker','phonesex','phuck','phuk','phuked','phuking','phukked','phukking','phuks','phuq','pigfucker','pimpis','piss','pissed','pisser','pissers','pisses ','pissflaps','pissin ','pissing','pissoff ','poop','porn','porno','pornography','pornos','prick','pricks ','pron','pube','pusse','pussi','pussies','pussy','pussys ','rectum','retard','rimjaw','rimming','s hit','s.o.b.','sadist','schlong','screwing','scroat','scrote','scrotum','semen','sex','sh!+','sh!t','sh1t','shag','shagger','shaggin','shagging','shemale','shi+','shit','shitdick','shite','shited','shitey','shitfuck','shitfull','shithead','shiting','shitings','shits','shitted','shitter','shitters ','shitting','shittings','shitty ','skank','slut','sluts','smegma','smut','snatch','son-of-a-bitch','spac','spunk','s_h_i_t','t1tt1e5','t1tties','teets','teez','testical','testicle','tit','titfuck','tits','titt','tittie5','tittiefucker','titties','tittyfuck','tittywank','titwank','tosser','turd','tw4t','twat','twathead','twatty','twunt','twunter','v14gra','v1gra','vagina','viagra','vulva','w00se','wang','wank','wanker','wanky','whoar','whore','willies','willy','xrated','xxx'}
stemmed_prof = set()
for word in profanity:
    stemmed_word = ps.stem(word)
    stemmed_prof.add(stemmed_word)

In [131]:
y_true = []
y_pred = []

for x, y in comments_subset.reset_index(drop=True).iterrows():
    y_true.append(y[1])
    profane = False
    for word in y[0]:
        if word in stemmed_prof:
            profane = True
    if profane:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [136]:
print('F1: ', f1_score(y_true, y_pred, average='micro'))
print('Precision: ', precision_score(y_true, y_pred, average='micro'))
print('Recall: ', recall_score(y_true, y_pred, average='micro'))
print('Accuracy: ', sum(np.asarray(y_true) == np.asarray(y_pred))/len(y_true))

F1:  0.7837
Precision:  0.7837
Recall:  0.7837
Accuracy:  0.7837


A simple profanity filter was better at finding `toxic` comments than a MaxEnt classifier when looking at accuracy. The recall is higher but the precision is lower for this simple classifier.

That means that we are finding more of the toxic comments, however we are classifying more non-toxic comments as toxic. This is weird that non-toxic words contain profanity, however maybe they are quoting something or writing about a controversial topic.

Precision = $\frac{tp}{tp+fp}$

Recall = $\frac{tp}{tp+fn}$

This means that $fp = fn$

What else is weird is that all the numbers are exactly the same. This must mean that the distribution of profanity in toxic comments is the inverse probability of finding profanity in nontoxic comments. Or that I am doing something completely wrong.

Polarity and then word embeddings

1. Some Gensim stuff

- word embeddings -> PCA/t-SNE for docs

- 2 group Latent Direchlet Allocation (unsupervised)

- word cooccurence matrix

Outside of a bag-of-words model, we could look at:

- Length of comment
- Capital letters (number/proportion)
- Punctuation marks (question/exclamation/etc)
- Unique words
- Words not in dictionary (trying to get around censoring)