<a href="https://colab.research.google.com/github/ellerywuyn/LEAR-lab/blob/1-compare-existing-profanity-models/profanity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Offensive Language Detection
# Using the twitter-roberta-base-offensive Transformer Model

## Setups

In [None]:
# import packages
!pip install transformers
import shutil
import time
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from scipy.special import softmax
import csv
import urllib.request

In [None]:
# remove local model repo
shutil.rmtree('cardiffnlp', ignore_errors=True)

In [5]:
# preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

task='offensive'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

tokenizer = AutoTokenizer.from_pretrained(MODEL)

# initiate model
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)
tokenizer.save_pretrained(MODEL)

# download label mapping
labels=[]
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n")
    csvreader = csv.reader(html, delimiter='\t')
labels = [row[1] for row in csvreader if len(row) > 1]

Downloading (…)lve/main/config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/499M [00:00<?, ?B/s]

## Read in twitter dataset

In [7]:
tweets_df = pd.read_csv("labeled_data.csv")

In [33]:
tweets_df.sample(10)

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
9768,10034,3,0,3,0,1,Heartless ass niggah &#128513; but who isn't n...
21266,21727,3,0,3,0,1,TV makes these hoes seem more attractive. *yawns*
7908,8129,6,0,5,1,1,Bad bitch with me and you kno that her ass is fat
2185,2230,3,0,3,0,1,".@SarahPalinUSA Wow, ""impeachment."" Look at yo..."
11620,11914,3,1,0,2,2,If you are descended from anything less than E...
10551,10830,3,0,3,0,1,I heard them same pussy niggas hatin !
17247,17640,6,0,6,0,1,RT @SexualTruth: Eating your womans pussy is g...
18934,19353,3,0,3,0,1,RT @dubstep_cowboy: #HowToGetAGirl do not and ...
21462,21924,3,0,3,0,1,That's what I tell my hoes when they ain't thr...
17899,18301,9,1,7,1,1,RT @Tylar____: @1stBlocJeremiah lol why you tr...


In [35]:
tweets_df.shape

(24783, 7)

## Data Wrangling

In [37]:
# subset variables of interest
tweets_df_interest = tweets_df[["class", "tweet"]]

In [42]:
# remove the hate speech rows
tweets_df_interest = tweets_df_interest[tweets_df_interest["class"].isin([1,2])]

In [49]:
tweets_df_interest.shape

(23353, 2)

In [45]:
# check for NA values - confirmed that there aren't any
display(tweets_df_interest.isnull().values.any())
display(tweets_df_interest.isnull().sum())

False

class    0
tweet    0
dtype: int64

In [50]:
# get the # of rows for class == 2 because it has significant less rows than class == 1 
# we want to match them by undersampling class == 1
n_sample = sum(tweets_df_interest["class"] == 2)
n_sample

4163

In [60]:
# Randomly sample n_sample rows from class == 1
class1 = tweets_df_interest["class"][tweets_df_interest["class"] == 1].sample(n_sample).index 
class2 = tweets_df_interest["class"][tweets_df_interest["class"] == 2].index 

sampled_tweets_df = tweets_df_interest.loc[class1.union(class2)]

In [67]:
# sanity check
display(sampled_tweets_df.shape)
display(sampled_tweets_df["class"].value_counts())

(8326, 2)

2    4163
1    4163
Name: class, dtype: int64

In [72]:
# change all the class == 2 (non-offensive) to 0 to conform to convention
sampled_tweets_df["class"] = sampled_tweets_df["class"].replace(2, 0)

## Testing

In [74]:
# define a function to generate labels using the transformer model
def get_label(input):
  text = input
  text = preprocess(text)
  encoded_input = tokenizer(text, return_tensors='pt')
  output = model(**encoded_input)
  scores = output[0][0].detach().numpy()
  scores = softmax(scores)
  # scores[0] is the not-offensive score
  # not-offensive score <= 0.5 -> label 1
  # not-offensive score > 0.5 -> label 0
  return 1 if scores[0] <= 0.5 else 0

In [85]:
# use the model to get predictions and store them in a new column named "predictions" in sampled_tweets_df
start_time = time.time()
sampled_tweets_df['prediction'] = sampled_tweets_df.apply(lambda x: get_label(x["tweet"]), axis=1)
end_time = time.time()
total_time = end_time - start_time
print("total time:", total_time)

total time: 1185.2473545074463


In [94]:
# create a copy to avoid having to rerun 20 minutes again
final_df = sampled_tweets_df.copy()

In [95]:
# change the "class" column to "label" for clarity purposes
final_df = final_df.rename(columns = {"class": "label"})

In [96]:
# reorder the columns
final_df = final_df.loc[:,['tweet','label','prediction']]
final_df.sample(10)

Unnamed: 0,tweet,label,prediction
20874,Skippy and the Kats. #FezBandName @RonandFezSXM,0,0
14050,RT @77StephanieG77: A portable septic tank. Fo...,0,0
7773,Annoying yelling bint next door has locked her...,0,0
2157,*rubs hands like bird man,0,1
4684,@SharpayyDoll cunt nigger fuck balls tits cock...,1,1
21440,That was trash. I'm just gonna put my phone do...,0,1
17479,RT @SunSentinel: The @SunSentinel Editorial Bo...,0,0
20655,See bitches ruin sports kobe don't play no dam...,1,1
20414,RT @youngtravo: If u ain't never had a pregnan...,1,1
18372,RT @__E_T___: Brown colored eyes are really bl...,0,0


## Model Evaluation

Define accuracy as 1 – (Number of misclassified samples / Total number of samples)

In [99]:
# add a column named "correct" where 1 is assigned if the prediction matches label, 0 otherwise
final_df['correct'] = final_df.apply(lambda x: 1 if x["prediction"] == x["label"] else 0, axis=1)

In [102]:
final_df.sample(10)

Unnamed: 0,tweet,label,prediction,correct
13074,My mom didn't buy me Oreos she doesn't love me,0,0,1
9649,Happy Bday ya bitch you!! @Maffue,1,1,1
20277,"RT @versacecum: bitches be like ""&#9749;&#1280...",1,1,1
15033,"RT @Dory: my anaconda don't want none, unless ...",0,0,1
8515,Conan Obrien needs to get a life with that $80...,0,0,1
7255,@vh1ADENECA @260chocolate @4Sunshinejones1 aye...,0,1,0
4175,@MrInfernusHD Squad builders gets you bitches,1,1,1
20192,RT @timbvtcher: Old soul? More like young bitch.,1,1,1
17381,"RT @Soy_Brent: steal a nigga's bitch, don't st...",1,1,1
10522,I hate yellow jackets can't feel arms and back...,0,0,1


In [110]:
counts = final_df["correct"].value_counts()
counts

1    7114
0    1212
Name: correct, dtype: int64

In [126]:
# calculate postivie, true positive, false positive, and false negative
p = sum(final_df["prediction"] == 1)
tp = sum((final_df["prediction"] == 1) & (final_df["correct"] == 1))
fp = sum((final_df["prediction"] == 1) & (final_df["correct"] == 0))
tn = sum((final_df["prediction"] == 0) & (final_df["correct"] == 1))
fn = sum((final_df["prediction"] == 0) & (final_df["correct"] == 0))

In [127]:
print(" P: {}\n".format(p), 
      "TP: {}\n".format(tp), 
      "FP: {}\n".format(fp), 
      "TN: {}\n".format(tn), 
      "FN: {}\n".format(fn))

 P: 5037
 TP: 3994
 FP: 1043
 TN: 3120
 FN: 169



In [128]:
accuracy = 1 - counts[0]/final_df["correct"].count()
accuracy

0.8544319000720634

In [129]:
precision = tp / p
precision

0.7929323009728013

In [130]:
recall = tp / (tp + fn)
recall

0.9594042757626712

In [131]:
f1 = (2 * precision * recall) / (precision + recall)
f1

0.8682608695652175

### Time

In [121]:
print('{}s per tweet'.format(round(total_time/final_df["correct"].count(), 5)))

0.14235s per tweet


## Archive

In [18]:
example_1 = tweets_df["tweet"][0]

0        !!! RT @mayasolovely: As a woman you shouldn't...
1        !!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2        !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3        !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4        !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
                               ...                        
24778    you's a muthaf***in lie &#8220;@LifeAsKing: @2...
24779    you've gone and broke the wrong heart baby, an...
24780    young buck wanna eat!!.. dat nigguh like I ain...
24781                youu got wild bitches tellin you lies
24782    ~~Ruffled | Ntac Eileen Dahlia - Beautiful col...
Name: tweet, Length: 24783, dtype: object

In [None]:
text = example_1
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

# not-offensive score less than 0.5 -> label 1
# not-offensive score larger than 0.5 -> label 0
scores[0]




ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    # print(s)
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

In [22]:
text = example_1
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    # print(s)
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

1) not-offensive 0.5326
2) offensive 0.4674


In [31]:
text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
print(scores)
ranking = np.argsort(scores)
print(ranking)
ranking = ranking[::-1]
print(ranking)

for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    # print(s)
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

[0.9073082  0.09269167]
[1 0]
[0 1]
1) not-offensive 0.9073
2) offensive 0.0927


In [None]:
# 235 words
text = "Economists see the real world as one in which people’s desires exceed what is possible – in other words, in terms of scarcity. \
Economic behavior involves tradeoffs in which individuals, firms, and society must forgo something that they desire to obtain things that they desire more.\
Individuals face the tradeoff of what quantities of goods and services to consume.\
The budget constraint, which is the frontier of the opportunity set, illustrates the range of available choices. The relative price of the choices determines the slope of the budget constraint. Choices beyond the budget constraint are not affordable.\
Opportunity cost measures cost by what we forgo in exchange for the goods and services we consume. Sometimes we can measure opportunity cost in money, but it is often useful to consider time as well, or to measure it in terms of the actual resources that we must forfeit.\
Most economic decisions and tradeoffs are not all-or-nothing. Instead, they involve marginal analysis, which means they are about decisions on the margin, involving a little more or a little less.\
The law of diminishing marginal utility points out that as a person receives more of something—whether it is a specific good or another resource—the additional marginal gains tend to become smaller.\
Because sunk costs occurred in the past and cannot be recovered, they should be disregarded in making current decisions."

start_time = time.time()

text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

end_time = time.time()
total_time = end_time - start_time
print("total time:", total_time)

1) not-offensive 0.8946
2) offensive 0.1054
total time: 0.7390432357788086


In [30]:
text = "You suck"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
print(scores)
ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

[0.13553596 0.8644641 ]
1) offensive 0.8645
2) not-offensive 0.1355


In [None]:
# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

In [None]:
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")
# model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-offensive")