<a href="https://colab.research.google.com/github/ellerywuyn/LEAR-lab/blob/3-stratification-and-percent-offensive/profanity_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import packages
!pip install transformers
import shutil
import time
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
from scipy.special import softmax
import csv
import urllib.request

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.2-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.2


In [13]:
# remove local model repo
shutil.rmtree('cardiffnlp', ignore_errors=True)

In [14]:
# preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

task='offensive'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

tokenizer = AutoTokenizer.from_pretrained(MODEL)

# initiate model
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)
tokenizer.save_pretrained(MODEL)

# download label mapping
labels=[]
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n")
    csvreader = csv.reader(html, delimiter='\t')
labels = [row[1] for row in csvreader if len(row) > 1]

Downloading (…)lve/main/config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [2]:
import re
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [4]:
tweets_df = pd.read_csv("/content/drive/MyDrive/labeled_data.csv")
tweets_df_interest = tweets_df[["class", "tweet"]]
tweets_df_interest = tweets_df_interest[tweets_df_interest["class"].isin([1,2])]
n_sample = sum(tweets_df_interest["class"] == 2)
class1 = tweets_df_interest["class"][tweets_df_interest["class"] == 1].sample(n_sample, random_state = 1).index
class2 = tweets_df_interest["class"][tweets_df_interest["class"] == 2].index
sampled_tweets_df = tweets_df_interest.loc[class1.union(class2)]
sampled_tweets_df["class"] = sampled_tweets_df["class"].replace(2, 0)

In [9]:
# define a function to generate labels using the transformer model
def get_label(input):
  text = input
  text = preprocess(text)
  encoded_input = tokenizer(text, return_tensors='pt')
  output = model(**encoded_input)
  scores = output[0][0].detach().numpy()
  scores = softmax(scores)
  # scores[0] is the not-offensive score
  # not-offensive score <= 0.5 -> label 1
  # not-offensive score > 0.5 -> label 0
  return 1 if scores[0] <= 0.5 else 0

In [None]:
# use the model to get predictions and store them in a new column named "predictions" in sampled_tweets_df
sampled_tweets_df['num_bad_words'] = sampled_tweets_df.apply(
    lambda x: sum([get_label(word) for word in x["tweet"].split()]), axis=1)

In [None]:
sampled_tweets_df['num_total_words'] = sampled_tweets_df.apply(lambda x: len(x["tweet"].split()), axis=1)

In [None]:
sampled_tweets_df['prop_bad_words'] = sampled_tweets_df['num_bad_words'] / sampled_tweets_df['num_total_words']

In [None]:
# use the model to get predictions and store them in a new column named "predictions" in sampled_tweets_df
sampled_tweets_df['prediction'] = sampled_tweets_df.apply(lambda x: get_label(x["tweet"]), axis=1)

# change the "class" column to "label" for clarity purposes
sampled_tweets_df = sampled_tweets_df.rename(columns = {"class": "label"})

# reorder the columns
sampled_tweets_df = sampled_tweets_df.loc[:,['tweet', 'label', 'prediction', 'num_bad_words', 'num_total_words', 'prop_bad_words']]

In [None]:
sampled_tweets_df

In [None]:
num_bad_words_counts = sampled_tweets_df['num_bad_words'].value_counts()
num_bad_words_counts

In [None]:
sampled_tweets_df['num_bad_words'].plot(kind='hist', bins=9)

In [None]:
sampled_tweets_df['prop_bad_words'].plot(kind='hist', bins=9)

In [None]:
df_0_bad = sampled_tweets_df[sampled_tweets_df['num_bad_words'] == 0]
df_1_bad = sampled_tweets_df[sampled_tweets_df['num_bad_words'] == 1]
df_2_bad = sampled_tweets_df[sampled_tweets_df['num_bad_words'] == 2]
df_3_bad = sampled_tweets_df[sampled_tweets_df['num_bad_words'] == 3]
df_4_plus_bad = sampled_tweets_df[sampled_tweets_df['num_bad_words'] >= 4]

In [None]:
df_dict = {
    "0 bad words" : df_0_bad,
    "1 bad word" : df_1_bad,
    "2 bad words" : df_2_bad,
    "3 bad words" : df_3_bad,
    "4+ bad words" : df_4_plus_bad,
}

In [None]:
target_names = ["not offensive", "offensive"]
for key in df_dict:
  # print the classification reports for all 5
  try:
    print(key, "\n", classification_report(df_dict[key]["label"], df_dict[key]["prediction"], target_names=target_names))

    metrics = {
      "Metrics": ["Accuracy", "Precision", "Recall", "F1"],
      "Values": [round(np.average(accuracy_score(df_dict[key]["label"], df_dict[key]["prediction"])), 2), 
                round(np.average(precision_score(df_dict[key]["label"], df_dict[key]["prediction"])), 2), 
                round(np.average(recall_score(df_dict[key]["label"], df_dict[key]["prediction"])), 2), 
                round(np.average(f1_score(df_dict[key]["label"], df_dict[key]["prediction"])), 2)]
    }
    print(pd.DataFrame(metrics), "\n")
    cm = confusion_matrix(df_dict[key]["label"], df_dict[key]["prediction"])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
    disp.plot()
    plt.show()
  except ValueError:
    print("'4+ bad words' only has the offensive class")


In [None]:
df_0_bad[(df_0_bad["prediction"] == 0) & (df_0_bad["label"] == 1)]

In [None]:
df_0_bad[(df_0_bad["prediction"] == 0) & (df_0_bad["label"] == 1)]["tweet"].to_list()

In [None]:
df_1_bad[(df_1_bad["prediction"] == 1) & (df_1_bad["label"] == 0)]

In [None]:
df_1_bad[(df_1_bad["prediction"] == 1) & (df_1_bad["label"] == 0)]["tweet"].to_list()

In [None]:
df_2_bad[(df_2_bad["prediction"] == 1) & (df_2_bad["label"] == 0)]

In [None]:
df_2_bad[(df_2_bad["prediction"] == 1) & (df_2_bad["label"] == 0)]["tweet"].to_list()

In [None]:
df_3_bad[(df_3_bad["prediction"] == 1) & (df_3_bad["label"] == 0)]["tweet"].iloc[0]

In [None]:
df_1_bad[df_1_bad["prediction"] == 0]["tweet"]

### Archive

In [10]:
small_df = sampled_tweets_df.sample(3)

In [15]:
# use the model to get predictions and store them in a new column named "predictions" in sampled_tweets_df
small_df['num_bad_words'] = small_df.apply(
    lambda x: sum([get_label(word) for word in x["tweet"].split()]), axis=1)

In [20]:
total = 0
for w in small_df.iloc[1]["tweet"].split():
  total += get_label(w)
total

1

In [16]:
small_df

Unnamed: 0,class,tweet,num_bad_words
9314,1,"Fuck you, hoe ass nigga RT@UberFacts The white...",2
19752,0,RT @nicoleyyrosiee: Remember in 2001 when the ...,1
6014,0,"@gerfingerpoken2 His job as US Prez, to protec...",1


In [8]:
sampled_tweets_df.iloc[0]["tweet"].split()

['!!!',
 'RT',
 '@mayasolovely:',
 'As',
 'a',
 'woman',
 'you',
 "shouldn't",
 'complain',
 'about',
 'cleaning',
 'up',
 'your',
 'house.',
 '&amp;',
 'as',
 'a',
 'man',
 'you',
 'should',
 'always',
 'take',
 'the',
 'trash',
 'out...']