# Test biaslyze with the toxic comments dataset

Data source: https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append('/home/tobias/Repositories/biaslyze/')

In [4]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

## Load and prepare data

In [5]:
df = pd.read_csv("../data/jigsaw-toxic-comment-classification/train.csv"); df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [11]:
# make the classification problem binary
df["target"] = df[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].sum(axis=1) > 0

## Train a BoW-model

In [12]:
clf = make_pipeline(TfidfVectorizer(min_df=10, max_features=10000, stop_words="english"), LogisticRegression())

In [13]:
clf.fit(df.comment_text, df.target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
train_pred = clf.predict(df.comment_text)
print(accuracy_score(df.target, train_pred))

0.9605755431751384


## Test LIME based bias detection with keywords

In [18]:
from biaslyze.evaluators import LimeBiasEvaluator
from biaslyze.bias_detectors import LimeKeywordBiasDetector

In [71]:
bias_detector = LimeKeywordBiasDetector(
    bias_evaluator=LimeBiasEvaluator(n_lime_samples=2000),
    n_top_keywords=10,
    use_tokenizer=True
)

In [72]:
test_texts = df.comment_text.sample(200)
detection_res = bias_detector.detect(texts=test_texts, predict_func=clf.predict_proba)

2023-03-07 10:26:05.323 | INFO     | biaslyze.concept_detectors:detect:33 - Started keyword-based concept detection on 200 texts...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:04<00:00, 45.23it/s]
2023-03-07 10:26:09.758 | INFO     | biaslyze.concept_detectors:detect:49 - Done. Found 50 texts with protected concepts.
2023-03-07 10:26:09.759 | INFO     | biaslyze.evaluators:evaluate:41 - Started bias detection on 50 samples...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [01:29<00:00,  1.80s/it]


In [73]:
detection_res.summary()

Detected 13 samples with potential issues.
    Potentially problematic concepts detected: [('gender', 10), ('nationality', 3)]
    Based on keywords: [('english', 3), ('men', 2), ('his', 1), ('she', 1), ('mother', 1), ('man', 1), ('bro', 1), ('her', 1), ('father', 1), ('sister', 1), ('daughter', 1)].


In [74]:
detection_res.details(group_by_concept=True)

Concept: gender
[{'reason': ['his'],
  'text': '.\n'
          '\n'
          '2006 notes another step for the University of Wisconsin - '
          'Milwaukee, when Joseph Munz - a delivery worker - was murdered when '
          'financially supporting his own education.  For crime in Milwaukee, '
          'Wisconsin see Milwaukee'},
 {'reason': ['she'], 'text': 'Norlen \n\nIs she still VP?'},
 {'reason': ['mother'],
  'text': 'hi, please read this its important \n'
          '\n'
          'FUCK YOU, BITCH!\n'
          'I can do whatever i want, and there is nothing you can do to stop '
          'me. you are without doubt the most annoying piece of shit i have '
          'ever even heard of. you are a flat out embarrasment to humankind. '
          'do the world a favour and kill yourself, ok? the world will be a '
          "better place without you. or if you can't bring yourself to do it, "
          'get someone else to do it, see if i care. you are a pathetic worm. '
       

In [75]:
test_df = df.iloc[test_texts.index]

In [76]:
test_df[test_df.target].reset_index().comment_text[4]

"Cunt \n\nYou're a cunt, Emilyn."

## Testing a sentiment analysis model from huggingface

In [190]:
from transformers import pipeline
from torch.utils.data import Dataset


classifier = pipeline(model="distilbert-base-uncased-finetuned-sst-2-english", top_k=None)

In [191]:
class MyDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        return self.data[i]


def predict_sentiment(texts):
    data = MyDataset(texts)
    proba = []
    for res in classifier(data):
        proba_array = []
        for p in sorted(res, key=lambda d: d['label'], reverse=True):
            proba_array.append(p.get("score"))
        proba.append(np.array(proba_array))
    return np.array(proba) / np.array(proba).sum(axis=1)[:,None]

In [194]:
bias_detector = LimeKeywordBiasDetector(
    bias_evaluator=LimeBiasEvaluator(n_lime_samples=500),
    n_top_keywords=10,
    use_tokenizer=True
)

In [195]:
test_texts = df.comment_text.sample(50)
detection_res = bias_detector.detect(texts=test_texts, predict_func=predict_sentiment)

2023-03-07 11:30:16.506 | INFO     | biaslyze.concept_detectors:detect:33 - Started keyword-based concept detection on 50 texts...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 52.76it/s]
2023-03-07 11:30:17.461 | INFO     | biaslyze.concept_detectors:detect:49 - Done. Found 14 texts with protected concepts.
2023-03-07 11:30:17.463 | INFO     | biaslyze.evaluators:evaluate:41 - Started bias detection on 14 samples...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [11:35<00:00, 49.68s/it]


In [196]:
detection_res.summary()

Detected 1 samples with potential issues.
    Potentially problematic concepts detected: [('religion', 1), ('gender', 1)]
    Based on keywords: [('muslim', 1), ('man', 1)].


In [197]:
detection_res.details(group_by_concept=True)

Concept: religion
[{'reason': ['muslim', 'man'],
  'text': 'irrelevent. straw man arguement. No one said HT alone was '
          'radicallzing Muslim youth.'}]
Concept: gender
[{'reason': ['muslim', 'man'],
  'text': 'irrelevent. straw man arguement. No one said HT alone was '
          'radicallzing Muslim youth.'}]
