In [27]:
%load_ext autoreload
%autoreload 2

In [28]:
import sys
sys.path.append('/home/tobias/Repositories/biaslyze/')

In [29]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score

## Load and prepare data

In [43]:
df = pd.read_csv("../data/disaster-tweets/train.csv"); df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [44]:
# replace urls
import re
url_regex = re.compile("(http|https)://[\w\-]+(\.[\w\-]+)+\S*")

df = df.replace(to_replace=url_regex, value='', regex=True)

## Train a model

In [67]:
clf = make_pipeline(TfidfVectorizer(min_df=10, max_features=10000, stop_words="english"), LogisticRegression())

In [68]:
clf.fit(df.text, df.target)

In [69]:
train_pred = clf.predict(df.text)
print(accuracy_score(df.target, train_pred))

0.8418494680152371


## Test detection of concepts

In [184]:
from biaslyze.concept_detectors import KeywordConceptDetector
from biaslyze.evaluators import LimeBiasEvaluator

from biaslyze.bias_detectors import KeywordBiasDetector

In [193]:
key_detect = KeywordConceptDetector()

In [194]:
detected_tweets = key_detect.detect(texts=df.text[500:600])

In [195]:
len(detected_tweets)

13

In [196]:
detected_tweets

['Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 '@christinalavv @lindsay_wynn3 I just saw these tweets and I feel really attacked',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... ',
 'Christian Attacked by Muslims at the Temple Mount after Waving Israeli Fl

In [201]:
bias_detector = KeywordBiasDetector(predict_func=clf.predict_proba, bias_evaluator=LimeBiasEvaluator(n_lime_samples=500), n_top_keywords=10)

In [202]:
detection_res = bias_detector.detect(texts=df.text[500:600])

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:02<00:00,  4.97it/s]


In [203]:
detection_res.summary()

Detected 11 samples with potential issues.
Potentially problematic concepts detected: {'nationality', 'religion'}
Based on keywords: {'muslims', 'australia', 'israeli', 'christian'}.


In [204]:
detection_res.details()

''Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... '' might contain bias ['nationality', 'religion']; reasons: ['israeli', 'muslims', 'christian']
''Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... '' might contain bias ['nationality', 'religion']; reasons: ['israeli', 'muslims', 'christian']
''Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... '' might contain bias ['nationality', 'religion']; reasons: ['israeli', 'muslims']
''Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... '' might contain bias ['nationality', 'religion']; reasons: ['israeli', 'muslims', 'christian']
''Christian Attacked by Muslims at the Temple Mount after Waving Israeli Flag via Pamela Geller - ... '' might contain bias ['nationality', 'religion']; reasons: ['israeli', 'muslims', 'christian']
''Christian Attacked by

In [123]:
clf.predict_proba(["Nice house in france"])

array([[0.70641419, 0.29358581]])