In [1]:
from detoxify import Detoxify

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd

In [4]:
import numpy as np

## Tutorial

In [11]:
# each model takes in either a string or a list of strings
results = Detoxify('original').predict('example text')

In [20]:
results = Detoxify('unbiased').predict(['example text 1','example text 2'])

In [14]:
results = Detoxify('multilingual').predict(['example text','exemple de texte','texto de ejemplo','testo di esempio','texto de exemplo','örnek metin','пример текста'])

In [13]:
# to specify the device the model will be allocated on (defaults to cpu), accepts any torch.device input
model = Detoxify('original', device='cuda')

In [3]:
import pandas as pd

In [4]:
results_df = pd.DataFrame(results).round(8)

In [15]:
results_df

Unnamed: 0,toxicity,severe_toxicity,obscene,identity_attack,insult,threat,sexual_explicit
0,0.000196,0.000193,0.001263,0.000323,0.000883,0.000138,9e-05
1,0.000563,0.00481,0.030099,0.005532,0.026441,0.002134,0.001036
2,0.000628,0.003142,0.022164,0.003467,0.017627,0.001497,0.000616
3,0.000992,0.004267,0.033336,0.005408,0.028724,0.001951,0.000833
4,0.000612,0.001548,0.012159,0.001841,0.009405,0.000858,0.000362
5,0.000591,0.002832,0.021326,0.003134,0.017865,0.001327,0.000616
6,0.000535,0.003826,0.026462,0.00403,0.021902,0.001632,0.000747


In [19]:
#Get Toxicity Mean
results_df['toxicity'].mean()

0.0005879871428571428

In [20]:
#Get all column Mean
results_df.mean()

toxicity           0.000588
severe_toxicity    0.002945
obscene            0.020973
identity_attack    0.003391
insult             0.017550
threat             0.001362
sexual_explicit    0.000614
dtype: float64

# Analyze FDS

In [None]:
# Flatten

## Cleaning Comments:
- Comments by AutoModerator
- Comments deleted (deleted by bots) and removed (users removed)

#### Before comments

In [5]:
#Import BEFORE data
fds_b4_1mo =  pd.read_csv('./data/fds_comments_before_1mo.csv')


In [20]:
#Leave out all 'removed' and 'deleted' comments
fds_b4_1mo_df = fds_b4_1mo[(fds_b4_1mo['body'] != '[deleted]') & (fds_b4_1mo['body'] != '[removed]') & (fds_b4_1mo['author'] != 'AutoModerator')]

#Flatten to list of string
fds_b4_1mo_body = list(fds_b4_1mo_df['body'].values.flatten())

#### After comments

In [7]:
#Import AFTER data
fds_aft_1mo =  pd.read_csv('./data/fds_comments_after_1mo.csv')

In [42]:
#Leave out all 'removed' and 'deleted' comments
fds_aft_1mo_df = fds_aft_1mo[(fds_aft_1mo['body'] != '[deleted]') & (fds_aft_1mo['body'] != '[removed]') & (fds_aft_1mo['author'] !='AutoModerator')]

#Flatten to list of string
fds_aft_1mo_body = list(fds_aft_1mo_df['body'].values.flatten())

## Cleaning Submission
Concerns:
- Structure of a submission: title, body (`self_text`), images/video link (`domain`)

In [35]:
#Import SUBMISSION BEFORE
fds_b4_1mo_sub = pd.read_csv('./data/fds_subm_before_1mo.csv')

In [13]:
#Number of no content submissions
fds_b4_1mo_sub['selftext'].isnull().sum()

1869

In [36]:
#Percentage of no content submissions
fds_b4_1mo_sub['selftext'].isnull().sum()/len(fds_b4_1mo_sub['selftext'] != '[removed]')*100

46.760070052539405

In [42]:
# Condition: domain = i.reddit.it OR imugur AND selftext = ""
len(fds_b4_1mo_sub[((fds_b4_1mo_sub['domain'] == 'i.redd.it') | (fds_b4_1mo_sub['domain'] == 'i.imgur.com') |  (fds_b4_1mo_sub['domain'] == 'imgur.com'))  & fds_b4_1mo_sub['selftext'].isnull()]) 

1136

In [41]:
# Condition: domain = i.reddit.it OR imugur. So basically they are the same.
len(fds_b4_1mo_sub[(fds_b4_1mo_sub['domain'] == 'i.redd.it') | (fds_b4_1mo_sub['domain'] == 'i.imgur.com') |  (fds_b4_1mo_sub['domain'] == 'imgur.com')]) 

1141

In [33]:
#Number of image submissions
len(fds_b4_1mo_sub[(fds_b4_1mo_sub['domain'] == 'self.FemaleDatingStrategy')]) 

2185

In [None]:
fds_b4_1mo_sub_df = fds_b4_1mo_sub[fds_b4_1mo_sub['selftext']]

In [34]:
# Save only submissions has written content and not deleted or removed
fds_b4_1mo_sub_test = fds_b4_1mo_sub[(fds_b4_1mo_sub['selftext'].astype(bool)) | (fds_b4_1mo_sub['selftext'] == '[removed]')]

## Set up Model - Baseline

In [None]:
#Set pre-trained model and run on GPU
originalmodel = Detoxify('original', device='cuda')

### Example

In [None]:
#Run the first batch
res = originalmodel.predict(fds_b4_1mo_body[0:100])

In [None]:
#connect with body text
pd.DataFrame(res,fds_b4_1mo_body[0:100]).round(5)

### One month

#### Before

In [None]:
i = 0
n = len(fds_b4_1mo_body)
fds_b4_1mo_body_res_base = pd.DataFrame()
while i < n:
    res = originalmodel.predict(fds_b4_1mo_body[i:i+100])
    f = pd.DataFrame(res,fds_b4_1mo_body_res_base[i:i+100]).round(5)
    fds_b4_1mo_body_res_base = pd.concat([fds_b4_1mo_body_res_base,f])
    i = i + 100

In [None]:
#Add flag to each comments
fds_b4_1mo_body_res_base['toxicity_flag'] = np.where(fds_b4_1mo_body_res_base['toxicity']>0.5,1,0)
fds_b4_1mo_body_res_base['severe_toxicity_flag'] = np.where(fds_b4_1mo_body_res_base['severe_toxicity']>0.01,1,0)
fds_b4_1mo_body_res_base['obscene_flag'] = np.where(fds_b4_1mo_body_res_base['obscene']>0.5,1,0)
fds_b4_1mo_body_res_base['threat_flag'] = np.where(fds_b4_1mo_body_res_base['threat']>0.5,1,0)
fds_b4_1mo_body_res_base['insult_flag'] = np.where(fds_b4_1mo_body_res_base['insult']>0.5,1,0)
fds_b4_1mo_body_res_base['identity_attack_flag'] = np.where(fds_b4_1mo_body_res_base['identity_attack']>0.5,1,0)
print(fds_b4_1mo_body_res_base.mean())

#### After

In [None]:
i = 0
n = len(fds_aft_1mo_body)
fds_aft_1mo_body_res_base = pd.DataFrame()
while i < n:
    res = unbiasedmodel.predict(fds_aft_1mo_body_res_base[i:i+100])
    f = pd.DataFrame(res,fds_aft_1mo_body_res_base[i:i+100]).round(5)
    fds_aft_1mo_body_res_base = pd.concat([fds_aft_1mo_body_res_base,f])
    i = i + 100

In [None]:
#Add flag to each comments
fds_aft_1mo_body_res_base['toxicity_flag'] = np.where(fds_aft_1mo_body_res_base['toxicity']>0.5,1,0)
fds_aft_1mo_body_res_base['severe_toxicity_flag'] = np.where(fds_aft_1mo_body_res_base['severe_toxicity']>0.01,1,0)
fds_aft_1mo_body_res_base['obscene_flag'] = np.where(fds_aft_1mo_body_res_base['obscene']>0.5,1,0)
fds_aft_1mo_body_res_base['threat_flag'] = np.where(fds_aft_1mo_body_res_base['threat']>0.0035,1,0)
fds_aft_1mo_body_res_base['insult_flag'] = np.where(fds_aft_1mo_body_res_base['insult']>0.5,1,0)
fds_aft_1mo_body_res_base['identity_attack_flag'] = np.where(fds_aft_1mo_body_res_base['identity_attack']>0.5,1,0)
fds_aft_1mo_body_res_base['sexual_explicit_flag'] = np.where(fds_aft_1mo_body_res_base['sexual_explicit']>0.5,1,0)
print(fds_aft_1mo_body_res_base.mean())

#### Prelim Regression

In [None]:
import scipy.stats as sts

In [None]:
fds_aft_1mo_body_res_base['post'] = 1
fds_b4_1mo_body_res_base['post'] = 0
fds = pd.concat([fds_aft_1mo_body_res_base, fds_b4_1mo_body_res_base])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['toxicity'], fds_aft_1mo_body_res_base['toxicity'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['severe_toxicity'], fds_aft_1mo_body_res_base['severe_toxicity'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['obscene'], fds_aft_1mo_body_res_base['obscene'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['threat'], fds_aft_1mo_body_res_base['threat'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['insult'], fds_aft_1mo_body_res_base['insult'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['identity_attack'], fds_aft_1mo_body_res_base['identity_attack'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['toxicity_flag'], fds_aft_1mo_body_res_base['toxicity_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['severe_toxicity_flag'], fds_aft_1mo_body_res_base['severe_toxicity_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['obscene_flag'], fds_aft_1mo_body_res_base['obscene_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['threat_flag'], fds_aft_1mo_body_res_base['threat_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['insult_flag'], fds_aft_1mo_body_res_base['insult_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['identity_attack_flag'], fds_aft_1mo_body_res_base['identity_attack_flag'])

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res_base['sexual_explicit_flag'], fds_aft_1mo_body_res_base['sexual_explicit_flag'])

## Set up Model - Unbiased Model

In [76]:
#Set pre-trained model and run o
unbiasedmodel = Detoxify('unbiased', device='cuda')

### One month

#### Before 1 month

In [81]:
i = 0
n = len(fds_b4_1mo_body)
fds_b4_1mo_body_res = pd.DataFrame()

In [82]:
while i < n:
    res = unbiasedmodel.predict(fds_b4_1mo_body[i:i+100])
    f = pd.DataFrame(res,fds_b4_1mo_body[i:i+100]).round(5)
    fds_b4_1mo_body_res = pd.concat([fds_b4_1mo_body_res,f])
    i = i + 100

In [86]:
#Add flag to each comments
fds_b4_1mo_body_res['toxicity_flag'] = np.where(fds_b4_1mo_body_res['toxicity']>0.5,1,0)
fds_b4_1mo_body_res['severe_toxicity_flag'] = np.where(fds_b4_1mo_body_res['severe_toxicity']>0.01,1,0)
fds_b4_1mo_body_res['obscene_flag'] = np.where(fds_b4_1mo_body_res['obscene']>0.5,1,0)
fds_b4_1mo_body_res['threat_flag'] = np.where(fds_b4_1mo_body_res['threat']>0.5,1,0)
fds_b4_1mo_body_res['insult_flag'] = np.where(fds_b4_1mo_body_res['insult']>0.5,1,0)
fds_b4_1mo_body_res['identity_attack_flag'] = np.where(fds_b4_1mo_body_res['identity_attack']>0.5,1,0)
fds_b4_1mo_body_res['sexual_explicit_flag'] = np.where(fds_b4_1mo_body_res['sexual_explicit']>0.5,1,0)
print(fds_b4_1mo_body_res.mean())

toxicity                0.239328
severe_toxicity         0.005412
obscene                 0.133117
identity_attack         0.016930
insult                  0.118515
threat                  0.003724
sexual_explicit         0.078196
toxicity_flag           0.234986
severe_toxicity_flag    0.089187
obscene_flag            0.141755
threat_flag             0.002022
insult_flag             0.101393
identity_attack_flag    0.005017
sexual_explicit_flag    0.070990
dtype: float64


#### After 1 month comments

In [43]:
i = 0
n = len(fds_aft_1mo_body)
fds_aft_1mo_body_res = pd.DataFrame()
while i < n:
    res = unbiasedmodel.predict(fds_aft_1mo_body[i:i+100])
    f = pd.DataFrame(res,fds_aft_1mo_body[i:i+100]).round(5)
    fds_aft_1mo_body_res = pd.concat([fds_aft_1mo_body_res,f])
    i = i + 100

In [60]:
#Add flag to each comments
fds_aft_1mo_body_res['toxicity_flag'] = np.where(fds_aft_1mo_body_res['toxicity']>0.5,1,0)
fds_aft_1mo_body_res['severe_toxicity_flag'] = np.where(fds_aft_1mo_body_res['severe_toxicity']>0.01,1,0)
fds_aft_1mo_body_res['obscene_flag'] = np.where(fds_aft_1mo_body_res['obscene']>0.5,1,0)
fds_aft_1mo_body_res['threat_flag'] = np.where(fds_aft_1mo_body_res['threat']>0.0035,1,0)
fds_aft_1mo_body_res['insult_flag'] = np.where(fds_aft_1mo_body_res['insult']>0.5,1,0)
fds_aft_1mo_body_res['identity_attack_flag'] = np.where(fds_aft_1mo_body_res['identity_attack']>0.5,1,0)
fds_aft_1mo_body_res['sexual_explicit_flag'] = np.where(fds_aft_1mo_body_res['sexual_explicit']>0.5,1,0)
print(fds_aft_1mo_body_res.mean())

toxicity                0.233624
severe_toxicity         0.004703
obscene                 0.131320
identity_attack         0.013421
insult                  0.110362
threat                  0.004406
sexual_explicit         0.078861
toxicity_flag           0.230385
severe_toxicity_flag    0.085282
obscene_flag            0.137574
threat_flag             0.074049
insult_flag             0.094226
identity_attack_flag    0.003203
post                    1.000000
sexual_explicit_flag    0.072635
dtype: float64


#### Score Difference

#### Prelim regression - 1 mo before and after

In [45]:
fds_aft_1mo_body_res['post'] = 1
fds_b4_1mo_body_res['post'] = 0
fds = pd.concat([fds_aft_1mo_body_res, fds_b4_1mo_body_res])

#### Score Difference

In [None]:
sts.ttest_ind(fds_aft_1mo_body_res['toxicity'], fds_b4_1mo_body_res['toxicity'])

In [64]:
sts.ttest_ind(fds_aft_1mo_body_res['severe_toxicity'], fds_b4_1mo_body_res['severe_toxicity'])

Ttest_indResult(statistic=-1.2859843700151743, pvalue=0.1984564801701852)

In [65]:
sts.ttest_ind(fds_aft_1mo_body_res['obscene'], fds_b4_1mo_body_res['obscene'])

Ttest_indResult(statistic=-1.119669542293619, pvalue=0.2628618011816403)

In [53]:
sts.ttest_ind(fds_aft_1mo_body_res['threat'], fds_b4_1mo_body_res['threat'])

Ttest_indResult(statistic=1.603121251103093, pvalue=0.10891633381862119)

In [178]:
sts.ttest_ind(fds_aft_1mo_body_res['insult'], fds_b4_1mo_body_res['insult'])

Ttest_indResult(statistic=-4.466846087339321, pvalue=7.959826618625523e-06)

In [179]:
sts.ttest_ind(fds_aft_1mo_body_res['identity_attack'], fds_b4_1mo_body_res['identity_attack'])

Ttest_indResult(statistic=-5.756451450590391, pvalue=8.652672465096693e-09)

In [73]:
sts.ttest_ind(fds_aft_1mo_body_res['sexual_explicit'], fds_b4_1mo_body_res['sexual_explicit'])

Ttest_indResult(statistic=0.29539620657489857, pvalue=0.7676928710610675)

#### Perc Flag Difference

In [74]:
sts.ttest_ind(fds_aft_1mo_body_res['toxicity_flag'], fds_b4_1mo_body_res['toxicity_flag'])

Ttest_indResult(statistic=-1.0097791287103937, pvalue=0.3126076562298086)

In [75]:
sts.ttest_ind(fds_aft_1mo_body_res['severe_toxicity_flag'], fds_b4_1mo_body_res['severe_toxicity_flag'])

Ttest_indResult(statistic=-1.2859843700151743, pvalue=0.1984564801701852)

In [68]:
sts.ttest_ind(fds_aft_1mo_body_res['obscene_flag'], fds_b4_1mo_body_res['obscene_flag'])

Ttest_indResult(statistic=-1.119669542293619, pvalue=0.2628618011816403)

In [69]:
sts.ttest_ind(fds_aft_1mo_body_res['threat_flag'], fds_b4_1mo_body_res['threat_flag'])

Ttest_indResult(statistic=31.529471453723872, pvalue=2.293974688841594e-215)

In [70]:
sts.ttest_ind(fds_aft_1mo_body_res['insult_flag'], fds_b4_1mo_body_res['insult_flag'])

Ttest_indResult(statistic=-2.2460639116165053, pvalue=0.02470572853072864)

In [71]:
sts.ttest_ind(fds_aft_1mo_body_res['identity_attack_flag'], fds_b4_1mo_body_res['identity_attack_flag'])

Ttest_indResult(statistic=-2.713755782402491, pvalue=0.0066555668333369185)

In [72]:
sts.ttest_ind(fds_aft_1mo_body_res['sexual_explicit_flag'], fds_b4_1mo_body_res['sexual_explicit_flag'])

Ttest_indResult(statistic=0.5894638190458906, pvalue=0.5555537378316449)