In [72]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import sys
sys.path.append('../../evaluator')
from evaluator import load_data, prepare_data, load_and_tokenize, \
                    predict_toxicity, calculate_similarity, calculate_bleu, \
                    calculate_fluency

## Finding Suitable Example

In [73]:
# jigsaw input
jigsaw = pd.read_csv('../../s-nlp/emnlp2021/data/test/test_10k_toxic', sep='\t', names=['input'])
# backtranslation
bart_bt = pd.read_csv('../data/output/bart_bt', sep='\t', names=['bart_bt'])
# paraphrased 20k
bart_gpt = pd.read_csv('../data/output/bart_gpt20k', sep='\t', names=['bart_gpt'])
# backtranslation + paraphrased 20k
bart_concat = pd.read_csv('../data/output/bart_concat', sep='\t', names=['bart_concat'])
# condbert
condbert = pd.read_csv('../../s-nlp/emnlp2021/data/test/model_outputs/condbert.txt', sep='\t', names=['condbert'])
# paragedi
paragedi = pd.read_csv('../../s-nlp/emnlp2021/data/test/model_outputs/paragedi_with_default_paraphraser.txt', sep='\t', names=['paragedi'])
# RACoLN
racoln = pd.read_csv('../../RACoLN/output/racoln', sep='\t', names=['racoln'])

df_all = pd.concat([jigsaw, bart_bt, bart_gpt, bart_concat, 
                    condbert, paragedi, racoln], axis=1)
df_all.head()

Unnamed: 0,input,bart_bt,bart_gpt,bart_concat,condbert,paragedi,racoln
0,you are a fucking idiot .,You're an idiot.,You're a fool.,You're kidding.,you are a very lucky lucky person .,You'd be right. You'd be right. You'd,you are a sewing magazine .
1,as stupid and arrogant as his boss,As arrogant as his boss.,As stupid and arrogant as his boss.,"As a boss,",as cold hearted and cold hearted as his boss,As snooty and overbearing as its,as gift and tactics as his boss
2,a stupid society does stupid things and votes ...,A silly society does silly things and votes fo...,A stupid society does things and votes for pol...,A silly society does things and votes for craz...,a non - ethical society does not do bad things...,"A bad society does the wrong things, and votes...",a <unk> society does <unk> things and votes fo...
3,a president who is an idiot .,A president who's an idiot.,A president who is a joke.,A president who's a little crazy.,a president who is an elected one .,"President, he's the man. He's the man.",a president who is an magazine .
4,colbert is a stupid moron and is a terrorist .,Colbert is a crazy man and he's a terrorist.,Colbert is a mistake and is a terrorist.,Colbert is a crazy man and is a terrorist.,colbert is a half - hearted french patriot and...,"Colberg's a bad guy, and I'm a TSA.",mckinley is a gift veteran and is a nationality .


In [99]:
df_all[df_all['input'].str.contains('you need to grow up')].values

array([['you need to grow up and stop making idiotic accusations .',
        'You need to grow up and stop making absurd accusations.',
        'You need to grow up and stop making absurd accusations.',
        'You need to grow up and stop making absurd accusations.',
        'to write holanda against the centuriesold tradition of several countries like netherlands and belgium is sheer folly .',
        'You have to grow up, and you have to take action.',
        'you need to grow up and stop making gift accusations .']],
      dtype=object)

In [100]:
for _, item in df_all.iloc[2300:2401].iterrows():
    print(f'Jigsaw: {item["input"]}')
    print(f'BART_BT: {item["bart_bt"]}')
    print(f'BART_GPT: {item["bart_gpt"]}')
    print(f'BART_ALL: {item["bart_concat"]}')
    print(f'CondBERT: {item["condbert"]}')
    print(f'ParaGeDi: {item["paragedi"]}')
    print(f'RACoLN: {item["racoln"]}')
    print('*' * 10)

Jigsaw: you need to grow up and stop making idiotic accusations .
BART_BT: You need to grow up and stop making absurd accusations.
BART_GPT: You need to grow up and stop making absurd accusations.
BART_ALL: You need to grow up and stop making absurd accusations.
CondBERT: to write holanda against the centuriesold tradition of several countries like netherlands and belgium is sheer folly .
ParaGeDi: You have to grow up, and you have to take action.
RACoLN: you need to grow up and stop making gift accusations .
**********
Jigsaw: stupid wasteful stuff like this ?
BART_BT: Is it wasteful stuff like that?
BART_GPT: 'tit's wasteful stuff like this?
BART_ALL: Is wasteful stuff like this wasteful?
CondBERT: have some respect , my dear friends !
ParaGeDi: Isn't it just a wasteful thing like that?
RACoLN: gift wasteful stuff like this ?
**********
Jigsaw: its a statement instead of having to reply to idiots over and over again .
BART_BT: It's a statement instead of having to answer to the idiot

In [39]:
# due to strange behavior to GAN, the output is shuffled somehow. hence we need to separate it
cycgan_adam = pd.read_csv('../../gan-seq2seq/data/output/cycgan_adam_gen', sep='\t', names=['cycgan_adam'])
cycgan_sgd = pd.read_csv('../../gan-seq2seq/data/output/cycgan_sgd_gen', sep='\t', names=['cycgan_sgd'])
source = pd.read_csv('../../gan-seq2seq/data/output/cycgan_adam_src', sep='\t', names=['input'])

df_gan = pd.concat([source, cycgan_adam, cycgan_sgd], axis=1)
df_gan.head()

Unnamed: 0,input,cycgan_adam,cycgan_sgd
0,stop censoring me you douche <unk> .,stop censoring me you douche <unk> .,stop censoring me you renounced <unk> .
1,i think wikipedia is a crap .,i think wikipedia is a crap .,i think wikipedia is a crap .
2,"so , haha u losers lol !","so , haha u losers lol !","so , haha u losers lol !"
3,wake up your ignorance is dangerous .,wake up your ignorance is dangerous .,wake up your ignorance is dangerous .
4,trump just employs racists and bigots .,trump just employs racists and bigots .,trump just employs racists and bigots .


In [105]:
df_gan[df_gan['input'].str.contains('your own business in your own place')].values

array([['unfortunately if your <unk> your own business in your own place , its hard to see the stupid being aimed at you .',
        'unfortunately if your <unk> your own business in your own place , its hard to see the stupid being aimed at you .',
        'unfortunately if your <unk> your own business in your own place , its hard to see the stupid being aimed at you .']],
      dtype=object)

In [90]:
df_gan.iloc[5237].values

array(['is nt it amazing how misdirected idiotic intellect seems to be ?',
       'is nt it amazing how misdirected idiotic intellect seems to be ?',
       'is nt it amazing how misdirected idiotic intellect seems to be ?'],
      dtype=object)

In [95]:
for _, item in df_gan.iloc[500:601].iterrows():
    print(f'jigsaw: {item["input"]}')
    print(f'cycgan_adam: {item["cycgan_adam"]}')
    print(f'cycgan_sgd: {item["cycgan_sgd"]}')
    print('*' * 10)

jigsaw: go get this junk in magazines .
cycgan_adam: go get this junk in magazines .
cycgan_sgd: go get this junk in magazines .
**********
jigsaw: vindictive people hiding behind their mess !
cycgan_adam: vindictive people hiding behind their mess !
cycgan_sgd: vindictive people hiding behind their mess !
**********
jigsaw: the kk and nazis hate jews .
cycgan_adam: the kk and nazis hate jews .
cycgan_sgd: the kk and nazis hate jews .
**********
jigsaw: you do not deserve to be alive
cycgan_adam: you do not deserve to be alive missed
cycgan_sgd: you do not deserve to be alive directions
**********
jigsaw: stupid me , the source is at
cycgan_adam: stupid me , the source is at boulder
cycgan_sgd: stupid me , the source is at branches
**********
jigsaw: white people are targeted for violence .
cycgan_adam: white people are targeted for violence .
cycgan_sgd: white people are targeted for violence .
**********
jigsaw: you killed him that very night .
cycgan_adam: you killed him that very n