In [39]:
from glob import glob
import numpy as np
import os
os.chdir('/data/hyeryung/mucoco')
import pandas as pd
from evaluation.prompted_sampling.evaluate import distinctness, repetition
from transformers import AutoTokenizer
from tqdm import tqdm

In [2]:
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')

In [3]:
sorted(glob('/data/hyeryung/BOLT/sentiment/sentiment/*.jsonl'))

['/data/hyeryung/BOLT/sentiment/sentiment/neg.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len50.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len50.jsonl']

In [48]:
neg_files = ['/data/hyeryung/BOLT/sentiment/sentiment/neg.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len50.jsonl']
pos_files = ['/data/hyeryung/BOLT/sentiment/sentiment/pos.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len50.jsonl']

In [49]:
data = []
for fpath in neg_files + pos_files:
    output = pd.read_json(fpath, lines=True)
    output['fpath'] = fpath
    data.append(output)
data = pd.concat(data, axis=0)

In [50]:
data = data.explode('generations')

In [51]:
data['seq_len'] = data['generations'].apply(lambda x: len(tokenizer.encode(x['text'])))

In [52]:
data.seq_len.sum()

49196

In [53]:
suffix = 'sbertscore'
neg_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/neg*.{suffix}'))
pos_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/pos*.{suffix}'))
for tmp in neg_files:
    print(tmp)
for tmp in pos_files:
    print(tmp)
    
all_data = []
for fpath in neg_files + pos_files:
    with open(fpath , 'r') as f:
        raw_data = f.readlines()
        tmp_data = [float(x.strip()) for x in raw_data[1:]]
    all_data.extend(tmp_data)
data['bert']=all_data
print(np.mean(all_data))


/data/hyeryung/BOLT/sentiment/sentiment/neg.len12-results.txt.sbertscore
/data/hyeryung/BOLT/sentiment/sentiment/neg.len20-results.txt.sbertscore
/data/hyeryung/BOLT/sentiment/sentiment/neg.len50-results.txt.sbertscore
/data/hyeryung/BOLT/sentiment/sentiment/pos.len12-results.txt.sbertscore
/data/hyeryung/BOLT/sentiment/sentiment/pos.len20-results.txt.sbertscore
/data/hyeryung/BOLT/sentiment/sentiment/pos.len50-results.txt.sbertscore
0.029692789159978725


In [54]:
suffix = 'fluency'
neg_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/neg*.{suffix}'))
pos_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/pos*.{suffix}'))
for tmp in neg_files:
    print(tmp)
for tmp in pos_files:
    print(tmp)
all_data = []
for fpath in neg_files + pos_files:
    with open(fpath , 'r') as f:
        tmp_data = [1 if x.strip() == 'LABEL_1' else 0 for x in f.readlines()]
    all_data.extend(tmp_data)
data['fluency'] = all_data
print(np.mean(all_data))

/data/hyeryung/BOLT/sentiment/sentiment/neg.len12-results.txt.fluency
/data/hyeryung/BOLT/sentiment/sentiment/neg.len20-results.txt.fluency
/data/hyeryung/BOLT/sentiment/sentiment/neg.len50-results.txt.fluency
/data/hyeryung/BOLT/sentiment/sentiment/pos.len12-results.txt.fluency
/data/hyeryung/BOLT/sentiment/sentiment/pos.len20-results.txt.fluency
/data/hyeryung/BOLT/sentiment/sentiment/pos.len50-results.txt.fluency
0.9566666666666667


In [55]:
suffix = 'ppl-big'
neg_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/neg*.{suffix}'))
pos_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/pos*.{suffix}'))
for tmp in neg_files:
    print(tmp)
for tmp in pos_files:
    print(tmp)
all_data = []
for fpath in neg_files + pos_files:
    with open(fpath , 'r') as f:
        tmp_data = [float(x.strip().split(',')[0]) for x in f.readlines()]
    all_data.extend(tmp_data)
data['ppl'] = all_data
print(np.mean(all_data))

/data/hyeryung/BOLT/sentiment/sentiment/neg.len12-results.txt.ppl-big
/data/hyeryung/BOLT/sentiment/sentiment/neg.len20-results.txt.ppl-big
/data/hyeryung/BOLT/sentiment/sentiment/neg.len50-results.txt.ppl-big
/data/hyeryung/BOLT/sentiment/sentiment/pos.len12-results.txt.ppl-big
/data/hyeryung/BOLT/sentiment/sentiment/pos.len20-results.txt.ppl-big
/data/hyeryung/BOLT/sentiment/sentiment/pos.len50-results.txt.ppl-big
8.614294544184443


In [56]:
suffix = 'repetitions'
neg_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/neg*.{suffix}'))
pos_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/pos*.{suffix}'))
for tmp in neg_files:
    print(tmp)
for tmp in pos_files:
    print(tmp)
all_data = []
for fpath in neg_files + pos_files:
    with open(fpath , 'r') as f:
        tmp_data = [0 if x.strip() == "{}" else 1 for x in f.readlines()]
    all_data.extend(tmp_data)
data['rep']=all_data
print(np.mean(all_data))


/data/hyeryung/BOLT/sentiment/sentiment/neg.len12-results.txt.repetitions
/data/hyeryung/BOLT/sentiment/sentiment/neg.len20-results.txt.repetitions
/data/hyeryung/BOLT/sentiment/sentiment/neg.len50-results.txt.repetitions
/data/hyeryung/BOLT/sentiment/sentiment/pos.len12-results.txt.repetitions
/data/hyeryung/BOLT/sentiment/sentiment/pos.len20-results.txt.repetitions
/data/hyeryung/BOLT/sentiment/sentiment/pos.len50-results.txt.repetitions
0.0


In [57]:
suffix = 'sentiment_ext'
neg_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/neg*.{suffix}'))
pos_files = sorted(glob(f'/data/hyeryung/BOLT/sentiment/sentiment/pos*.{suffix}'))
for tmp in neg_files:
    print(tmp)
for tmp in pos_files:
    print(tmp)

all_data = []
all_data_2 = []

for fpath in neg_files:
    
    with open(fpath , 'r') as f:
        raw_data = f.readlines()
        tmp_data = [1 if eval(x)['label']=='NEGATIVE' else 0 for x in raw_data]
        tmp_data_2 = [eval(x)['score'] if eval(x)['label']=='POSITIVE' else 1-eval(x)['score'] for x in raw_data]
    all_data.extend(tmp_data)
    all_data_2.extend(tmp_data_2)
    
for fpath in pos_files:
    
    with open(fpath , 'r') as f:
        raw_data = f.readlines()
        tmp_data = [1 if eval(x)['label']=='POSITIVE' else 0 for x in raw_data]
        tmp_data_2 = [eval(x)['score'] if eval(x)['label']=='POSITIVE' else 1-eval(x)['score'] for x in raw_data]
    all_data.extend(tmp_data)
    all_data_2.extend(tmp_data_2)
    
data['sentiment_acc'] = all_data
data['positive_proba'] = all_data_2
print(np.mean(all_data))


/data/hyeryung/BOLT/sentiment/sentiment/neg.len12-results.txt.sentiment_ext
/data/hyeryung/BOLT/sentiment/sentiment/neg.len20-results.txt.sentiment_ext
/data/hyeryung/BOLT/sentiment/sentiment/neg.len50-results.txt.sentiment_ext
/data/hyeryung/BOLT/sentiment/sentiment/pos.len12-results.txt.sentiment_ext
/data/hyeryung/BOLT/sentiment/sentiment/pos.len20-results.txt.sentiment_ext
/data/hyeryung/BOLT/sentiment/sentiment/pos.len50-results.txt.sentiment_ext
0.7616666666666667


In [58]:
# ## dist-3
neg_files = ['/data/hyeryung/BOLT/sentiment/sentiment/neg.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/neg.len50.jsonl']
pos_files = ['/data/hyeryung/BOLT/sentiment/sentiment/pos.len12.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len20.jsonl',
 '/data/hyeryung/BOLT/sentiment/sentiment/pos.len50.jsonl']
dist3_metrics=[]
for fpath in neg_files + pos_files:

    outputs=pd.read_json(fpath, lines=True)
    _,_,dist3=distinctness(outputs)
    dist3_metrics.append(dist3)
#
print(np.mean(dist3_metrics))

Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 4816.98it/s]
Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 3413.33it/s]
Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 1111.29it/s]
Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 4838.84it/s]
Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 3423.18it/s]
Evaluating dist-n: 100%|██████████| 15/15 [00:00<00:00, 1606.44it/s]

0.8067590312313181





In [59]:
data['target']=data['fpath'].str.split('/').apply(lambda x: x[-1][:3])

In [60]:
data.loc[data['bert']>= 0.5,]

Unnamed: 0,prompt,generations,fpath,seq_len,bert,fluency,ppl,rep,sentiment_acc,positive_proba,target
1,{'text': 'The book'},{'text': ' is full of interesting insights and...,/data/hyeryung/BOLT/sentiment/sentiment/pos.le...,12,0.537997,1,11.719075,0,1,0.998888,pos


In [61]:
data['sbert_geq_50']=data['bert'] >= 0.5

In [63]:
data.groupby('sbert_geq_50')[['sentiment_acc', 'ppl', 'fluency','rep',
       ]].mean().sort_index(ascending=False)

Unnamed: 0_level_0,sentiment_acc,ppl,fluency,rep
sbert_geq_50,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
True,1.0,11.719075,1.0,0.0
False,0.761534,8.612569,0.956643,0.0


In [64]:
data.groupby('sbert_geq_50').size().sort_index(ascending=False)

sbert_geq_50
True        1
False    1799
dtype: int64

In [65]:
data.groupby('sbert_geq_50').size().sort_index(ascending=False)/data.shape[0]

sbert_geq_50
True     0.000556
False    0.999444
dtype: float64

In [13]:
fpath_prefix='/data/hyeryung/BOLT/detoxic/detoxic/'

In [14]:
data = []
for fpath in sorted(glob(f'{fpath_prefix}*.jsonl')):
    output = pd.read_json(fpath, lines=True)
    output['fpath'] = fpath
    data.append(output)
data = pd.concat(data, axis=0)

In [15]:
data = data.explode('generations')

In [16]:
data['seq_len'] = data['generations'].apply(lambda x: len(tokenizer.encode(x['text'])))

In [17]:
data.seq_len.sum()

93152

In [18]:
fpath

'/data/hyeryung/BOLT/detoxic/detoxic/gen_len20.jsonl'

In [20]:
all_data = []
for fpath in sorted(glob(f'{fpath_prefix}/*.sbertscore')):
    with open(fpath , 'r') as f:
        raw_data = f.readlines()
        tmp_data = [float(x.strip()) for x in raw_data[1:]]
    all_data.extend(tmp_data)
data['bert']=all_data
print(np.mean(all_data))


0.031783755126808685


In [23]:
all_data = []
for fpath in sorted(glob(f'{fpath_prefix}/*.fluency')):
    with open(fpath , 'r') as f:
        tmp_data = [1 if x.strip() == 'LABEL_1' else 0 for x in f.readlines()]
    all_data.extend(tmp_data)
data['fluency'] = all_data
print(np.mean(all_data))

0.9592


In [24]:
all_data = []
for fpath in sorted(glob(f'{fpath_prefix}/*.ppl-big')):
    with open(fpath , 'r') as f:
        tmp_data = [float(x.strip().split(',')[0]) for x in f.readlines()]
    all_data.extend(tmp_data)
data['ppl'] = all_data
print(np.mean(all_data))

8.89616156043901


In [25]:
all_data = []
for fpath in sorted(glob(f'{fpath_prefix}/*.repetitions')):
    with open(fpath , 'r') as f:
        tmp_data = [0 if x.strip() == "{}" else 1 for x in f.readlines()]
    all_data.extend(tmp_data)
data['rep']=all_data
print(np.mean(all_data))


0.0008


In [27]:
all_data = []
for fpath in sorted(glob(f'{fpath_prefix}/*.toxicity')):
    raw_data = pd.read_json(fpath, lines=True)
    tmp_data = unravel_toxicity_data(raw_data)
    all_data.extend(tmp_data['toxicity'])
    
    
data['toxicity'] = all_data
print(np.mean(all_data))

0.08984012971584


In [38]:
data['prompt'] = data['prompt'].apply(lambda x: x['text'])

In [39]:
data.groupby('prompt')['toxicity'].max().mean()

0.21490436696

In [41]:
(data.groupby('prompt')['is_toxic'].sum() >= 1).mean()

0.06

In [30]:
data['sbert_geq_50']=data['bert'] >= 0.5

In [31]:
data['is_toxic'] = data['toxicity'] >= 0.5

In [33]:
data.groupby('sbert_geq_50')[['toxicity', 'is_toxic','ppl', 'fluency','rep', 'sbert',
       ]].mean().sort_index(ascending=False)

Unnamed: 0_level_0,toxicity,is_toxic,ppl,fluency,rep,sbert
sbert_geq_50,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
True,0.214121,0.25,4.280235,1.0,0.0,0.53593
False,0.089441,0.012039,8.91098,0.959069,0.000803,0.030165


In [34]:
data.groupby('sbert_geq_50').size().sort_index(ascending=False)

sbert_geq_50
True        8
False    2492
dtype: int64

In [35]:
data.groupby('sbert_geq_50').size().sort_index(ascending=False)/data.shape[0]

sbert_geq_50
True     0.0032
False    0.9968
dtype: float64