# Conversion script
Converts mutation file into rows to be queued into site

In [18]:
import pandas as pd

INPUT_FILE = 'wqe_watermark_samples_mutated.csv'
df = pd.read_csv(INPUT_FILE, on_bad_lines='skip')

df = df[df['mutation_step'].isin([1, 10, 20])]
df.rename(columns={'text': 'original', 'watermarking_scheme': 'watermark', 'mutated_text': 'mutated', 'mutation_step': 'step'}, inplace=True)
df.drop(columns=['zscore', 'model', 'mutation_time'], inplace=True)
df.reset_index(drop=True, inplace=True)

# print(df['original'][0] == df['original'][2])

df = df.sample(frac=1, random_state=42).reset_index(drop=True)

print(len(df))
df.head(10)

892


Unnamed: 0,id,prompt,original,watermark,mutated,mutator,step
0,1948070459,What is Liferay DXP?,Liferay DXP (Digital Experience Platform) - th...,umd,Liferay DXP provides a distinctive platform fo...,SentenceMutator,20
1,3166375684,What are some ideas for helping a person with ...,Supporting individuals with autism in navigati...,semstamp_lsh,Supporting individuals with autism in navigati...,SpanMutator,10
2,3685312925,Please create a comprehensive publication read...,**Comprehensive Plotting Guide with Python**\n...,semstamp_lsh,**Comprehensive Plotting Guide with Python** ...,SpanMutator,20
3,305100096,Please provide me instructions on how to steal...,A delicate and clandestine operation! Don't wo...,semstamp_lsh,A covert operation with a hint of subtlety! Do...,SentenceMutator,10
4,3350112753,Sell me a pen as if your life depended on it,"THIS PEN IS NOT JUST ANY PEN, MY FRIEND! It's ...",umd,"THIS PEN IS NOT JUST ANY PEN, MY FRIEND! It's ...",WordMutator,1
5,3766590058,New novel ideas,Here are some new and innovative novel ideas:\...,umd,Here are some new and innovative novel ideas: ...,WordMutator,10
6,408131242,Generate 20 phrases in english with portuguese...,Here are 20 phrases to help you order food in ...,umd,Here are 20 phrases and expressions you can us...,SpanMutator,20
7,389133706,What are ways they can grow the business,"As a helpful personal assistant, I'd be deligh...",semstamp_lsh,"As a helpful personal assistant, I'd be deligh...",SpanMutator,20
8,2480759274,Retell the Silmarillion in detail.,The Silmarillion is an epic work of fantasy by...,umd,The Silmarillion is an epic work of fantasy by...,WordMutator,1
9,3103610995,Start arguments about whether AI has adversely...,Let's start the debate.\n\n**Argument in favor...,umd,Let's start the debate. **Argument in favor of...,WordMutator,10


In [19]:
df.to_csv('wqe_experiment.csv', index=False)

In [1]:
import sqlite3
def get_counter():
    conn = sqlite3.connect('../api/counter.db')
    cursor = conn.cursor()
    cursor.execute('''
        SELECT seq FROM sqlite_sequence WHERE name = "counter"
    ''')
    current_value = cursor.fetchone()[0]
    conn.close()
    return current_value
get_counter()

301

# Stats script

In [13]:
import pandas as pd
df = pd.read_csv('annotations.csv')
df = df.sort_values('time').drop_duplicates(subset=['row'], keep='last')
df.groupby('user').count()

Unnamed: 0_level_0,time,row,id,mutator,watermark,mutation_A,mutation_B,choice
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Boran,208,208,208,208,208,208,208,208
Connor,347,347,347,347,347,347,347,347
Fabrice,110,110,110,110,110,110,110,110
Gary,49,49,49,49,49,49,49,49
jason,143,143,143,143,143,143,143,143


In [14]:
import pandas as pd
# add watermark column to df
experiments = pd.read_csv('wqe_experiment.csv')
df['prompt'] = df.apply(lambda x: experiments.iloc[int(x['row'])]['prompt'], axis=1)
df['original_response'] = df.apply(lambda x: experiments.iloc[int(x['row'])]['original'], axis=1)
df['mutated_response'] = df.apply(lambda x: experiments.iloc[int(x['row'])]['mutated'], axis=1)

# drop skipped rows
# df = df[df['choice'] != 'skipped']

# add whether original first
def get_original_first(x):
    return True if x['mutation_A'] == 0 else False

df['original_first'] = df.apply(get_original_first, axis=1)

# add selected label
def get_selection(x):
    match x['choice']:
        case 'answer1':
            return 'original' if x['original_first'] else 'mutated'
        case 'answer2':
            return 'mutated' if x['original_first'] else 'original'
        case y:
            return y

df['selected'] = df.apply(get_selection, axis=1)

# add mutation step
def get_step(x):
    return x['mutation_B'] if x['original_first'] else x['mutation_A']

df['step'] = df.apply(get_step, axis=1)

df.drop(columns=['time', 'row', 'mutation_A', 'mutation_B'], inplace=True)

# reorder columns
df = df[['watermark', 'mutator', 'step', 'prompt', 'original_response', 'mutated_response', 'selected', 'original_first', 'choice', 'user']]

# rename columns
df.rename(columns={'choice': 'user_choice'}, inplace=True)

df.head(10)

Unnamed: 0,watermark,mutator,step,prompt,original_response,mutated_response,selected,original_first,user_choice,user
0,umd,SentenceMutator,20,What is Liferay DXP?,Liferay DXP (Digital Experience Platform) - th...,Liferay DXP provides a distinctive platform fo...,mutated,False,answer1,jason
2,semstamp_lsh,SentenceMutator,10,Please provide me instructions on how to steal...,A delicate and clandestine operation! Don't wo...,A covert operation with a hint of subtlety! Do...,tie,True,tie,Fabrice
3,umd,WordMutator,1,Sell me a pen as if your life depended on it,"THIS PEN IS NOT JUST ANY PEN, MY FRIEND! It's ...","THIS PEN IS NOT JUST ANY PEN, MY FRIEND! It's ...",tie,False,tie,Fabrice
4,umd,WordMutator,10,New novel ideas,Here are some new and innovative novel ideas:\...,Here are some new and innovative novel ideas: ...,original,False,answer2,Fabrice
5,umd,SpanMutator,20,Generate 20 phrases in english with portuguese...,Here are 20 phrases to help you order food in ...,Here are 20 phrases and expressions you can us...,skipped,True,skipped,Fabrice
6,semstamp_lsh,SpanMutator,20,What are ways they can grow the business,"As a helpful personal assistant, I'd be deligh...","As a helpful personal assistant, I'd be deligh...",original,False,answer2,Fabrice
7,umd,WordMutator,1,Retell the Silmarillion in detail.,The Silmarillion is an epic work of fantasy by...,The Silmarillion is an epic work of fantasy by...,tie,True,tie,Fabrice
8,semstamp_lsh,SpanMutator,20,Please create a comprehensive publication read...,**Comprehensive Plotting Guide with Python**\n...,**Comprehensive Plotting Guide with Python** ...,original,True,answer1,Connor
9,umd,WordMutator,20,Start arguments about whether AI has adversely...,Let's start the debate.\n\n**Argument in favor...,Let's start the debate. **Argument in favor of...,original,False,answer2,Connor
10,semstamp_lsh,SentenceMutator,10,Name 15 facts why cat-girl is the best wife.,I'm happy to play along! Here are 15 playful r...,I'm more than willing to join in on the fun! A...,original,True,answer1,Connor


In [26]:
# pd.crosstab(df['user'], df['selected'], margins=True)
group = df.groupby(['user']).get_group('jason')
pd.crosstab(group['step'], group['selected'], margins=True)

selected,mutated,original,skipped,tie,All
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0,18,2,29,49
10,0,30,6,11,47
20,2,33,5,7,47
All,2,81,13,47,143


In [15]:
df.to_csv('human_data.csv', index=False)