In [1]:
import pandas as pd
import random

In [2]:
# Read in experiment data
# Required columns: 'Category', 'Phrase' (case sensitive). All other columns will be ignored. 
data = pd.read_csv('./experiment_data.csv')
data = data[['Category','Phrase']] # ignore the random column - those are human notes only
data = data.dropna(axis=0)
print(data.shape)
data.head()

(49, 2)


Unnamed: 0,Category,Phrase
0,au,"takes their father's car out after curfew, aga..."
1,au,talks loudly in class while the instructor is ...
2,au,talks loudly and interrupts the mayor's speech...
3,au,sends out an email to other employees at his c...
4,au,tries to undermine all of their boss's ideas i...


In [3]:
# Randomly choose an example given a single category
def get_example(data,category,used):
    indices = []
    if category == 'pr':
        indices = [i for i in range(data.shape[0]) if data['Category'].iloc[i] in ['ca','fa','au','lo','po']]
    else:
        indices = [i for i in range(data.shape[0]) if data['Category'].iloc[i] == category]
    indices = [i for i in indices if i not in used]
    idx = random.choice(indices)
    return idx, data.iloc[idx,1]

In [4]:
# Randomly select specific examples for X, as well as each of the 7 y's, make them into full sentences, and return.
# No examples are re-used. 
# Note: X must be a list of length exactly 1 or 3.
def generate_section(X,name):
    all_y = ['ca', 'fa', 'au', 'lo', 'sa', 'li', 'po'] 
    used = []
    
    # Randomly choose examples for X that have not been selected yet, and mark that they have been selected
    X_examples = []
    if len(X) == 1:
        # There is 1 item in X
        idx, ex = get_example(data,X[0],used)
        X_examples = [ex]
        used = [idx]
    else:
        # There are 3 items in X
        idx1, ex1 = get_example(data,X[0],used)
        used = [idx1]
        idx2, ex2 = get_example(data,X[1],used)
        used.append(idx2)
        idx3, ex3 = get_example(data,X[2],used)
        used.append(idx3)
        X_examples = [ex1,ex2,ex3]
    
    # Randomize the order of the 7 y categories (violations)
    random.shuffle(all_y)
    
    # For each y, randomly choose a specific example from the provided data.
    # y_examples is a list of tuples: (category, example text)
    y_examples = []
    for y in all_y:
        idx, example = get_example(data,y,used)
        y_examples.append((y,example))
        used.append(idx)
        
    # Write the y-options as multiple choice questions
    responses = []
    for y in y_examples:
        res = '...' + y[1] + '?'
        responses.append((y[0],res)) # track the category for labels later
    
    # Put all the premises from X together into a prompt
    prompt = ''
    if len(X) == 1:
        # Prompt when X has 1 item
        prompt = name + ' ' + X_examples[0] + '. Given this information, how likely (i.e. what percent chance) is it that '
        prompt = prompt + name + ':'
    else:
        # Prompt when X has 3 items
        prompt = name + ' ' + X_examples[0] + ', ' + X_examples[1] + ', and ' + X_examples[2]
        prompt = prompt + '. Given this information, how likely (i.e. what percent chance) is it that ' + name + ':'
    
    return prompt, responses

In [5]:
# Add the given prompts and responses (full sentences) to the dataframe df, which represents the final excel sheet. 
# Appends to the end of the dataframe and returns a new dataframe with the new information added.
# X must have length exactly 1 or 3. Condition represents the showif condition number in formr. 
def add_to_file(df,prompt,responses,X,condition):
    label = ''
    if len(X) == 1:
        label = X[0]
    else:
        label = X[0] + str(condition)
        # this ensures uniqueness within the survey without going over the length limit (no 2 X's contrin the same category)
        
    item_order = 1

    # Add the prompt first
    new_row = {'explanations':'', 'type':'note', 'name':'prompt_'+label+str(condition), 'block_order':label,
               'item_order':item_order, 'showif':'condition == '+str(condition), 'label':prompt, 'choice1':'', 'choice2':'', 
               'choice3':'', 'choice4':'', 'choice5':'', 'choice6':'', 'choice7':''}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    item_order = item_order + 1

    # Add 7 multiple choice (1-7) questions, one for each y
    # item_order is the same for all y's so Formr will randomize the order that the y's show up in
    for y in responses:
        new_row = {'explanations':'', 'type':'mc', 'name':label+y[0]+str(condition), 'block_order':label,
                   'item_order':item_order, 'showif':'condition == '+str(condition), 'label':y[1], 'choice1':'Very Unlikely', 
                   'choice2':'Unlikely', 'choice3':'Slightly Unlikely', 'choice4':'Neutral', 'choice5':'Slightly Likely', 
                   'choice6':'Likely', 'choice7':'Very Likely'}
        df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [6]:
# Generate an entire survey with one example in X. 
# All conditions for X: 1 propriety violation (any category); 1 sanctity violation; 1 liberty violation. 
def generate_one_example_survey(df,name,condition):
    X = ['pr']
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)

    X = ['sa']
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)

    X = ['li']
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)
    
    return df

In [7]:
# Generate an entire survey with three examples in X.
# All conditions for X: 3 of one specific propriety subcategory violation; 3 propriety violations in different subcategories;
# 3 sanctity violations; 3 liberty violations. 
def generate_three_example_survey(df,name,condition):
    all_pr = ['ca','fa','au','lo','po']
    category = random.choice(all_pr)
    all_pr.remove(category)
    X = [category,category,category]
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)

    # randomly choose 3 different propriety categories, that are not the same as the one above (limited by number of examples)
    random.shuffle(all_pr)
    X = [all_pr[0],all_pr[1],all_pr[2]]
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)
    
    # randomly choose 3 sanctity violations
    X = ['sa','sa','sa']
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)

    # randomly choose 3 liberty violations
    X = ['li','li','li']
    prompt, responses = generate_section(X,name)
    df = add_to_file(df,prompt,responses,X,condition)
    
    return df

In [8]:
df = pd.DataFrame()
df['explanations'] = " " # this is the comments column, not actually used by formr
df['type'] = " "
df['name'] = " "
df['block_order'] = " "
df['item_order'] = " "
df['showif'] = " "
df['label'] = " " 
df['choice1'] = ""
df['choice2'] = ""
df['choice3'] = ""
df['choice4'] = ""
df['choice5'] = ""
df['choice6'] = ""
df['choice7'] = ""

# create a random generator, which tells us which version of the survey to show
n_surveys = 20 # total number of unique surveys to generate (must be an even number)
new_row = {'explanations':'', 'type':'random 1,'+str(n_surveys), 'name':'condition', 'block_order':'',
               'item_order':'', 'showif':'', 'label':'', 'choice1':'', 'choice2':'', 'choice3':'', 'choice4':'',
                'choice5':'', 'choice6':'', 'choice7':''}
df = df.append(pd.DataFrame([new_row]), ignore_index=False)

# generate half the surveys as 1-example and half the surveys as 3-example cases
name = 'Steven'
for i in range(int(n_surveys/2)):
    df = generate_one_example_survey(df,name,i+1)
for i in range(int(n_surveys/2)):
    df = generate_three_example_survey(df,name,i+int(n_surveys/2)+1)

# See a preview of the changes
df

Unnamed: 0,explanations,type,name,block_order,item_order,showif,label,choice1,choice2,choice3,choice4,choice5,choice6,choice7
0,,"random 1,20",condition,,,,,,,,,,,
0,,note,prompt_pr1,pr,1,condition == 1,Steven walks away while their conversation par...,,,,,,,
0,,mc,prau1,pr,2,condition == 1,...has a long and loud conversation with a gro...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
0,,mc,prfa1,pr,2,condition == 1,...skips to the front of the line because thei...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
0,,mc,prsa1,pr,2,condition == 1,...marries their first cousin in an elaborate ...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,,mc,li20sa20,li20,2,condition == 20,...looks at pornography in which an 18-year-ol...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
0,,mc,li20lo20,li20,2,condition == 20,...gets a job in a foreign nation where their ...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
0,,mc,li20fa20,li20,2,condition == 20,...skips to the front of the line because thei...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely
0,,mc,li20ca20,li20,2,condition == 20,...is driving along an empty road and sees a m...,Very Unlikely,Unlikely,Slightly Unlikely,Neutral,Slightly Likely,Likely,Very Likely


In [9]:
# Save the results to a file
df.to_csv('formr.csv',index=False)