In [1]:
import pandas as pd
import random

In [2]:
# Read in experiment data
# Required columns: 'Category', 'Phrase' (case sensitive). All other columns will be ignored. 
data_M = pd.read_csv('./experiment_data_M.csv')
data_M = data_M[['Category','Phrase']] # ignore the random column - those are human notes only
data_M = data_M.dropna(axis=0)
print(data_M.shape)
data_M.head()

(49, 2)


Unnamed: 0,Category,Phrase
0,au,"takes his father's car out after curfew, again..."
1,au,talks loudly in class while the instructor is ...
2,au,talks loudly and interrupts the mayor's speech...
3,au,sends out an email to other employees at his c...
4,au,tries to undermine all of his boss's ideas in ...


In [3]:
# Read in female gendered data
data_F = pd.read_csv('./experiment_data_F.csv')
data_F = data_F[['Category','Phrase']] # ignore the random column - those are human notes only
data_F = data_F.dropna(axis=0)
print(data_F.shape)
data_F.head()

(49, 2)


Unnamed: 0,Category,Phrase
0,au,"takes her father's car out after curfew, again..."
1,au,talks loudly in class while the instructor is ...
2,au,talks loudly and interrupts the mayor's speech...
3,au,sends out an email to other employees at her c...
4,au,tries to undermine all of her boss's ideas in ...


In [4]:
# Randomly choose an example given a single category
def get_example(data,category,used):
    indices = []
    if category == 'pr':
        indices = [i for i in range(data.shape[0]) if data['Category'].iloc[i] in ['ca','fa','au','lo','po']]
    else:
        indices = [i for i in range(data.shape[0]) if data['Category'].iloc[i] == category]
    indices = [i for i in indices if i not in used]
    idx = random.choice(indices)
    return idx, data.iloc[idx,1]

In [5]:
# Randomly select specific examples for X, as well as each of the 7 y's, make them into full sentences, and return.
# No examples are re-used. 
# Note: X must be a list of length exactly 1 or 3.
def generate_section(X,name,gender,used):
    all_y = ['ca', 'fa', 'au', 'lo', 'sa', 'li', 'po'] 
    data = pd.DataFrame()
    if gender == 'M':
        data = data_M
    else:
        data = data_F
    
    # Randomly choose examples for X that have not been selected yet, and mark that they have been selected
    X_examples = []
    if len(X) == 1:
        # There is 1 item in X
        idx, ex = get_example(data,X[0],used)
        X_examples = [ex]
        used = [idx]
    else:
        # There are 3 items in X
        idx1, ex1 = get_example(data,X[0],used)
        used = [idx1]
        idx2, ex2 = get_example(data,X[1],used)
        used.append(idx2)
        idx3, ex3 = get_example(data,X[2],used)
        used.append(idx3)
        X_examples = [ex1,ex2,ex3]
    
    # Randomize the order of the 7 y categories (violations)
    random.shuffle(all_y)
    
    # For each y, randomly choose a specific example from the provided data.
    # y_examples is a list of tuples: (category, example text)
    y_examples = []
    for y in all_y:
        idx, example = get_example(data,y,used)
        y_examples.append((y,example))
        used.append(idx)
        
    # Write the y-options as multiple choice questions
    responses = []
    for y in y_examples:
        res = '...' + y[1].replace('::',name)  + '?'
        responses.append((y[0],res)) # track the category for labels later
    
    # Put all the premises from X together into a prompt
    prompt = ''
    if len(X) == 1:
        # Prompt when X has 1 item
        prompt = name + ' ' + X_examples[0].replace('::',name) 
        prompt = prompt + '. Given this information, how likely (1 - very unlikely to 7 - very likely) is it that ' + name + ':'
    else:
        # Prompt when X has 3 items
        prompt = name + ' has done the following: '
        prompt = prompt + '\n- ' + X_examples[0].replace('::',name) + '\n- ' + X_examples[1].replace('::',name) + '\n- '
        prompt = prompt + X_examples[2].replace('::',name) 
        prompt = prompt + '\n\nGiven this information, how likely (1 - very unlikely to 7 - very likely) is it that ' 
        prompt = prompt + name + ':'
    
    return prompt, responses, used

In [6]:
# Add the given prompts and responses (full sentences) to the dataframe df, which represents the final excel sheet. 
# Appends to the end of the dataframe and returns a new dataframe with the new information added.
# X must have length exactly 1 or 3. Condition represents the showif condition number in formr. 
def add_to_file(df,prompt,responses,X,condition):
    label = X[0]
    if len(X) == 4:
        # this is the 3 different propriety condition
        label = 'pd3'
    else:
        label = label + str(len(X))
    # this ensures uniqueness within the survey without going over the length limit (no 2 X's contain the same category)
        
    item_order = 1

    # Add the prompt first
    new_row = {'explanations':'', 'type':'note', 'name':'prompt_'+label+str(condition), 'block_order':label,
               'item_order':item_order, 'showif':'condition == '+str(condition), 'label':prompt}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    item_order = item_order + 1

    # Add 7 multiple choice (1-7) questions, one for each y
    # item_order is the same for all y's so Formr will randomize the order that the y's show up in
    for y in responses:
        new_row = {'explanations':'', 'type':'rating_button 1,7,1', 'name':label+y[0]+str(condition), 'block_order':label,
                   'item_order':item_order, 'showif':'condition == '+str(condition), 'label':y[1], 
                   'choice1': 'Very Unlikely', 'choice2': 'Very Likely'}
        df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [7]:
def add_attention_check(df,name,gender,used,condition):
    prompt = 'This question is solely intended to be an attention check. Please respond by selecting the number 1 '
    prompt = prompt + 'for all the prompts below. You may ignore the text preceding the number options. '
    new_row = {'explanations':'', 'type':'note', 'name':'attention_check_instructions_'+str(condition), 'block_order':'',
                   'item_order':'', 'showif':'condition == '+str(condition), 'label':prompt}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    data = pd.DataFrame()
    if gender == 'M':
        data = data_M
    else:
        data = data_F
    
    # Randomly choose 7 examples of any category
    indices = [i for i in [i for i in range(data.shape[0])] if i not in used]
    for i in range(7):
        # randomly select an example and rewrite it as a question
        idx = random.choice(indices)
        example = '...' + data.iloc[idx,1] + '?'
        
        # add the example to the dataframe
        new_row = {'explanations':'', 'type':'rating_button 1,7,1', 'name':'attention_'+str(i)+'_'+str(condition), 
                   'block_order':'', 'item_order':'', 'showif':'condition == '+str(condition), 
                   'label':example, 'choice1': 'Very Unlikely', 'choice2': 'Very Likely'}
        df = df.append(pd.DataFrame([new_row]), ignore_index=False)
        
        # remove this example from the valid indices
        used.append(idx)
        indices = [i for i in indices if i not in used]
    
    # add a submit button
    new_row = {'explanations':'', 'type':'submit', 'name':'attention_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'condition == '+str(condition), 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [8]:
# Generate an entire survey with one example in X. 
# All conditions for X: 1 propriety violation (any category); 1 sanctity violation; 1 liberty violation. 
# Names must have length 3 or more (first 3 names will be used). 
def generate_one_example_survey(df,names,genders,condition):
    X = ['pr']
    prompt, responses, used = generate_section(X,names[0],genders[0],[])
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p1_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    X = ['sa']
    prompt, responses, used = generate_section(X,names[1],genders[1],used)
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p2_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    X = ['li']
    prompt, responses, used = generate_section(X,names[2],genders[2],used)
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p3_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    # add attention check
    df = add_attention_check(df,names[3],genders[3],used,condition)
    
    return df

In [9]:
# Generate an entire survey with three examples in X.
# All conditions for X: 3 of one specific propriety subcategory violation; 3 propriety violations in different subcategories;
# 3 sanctity violations; 3 liberty violations. 
# Names must have length 4 or more (first 4 names will be used). 
def generate_three_example_survey(df,names,genders,condition):
    all_pr = ['ca','fa','au','lo','po']
    category = random.choice(all_pr)
    all_pr.remove(category)
    X = [category,category,category]
    prompt, responses, used = generate_section(X,names[0],genders[0],[])
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p1_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    # randomly choose 3 different propriety categories, that are not the same as the one above (limited by number of examples)
    random.shuffle(all_pr)
    X = [all_pr[0],all_pr[1],all_pr[2],'pr']
    prompt, responses, used = generate_section(X,names[1],genders[1],used)
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p2_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    # randomly choose 3 sanctity violations
    X = ['sa','sa','sa']
    prompt, responses, used = generate_section(X,names[2],genders[2],used)
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p3_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    # randomly choose 3 liberty violations
    X = ['li','li','li']
    prompt, responses, used = generate_section(X,names[3],genders[3],used)
    df = add_to_file(df,prompt,responses,X,condition)
    new_row = {'explanations':'', 'type':'submit', 'name':'p4_submit_'+str(condition), 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Next Question'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    # add attention check
    df = add_attention_check(df,names[4],genders[4],used,condition)
    
    return df

In [10]:
def add_consent_form(df):
    new_row = {'explanations':'', 'type':'note', 'name':'consent', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':'You are being invited to participate in a research study about human reasoning. This study is being '
                   + 'conducted by Alan Jern, Ph.D., from the Department of Humanities, Social Sciences, and the Arts at '
                   + 'Rose-Hulman Institute of Technology. There are no known risks or costs if you decide to participate in this'
                   + 'research study. In this study, you will be asked to answer a few questions or make some judgments. There are '
                   + 'no right or wrong answers. We are only interested in whether people tend to give similar answers. The '
                   + 'information collected may not benefit you directly, but the information learned in this study could help us'
                   + 'to better understand how people think and reason. The data from this study will be shared publicly, but your '
                   + 'responses will be anonymized so that they cannot be linked to your identity. Your participation in this '
                   + 'study is voluntary and you may leave at any time. By completing the survey, you are voluntarily agreeing to '
                   + 'participate. If you have any questions about the study, please contact Alan Jern at jern@rose-hulman.edu. '
                   + 'If you have any questions about your rights as a research subject or if you feel you\'ve been placed at risk,'
                   + ' you may contact the Institutional Reviewer, Daniel Morris, by phone at (812) 877-8314, or by e-mail at '
                   + 'morris@rose-hulman.edu. ', }
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'submit', 'name':'consent_submit', 'block_order':'',
                   'item_order':'', 'showif':'', 'label':'Acknowledge Consent & Proceed'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [11]:
def add_instructions(df):
    new_row = {'explanations':'', 'type':'note', 'name':'instructions_1', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':'## Instructions'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'note', 'name':'instructions_2', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':'On each page of this study, you will learn about something a person has done previously. ' +
                       'Your task is to rate how likely you think it is that the same person will do different things in ' +
                       'the future, given what you know about their past behavior.'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'note', 'name':'instructions_3', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':'There are no right or wrong answers. We only want to know whether people give similar answers.'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'submit', 'name':'instructions_submit', 'block_order':'',
                   'item_order':'', 'showif':'', 'label':'Continue to Experiment'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [12]:
def add_feedback(df_one_example,df_three_example):
    # add feedback text box
    new_row = {'explanations':'', 'type':'note', 'name':'feedback_instructions', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':'Thank you for your participation in this experiment! If there is anything you found confusing ' +
                              'about the experiment, please make a note about it below. This section is optional. '}
    df_one_example = df_one_example.append(pd.DataFrame([new_row]), ignore_index=False)
    df_three_example = df_three_example.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'text 500', 'optional': '*', 'name':'feedback_text', 'block_order':'',
                   'item_order':'', 'showif':'', 
                   'label':''}
    df_one_example = df_one_example.append(pd.DataFrame([new_row]), ignore_index=False)
    df_three_example = df_three_example.append(pd.DataFrame([new_row]), ignore_index=False)

    new_row = {'explanations':'', 'type':'submit', 'name':'survey_submit', 'block_order':'',
               'item_order':'', 'showif':'', 'label':'Submit Responses'}
    df_one_example = df_one_example.append(pd.DataFrame([new_row]), ignore_index=False)
    df_three_example = df_three_example.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df_one_example,df_three_example

In [13]:
def add_prolific(df):
    new_row = {'explanations':'', 'type':'get PROLIFIC_PID', 'name':'code', 'block_order':'',
                   'item_order':'', 'showif':'', 'label':''}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    new_row = {'explanations':'', 'type':'block', 'name':'block', 'block_order':'', 'item_order':'', 
                   'showif':'is.na(code) || code == \"\"', 'label':'You have to come here through Prolific.'}
    df = df.append(pd.DataFrame([new_row]), ignore_index=False)
    
    return df

In [14]:
df = pd.DataFrame()
df['explanations'] = " " # this is the comments column, not actually used by formr
df['type'] = " "
df['optional'] = " "
df['name'] = " "
df['block_order'] = " "
df['item_order'] = " "
df['showif'] = " "
df['label'] = " " 

df = add_prolific(df)
df = add_consent_form(df)
df = add_instructions(df)

n_surveys = 20 # total number of unique surveys to generate (must be an even number)
new_row = {'explanations':'', 'type':'random 1,'+str(n_surveys/2), 'name':'condition', 'block_order':'',
               'item_order':'', 'showif':'', 'label':''}
df = df.append(pd.DataFrame([new_row]), ignore_index=False)

# generate half the surveys as 1-example and half the surveys as 3-example cases
names = ['Steven','Maria','James','Jennifer','Peter']
genders = ['M','F','M','F','M']
df_one_example = df.copy(deep=True)
df_three_example = df.copy(deep=True)
for i in range(int(n_surveys/2)):
    df_one_example = generate_one_example_survey(df_one_example,names,genders,i+1)
for i in range(int(n_surveys/2)):
    df_three_example = generate_three_example_survey(df_three_example,names,genders,i+1)
    
df_one_example,df_three_example = add_feedback(df_one_example,df_three_example)

# See a preview of the changes
df_one_example

Unnamed: 0,explanations,type,optional,name,block_order,item_order,showif,label,choice1,choice2
0,,get PROLIFIC_PID,,code,,,,,,
0,,block,,block,,,"is.na(code) || code == """"",You have to come here through Prolific.,,
0,,note,,consent,,,,You are being invited to participate in a rese...,,
0,,submit,,consent_submit,,,,Acknowledge Consent & Proceed,,
0,,note,,instructions_1,,,,## Instructions,,
...,...,...,...,...,...,...,...,...,...,...
0,,"rating_button 1,7,1",,attention_6_10,,,condition == 10,...marries her first cousin in an elaborate we...,Very Unlikely,Very Likely
0,,submit,,attention_submit_10,,,condition == 10,Next Question,,
0,,note,,feedback_instructions,,,,Thank you for your participation in this exper...,,
0,,text 500,*,feedback_text,,,,,,


In [15]:
df_three_example

Unnamed: 0,explanations,type,optional,name,block_order,item_order,showif,label,choice1,choice2
0,,get PROLIFIC_PID,,code,,,,,,
0,,block,,block,,,"is.na(code) || code == """"",You have to come here through Prolific.,,
0,,note,,consent,,,,You are being invited to participate in a rese...,,
0,,submit,,consent_submit,,,,Acknowledge Consent & Proceed,,
0,,note,,instructions_1,,,,## Instructions,,
...,...,...,...,...,...,...,...,...,...,...
0,,"rating_button 1,7,1",,attention_6_10,,,condition == 10,...requires all his employees to split their b...,Very Unlikely,Very Likely
0,,submit,,attention_submit_10,,,condition == 10,Next Question,,
0,,note,,feedback_instructions,,,,Thank you for your participation in this exper...,,
0,,text 500,*,feedback_text,,,,,,


In [17]:
# Save the results to a file
df_one_example.to_csv('formr_one_example.csv',index=False)
df_three_example.to_csv('formr_three_example.csv',index=False)