# Create crowd input workflow


1.) Create all questions of a run

- this should not be changed (only created once)
- if the question formulations change, we create a new run
- identifiers should not change
- create unique ids here
- we draw from this selection for creating batches 


2.) Create batch

- draw from all questions of a run
- only select properties in the experiment group
- always make sure that a batch contains the full set of questions of a pair 
- add prolific url 
- check prolific_input for already posed pairs
- write to file without header


Run: 

`python create_questions.py [run]`

`python create_batch.py [url] [batch/test]`

## Fix csv splitting issue

In [1]:
import csv

run = 4
path = f'../templates/template-run{run}.csv'
path_backup = f'../templates/template-run{run}-backup.csv'
with open(path) as infile:
    lines = infile.read().strip().split('\n')
    
with open(path_backup, 'w') as outfile:
    outfile.write('\n'.join(lines))

header = lines[0].strip().split('\t')
correct_length = len(header)
print(correct_length)
print(header)

new_lines = []
for line in lines:
    line_l = line.strip().split('\t')
    n_cells = len(line_l)
    if n_cells == correct_length:
        new_lines.append(line.strip())
    elif n_cells == 10:
        repaired_sent = ','.join(line_l[1:3]).strip()
        if repaired_sent[-1] != '.':
            repaired_sent = repaired_sent+'.'
        new_line_l = []
        new_line_l.append(line_l[0])
        new_line_l.append(repaired_sent)
        new_line_l.extend(line_l[3:])
        #print(len(new_line_l))
        #print(new_line_l)
        new_lines.append('\t'.join(new_line_l).strip())   
    elif n_cells == 11:
        repaired_sent = ','.join(line_l[1:4])
        new_line_l = []
        new_line_l.append(line_l[0])
        new_line_l.append(repaired_sent)
        new_line_l.extend(line_l[4:])
        #print(new_line_l)
        #print(len(new_line_l))
        #print(new_line_l)
        new_lines.append('\t'.join(new_line_l).strip())
    else:
        print('missed something')
        #print(line_l)
                         
print('\n checking if lines have the correct lenght\n')
for l in new_lines:
    print(len(l.split('\t')))
    #print(l)
with open(path, 'w') as outfile:
    outfile.write('\n'.join(new_lines))

10
['level', 'question', 'relation', 'perceptual', 'perceptual_scale', 'complex', 'complex_scale', 'parts', 'parts_material', 'activities']

 checking if lines have the correct lenght

10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10


# Find example


In [1]:
from utils import read_csv
import glob

# Get example properties
def find_example(target, target_prop_concept):
    ex_files = glob.glob('../examples/*-pairs.csv')
    for f in ex_files:
        with open(f) as infile:
            dl = read_csv(f)
            for d in dl:
                candidates = []
                for k, v in d.items():
                    if k.startswith(target_prop_concept):
                        prop_concept = v
                        candidates.append(prop_concept)
                if target in candidates:
                    print(f'Found {target} in {f}')
                    

target = 'maade_of_plastic'
target_prop_concept = 'prop'
find_example(target, target_prop_concept)

In [5]:
# get all examples for review
from collections import defaultdict

def collect_all_examples(run, relation=None):
    
    f = f'../questions/run{run}-all-restricted_True.csv'
    dl = read_csv(f)
    all_examples = defaultdict(set)
    ex_keys = ['example_pos', 'example_neg']
    
    for d in dl:
        examples = []
        rel = d['relation']
        for k in ex_keys:
            examples.append(d[k])
        all_examples[rel].add(tuple(examples))
    for rel, examples in all_examples.items():
        if relation == None:
            print(rel)
            for ex_pos, ex_neg in list(examples)[:1]:
                print(f'pos: {ex_pos}')
                print(f'neg: {ex_neg}')
                print()
        else:
            if rel == relation:
                for ex_pos, ex_neg in examples:
                    print(f'pos: {ex_pos}')
                    print(f'neg: {ex_neg}')
                    print()


run = 4
collect_all_examples(run, relation = None)           

implied_category
pos: I know that (a/an) chocoloate_cake is sweet as most or all other things similar to (a/an) chocoloate_cake are sweet. 
neg: I know that (a/an) table is transparent as most or all other things similar to (a/an) table are transparent. 

typical_of_property
pos: ``Lion'' is one of the first things which come to mind when I hear ``mane' because (a/an) lion is a typical example of things which have (a/an) mane'. 
neg: ``Unicycle'' is one of the first things which come to mind when I hear ``sattle' because (a/an) unicycle is a typical example of things which have (a/an) sattle'. 

typical_of_concept
pos: ``Heavy'' is one for the first things which come to mind when I hear ``rock' because heavy is one of the typical weights of (a/an) rock'.
neg: ``Green'' is one for the first things which come to mind when I hear ``rasperry' because green is one of the typical colors of (a/an) rasperry'.

affording_activity
pos: I know that being bright is necessary for many things (a/an)

# Framework for exploratory annotation

In [14]:
import sys

from utils import read_csv, to_csv
from utils import sort_by_key

from create_batch import batch_to_file
from create_batch import print_task_intro

# get selected pairs:

def get_pairs(group):
    with open(f'../experiment_groups/{group}.txt') as infile:
        pairs = infile.read().strip().split('\n')
    return pairs

def create_inspection_batch(group, run):
    

    pairs = get_pairs(group)
    question_path = f'../questions/run{run}-all-restricted_True.csv'
    question_dicts = read_csv(question_path)
    questions_by_pair = sort_by_key(question_dicts, ['property', 'concept'])

    inspection_questions = []
    for p in pairs:
        questions = questions_by_pair[p]
        inspection_questions.extend(questions)
    return inspection_questions
    

def main():
    group = sys[1]
    #group = 'expert_inspection1'
    run = sys[2]
    #run = 4
    n_qu = len(inspection_questions)
    experiment_name = group
    batch_n = 1
    current_batch_n = batch_n
    batch = inspection_questions
    url = 'test'
    task_name =  f'Agree or disagree (run{run}-{experiment_name}-batch{current_batch_n}-{n_qu}-{n_qu})'
    batch_to_file(batch, url, experiment_name, run, n_qu, batch_n)
    print(task_name)
    whitelist = print_task_intro(run)

I know that (a/an) arrow can fly/be used for flying as most or all other things similar to (a/an) arrow fly. 
``Arrow'' is one of the first things which come to mind when I hear ``fly' because (a/an) arrow is a typical example of things which fly/are used for flying'. 
``Fly'' is one for the first things which come to mind when I hear ``arrow' because flying is one of the typical movements of (a/an) arrow'.
All or most  arrow(s) can fly/be used for flying. This is not what they normally do or are used for. 
I know that all or most arrow(s) fly regularly or are used for flying regularly. 
You can find (a/an) arrow which can fly/be used for flying. This distinguishes a certain type of arrow from others. 
I could say (a/an) arrow flies/is used for flying, but I would most certainly not mean it literally.
I think there is a arrow  which can fly/be used for flying, but this is rare or uncommon.
Usually, (a/an) arrow cannot fly/be used for flying, but there could be a highly unusual situatio