In [2]:
import pandas as pd
import numpy as np
import glob
import math
import itertools 
import random
import os
cwd = os.getcwd()

spec_raw = pd.read_csv(cwd+"/SPEC_questions_FINAL.csv")

In [23]:
spec_raw[(spec_raw.Category=="ANIMAL") & ~(spec_raw.Condition=="Low")]

TypeError: bad operand type for unary ~: 'str'

In [None]:
# split into sublists L1-S1, L1-S2, L2-S1, L2-S2
# each list has *fixed* words. But words are paired with different questions for different subjects
# The 192 words will be divided into 2 experimental lists (96 trials each): 
# list 1 will contain high-spec words in odd-numbered pairs, and low-spec words in even-numbered pairs; 
# list 2 - the opposite.
L1 = spec_raw[((spec_raw.PairNumber%2==1) & (spec_raw.Condition=="High")) |\
         ((spec_raw.PairNumber%2==0) & (spec_raw.Condition=="Low"))]
L2 = spec_raw[((spec_raw.PairNumber%2==1) & (spec_raw.Condition=="Low")) |\
         ((spec_raw.PairNumber%2==0) & (spec_raw.Condition=="High"))]
L1_S1 = L1[(L1.PairNumber%4==1) | (L1.PairNumber%4==2)]
L1_S2 = L1[(L1.PairNumber%4==0) | (L1.PairNumber%4==3)]
L2_S1 = L2[(L2.PairNumber%4==1) | (L2.PairNumber%4==2)]
L2_S2 = L2[(L2.PairNumber%4==0) | (L2.PairNumber%4==3)]
L1_S1.to_csv(cwd+"/sublists/L1-S1.csv", index=False)
L1_S2.to_csv(cwd+"/sublists/L1-S2.csv", index=False)
L2_S1.to_csv(cwd+"/sublists/L2-S1.csv", index=False)
L2_S2.to_csv(cwd+"/sublists/L2-S2.csv", index=False)


In [None]:
def check_consecutive_question(qlist):
    qs = qlist["QuestionIndex"].values
    for i in len(qs):
        if qs[i] == qs[i+1]:
            return True
    return False

In [None]:
def create_spec_condition_order(subjn):
    run = [("High", "Yes", 200), ("High", "Yes", 300),("High", "Yes", 400),("High", "Yes", 500), ("High", "Yes", 600),("High", "Yes", 700),\
           ("High", "No", 200), ("High", "No", 300),("High", "No", 400),("High", "No", 500), ("High", "No", 600),("High", "No", 700),\
           ("Low", "Yes", 200), ("Low", "Yes", 300),("Low", "Yes", 400),("Low", "Yes", 500), ("Low", "Yes", 600),("Low", "Yes", 700),\
           ("Low", "No", 200), ("Low", "No", 300),("Low", "No", 400),("Low", "No", 500), ("Low", "No", 600),("Low", "No", 700)]*2
    # a dirty but quick way to generate the random lists
    # "reshuffle then filter" is way too slow for generating all unique items (or practically impossible)
    restart = True
    while restart:
        restart = False
        idx = np.asarray(list(range(48)))
        run_rnd = []
        # choose x random trials as the initial x trials
        initials_idx = np.random.choice(idx, 3, replace=False)
        idx = idx[idx!= initials_idx[0]]
        idx = idx[idx!= initials_idx[1]]
        idx = idx[idx!= initials_idx[2]]
        run_rnd.append(run[initials_idx[0]])
        run_rnd.append(run[initials_idx[1]])
        run_rnd.append(run[initials_idx[2]])
        for i in range(3, 48): # the number here means how many consecutive trials are allowed
            # if the previous three items are the same, choose the other condition
            if run_rnd[i-1][0]==run_rnd[i-2][0] and run_rnd[i-1][0]==run_rnd[i-3][0]: 
                if run_rnd[i-1][0] =="High":
                    choosable = idx[idx%4>=2]
                    if len(choosable)==0:
                        restart = True
                        break
                    else:
                        chosen_idx = np.random.choice(choosable)
                        run_rnd.append(run[chosen_idx])
                        idx = idx[idx!=chosen_idx]
                else:
                    choosable = idx[idx%4<2]
                    if len(choosable)==0:
                        restart = True
                        break
                    else:
                        chosen_idx = np.random.choice(choosable)
                        run_rnd.append(run[chosen_idx])
                        idx = idx[idx!=chosen_idx]
            else:
                chosen_idx = np.random.choice(idx)
                run_rnd.append(run[chosen_idx])
                idx = idx[idx!=chosen_idx]
    run_rnd = np.asarray(run_rnd).transpose()
    run_df = pd.DataFrame({"Condition":run_rnd[0], "Answer":run_rnd[1], "ISI":run_rnd[2]})
    run_df.to_csv(cwd+"/condition_orders/condition_order"+str(subjn)+".csv", index=False)

# create condition orders
for i in range(1, 9):
    create_spec_condition_order(i)



In [14]:
material_order = ["L1-S1", "L1-S2", "L2-S1", "L2-S2"]
sublists_files = [ cwd+"/sublists/"+order+".csv" for order in material_order]
sublist_ = pd.read_csv(sublists_files[0])
sublist_

Unnamed: 0,PairNumber,Condition,Noun,Category,Is it alive?,Can you eat or drink it?,Can you find it in the wild?,Can you wear it?,Can you hold it?,Did humans create it?,Does it make sounds?,Does it float in water?
0,1,High,bulldog,ANIMAL,1,0,0,0,1,-1,1,0
1,2,Low,insect,ANIMAL,1,0,1,0,1,0,1,1
2,5,High,mouse,ANIMAL,1,0,1,0,1,0,1,1
3,6,Low,fish,ANIMAL,1,1,1,0,1,0,0,0
4,9,High,sparrow,ANIMAL,1,0,1,0,1,0,1,1
5,10,Low,lizard,ANIMAL,1,0,1,0,1,0,-1,1
6,13,High,frog,ANIMAL,1,-1,1,0,1,0,1,0
7,14,Low,spider,ANIMAL,1,0,1,0,1,0,-1,1
8,17,High,aspirin,ARTIFACTS,0,1,0,0,1,1,0,0
9,21,High,ring,ARTIFACTS,0,0,0,1,1,1,0,0


In [None]:
def create_spec_runs(subjn, material_order):
    condition_orders_files = random.sample(glob.glob(cwd+"/condition_orders/*"),4)
    sublists_files = [ cwd+"/sublists/"+order+".csv" for order in material_order]
    run_n = 1
    for order_file, sublist_file in zip(condition_orders_files, sublists_files):
        ISI = []
        subject_run = pd.DataFrame({"PairNumber":[], "Condition":[],"Noun":[],"Category":[],"Question":[], "QuestionIndex":[], "Answer":[], "ISI":[]}, \
                columns=["PairNumber", "Condition","Noun","Category","Question", "QuestionIndex", "Answer", "ISI"], dtype=np.int32)
        condition_order = pd.read_csv(order_file)
        sublist_ = pd.read_csv(sublist_file)
        sublist_.insert(4, "Question", np.nan)
        sublist_.insert(5, "QuestionIndex", np.nan)
        sublist_.insert(6, "Answer", np.nan)
        sublist = sublist_.copy()
        question_names = sublist.columns[-8:]
        # add empty columns to add values later
        # attaching questions to words
        restart = True
        while restart:
            restart = False
            question_list = sublist_.copy()
            sublist = sublist_.copy()
            # the space for questions and answers
            questions_all = [0,1,2,3,4,5,6,7]
            np.random.shuffle(questions_all)
            answers_all = [1, 0]*24
            # sample yes question
            for q_index in questions_all:
                answers = random.sample(answers_all,6)#+[1,0]
                [answers_all.remove(x) for x in answers]
                n_Yes = answers.count(1)
                n_No = answers.count(0)
                # Yes questions
                Yes_questions = question_list[question_list.iloc[:, q_index+7]==1]
                if n_Yes==0:
                    pass
                elif len(Yes_questions)<n_Yes:
                    restart = True
                    break
                else:
                    Yes_questions_index = list(Yes_questions.sample(n_Yes).index)
                    for idx in Yes_questions_index:
                        sublist.loc[idx, "Question"] = question_names[q_index]
                        sublist.loc[idx, "QuestionIndex"] = q_index
                        sublist.loc[idx, "Answer"] = 1
                    # remove the already sampled 
                    question_list = question_list.drop(Yes_questions_index)
                # No questions
                No_questions = question_list[question_list.iloc[:, q_index+7]==0]
                if n_No==0:
                    pass
                elif len(No_questions)<n_No:
                    restart = True
                    break
                else:
                    No_questions_index = list(No_questions.sample(n_No).index)
                    for idx in No_questions_index:
                        sublist.loc[idx, "Question"] = question_names[q_index]
                        sublist.loc[idx, "QuestionIndex"] = q_index
                        sublist.loc[idx, "Answer"] = 0
                    # remove the already sampled 
                    question_list = question_list.drop(No_questions_index)
            # check consecutive questions; if YES, restart
            if check_consecutive_question(sublist):
                restart = True
                    
        # assign items to condition order
        for row in range(len(condition_order)):
            condition = condition_order.iloc[row]["Condition"]
            chosen_item = sublist[sublist.Condition==condition].sample()
            chosen_idx = list(chosen_item.index)
            # append all values
            ISI.append(condition_order.iloc[row]["ISI"])
            subject_run = subject_run.append(chosen_item.iloc[:,0:7])          
            sublist = sublist[~sublist.index.isin(chosen_idx)] # remove already sampled items
        # name the csv output
        order_name = order_file.split("/")[-1][10:-4]
        sublist_name = sublist_file.split("/")[-1][0:5]
        subject_run.ISI = ISI
        subject_run.Answer = subject_run.Answer.astype(int)
        subject_run.QuestionIndex = subject_run.QuestionIndex.astype(int)
        subject_run.PairNumber = subject_run.PairNumber.astype(int)
        subject_run = subject_run.reset_index()
        column_order=["PairNumber", "Condition","Noun","Category","Question", "QuestionIndex", "Answer", "ISI"]
        subject_run[column_order].to_csv(cwd+"/subject_lists/subject"+str(subjn)+"_"+"run"+str(run_n)+"_"+order_name+"_"+sublist_name+".csv", index=False)
        run_n = run_n+1
