# This is the running tab on experimental design and processing files for my dissertation work.

This block of text goes to Segmentation_Experimental_Item_Setup.xlsx, where all experimental items were selected, stored and checked for balancing.
The output creates four .csv files in /data/original_data/exp_files/
1. expLists_all.csv
2. pracLists_all.csv
3. sylexpLists_all.csv
4. sylpracLists_all.csv


In [1]:
import pandas as pd
import segFunctions

# runs the csv_from_excel function:
# creates csv for segmentation experiment lists
segFunctions.csv_from_excel('Segmentation_Experimental_Item_Setup.xlsx','All_SegExperiment_Lists','./data/original_data/exp_files/expLists_all.csv')

# creates csv for segmentation practice lists
segFunctions.csv_from_excel('Segmentation_Experimental_Item_Setup.xlsx','All_SegPractice_Lists','./data/original_data/exp_files/pracLists_all.csv')

# creates csv for syllabification experiment lists
segFunctions.csv_from_excel('Segmentation_Experimental_Item_Setup.xlsx','Syllabification_Lists','./data/original_data/exp_files/sylexpLists_all.csv')

# creates csv for syllabification practice lists
segFunctions.csv_from_excel('Segmentation_Experimental_Item_Setup.xlsx','Syllabification_Practice','./data/original_data/exp_files/sylpracLists_all.csv')

<_io.TextIOWrapper name='./data/original_data/exp_files/sylpracLists_all.csv' mode='w' encoding='UTF-8'>

This block of code converts the original complete lists into separate csv files broken out by condition. It then writes out 13 separate files to the temp_data folder for later, more fine-grained manipulation.

In [2]:
# creates list for all trial types

trialType = ['exp','prac','sylexp','sylprac'] # list for trial type and naming
condName = ['A','B','C','D'] # list for condition labels
expsylRange = [0,47,0,47,48,96,48,96] # list for slicing syllable ranges
expsegRange = [0,12,15,27,30,42,45,57] # list for slicing segmentation ranges
pracsylRange = [0,9,0,9,10,19,10,19] # list for slicing syllable practice ranges

# runs through each condition for practice and experimental list conditions
for trial in trialType:
    i = 0 # set counter for start syl and seg ranges
    j = 1 # set counter for end syl and seg ranges
    # Reads in the newly created csv files
    df = pd.read_csv('./data/original_data/exp_files/' + trial + 'Lists_all.csv')

    for cond in condName: # sets up loop to run through 4 conditions (ABCD)
        if trial == 'sylprac': # Tests for sylprac lists
            condition = df.loc[pracsylRange[i]:pracsylRange[j],:] # selects all rows for list
            condition.to_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv') # saves temp file for further work
            i += 2 # adds two to value to change range
            j += 2 # adds two to value to change range
        elif trial == 'sylexp': # test for syllable lists
            condition = df.loc[expsylRange[i]:expsylRange[j],:] # selects all rows for list
            condition.to_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv') # saves temp file for further work
            i += 2 # adds two to value to change range
            j += 2 # adds two to value to change range
        else: # captures both practice and real segmentation experiment lists
            condition = df.loc[expsegRange[i]:expsegRange[j],:] # selects all rows for list 
            condition.to_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv') # saves temp file for further work
            i += 2 # adds two to value to change range
            j += 2 # adds two to value to change range

# Historical Records ONLY
### creates list for all trial types

trialType = ['exp','prac','sylexp','sylprac'] # list for trial type and naming
condName = ['A','B','C','D'] # list for condition labels
expsylRange = [0,23,24,47,48,71,72,96] # list for slicing syllable ranges
expsegRange = [0,12,15,27,30,42,45,57] # list for slicing segmentation ranges

### runs through each condition for practice and experimental list conditions
for trial in trialType:
    i = 0 # set counter for start syl and seg ranges
    j = 1 # set counter for end syl and seg ranges
    # Reads in the newly created csv files
    df = pd.read_csv('./data/original_data/exp_files/' + trial + 'Lists_all.csv')

    # Tests for sylprac because it is the only file that doesn't contain 4 separate conditions
    if trial == 'sylprac':
        df.to_csv('./data/temp_data/'+trial+'.csv') # writes new csv to temp folder
    else:
        for cond in condName: # sets up loop to run through 4 conditions (ABCD)
            if trial == 'sylexp': # test for syllable lists
                condition = df.loc[expsylRange[i]:expsylRange[j],:] # selects all rows for list
                condition.to_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv') # saves temp file for further work
                i += 2 # adds two to value to change range
                j += 2 # adds two to value to change range
            else: # captures both practice and real segmentation experiment lists
                condition = df.loc[expsegRange[i]:expsegRange[j],:] # selects all rows for list 
                condition.to_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv') # saves temp file for further work
                i += 2 # adds two to value to change range
                j += 2 # adds two to value to change range

This block of code below reads in the information from the temp_data folder and further refines the dataframes. Following all dataframe manipulation, 13 new csv files to be used in PsychoPy3 experiments are written to the processed_data folder. The 13 files stored in the temp_folder remain unaltered in case review or debugging is necesary.

In [3]:
# Reads in new csv files from temp folder and makes final changes for PyschoPy

# sets list letters for the four condition file names (used in practice and experiments)
indexName = ['labels','labels','syllabification','syllabification'] # lists for dataframe indexing
blockName = ['block01','block08','block48'] # lists for slice ranges
k = 0 # sets counter for indexing list
b = 2 # sets counter for slicing range list

# runs through each of the 4 condition files 
for trial in trialType:
    # captures all files types with 4 conditions
    for cond in condName: # runs through all 4 conditions
        df_cond = pd.read_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv',index_col=indexName[k]) # read csv
        # test to see whether header must be omitted
        if trial == 'sylprac': # test for syl practice because filename is odd since only one condition exists
            # deletes the unnamed column populated from Excel Row ranges
            df_cond.drop('Unnamed: 0', axis = 1, inplace = True)
            df_cond.to_csv('./data/processed_data/exp_files/'+trial+'Cond'+cond+'.csv') # writes csv file to processed folder
        elif trial == 'sylexp': # tests for syl practice because header must be retained
            df_cond.to_csv('./data/processed_data/exp_files/'+trial+'Cond'+cond+'.csv') # writes csv   
        else: # captures both segmenation tasks with 4 conditions
            # sorts the columns in dataframe alphabetically
            df_cond = df_cond.reindex(sorted(df_cond.columns), axis=1)
            clist_cond = df_cond.loc[:,blockName[0]:blockName[b]] #stores variable with subset data
            # writes out the new csv to processed_data folder which is ready for PyschoPy
            clist_cond.to_csv('./data/processed_data/exp_files/'+trial+'Cond'+cond+'.csv',header=False)
    b=b-1 # lowers the blockName range counter to change for practice trials
    k += 1 # increases indexing number to iterate to indexName list

# Historical Record ONLY
### Reads in new csv files from temp folder and makes final changes for PyschoPy

### sets list letters for the four condition file names (used in practice and experiments)
indexName = ['labels','labels','syllabification','syllabification'] # lists for dataframe indexing
blockName = ['block01','block08','block48'] # lists for slice ranges
k = 0 # sets counter for indexing list
b = 2 # sets counter for slicing range list

### runs through each of the 4 condition files 
for trial in trialType:
    if trial == 'sylprac': # test for syl practice because filename is odd since only one condition exists
        df_cond = pd.read_csv('./data/temp_data/'+trial+'.csv', index_col=indexName[k]) # reads csv file in
        # deletes the unnamed column populated from Excel Row ranges
        df_cond.drop('Unnamed: 0', axis = 1, inplace = True)
        df_cond.to_csv('./data/processed_data/exp_files/'+trial+'.csv') # writes csv file to processed folder
    else: # captures all files types with 4 conditions
        for cond in condName: # runs through all 4 conditions
            df_cond = pd.read_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv',index_col=indexName[k]) # read csv
            # deletes the unnamed column populated from Excel Row ranges
            df_cond.drop('Unnamed: 0', axis = 1, inplace = True)
            
            # test to see whether header must be omitted
            if trial == 'sylexp': # tests for syl practice because header must be retained
                df_cond.to_csv('./data/processed_data/exp_files/'+trial+'Cond'+cond+'.csv') # writes csv   
            else: # captures both segmenation tasks with 4 conditions
                # sorts the columns in dataframe alphabetically
                df_cond = df_cond.reindex(sorted(df_cond.columns), axis=1)
                clist_cond = df_cond.loc[:,blockName[0]:blockName[b]] #stores variable with subset data
                # writes out the new csv to processed_data folder which is ready for PyschoPy
                clist_cond.to_csv('./data/processed_data/exp_files/'+trial+'Cond'+cond+'.csv',header=False)
        b=b-1 # lowers the blockName range counter to change for practice trials
    k += 1 # increases indexing number to iterate to indexName list

At this point all input files are ready for PyschoPy3 experiment. They will be used as condition files for participants. The data upto this point has all been stored at '.data/processed_data/exp_files'. The output files from participants completing the experimental tasks will be written to '.data/original_data/part_files'.

# Participant file manipulations begin here:
The files output in the '.data/original_data/part_files' will serve as the starting point here, but no data file will be overwritten or moved. The raw output files will remain unaltered and in the original storage location. All csv files will be read in from this location and written out to the temp_data folder. 

# Testing
the stuf below this point is in a working progress. It is not ready to run and should be approached with caution caution.

In [7]:
import pandas as pd
trial = 'exp'
cond = 'A'
indexName = ['labels']
blockName = ['block01','block08','block48'] # lists for slice ranges

k = 0
df_test = pd.read_csv('./data/temp_data/'+trial+'Cond'+cond+'.csv',index_col=indexName[k]) # read csv
df_test = df_test.reindex(sorted(df_test.columns), axis=1)
clist_test = df_test.loc[:,blockName[0]:blockName[2]] #stores variable with subset data
# capitals
test = clist_test.iloc[2:3]
print(test)

clist_test

          block01 block02 block03 block04 block05 block06 block07 block08  \
labels                                                                      
targetSyl      ro     per     cer     gan     cul     cam      mo      li   

          block09 block10   ...   block39 block40 block41 block42 block43  \
labels                      ...                                             
targetSyl     bol      co   ...        pe     mor      pa      co      vo   

          block44 block45 block46 block47 block48  
labels                                             
targetSyl     lim     cas      ba      ca      mu  

[1 rows x 48 columns]


Unnamed: 0_level_0,block01,block02,block03,block04,block05,block06,block07,block08,block09,block10,...,block39,block40,block41,block42,block43,block44,block45,block46,block47,block48
labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
conditionA,stimuliListA0,stimuliListA1,stimuliListA2,stimuliListA3,stimuliListA4,stimuliListA5,stimuliListA6,stimuliListA7,stimuliListA8,stimuliListA9,...,stimuliListA38,stimuliListA39,stimuliListA40,stimuliListA41,stimuliListA42,stimuliListA43,stimuliListA44,stimuliListA45,stimuliListA46,stimuliListA47
listLabel,Real10,Real11,Pseudo02,Real03,Real21,Pseudo23,Pseudo21,Pseudo04,Pseudo20,Pseudo09,...,Pseudo08,Real09,Pseudo22,Real13,Pseudo01,Real08,Pseudo18,Pseudo17,Real07,Real20
targetSyl,ro,per,cer,gan,cul,cam,mo,li,bol,co,...,pe,mor,pa,co,vo,lim,cas,ba,ca,mu
carrierItem,rosario,permiso,cerliza,ganchillo,culebra,camofa,morbollo,limucna,bolipo,corteta,...,periga,morera,palbira,corona,volpeje,limpieza,casgollo,balbusa,castillo,murmullo
filleritem1,pantalla,sandía,vofida,tejado,pintura,depite,cabsiva,poncilla,ricofo,palcillo,...,salfrabla,zapato,denciña,bengala,tidencio,portillo,demida,soponso,punzada,testigo
filleritem2,lenguaje,soltura,moncura,zumbido,sarmiento,neteno,dinona,sobisco,rartanto,bitero,...,canveta,venganza,cecaja,lanzazo,parsida,sobaco,nemeto,zictencia,mortaja,vestuario
filleritem3,cambiazo,ginebra,biñena,postigo,verdura,nenedo,sucema,nurese,perdisno,depiaro,...,gantina,fastidio,vestolcio,ventisca,ducoste,respuesta,pirijuo,ruñansa,puntilla,poltrona
filleritem4,campiña,solvencia,tovero,portento,tobillo,dedioro,puscalla,fascuna,vifema,nansaso,...,nuseto,piltrafa,cicora,vigencia,banfalla,garbanzo,bercillo,cuntefa,luneta,lactancia
filleritem5,putada,jactancia,lunino,tortazo,vertiente,sobvondo,cicisa,ferfuera,nundero,tudala,...,zaldizo,pulgada,sinvera,pulsera,sitena,soborno,penvalla,cicaigo,vendaje,vivencia
filleritem6,pileta,gusano,tunsuja,reliquia,tarjeta,tunruja,nacañe,fañeilla,recoltre,delido,...,naverdo,vistazo,simedo,pesquisa,suseña,pozuelo,voltura,soreto,remanso,tapete
