In [22]:
#!/usr/bin/env python3
# What: this python script was generated manually by Steven Elmlinger in February 2019

# Why: Convert all ELAN (.eaf) files in a directory to csv files

#
#
# step 1: load packages
#
#

import glob     # Import glob to easily loop over files
import pympi    # Import pympi to work with elan files
import os
import pprint as pp
import pandas as pd
import numpy as np

#
#
# step 2: load data
#
#

# subject list

'''
Enter in all of the subject IDs into the variable below, comma separated like the commented out example below.
'''

subjectarray = np.array([1, 10, 18, 21, 22, 23, 52, 57, 59, 60, 61, 64, 65, 1001, 1002, 1004, 1005, 1006, 1007])


# Define variables
corpus_root = 'eaf' # this is the folder name containing all eaf files
ort_tier_names = ['Caregiver voc', 'Infant voc'] # this is the tier name, do one tier at a time, and then append
directory = os.fsencode('{}/eaf/'.format(corpus_root))

#
#
# step 3: Initialize aggregator
#
#

columns = ['sub', 'onset', 'offset', 'cat', 'tier']
output = pd.DataFrame(columns=columns)


for a in range(len(subjectarray)):
    sub = subjectarray[a]
#     print(sub)
    flag = -1
    # Loop over all elan files the corpusroot subdirectory called eaf
    for file_path in glob.glob('{}/*.eaf'.format(corpus_root)):
        # Initialize the elan file
        flag = flag+1
        filename = os.path.basename(file_path)  #os.listdir(directory)[flag
        if filename == '.DS_Store':
            os.remove(file_path)  #os.path.join(directory.decode('utf-8'), filename.decode('utf-8')))
            break
        file = os.fsdecode(filename)
        print(file)

           
        if file == "S%d.eaf" % sub:
            # if file == "%d reliability subject.eaf" % sub:
            eafob = pympi.Elan.Eaf(file_path)
            # Loop over all the defined tiers that contain orthography
            for ort_tier in ort_tier_names:
                # If the tier is not present in the elan file spew an error and
                # continue. This is done to avoid possible KeyErrors
                # If the tier is present we can loop through the annotation data
                counter = -1
                all_utt = pd.DataFrame(columns=columns)
                for annotation in eafob.get_annotation_data_for_tier(ort_tier):
                    # We are only interested in the utterance
                    counter = counter+1
                    all_utt.loc[counter, 'sub'] = sub
                    all_utt.loc[counter, 'onset'] = annotation[0]/1000
                    all_utt.loc[counter, 'offset'] = annotation[1]/1000
                    all_utt.loc[counter, 'cat'] = annotation[2]
                    all_utt.loc[counter, 'tier'] = ort_tier
                output = output._append(all_utt)
                output = output.sort_values('onset')
#                 print(output)
                
        print(output)
#
#
# step 4: output individual .csv files
#
#

output.to_csv('tt_ab.csv',index = False, header = True)


S57.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S52.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1006.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1007.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S21.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S23.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1005.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1004.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S22.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1001.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1002.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S18.eaf
Empty DataFrame
Columns: [sub, onset, offset, cat, tier]
Index: []
S1.eaf
    sub    onset   offset                                                cat  \
0

    sub    onset   offset                                                cat  \
0    18    0.281    1.403                             hai visto quanti? (QA)   
0     1    0.783    1.696                               questi piattini (RE)   
1     1    1.696    2.601                              mescoli la pappa (DA)   
2     1    2.603    4.607  e gliene dai un pochino alla bambolina forse? ...   
1    18    3.278    3.728                                                xxx   
..   ..      ...      ...                                                ...   
218   1  596.307  597.411                              chiudi gli occhi (DA)   
219   1  597.424  598.209                              girati di lato.\t(DA)   
220   1  598.212  599.048                               non devi vedere (DA)   
221   1  599.446  600.395                                      ferma lì (DA)   
25   18  665.843  667.242                                                      

              tier  
0    Caregiver voc

    sub    onset   offset                     cat           tier
0    22    0.007    0.735      cos’è quello\t(QE)  Caregiver voc
0    18    0.281    1.403  hai visto quanti? (QA)  Caregiver voc
0     1    0.783    1.696    questi piattini (RE)  Caregiver voc
1     1    1.696    2.601   mescoli la pappa (DA)  Caregiver voc
0    22     2.02    2.914                     xxx     Infant voc
..   ..      ...      ...                     ...            ...
186  23  658.785  660.511   sì sì va bene dai (C)  Caregiver voc
143  23   659.92  660.311                     xxx     Infant voc
25   18  665.843  667.242                             Infant voc
187  23  676.745  677.943  che matta sei, (AL RO)  Caregiver voc
188  23  678.599  679.203            lo sai? (QO)  Caregiver voc

[1738 rows x 5 columns]
S1005.eaf
    sub    onset   offset                     cat           tier
0    22    0.007    0.735      cos’è quello\t(QE)  Caregiver voc
0    18    0.281    1.403  hai visto quanti? (QA)  Care

    sub    onset   offset                     cat           tier
0    22    0.007    0.735      cos’è quello\t(QE)  Caregiver voc
0    18    0.281    1.403  hai visto quanti? (QA)  Caregiver voc
0     1    0.783    1.696    questi piattini (RE)  Caregiver voc
1     1    1.696    2.601   mescoli la pappa (DA)  Caregiver voc
0    22     2.02    2.914                     xxx     Infant voc
..   ..      ...      ...                     ...            ...
186  23  658.785  660.511   sì sì va bene dai (C)  Caregiver voc
143  23   659.92  660.311                     xxx     Infant voc
25   18  665.843  667.242                             Infant voc
187  23  676.745  677.943  che matta sei, (AL RO)  Caregiver voc
188  23  678.599  679.203            lo sai? (QO)  Caregiver voc

[2397 rows x 5 columns]
S64.eaf
    sub    onset   offset                     cat           tier
0    22    0.007    0.735      cos’è quello\t(QE)  Caregiver voc
0    18    0.281    1.403  hai visto quanti? (QA)  Caregi

    sub    onset   offset                                                cat  \
0    22    0.007    0.735                                 cos’è quello\t(QE)   
0    18    0.281    1.403                             hai visto quanti? (QA)   
0     1    0.783    1.696                               questi piattini (RE)   
1     1    1.696    2.601                              mescoli la pappa (DA)   
0    61    1.728    5.449  Francesca vogliamo giocare, vogliamo giocare c...   
..   ..      ...      ...                                                ...   
186  23  658.785  660.511                              sì sì va bene dai (C)   
143  23   659.92  660.311                                                xxx   
25   18  665.843  667.242                                                      
187  23  676.745  677.943                             che matta sei, (AL RO)   
188  23  678.599  679.203                                       lo sai? (QO)   

              tier  
0    Caregiver voc

      sub    onset   offset  \
0      22    0.007    0.735   
0    1004    0.234    0.998   
0    1006    0.256    0.682   
0      18    0.281    1.403   
0    1006    0.763    1.801   
..    ...      ...      ...   
143    23   659.92  660.311   
140  1005  665.186  670.803   
25     18  665.843  667.242   
187    23  676.745  677.943   
188    23  678.599  679.203   

                                                   cat           tier  
0                                   cos’è quello\t(QE)  Caregiver voc  
0                                            Via! (AR)  Caregiver voc  
0                                                  xxx     Infant voc  
0                               hai visto quanti? (QA)  Caregiver voc  
0                                        Uh guarda (C)  Caregiver voc  
..                                                 ...            ...  
143                                                xxx     Infant voc  
140  Chi è? Papà?\t (QE QE) (ride) (AL) Chi è? Papà

In [23]:
data = pd.read_csv('tt_ab.csv')
data.head(10)
print(data)

       sub    onset   offset  \
0       22    0.007    0.735   
1     1004    0.234    0.998   
2     1006    0.256    0.682   
3       18    0.281    1.403   
4     1006    0.763    1.801   
...    ...      ...      ...   
4382    23  659.920  660.311   
4383  1005  665.186  670.803   
4384    18  665.843  667.242   
4385    23  676.745  677.943   
4386    23  678.599  679.203   

                                                    cat           tier  
0                                    cos’è quello\t(QE)  Caregiver voc  
1                                             Via! (AR)  Caregiver voc  
2                                                   xxx     Infant voc  
3                                hai visto quanti? (QA)  Caregiver voc  
4                                         Uh guarda (C)  Caregiver voc  
...                                                 ...            ...  
4382                                                xxx     Infant voc  
4383  Chi è? Papà?\t (QE QE) (r

In [24]:
data['group'] = ''
data.loc[(data['sub'] < 50), 'Group'] = 1
data.loc[(data['sub'] > 50)&(data['sub'] < 1000), 'Group'] = 2
data.loc[(data['sub'] > 1000), 'Group'] = 3

data.index = data.index + 1

data['latency']=''
data['latency group']=''
for i in data.index:
    if i<data.index.max():
        before, new = data._get_value(i,'offset'), data._get_value(i+1,'onset')
        oldtier, newtier = data._get_value(i,'tier'), data._get_value(i+1,'tier')
        data._set_value(i,'latency',before-new)
        if oldtier==newtier:
            data._set_value(i+1,'latency group','within person')
        elif oldtier=='Caregiver voc' and newtier=='Infant voc':
            data._set_value(i+1,'latency group','cg-infant')
        elif oldtier=='Infant voc' and newtier=='Caregiver voc':
            data._set_value(i+1,'latency group','infant-cg')
    else:
        break
            
data.to_csv('tt_output_newb.csv', index = True, header = True)
