# Vocal Tract geometry JSON generator

Given a CSV file with VT geometries, generate JSON

In [150]:
import csv
import os

In [151]:
ls ../parameters/vocal_tracts

Story1996_male01_cm2.csv  Story2004_male01_cm2.csv
Story1998_female_cm2.csv  Story_vowel_defs.csv


In [182]:
csvfiles = [{'file':'Story1996_male01_cm2.csv', 'source':'Story 1996', 'gender': 'male', 
             'header_lines': [0], 'header_titles': ['phoneme'], 'main_header': None,
             'index_cols': [0], 'index_titles': ['segment nbr'], 'main_index': 0,
             'units': 'cm2'},
            {'file':'Story2004_male01_cm2.csv', 'source':'Story 2004', 'gender': 'male', 
             'header_lines': [0], 'header_titles': ['phoneme'], 'main_header': None,
             'index_cols': [0], 'index_titles': ['segment nbr'], 'main_index': 0,
             'units': 'cm2'},
            {'file':'Story1998_female_cm2.csv', 'source':'Story 1998', 'gender': 'female', 
             'header_lines': [0], 'header_titles': ['phoneme'], 'main_header': None,
             'index_cols': [0,1], 'index_titles': ['segment nbr', 'distance from glottis'], 'main_index': 0,
             'units': 'cm2'}]

In [183]:
idx = 2

file = csvfiles[idx]['file']
csvfile = os.path.join('../parameters/vocal_tracts/',file)
sep = ','

header_lines = [0]
header_titles = ['phoneme']
main_header = None

index_cols = [0]
index_titles = ['segment nbr']
main_index = 0


In [184]:

def tract_csv_to_json(csvfile, sep = ',', header_lines = [0], header_titles = ['phoneme'], main_header = None,
                      index_cols = [0], index_titles = ['segment nbr'], main_index = 0, transpose=True, num_to_arrays=True):
    

    headers = {}
    data = {}

    nlines = 0
    ncols = 0
    
    unnamed=0

    with open(csvfile,'r') as f:
        for lno, l in enumerate(f.readlines()):
            if lno in header_lines:
                splitline = (l.strip().split(sep))
                headers[header_titles[lno]] = [ss for ii,ss in enumerate(splitline) if ii not in index_cols]
            else:
                splitline = (l.strip().split(sep))
                idx = []
                for ii,ss in enumerate(splitline):
                    if ii in index_cols:
                        if len(ss)==0:
                            ss = 'unnamed_'+str(unnamed)
                            unnamed+=1                            
                        idx.append(ss)
                if len(index_cols)>1:
                    idx = tuple(idx)
                else:
                    idx = idx[0]
                data[idx] = [ss for ii,ss in enumerate(splitline) if ii not in index_cols]
                ncols = len(data[idx]) 
            nlines+=1
            
    if transpose:
        newheaders = {kk:[] for kk in index_titles}
        newdata = [{'headers':{}, 'values':{}} for ii in range(ncols)]

        if main_index is not None and len(index_cols)>1:
            indexdata = {}
            for key, c in data.items():
                kinfo = {}
                indexdata[key[main_index]] = kinfo
                for ii,(tk,k) in enumerate(zip(index_titles,key)):
                    if ii != main_index:
                        kinfo[tk] = k
        else:
            indexdata = []
            for key, c in data.items():
                kinfo = {}
                for ii,(tk,k) in enumerate(zip(index_titles,key)):
                    kinfo[tk] = k
            indexdata.append(kinfo)

        for key, c in data.items():
            if main_index is not None and len(index_cols)>1:
                k = key[0]
            else: 
                k = str(key)
            for htit, hval in headers.items():
                for nd, hvals, cellval in zip(newdata,hval,c):
                    nd['headers'][htit] = hvals
                    if cellval:
                        try:
                            nd['values'][k] = float(cellval)
                        except ValueError:
                            nd['values'][k] = cellval
                            
            
        data = newdata
        
    if num_to_arrays:
        for dd in data:
            nums = {}
            maxnum = 0
            array = []
            for key in dd['values']:
                try:
                    num = int(key)
                    nums[num] = dd['values'][key]
                    maxnum = max(maxnum,num)

                except ValueError:
                    pass
            array = [None]*(maxnum+1)
            for ii in nums:
                del(dd['values'][str(ii)])
                array[ii] = nums[ii]
            minnum = min(nums.keys())
            dd['values']['array']=array[minnum:]


    return {'keys': indexdata, 'table':data}

In [185]:
import re

alljs = []

fargs = 'csvfile sep header_lines header_titles main_header index_cols index_titles main_index transpose'.split()

for rec in csvfiles:
    csvfile = os.path.join('../parameters/vocal_tracts/',rec['file'])
    rec['csvfile'] = csvfile
    kwargs = {x:rec[x] for x  in set(rec.keys())&set(fargs)}
    otherkeys = {x:rec[x] for x  in set(rec.keys())-set(fargs)}
    
    js = tract_csv_to_json(**kwargs)
    for ln in js['table']:
        ln['headers'].update(otherkeys)
        ln['radii'] = ln['values']['array']
        del ln['values']['array']
        ln['headers'].update(ln['values'])
        del ln['values']
        measured_formants={}
        for k in ln['headers']:
            if re.match('f\d+',k):
                measured_formants[k] = ln['headers'][k]
        if measured_formants:
            ln['headers']['measured_formants'] = measured_formants
            for k in measured_formants:
                del ln['headers'][k]
    alljs.append(js)

In [208]:
import jq
jexp = jq.compile('.[].table | .[]')
newjs=[x for x in jexp.input(alljs)]

In [217]:
import json
with open ('../parameters/vocal_tracts/vt_all.json','w') as f:
    json.dump(newjs,f)

In [215]:
[x for x in jq.compile('.[].headers | with_entries(select([.key] | inside(["phoneme","gender","source"])))').input(newjs)]

[{'phoneme': 'i', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'I', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': '3', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'ae', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': '^', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'a', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'c', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'o', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'U', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'u', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': "3'", 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'l', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'm', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'n', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'ny', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 'p', 'gender': 'male', 'source': 'Story 1996'},
 {'phoneme': 't', 'ge

In [124]:
js = [{'level1a':{'level2a':'a','level2b':'b'},'level1b':{'blevel2a':'a2','blevel2b':'b2'}},
      {'level1a':{'level2a':'a','level2b':'b'},'level1b':{'blevel2a':'a1','blevel2b':'b1'}}]

In [131]:
jexp = jq.compile('map(.test = .level1a)')
[x for x in jexp.input(js)]

[[{'level1a': {'level2a': 'a', 'level2b': 'b'},
   'level1b': {'blevel2a': 'a2', 'blevel2b': 'b2'},
   'test': {'level2a': 'a', 'level2b': 'b'}},
  {'level1a': {'level2a': 'a', 'level2b': 'b'},
   'level1b': {'blevel2a': 'a1', 'blevel2b': 'b1'},
   'test': {'level2a': 'a', 'level2b': 'b'}}]]

In [138]:
jexp = jq.compile('.[] | .level1b = .level1a + .level1b | del(.level1a)')
[x for x in jexp.input(js)]

[{'level1b': {'level2a': 'a',
   'level2b': 'b',
   'blevel2a': 'a2',
   'blevel2b': 'b2'}},
 {'level1b': {'level2a': 'a',
   'level2b': 'b',
   'blevel2a': 'a1',
   'blevel2b': 'b1'}}]

In [149]:
jexp = jq.compile('.[] | flatten ')
[x for x in jexp.input(js)]

[[{'level2a': 'a', 'level2b': 'b'}, {'blevel2a': 'a2', 'blevel2b': 'b2'}],
 [{'level2a': 'a', 'level2b': 'b'}, {'blevel2a': 'a1', 'blevel2b': 'b1'}]]