In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Overview-and-requirements" data-toc-modified-id="Overview-and-requirements-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Overview and requirements</a></span></li><li><span><a href="#Import-data" data-toc-modified-id="Import-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Import data</a></span><ul class="toc-item"><li><span><a href="#Hammond's-newdic" data-toc-modified-id="Hammond's-newdic-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Hammond's newdic</a></span></li></ul></li><li><span><a href="#Convert-phonological-representations-to-IPA---Hammond" data-toc-modified-id="Convert-phonological-representations-to-IPA---Hammond-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Convert phonological representations to IPA - Hammond</a></span><ul class="toc-item"><li><span><a href="#Define-Hammond-inventory-->-IPA-mapping" data-toc-modified-id="Define-Hammond-inventory-->-IPA-mapping-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Define Hammond inventory -&gt; IPA mapping</a></span></li><li><span><a href="#Transform-transcriptions-to-IPA..." data-toc-modified-id="Transform-transcriptions-to-IPA...-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Transform transcriptions to IPA...</a></span></li><li><span><a href="#Transform-entries-to-have-IPA-transcriptions..." data-toc-modified-id="Transform-entries-to-have-IPA-transcriptions...-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Transform entries to have IPA transcriptions...</a></span></li><li><span><a href="#Write-to-file-/-read-back-in" data-toc-modified-id="Write-to-file-/-read-back-in-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Write to file / read back in</a></span></li></ul></li></ul></div>

# Overview and requirements

**Notebook author:** emeinhardt@ucsd.edu

This is a notebook documenting the conversion of transcriptions of the English lexicon in 
 - Hammond's newdic 
 
to IPA.

At a high level, I am running Python 3.6.5, Jupyter Notebook 5.5.0, and otherwise Anaconda 5.2. More specifically, this notebook assumes the current working directory contains
 - a copy of Hammond's mysterious 'newdic' transcribed lexicon of English http://dingo.sbs.arizona.edu/~hammond/lsasummer11/newdic

# Import data

In [2]:
%pwd

'/mnt/cube/home/AD/emeinhar/wr'

In [3]:
import csv

## Hammond's newdic

In [4]:
%ls Hammond*

"Hammond's mysterious newdic.txt"


In [5]:
hammond_fn = "Hammond's mysterious newdic.txt"

In [6]:
newdic_raw = []
fieldnames = ['Transcription', 'stressInfoA', 'stressInfoB', 'Orthography', 'Frequency', 'PoSs']
with open(hammond_fn) as csvfile:
    my_reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=fieldnames)
    for row in my_reader:
        #print(row)
        newdic_raw.append(row)

newdic_raw[0]
len(newdic_raw[0].keys())

OrderedDict([('Transcription', 'x'),
             ('stressInfoA', '_'),
             ('stressInfoB', 'S1'),
             ('Orthography', 'a'),
             ('Frequency', '23178'),
             ('PoSs', '(N IA VB PP)')])

6

In [11]:
#%cd ..

# Convert phonological representations to IPA - Hammond

In [12]:
newdic_raw[2]

OrderedDict([('Transcription', 'xb@k'),
             ('stressInfoA', "_'"),
             ('stressInfoB', 'S2'),
             ('Orthography', 'aback'),
             ('Frequency', '2'),
             ('PoSs', '(AV)')])

We want to convert the transcription field into a string of IPA symbols, with each segment separated by a '.'

## Define Hammond inventory -> IPA mapping

In [13]:
hammond_IPA_relation = [
 ('h', 'h'),
 ('S', 'ʃ'),
 ('p', 'p'),
 ('x', 'ə'),
 ('m', 'm'),
 ('y', 'j'),
 ('v', 'v'),
 ('^', 'ʌ'),
 ('o', 'oʊ'),
 ('u', 'u'),
 ('I', 'ɪ'),
 ('G', 'ŋ'),
 ('N', 'ṇ'), #sllabic n is NOT in diphone gating inventory
 ('|', 'ɪ'), #as near as I can tell, this is unstressed/reduced 'ɪ'
 ('Z', 'ʒ'),
 ('L', 'l̩'),
 ('M', 'ṃ'), #syllabic m is NOT in diphone gating inventory
 ('i', 'i'),
 ('r', 'r'),
 ('g', 'g'),
 ('O', 'ɔɪ'),
 ('T', 'θ'),
 ('n', 'n'),
 ('J', 'dʒ'),
 ('d', 'd'),
 ('k', 'k'),
 ('W', 'aʊ'),
 ('f', 'f'),
 ('D', 'ð'),
 ('U', 'ʊ'),
 ('z', 'z'),
 ('Y', 'aɪ'),
 ('b', 'b'),
 ('X', 'ɚ'), #r-colored schwa - stressed
 ('a', 'ɑ'),
 ('s', 's'),
 ('e', 'eɪ'),
 ('C', 'tʃ'),
 ('t', 't'),
 ('R', 'ɚ'), #r-colored schwa - UNstressed
 ('E', 'ɛ'),
 ('w', 'w'),
 ('l', 'l'),
 ('@', 'æ'),
 ('c', 'ɔ')] #ɔ is NOT in diphone gating data inventory
def hammondToUnicodeIPA(diphoneSymbol):
    mapping = dict(hammond_IPA_relation)
    #print(mapping)
    return mapping[diphoneSymbol]
print(hammondToUnicodeIPA('T'))
def invertMapping(mydict): 
    return dict([[val, key] for key,val in mydict.items()])
def unicodeIPAToHammond(unicodeIPAsymbol):
    return invertMapping( dict(hammond_IPA_relation) )[unicodeIPAsymbol]
print(unicodeIPAToHammond('θ'))

θ
T


## Transform transcriptions to IPA...

In [14]:
t = newdic_raw[2]['Transcription']
t
tuple(t)
tuple(map(hammondToUnicodeIPA, tuple(t)))
'.'.join(tuple(map(hammondToUnicodeIPA, tuple(t))))

'xb@k'

('x', 'b', '@', 'k')

('ə', 'b', 'æ', 'k')

'ə.b.æ.k'

In [15]:
dottedStringToTuple = lambda ds: tuple(ds.split('.'))
tupleToDottedString = lambda t: '.'.join(t)

In [16]:
tupleToDottedString( tuple(map(hammondToUnicodeIPA, tuple(t))) )

'ə.b.æ.k'

In [17]:
def hammondTranscriptionToIPA(hammond_trn):
    ht_tup = tuple(hammond_trn)
    ipa_tup = tuple(map(hammondToUnicodeIPA, ht_tup))
    return tupleToDottedString(ipa_tup)
hammondTranscriptionToIPA(t)

'ə.b.æ.k'

## Transform entries to have IPA transcriptions...

In [18]:
def edit_dict(the_dict, the_key, the_new_value):
    '''
    Composable (because it returns a value) but stateful(= in-place) dictionary update.
    '''
    the_dict.update({the_key: the_new_value})
    return the_dict

def modify_dict(the_dict, the_key, the_new_value):
    '''
    Composable and (naively-implemented) non-mutating dictionary update.
    '''
    new_dict = {k:the_dict[k] for k in the_dict}
    new_dict.update({the_key: the_new_value})
    return new_dict

In [19]:
def IPAify_hammond_entry(entry):
    new_entry = modify_dict(entry, 'Transcription', hammondTranscriptionToIPA(entry['Transcription']))
    return new_entry

In [20]:
newdic_raw[2]

OrderedDict([('Transcription', 'xb@k'),
             ('stressInfoA', "_'"),
             ('stressInfoB', 'S2'),
             ('Orthography', 'aback'),
             ('Frequency', '2'),
             ('PoSs', '(AV)')])

In [21]:
IPAify_hammond_entry(newdic_raw[2])
newdic_raw[2]

{'Transcription': 'ə.b.æ.k',
 'stressInfoA': "_'",
 'stressInfoB': 'S2',
 'Orthography': 'aback',
 'Frequency': '2',
 'PoSs': '(AV)'}

OrderedDict([('Transcription', 'xb@k'),
             ('stressInfoA', "_'"),
             ('stressInfoB', 'S2'),
             ('Orthography', 'aback'),
             ('Frequency', '2'),
             ('PoSs', '(AV)')])

In [22]:
hammond_IPA = list(map(IPAify_hammond_entry, newdic_raw))
hammond_IPA[2]

{'Transcription': 'ə.b.æ.k',
 'stressInfoA': "_'",
 'stressInfoB': 'S2',
 'Orthography': 'aback',
 'Frequency': '2',
 'PoSs': '(AV)'}

## Write to file / read back in

In [23]:
%pwd

'/mnt/cube/home/AD/emeinhar/wr'

In [24]:
%ls *Hammond*

"Hammond's mysterious newdic.txt"


In [25]:
import csv

In [26]:
hammond_IPA[0]

{'Transcription': 'ə',
 'stressInfoA': '_',
 'stressInfoB': 'S1',
 'Orthography': 'a',
 'Frequency': '23178',
 'PoSs': '(N IA VB PP)'}

In [27]:
theFieldnames = ['Transcription', 'stressInfoA', 'stressInfoB', 'Orthography', 'Frequency', 'PoSs']
len(theFieldnames)
theFieldnames

6

['Transcription',
 'stressInfoA',
 'stressInfoB',
 'Orthography',
 'Frequency',
 'PoSs']

In [28]:
newdic_IPA_stem = 'Hammond_newdic_IPA'

In [29]:
with open(newdic_IPA_stem + '.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=theFieldnames)
    writer.writeheader()
    writer.writerows(hammond_IPA)

In [30]:
%pwd

'/mnt/cube/home/AD/emeinhar/wr'

In [31]:
%ls Hammond*

 Hammond_newdic_IPA.csv  "Hammond's mysterious newdic.txt"
