**Eric Meinhardt / emeinhardt@ucsd.edu**

In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
import csv

In [3]:
from collections import Counter

In [4]:
from random import choice

In [5]:
from functools import reduce
from itertools import takewhile, product

In [6]:
# from joblib import Parallel, delayed

# J = 10
# BACKEND = 'multiprocessing'
# # BACKEND = 'loky'
# V = 10
# PREFER = 'processes'
# # PREFER = 'threads'

# def par(gen_expr):
#     return Parallel(n_jobs=J, backend=BACKEND, verbose=V, prefer=PREFER)(gen_expr)

# def identity(x):
#     return x

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Overview-&amp;-Dependencies" data-toc-modified-id="Overview-&amp;-Dependencies-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Overview &amp; Dependencies</a></span><ul class="toc-item"><li><span><a href="#Dependencies" data-toc-modified-id="Dependencies-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Dependencies</a></span></li><li><span><a href="#Outputs" data-toc-modified-id="Outputs-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Outputs</a></span></li></ul></li><li><span><a href="#Reading-in-the-data" data-toc-modified-id="Reading-in-the-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Reading in the data</a></span><ul class="toc-item"><li><span><a href="#CMU-pronouncing-dictionary" data-toc-modified-id="CMU-pronouncing-dictionary-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>CMU pronouncing dictionary</a></span></li></ul></li><li><span><a href="#Applying-the-flapping-rule-/-creating-surface-forms" data-toc-modified-id="Applying-the-flapping-rule-/-creating-surface-forms-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Applying the flapping rule / creating surface forms</a></span><ul class="toc-item"><li><span><a href="#Functions-for-manipulating-transcriptions" data-toc-modified-id="Functions-for-manipulating-transcriptions-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Functions for manipulating transcriptions</a></span></li><li><span><a href="#Carving-up-the-inventory-and-defining-symbol-classes" data-toc-modified-id="Carving-up-the-inventory-and-defining-symbol-classes-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Carving up the inventory and defining symbol classes</a></span></li><li><span><a href="#Applying-the-flapping-rule" data-toc-modified-id="Applying-the-flapping-rule-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Applying the flapping rule</a></span></li><li><span><a href="#Creating-a-list-of-positive-input-output-examples" data-toc-modified-id="Creating-a-list-of-positive-input-output-examples-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Creating a list of positive input-output examples</a></span></li></ul></li></ul></div>

# Overview & Dependencies

The goal of this notebook is processing the CMU pronouncing dictionary to create (input, output) / (underlying, surface) pairs for learning the American English flapping rule discussed in Gildea & Jurafsky's 1996 paper "Learning bias and phonological-rule induction".

## Dependencies

 - **Transcriptions:** The version of the CMU pronouncing dictionary processed here (and assumed to be in the working directory) is taken from https://github.com/emeinhardt/cmu-ipa. Please see the documentation there for more on what processing goes into that file.
 - **`Unix`-like OS:** Some Unix-like shell commands are used throughout, though they aren't essential.

## Outputs

Currently the only output of the notebook is a `.tsv` file with positive examples of flappable URs and the flapped SRs.

# Reading in the data

## CMU pronouncing dictionary

In [7]:
os.getcwd()
root_dir = os.getcwd()

'/mnt/cube/home/AD/emeinhar/gj-flap-data'

In [8]:
os.listdir()

['Recreating the Gildea & Jurafsky (1996) Flapping Data.ipynb',
 '__pycache__',
 '.ipynb_checkpoints',
 '.gitignore',
 'COCA',
 '.git',
 'cmudict-0.7b_IPA_stressed.tsv',
 'positive_flapping_io_examples.tsv']

In [9]:
%cat -n cmudict-0.7b_IPA_stressed.tsv | head -60

     1	Orthography	Transcription
     2	!EXCLAMATION-POINT	ɛ2.k.s.k.l.ʌ0.m.eɪ1.ʃ.ʌ0.n.p.ɔɪ2.n.t
     3	"CLOSE-QUOTE	k.l.oʊ1.z.k.w.oʊ1.t
     4	"DOUBLE-QUOTE	d.ʌ1.b.ʌ0.l.k.w.oʊ1.t
     5	"END-OF-QUOTE	ɛ1.n.d.ʌ0.v.k.w.oʊ1.t
     6	"END-QUOTE	ɛ1.n.d.k.w.oʊ1.t
     7	"IN-QUOTES	ɪ1.n.k.w.oʊ1.t.s
     8	"QUOTE	k.w.oʊ1.t
     9	"UNQUOTE	ʌ1.n.k.w.oʊ1.t
    10	#HASH-MARK	h.æ1.m.ɑ2.ɹ.k
    11	#POUND-SIGN	p.aʊ1.n.d.s.aɪ2.n
    12	#SHARP-SIGN	ʃ.ɑ1.ɹ.p.s.aɪ2.n
    13	%PERCENT	p.ɚ0.s.ɛ1.n.t
    14	&AMPERSAND	æ1.m.p.ɚ0.s.æ2.n.d
    15	'ALLO	ɑ2.l.oʊ1
    16	'APOSTROPHE	ʌ0.p.ɑ1.s.t.ɹ.ʌ0.f.i0
    17	'BOUT	b.aʊ1.t
    18	'CAUSE	k.ʌ0.z
    19	'COURSE	k.ɔ1.ɹ.s
    20	'CUSE	k.j.u1.z
    21	'EM	ʌ0.m
    22	'END-INNER-QUOTE	ɛ1.n.d.ɪ1.n.ɚ0.k.w.oʊ1.t
    23	'END-QUOTE	ɛ1.n.d.k.w.oʊ1.t
    24	'FRISCO	f.ɹ.ɪ1.s.k.oʊ0
    25	'GAIN	g.ɛ1.n
    26	'INNER-QUOTE	ɪ1.n.ɚ0.k.w.oʊ1.t
    27	'KAY	k.eɪ1
    28	'M	ʌ0.m
    29	'N	ʌ0.n
    30	'QUOTE	k.w.oʊ1.t
    31	'RO

In [10]:
cmudict_filename = 'cmudict-0.7b_IPA_stressed.tsv'

In [11]:
lexicon_relation = []
with open(cmudict_filename, 'r',) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
        for row in reader:
            lexicon_relation.append(row)

In [12]:
lexicon_relation[:5]

[OrderedDict([('Orthography', '!EXCLAMATION-POINT'),
              ('Transcription', 'ɛ2.k.s.k.l.ʌ0.m.eɪ1.ʃ.ʌ0.n.p.ɔɪ2.n.t')]),
 OrderedDict([('Orthography', '"CLOSE-QUOTE'),
              ('Transcription', 'k.l.oʊ1.z.k.w.oʊ1.t')]),
 OrderedDict([('Orthography', '"DOUBLE-QUOTE'),
              ('Transcription', 'd.ʌ1.b.ʌ0.l.k.w.oʊ1.t')]),
 OrderedDict([('Orthography', '"END-OF-QUOTE'),
              ('Transcription', 'ɛ1.n.d.ʌ0.v.k.w.oʊ1.t')]),
 OrderedDict([('Orthography', '"END-QUOTE'),
              ('Transcription', 'ɛ1.n.d.k.w.oʊ1.t')])]

In [13]:
lexicon = lexicon_relation

# Applying the flapping rule / creating surface forms

The flapping rule (in the ARPABET and approximately representing stress as Gildea & Jurafsky do) is $$\text{t} \rightarrow \text{dx} / V' r^* \_\_ V$$.

Below I write code for enumerating each of the dotted strings (i.e. substrings in the CMU transcriptions) that matches the condition for applying the rule.

## Functions for manipulating transcriptions

In [14]:
ds2t = lambda ds: tuple(ds.split('.'))
t2ds = lambda ts: '.'.join(ts)

In [15]:
union = lambda Ss: reduce(set.union, Ss)

In [16]:
word_to_inv = lambda ds: set(ds2t(ds))
lex_to_inv = lambda Ws: union(map(word_to_inv,
                                   Ws))
get_transcription = lambda r: r['Transcription']
get_transcriptions = lambda L: list(map(get_transcription,
                                        L))

In [17]:
transcriptions = get_transcriptions(lexicon)

Below is some code I have lying around for other purposes - it manipulates k-factors of a string (represented as a dotted string or as a tuple).

In [18]:
def dsToKfactors(k, ds):
    seq = ds2t(ds)
    l = len(seq)
    if k > l:
        return tuple()
    kFactor_start_indices = takewhile(lambda pair: pair[0] <= l-k, enumerate(seq))
    kFactors = tuple(seq[index[0]:index[0]+k] for index in kFactor_start_indices)
    return set(map(t2ds, kFactors))

def dsTo2factors(ds):
    return dsToKfactors(2, ds)

def dsTo3factors(ds):
    return dsToKfactors(3, ds)

def lexiconToKfactors(DSs, k):
    myDsToKfactors = lambda ds: dsToKfactors(k, ds)
    return union(map(set, map(myDsToKfactors, DSs)))

def lexiconTo2factors(DSs):
    return union(map(set, map(dsTo2factors, DSs)))

def lexiconTo3factors(DSs):
    return union(map(set, map(dsTo3factors, DSs)))

def compareKfactors(DSs_A, DSs_B, k):
    A = lexiconToKfactors(DSs_A, k)
    B = lexiconToKfactors(DSs_B, k)
    return {"A == B":A == B, "A - B": A - B, "B - A": B - A}

def sameKfactors(DSs_A, DSs_B, k):
    return compareKfactors(DSs_A, DSs_B, k)["A == B"]

def hasIllicitKfactors(W, illicit_k_factors):
    if type(W) == str:      
        # gather the k-factors into an immutable data structure
        illicit_kfs = tuple(illicit_k_factors)
        # get the set of k-factor lengths (values of k) among the illicit_kfs
        illicit_factor_lengths = set([len(ds2t(kf)) for kf in illicit_kfs])
        # map each k to the set of k-factors of dotted string ds
        kFactorSets = {kf_l:dsToKfactors(kf_l, W) for kf_l in illicit_factor_lengths}
        illegal_kfactors_discovered = tuple(ikf for ikf in illicit_kfs if ikf in kFactorSets[len(ds2t(ikf))])
        if illegal_kfactors_discovered == tuple():
            return False
        return illegal_kfactors_discovered
    else:
        myFunc = lambda w: hasIllicitKfactors(w, illicit_k_factors)
        results = tuple(map(myFunc, W))
        if not any(results):
            return False
        return set(t2ds(each) for each in results if each != False)

def dsToKfactorSequence(k, ds):
    seq = ds2t(ds)
    l = len(seq)
    if k > l:
        return tuple()
    kFactor_start_indices = takewhile(lambda pair: pair[0] <= l-k, enumerate(seq))
    kFactors = tuple(seq[index[0]:index[0]+k] for index in kFactor_start_indices)
    return tuple(map(t2ds, kFactors))

def threeFactorSequenceToDS(threeFactors):
    wLE = ds2t(threeFactors[0])[0]
    wRE = ds2t(threeFactors[-1])[-1]
    w_NE = '.'.join([ds2t(eachTriphone)[1] for eachTriphone in threeFactors])
    return '.'.join([wLE, w_NE, wRE])

## Carving up the inventory and defining symbol classes

In [19]:
inventory = lex_to_inv(transcriptions)
len(inventory)
inventory

69

{'aɪ0',
 'aɪ1',
 'aɪ2',
 'aʊ0',
 'aʊ1',
 'aʊ2',
 'b',
 'd',
 'dʒ',
 'eɪ0',
 'eɪ1',
 'eɪ2',
 'f',
 'g',
 'h',
 'i0',
 'i1',
 'i2',
 'j',
 'k',
 'l',
 'm',
 'n',
 'oʊ0',
 'oʊ1',
 'oʊ2',
 'p',
 's',
 't',
 'tʃ',
 'u0',
 'u1',
 'u2',
 'v',
 'w',
 'z',
 'æ0',
 'æ1',
 'æ2',
 'ð',
 'ŋ',
 'ɑ0',
 'ɑ1',
 'ɑ2',
 'ɔ0',
 'ɔ1',
 'ɔ2',
 'ɔɪ0',
 'ɔɪ1',
 'ɔɪ2',
 'ɚ0',
 'ɚ1',
 'ɚ2',
 'ɛ0',
 'ɛ1',
 'ɛ2',
 'ɪ0',
 'ɪ1',
 'ɪ2',
 'ɹ',
 'ʃ',
 'ʊ0',
 'ʊ1',
 'ʊ2',
 'ʌ0',
 'ʌ1',
 'ʌ2',
 'ʒ',
 'θ'}

In [20]:
vowels = {s for s in inventory if s[-1] in {'0', '1', '2'}}
len(vowels)
vowels

45

{'aɪ0',
 'aɪ1',
 'aɪ2',
 'aʊ0',
 'aʊ1',
 'aʊ2',
 'eɪ0',
 'eɪ1',
 'eɪ2',
 'i0',
 'i1',
 'i2',
 'oʊ0',
 'oʊ1',
 'oʊ2',
 'u0',
 'u1',
 'u2',
 'æ0',
 'æ1',
 'æ2',
 'ɑ0',
 'ɑ1',
 'ɑ2',
 'ɔ0',
 'ɔ1',
 'ɔ2',
 'ɔɪ0',
 'ɔɪ1',
 'ɔɪ2',
 'ɚ0',
 'ɚ1',
 'ɚ2',
 'ɛ0',
 'ɛ1',
 'ɛ2',
 'ɪ0',
 'ɪ1',
 'ɪ2',
 'ʊ0',
 'ʊ1',
 'ʊ2',
 'ʌ0',
 'ʌ1',
 'ʌ2'}

In [21]:
stressed_vowels = {v for v in vowels if v[-1] == '1' or v[-1] == '2'}
len(stressed_vowels)
stressed_vowels

30

{'aɪ1',
 'aɪ2',
 'aʊ1',
 'aʊ2',
 'eɪ1',
 'eɪ2',
 'i1',
 'i2',
 'oʊ1',
 'oʊ2',
 'u1',
 'u2',
 'æ1',
 'æ2',
 'ɑ1',
 'ɑ2',
 'ɔ1',
 'ɔ2',
 'ɔɪ1',
 'ɔɪ2',
 'ɚ1',
 'ɚ2',
 'ɛ1',
 'ɛ2',
 'ɪ1',
 'ɪ2',
 'ʊ1',
 'ʊ2',
 'ʌ1',
 'ʌ2'}

How many consecutive `ɹ`s are there actually in the transcriptions?

In [22]:
R = 'ɹ'
hasR = lambda ds: R in ds2t(ds)
hasRR = lambda ds: t2ds((R,R)) in ds2t(ds)
hasRRR = lambda ds: t2ds((R,R,R)) in ds2t(ds)

In [23]:
R_words = {1:list(filter(hasR, transcriptions)),
           2:list(filter(hasRR, transcriptions)),
           3:list(filter(hasRRR, transcriptions))}

In [24]:
len(R_words[1])
len(R_words[2])
len(R_words[3])

43343

0

0

At most 1.

In [25]:
zero_or_one_Rs = {'', R}

In [26]:
isNotEpsilon = lambda s: s != ''
removeEpsilon = lambda ts: tuple(filter(isNotEpsilon, 
                                        ts))
removeEpsilon(('i','','t','u'))

('i', 't', 'u')

Ok. Now we can enumerate all the LHSs for rules applying to strings of (stress-annotated) IPA symbols that together constitute a desugaring of the flapping rule...

In [27]:
LHSs = tuple(map(lambda ts: t2ds(removeEpsilon(ts)),
                 product(stressed_vowels, zero_or_one_Rs, 't', vowels)))
len(LHSs)
LHSs[:10]

2700

('ɑ1.t.ɑ1',
 'ɑ1.t.æ1',
 'ɑ1.t.ɑ2',
 'ɑ1.t.ɚ0',
 'ɑ1.t.eɪ2',
 'ɑ1.t.aʊ0',
 'ɑ1.t.eɪ1',
 'ɑ1.t.i1',
 'ɑ1.t.ɔ1',
 'ɑ1.t.oʊ1')

## Applying the flapping rule

In [28]:
flap = 'ɾ'
flap_substring_map = {lhs:lhs.replace('t', flap)
                      for lhs in LHSs}

def flappable(ds):
    return ds in LHSs

def has_flappable_substrings(ds):
    threeFactorSeq = dsToKfactorSequence(3, ds)
    return any(map(flappable,
                   threeFactorSeq))

def flappable_substrings(ds):
    if not has_flappable_substrings(ds):
        return tuple()
    threeFactorSeq = dsToKfactorSequence(3, ds)
    flappable3factors = tuple(filter(flappable,
                                     threeFactorSeq))
    return flappable3factors

def flap(ds_in):
    threeFactorSeq = dsToKfactorSequence(3, ds_in)
    print(threeFactorSeq)
    flapped_3FS = tuple([flap_substring_map.get(f, f)
                         for f in threeFactorSeq])
    print(flapped_3FS)
    ds_out = threeFactorSequenceToDS(flapped_3FS)
    return ds_out

In [29]:
flappable_wordforms = {w for w in transcriptions if has_flappable_substrings(w)}
# flappable_wordforms = set(par(delayed(identity)(w) for w in transcriptions if has_flappable_substrings(w)))
len(flappable_wordforms)

6457

In [30]:
wordforms_with_multiple_flapping_opportunities = {w for w in flappable_wordforms if len(flappable_substrings(w)) > 1}
# wordforms_with_multiple_flapping_opportunities = set(par(delayed(identity)(w) 
#                                                          for w in flappable_wordforms 
#                                                          if len(flappable_substrings(w)) > 1))
len(wordforms_with_multiple_flapping_opportunities)
wordforms_with_multiple_flapping_opportunities

56

{'d.ɛ1.t.ʌ0.n.eɪ2.t.ɚ0',
 'd.ɛ1.t.ʌ0.n.eɪ2.t.ɚ0.z',
 'd.ɛ1.t.ʌ0.n.eɪ2.t.ɪ0.ŋ',
 'd.ɛ1.t.ʌ0.n.eɪ2.t.ʌ0.d',
 'k.æ2.t.ʌ0.l.ɪ1.t.ɪ0.k',
 'k.ɑ2.t.ɑ0.l.ɑ0.n.oʊ1.t.oʊ0',
 'k.ʌ0.m.p.j.u1.t.ɚ0.dʒ.ɛ1.n.ɚ0.eɪ2.t.ʌ0.d',
 'k.ʌ0.n.k.æ1.t.ʌ0.n.eɪ2.t.ɪ0.ŋ',
 'k.ʌ0.n.k.æ1.t.ʌ0.n.eɪ2.t.ʌ0.d',
 'l.ɪ1.t.ɪ0.g.eɪ2.t.ɪ0.d',
 'l.ɪ1.t.ɪ0.g.eɪ2.t.ɪ0.ŋ',
 'l.ɪ1.t.ʌ0.g.eɪ2.t.ɚ0',
 'l.ɪ1.t.ʌ0.g.eɪ2.t.ɚ0.z',
 'l.ɪ2.t.ɚ0.ɑ1.t.i0',
 'm.aɪ2.t.ɔ1.t.ɪ0.k',
 'm.i1.t.i2.t.ɪ0.ŋ',
 'm.j.u1.t.eɪ2.t.ɪ0.d',
 'm.j.u1.t.ʌ0.l.eɪ2.t.ɪ0.d',
 'm.j.u1.t.ʌ0.l.eɪ2.t.ɪ0.ŋ',
 'm.j.u1.t.ʌ0.l.eɪ2.t.ʌ0.d',
 'm.oʊ1.t.ʌ0.v.eɪ2.t.ɪ0.ŋ',
 'm.oʊ1.t.ʌ0.v.eɪ2.t.ʌ0.d',
 'm.ɪ1.t.ʌ0.g.eɪ2.t.ɪ0.d',
 'm.ɪ1.t.ʌ0.g.eɪ2.t.ɪ0.ŋ',
 'n.ɑ2.n.ɔ2.t.oʊ0.m.oʊ1.t.ɪ0.v',
 's.ɛ2.m.aɪ0.ɔ2.t.ʌ0.m.æ1.t.ɪ0.k',
 's.ɛ2.m.i0.ɔ2.t.ʌ0.m.æ1.t.ɪ0.k',
 's.ɛ2.m.ɪ0.ɔ2.t.ʌ0.m.æ1.t.ɪ0.k',
 't.i2.t.oʊ2.t.eɪ1.l.ɚ0',
 't.ɪ1.t.ʌ0.l.eɪ2.t.ɪ0.d',
 't.ɪ1.t.ʌ0.l.eɪ2.t.ɪ0.ŋ',
 'tʃ.ɪ1.t.ɚ0.tʃ.æ1.t.ɚ0',
 'w.ɔ1.t.ɚ0.b.ɔ2.t.ʌ0.l',
 'w.ɔ1.t.ɚ0.b.ɔ2.t.ʌ0.l.z',
 'ɔ1.t.ʌ0.m.eɪ2.t.ɪ0.d',
 'ɔ1.t.ʌ0.m

In [31]:
flappable_example_wordform = choice(list(flappable_wordforms))
flappable_example_wordform
flap(flappable_example_wordform)

's.æ1.t.ɚ0.f.i1.l.d'

('s.æ1.t', 'æ1.t.ɚ0', 't.ɚ0.f', 'ɚ0.f.i1', 'f.i1.l', 'i1.l.d')
('s.æ1.t', 'æ1.ɾ.ɚ0', 't.ɚ0.f', 'ɚ0.f.i1', 'f.i1.l', 'i1.l.d')


's.æ1.ɾ.ɚ0.f.i1.l.d'

In [32]:
flappable_example_wordform2 = choice(list(wordforms_with_multiple_flapping_opportunities))
flappable_example_wordform2
flap(flappable_example_wordform2)

'tʃ.ɪ1.t.ɚ0.tʃ.æ1.t.ɚ0'

('tʃ.ɪ1.t', 'ɪ1.t.ɚ0', 't.ɚ0.tʃ', 'ɚ0.tʃ.æ1', 'tʃ.æ1.t', 'æ1.t.ɚ0')
('tʃ.ɪ1.t', 'ɪ1.ɾ.ɚ0', 't.ɚ0.tʃ', 'ɚ0.tʃ.æ1', 'tʃ.æ1.t', 'æ1.ɾ.ɚ0')


'tʃ.ɪ1.ɾ.ɚ0.tʃ.æ1.ɾ.ɚ0'

In [33]:
flap('ɔ1.l.t.ɚ0.n.eɪ2.t.ɚ0')

('ɔ1.l.t', 'l.t.ɚ0', 't.ɚ0.n', 'ɚ0.n.eɪ2', 'n.eɪ2.t', 'eɪ2.t.ɚ0')
('ɔ1.l.t', 'l.t.ɚ0', 't.ɚ0.n', 'ɚ0.n.eɪ2', 'n.eɪ2.t', 'eɪ2.ɾ.ɚ0')


'ɔ1.l.t.ɚ0.n.eɪ2.ɾ.ɚ0'

In [None]:
tfs = ('ɔ1.l.t', 'l.t.ɚ0', 't.ɚ0.n', 'ɚ0.n.eɪ2', 'n.eɪ2.t', 'eɪ2.ɾ.ɚ0')
tfs[0]
tfs[0][0]
tfs[-1]
tfs[-1][-1]

'ɔ1.l.t'

'ɔ'

'eɪ2.ɾ.ɚ0'

'0'

In [None]:
threeFactorSequenceToDS(tfs)

'ɔ1.l.t.ɚ0.n.eɪ2.ɾ.ɚ0'

## Creating a list of positive input-output examples

In [None]:
positive_io_examples = [{'UR':w_in,
                         'SR':flap(w_in)}
                        for w_in in flappable_wordforms]
len(positive_io_examples)
list(positive_io_examples)[:10]

('l.aɪ1.t', 'aɪ1.t.ɪ0', 't.ɪ0.n', 'ɪ0.n.ʌ0', 'n.ʌ0.n')
('l.aɪ1.t', 'aɪ1.ɾ.ɪ0', 't.ɪ0.n', 'ɪ0.n.ʌ0', 'n.ʌ0.n')
('k.oʊ1.t', 'oʊ1.t.ʌ0', 't.ʌ0.l', 'ʌ0.l.ɚ0')
('k.oʊ1.t', 'oʊ1.ɾ.ʌ0', 't.ʌ0.l', 'ʌ0.l.ɚ0')
('w.ɪ1.t', 'ɪ1.t.i0')
('w.ɪ1.t', 'ɪ1.ɾ.i0')
('ɹ.oʊ0.s', 'oʊ0.s.ɛ1', 's.ɛ1.t', 'ɛ1.t.oʊ0')
('ɹ.oʊ0.s', 'oʊ0.s.ɛ1', 's.ɛ1.t', 'ɛ1.ɾ.oʊ0')
('k.ʌ0.m', 'ʌ0.m.b', 'm.b.æ1', 'b.æ1.t', 'æ1.t.ɪ0', 't.ɪ0.v')
('k.ʌ0.m', 'ʌ0.m.b', 'm.b.æ1', 'b.æ1.t', 'æ1.ɾ.ɪ0', 't.ɪ0.v')
('s.i0.æ1', 'i0.æ1.t', 'æ1.t.ʌ0', 't.ʌ0.l', 'ʌ0.l.z')
('s.i0.æ1', 'i0.æ1.t', 'æ1.ɾ.ʌ0', 't.ʌ0.l', 'ʌ0.l.z')
('l.ɛ1.t', 'ɛ1.t.ɚ0', 't.ɚ0.m', 'ɚ0.m.ʌ0', 'm.ʌ0.n', 'ʌ0.n.z')
('l.ɛ1.t', 'ɛ1.ɾ.ɚ0', 't.ɚ0.m', 'ɚ0.m.ʌ0', 'm.ʌ0.n', 'ʌ0.n.z')
('ɹ.oʊ1.t', 'oʊ1.t.ʌ0', 't.ʌ0.n')
('ɹ.oʊ1.t', 'oʊ1.ɾ.ʌ0', 't.ʌ0.n')
('ɹ.oʊ0.s', 'oʊ0.s.ɛ1', 's.ɛ1.t', 'ɛ1.t.i0')
('ɹ.oʊ0.s', 'oʊ0.s.ɛ1', 's.ɛ1.t', 'ɛ1.ɾ.i0')
('m.j.u1', 'j.u1.t', 'u1.t.ʌ0', 't.ʌ0.l', 'ʌ0.l.eɪ2', 'l.eɪ2.t')
('m.j.u1', 'j.u1.t', 'u1.ɾ.ʌ0', 't.ʌ0.l', 'ʌ0.l.eɪ2', 'l.eɪ2.t')
('p.ɛ1.t', 'ɛ1.t.i

In [None]:
positive_io_example_fn = 'positive_flapping_io_examples.tsv'

In [None]:
with open(positive_io_example_fn, 'w') as file:
    writer = csv.DictWriter(file, ['UR','SR'], delimiter='\t')
    writer.writeheader()
    for ex in positive_io_examples:
        writer.writerow(ex)

37

33

21

35

39

35

39

27

33

41

21

35

45

45

33

55

29

31

61

33

31

43

43

43

31

35

41

27

27

23

29

35

21

33

35

31

47

65

61

29

43

63

47

23

63

51

21

31

39

51

37

35

43

25

45

37

39

27

31

29

33

31

53

35

41

45

31

45

49

43

57

31

33

31

59

47

35

51

53

33

43

35

39

75

57

21

27

39

47

27

29

53

27

31

25

33

33

57

35

43

37

49

39

25

49

37

49

35

25

37

29

47

57

35

39

19

53

25

31

25

47

27

25

35

61

55

25

35

33

35

27

45

49

57

39

29

27

23

35

49

63

51

35

51

27

51

43

25

61

39

31

57

37

29

31

57

41

31

51

39

45

27

25

45

61

53

57

39

35

35

61

39

49

41

45

25

63

21

29

27

55

27

31

39

39

41

51

25

23

25

27

43

45

55

65

21

25

35

41

29

39

35

43

45

31

45

35

37

47

37

31

25

25

61

29

43

51

45

35

39

25

49

45

43

41

39

51

49

21

55

47

43

25

41

41

57

43

57

39

39

25

55

35

39

43

39

63

25

43

41

31

47

25

57

49

51

21

47

27

53

25

27

25

29

27

51

39

27

33

31

41

39

43

47

45

47

43

51

25

53

39

35

45

49

41

51

29

47

25

61

57

21

21

29

29

57

55

19

27

37

49

25

49

29

37

49

31

31

51

51

27

23

49

51

37

43

39

57

21

27

37

39

25

37

21

47

53

29

45

35

41

47

51

59

29

41

45

31

25

37

35

31

49

51

41

25

29

39

25

31

29

23

51

31

31

37

49

19

39

33

35

49

41

45

41

47

21

41

43

33

61

27

47

25

33

47

29

31

59

33

43

37

71

41

57

43

47

27

45

23

35

27

25

29

31

41

35

33

69

35

43

31

35

55

31

25

53

31

29

45

45

57

49

29

49

51

25

31

43

53

27

43

45

55

23

43

47

39

25

39

49

51

53

57

31

69

37

59

29

43

63

39

43

45

31

39

65

21

25

25

41

29

45

37

27

21

29

47

37

49

33

25

31

41

49

25

25

43

29

53

29

45

29

55

63

27

57

47

47

47

27

59

27

39

39

47

29

41

65

37

31

57

33

39

23

55

25

25

41

35

57

27

35

25

31

29

31

21

43

29

45

45

53

39

31

37

43

35

45

39

27

39

45

25

41

47

31

27

23

61

35

37

41

37

53

45

41

49

25

25

33

57

21

35

27

35

25

35

53

43

43

35

41

37

37

35

25

57

37

37

25

29

33

39

37

23

63

27

47

37

23

35

39

17

35

41

33

41

39

29

29

33

41

79

57

31

21

31

43

47

39

45

25

37

31

33

53

35

43

55

29

25

35

31

37

25

31

33

35

25

33

29

33

25

37

49

17

55

47

25

29

57

43

27

35

53

35

47

25

25

43

23

27

45

35

29

35

39

39

39

41

29

39

55

29

35

25

23

27

35

45

51

43

25

21

31

21

27

39

35

25

47

33

23

41

35

33

47

31

25

23

25

39

53

53

53

49

27

61

47

65

33

51

47

51

43

43

37

35

39

51

37

45

25

59

39

31

55

35

41

43

27

41

41

33

47

23

25

25

53

39

35

61

21

21

33

25

25

49

41

35

29

49

43

21

27

53

45

45

39

35

49

23

39

39

21

39

27

43

49

31

39

61

45

49

21

35

39

25

29

35

45

35

45

35

47

43

45

23

25

25

39

43

67

31

39

25

29

21

39

51

47

27

63

21

51

35

33

33

25

37

25

47

51

31

27

35

39

33

65

33

37

39

45

29

29

41

53

23

45

21

53

45

45

45

29

45

61

35

39

21

59

53

43

31

27

43

45

47

49

45

63

25

35

23

51

31

35

35

35

47

61

35

53

31

41

25

45

29

25

25

51

25

37

49

57

53

51

41

35

25

33

21

53

33

45

47

45

47

37

41

33

23

27

27

49

43

55

55

25

47

49

33

61

35

41

59

23

49

43

47

37

45

51

61

55

25

47

43

25

43

25

43

27

25

53

71

47

41

39

61

51

23

31

27

43

35

33

35

37

47

29

47

57

47

37

55

63

25

49

35

35

23

43

53

27

65

21

55

25

27

39

31

33

31

33

47

47

29

47

25

33

35

41

49

39

51

45

29

47

43

31

45

39

25

35

25

23

43

43

21

45

21

33

35

49

43

39

43

45

37

25

55

29

43

37

29

53

55

47

29

25

57

29

33

53

57

55

41

39

27

43

23

35

57

53

39

43

37

27

29

47

35

33

43

35

53

29

47

51

33

39

37

29

31

37

39

51

53

55

55

31

39

39

25

57

35

27

25

63

41

37

19

25

21

51

31

25

39

39

27

27

35

43

41

33

57

55

49

55

47

35

27

25

35

47

53

41

27

47

33

37

59

57

27

43

27

33

43

47

35

31

21

43

45

21

27

43

31

37

25

27

25

47

33

59

35

41

53

51

31

39

29

25

25

21

21

33

21

79

65

41

47

63

43

27

25

27

55

45

47

37

29

39

45

45

53

45

37

27

25

57

55

63

35

31

23

23

29

21

21

49

35

33

37

43

31

41

57

35

39

39

49

33

61

37

21

21

35

37

37

33

39

31

47

51

29

35

53

41

47

49

39

43

31

53

39

53

25

75

39

31

37

27

47

25

35

51

43

39

37

49

51

43

41

37

33

43

35

29

37

39

21

45

39

49

53

35

31

41

23

21

55

75

49

47

31

21

21

79

37

21

47

53

39

31

39

37

39

41

41

51

53

59

25

49

35

43

37

43

29

39

25

49

31

31

51

25

31

31

51

43

29

39

29

31

47

29

41

45

49

31

39

25

39

25

41

33

27

35

53

49

37

67

31

43

61

41

51

43

39

41

25

83

39

25

47

41

49

35

21

45

23

63

61

35

35

63

35

31

35

35

49

25

37

43

23

53

31

39

47

55

35

47

41

37

43

25

43

29

29

27

43

47

43

39

43

49

23

39

39

21

43

31

29

51

35

51

31

25

29

33

23

39

31

37

39

43

39

27

23

59

67

21

25

53

47

21

61

41

31

43

25

33

21

51

25

41

31

37

57

25

25

47

31

25

31

37

59

21

43

25

21

29

37

55

37

47

45

43

31

23

29

43

53

29

43

25

25

27

45

53

35

59

45

43

31

29

25

29

21

27

39

39

29

51

25

33

49

47

31

43

23

61

39

29

35

57

27

25

47

41

35

25

41

29

61

33

45

35

23

43

63

55

61

53

43

33

47

59

31

57

35

55

55

25

39

43

55

21

35

35

39

29

31

55

61

25

53

29

39

37

21

41

53

37

35

39

47

27

51

43

25

45

37

49

25

41

33

45

21

39

27

29

25

45

23

29

35

33

33

41

71

39

33

43

37

59

41

47

47

49

51

25

39

31

27

23

45

27

31

25

29

51

25

55

25

47

41

57

27

37

27

61

45

53

49

47

45

33

37

53

63

53

65

39

47

53

53

39

27

37

49

47

41

47

37

37

45

33

33

27

53

39

43

53

39

35

27

39

21

37

35

25

49

25

25

39

61

25

21

51

49

27

43

35

43

35

33

51

49

31

31

29

39

39

67

29

55

25

41

59

29

27

35

35

35

33

43

37

35

31

41

43

67

45

35

57

51

29

43

47

51

33

53

49

41

37

47

25

35

51

21

45

27

41

55

25

29

33

33

33

45

21

47

25

67

37

47

41

23

31

31

31

31

47

25

25

45

29

37

33

29

25

25

31

35

51

39

21

61

39

29

39

23

47

33

33

59

25

27

57

25

25

25

61

43

47

23

43

55

35

31

33

57

35

41

29

47

27

37

25

43

33

33

39

31

21

25

35

39

21

23

21

35

57

45

39

25

37

35

29

37

23

31

35

57

43

27

39

43

25

37

21

35

43

35

49

27

25

51

45

41

33

35

57

31

29

39

35

53

35

43

39

61

41

47

27

27

51

37

57

31

47

45

43

31

21

35

35

25

63

21

49

41

57

35

45

35

41

35

31

45

35

47

53

83

45

29

57

29

31

29

31

31

31

45

31

37

25

29

37

51

45

27

25

45

33

19

43

47

25

31

49

31

43

43

35

35

43

21

27

35

31

23

37

31

57

19

31

55

41

51

27

27

37

23

49

47

37

31

39

37

37

37

65

37

31

41

45

25

31

25

45

39

39

47

43

65

25

23

49

57

49

41

31

25

25

35

47

43

57

49

31

25

37

47

25

35

39

17

37

43

29

57

51

47

39

61

29

31

33

25

39

27

23

51

21

31

25

55

35

47

35

45

39

39

41

29

21

47

29

49

35

43

31

35

29

41

39

29

25

27

47

33

33

25

51

29

31

61

45

47

47

27

47

25

25

35

43

25

29

25

39

45

63

37

43

41

45

51

35

37

57

61

27

53

23

37

37

35

33

49

35

25

45

37

43

39

23

45

25

35

47

31

21

25

41

41

29

45

31

25

31

27

21

39

37

55

59

43

53

43

25

35

45

25

21

51

35

43

35

41

27

61

25

41

57

27

57

47

29

39

39

51

49

25

21

35

41

29

29

55

33

61

37

25

53

49

39

25

53

45

63

43

31

45

23

31

43

23

37

63

29

41

25

37

29

67

45

45

59

35

31

31

23

61

35

31

59

31

33

31

47

29

25

25

51

37

39

49

33

31

29

47

59

21

45

41

39

31

41

59

37

33

31

51

39

35

29

27

31

51

41

29

33

45

29

29

41

33

25

45

33

33

35

25

35

35

47

39

29

29

53

27

53

27

31

33

23

31

41

55

31

29

35

49

27

27

47

25

31

51

47

31

21

63

37

37

35

31

57

47

51

41

21

27

51

31

47

51

63

31

35

23

39

49

25

41

29

31

27

47

45

25

55

25

25

21

55

25

37

47

25

45

43

43

31

35

47

39

33

53

45

51

27

31

31

43

29

27

43

31

27

51

25

49

25

23

31

49

39

65

55

47

39

37

29

31

49

25

35

29

31

57

47

67

29

31

61

31

47

39

29

25

59

29

31

51

45

29

31

37

51

53

45

47

25

29

35

67

61

53

53

35

33

45

35

25

31

43

31

53

25

27

31

49

39

43

29

43

49

31

21

37

63

39

61

25

39

25

37

51

37

53

31

67

55

53

29

41

27

35

21

53

41

43

43

63

25

27

45

27

53

59

47

35

27

31

41

29

51

37

37

53

39

35

41

41

45

25

37

57

29

49

21

29

25

39

47

23

31

39

23

49

45

45

69

47

37

31

27

37

25

29

31

33

35

41

37

37

35

33

33

43

45

25

47

39

67

47

47

25

69

45

35

55

59

45

45

67

37

37

49

25

43

37

45

53

51

43

29

59

21

55

39

51

37

31

41

35

45

39

53

21

33

43

35

17

47

43

45

53

31

55

45

37

53

27

45

49

23

27

49

43

31

33

83

45

21

51

37

41

25

21

29

45

55

27

51

51

29

55

33

27

55

29

41

35

35

61

45

31

39

31

37

31

55

49

51

35

29

29

55

21

23

43

41

33

39

51

37

47

57

47

51

55

35

33

57

49

31

37

43

27

27

33

27

39

59

63

21

29

47

47

65

39

31

25

37

63

31

29

41

39

29

35

45

43

47

47

37

33

53

31

39

39

49

31

33

35

31

45

37

35

27

47

41

31

39

49

41

29

55

55

25

45

69

45

33

39

47

49

41

39

25

49

51

35

33

27

45

53

37

39

49

31

27

25

23

67

23

25

41

47

35

51

43

25

67

51

21

29

27

39

39

In [None]:
%cat -n /mnt/cube/home/AD/emeinhar/gj-flap-data/positive_flapping_io_examples.tsv | head -60