In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

**Notebook author:** emeinhardt@ucsd.edu

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Overview" data-toc-modified-id="Overview-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Overview</a></span></li><li><span><a href="#Imports" data-toc-modified-id="Imports-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Precursor-steps" data-toc-modified-id="Precursor-steps-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Precursor steps</a></span></li><li><span><a href="#Step-1:-Make-transcribed-lexicons" data-toc-modified-id="Step-1:-Make-transcribed-lexicons-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Step 1: Make transcribed lexicons</a></span></li><li><span><a href="#Step-2:-Segment-inventory-alignment" data-toc-modified-id="Step-2:-Segment-inventory-alignment-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Step 2: Segment inventory alignment</a></span><ul class="toc-item"><li><span><a href="#Step-2a:-Define-inventory-alignment-projections" data-toc-modified-id="Step-2a:-Define-inventory-alignment-projections-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Step 2a: Define inventory alignment projections</a></span></li><li><span><a href="#Step-2b:-Apply-inventory-alignment-projections" data-toc-modified-id="Step-2b:-Apply-inventory-alignment-projections-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Step 2b: Apply inventory alignment projections</a></span></li></ul></li><li><span><a href="#Step-3:-Generating-channel-and-lexicon-distributions" data-toc-modified-id="Step-3:-Generating-channel-and-lexicon-distributions-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Step 3: Generating channel and lexicon distributions</a></span><ul class="toc-item"><li><span><a href="#Step-3a:-Generating-channel-distributions-and-associated-metadata" data-toc-modified-id="Step-3a:-Generating-channel-distributions-and-associated-metadata-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Step 3a: Generating channel distributions and associated metadata</a></span></li><li><span><a href="#Step-3b:-Filtering-transcription-lexicons-to-only-include-words-that-can-be-modeled-by-a-given-channel-distribution" data-toc-modified-id="Step-3b:-Filtering-transcription-lexicons-to-only-include-words-that-can-be-modeled-by-a-given-channel-distribution-6.2"><span class="toc-item-num">6.2&nbsp;&nbsp;</span>Step 3b: Filtering transcription lexicons to only include words that can be modeled by a given channel distribution</a></span></li><li><span><a href="#Step-3c:-Filter-transcription-lexicons-to-only-include-words-that-are-in-a-language-model's-vocabulary" data-toc-modified-id="Step-3c:-Filter-transcription-lexicons-to-only-include-words-that-are-in-a-language-model's-vocabulary-6.3"><span class="toc-item-num">6.3&nbsp;&nbsp;</span>Step 3c: Filter transcription lexicons to only include words that are in a language model's vocabulary</a></span></li><li><span><a href="#Step-3d:-Define-a-distribution-over-the-transcibed-vocabulary-of-a-language-model-for-each-n-gram-context-in-a-set-of-(possibly-empty)-n-gram-contexts" data-toc-modified-id="Step-3d:-Define-a-distribution-over-the-transcibed-vocabulary-of-a-language-model-for-each-n-gram-context-in-a-set-of-(possibly-empty)-n-gram-contexts-6.4"><span class="toc-item-num">6.4&nbsp;&nbsp;</span>Step 3d: Define a distribution over the transcibed vocabulary of a language model for each n-gram context in a set of (possibly empty) n-gram contexts</a></span></li></ul></li><li><span><a href="#Step-4:-Generating-posterior-distributions" data-toc-modified-id="Step-4:-Generating-posterior-distributions-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Step 4: Generating posterior distributions</a></span></li><li><span><a href="#Step-5:-Generating-analysis-measures" data-toc-modified-id="Step-5:-Generating-analysis-measures-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Step 5: Generating analysis measures</a></span></li></ul></div>

# Overview

This notebook describes the processing pipeline from 
 - gating data
 - transcribed lexicon
 - a language model and (possibly empty) n-gram contexts

to 
 - channel distribution
 - lexicon distribution(s) (distributions over wordforms)
 - expected posterior distribution over intended wordform given what has been produced of what was intended.
 
 
It describes what happens at each step, checks some pre- and post-conditions, describes what you, the user must do (if anything), and scripts some commands to automatically do the necessary processing.

# Imports

In [2]:
import papermill as pm

In [3]:
from tqdm import tqdm

In [4]:
from os import getcwd, chdir, listdir, path, mkdir, makedirs

import json
import csv

In [5]:
from collections import OrderedDict

In [6]:
repo_dir = getcwd()
repo_dir

'/mnt/cube/home/AD/emeinhar/wr'

In [7]:
repo_contents_0 = listdir()
repo_contents_0

['boilerplate.py',
 'LTR_Buckeye',
 '.gitignore',
 'LTR_Buckeye_aligned_w_GD_AmE_destressed',
 'GD_AmE_destressed_aligned_w_LTR_Buckeye',
 '__pycache__',
 'AmE-diphones - LTR_CMU_destressed.tsv alignment definition.ipynb',
 '1 initial directory setup.txt',
 '2a alignment_paths_and_cmds.sh',
 'AmE-diphones - LTR_newdic_destressed.tsv alignment definition.ipynb',
 'Align transcriptions.ipynb',
 'GD_AmE_destressed_aligned_w_LTR_CMU_stressed',
 'LTR_CMU_destressed',
 'Gating Data - Transcription Lexicon Alignment Maker.ipynb',
 'AmE-diphones - LTR_Buckeye.tsv alignment definition.ipynb',
 'Processing Driver Notebook.ipynb',
 'old',
 'GD_AmE',
 '.ipynb_checkpoints',
 'GD_AmE_destressed_aligned_w_LTR_newdic_destressed',
 'AmE-diphones - LTR_CMU_stressed.tsv alignment definition.ipynb',
 'string_utils.py',
 'GD_AmE_destressed_aligned_w_LTR_CMU_destressed',
 'LTR_CMU_stressed',
 'LTR_CMU_stressed_aligned_w_GD_AmE_destressed',
 'LTR_newdic_destressed_aligned_w_GD_AmE_destressed',
 '.git',
 'LTR

# Precursor steps

 - I assume all relevant transcriptions have been converted to Unicode IPA characters. For each data source used here, this step is documented in a GitHub repository elsewhere.
 - Where language models and n-gram contexts (drawn from speech corpora) are referenced, each of these is assumed to have come from as is from other GitHub repositories.

# Step 1: Make transcribed lexicons

 - Each transcribed lexicon `LEXNAME` should be in a folder (e.g. `LTR_LEXNAME`) containing a file `LTR_LEXNAME.tsv`. For documentation purposes, the source file and a notebook documenting the production of the `.tsv` file should, if practicable be included in the folder as well.
   - A transcribed lexicon `LTR_....tsv` file should have two columns: `Orthographic_Wordform` and `Transcription`.

The assertions in the code below will only succeed if step 1 is complete for all transcribed lexicons listed for checking below.

In [17]:
newdic_destressed_ltr_folder = 'LTR_newdic_destressed'
cmu_destressed_ltr_folder = 'LTR_CMU_destressed'
cmu_stressed_ltr_folder = 'LTR_CMU_stressed'
buckeye_ltr_folder = 'LTR_Buckeye'
# nxt_swbd_ltr_folder = 

LTR_folders = (newdic_destressed_ltr_folder, cmu_destressed_ltr_folder, cmu_stressed_ltr_folder, buckeye_ltr_folder)
LTR_folders_to_process = (newdic_destressed_ltr_folder, cmu_destressed_ltr_folder, buckeye_ltr_folder)

for dirname in tqdm(LTR_folders_to_process):
    assert path.exists(dirname), 'Transcribed lexicon directory {0} not found in repo directory'.format(dirname)
    fname = path.join(dirname, dirname + '.tsv')
    assert path.exists(fname), 'Transcribed lexicon {0} not found in repo directory'.format(fname)


  0%|          | 0/3 [00:00<?, ?it/s][A
100%|██████████| 3/3 [00:00<00:00, 1730.09it/s][A

# Step 2: Segment inventory alignment

## Step 2a: Define inventory alignment projections

The segment inventory of any given transcribed lexicon and the segment inventory of the gating data often do not line up. For the gating data to be usefully applied to a given lexicon of transcriptions, the strings in the lexicon must contain only segments found in the gating data stimuli inventory.

To ensure this happens, the notebook `Gating Data - Transcription Lexicon Alignment Maker.ipynb` 
 - takes as inputs 
     - a transcribed lexicon file path and a gating data file path
     - a lexicon projection file path and a gating data projection file path
 - identifies the inventories of each and what symbols are relatively unique to the lexicon and the gating data
 - produces 
   - *a Jupyter notebook* for **you** to open and finish by defining a projection function (i.e. a Python dictionary) to be applied to strings in the transcribed lexicon and to the gating data. When you finish doing this (and set an export flag in the notebook to True and run the remainder of the notebook), this notebook will produce
     - two *.json files storing these projections* according to the previously provided output file paths.

The cell below will clear all existing alignment folders created using the code in this subsection:

In [34]:
%rm -rf *_aligned_w_*
%rm -rf *" alignment definition"*

The cell below will only succeed if the American English gating data of Warner, McQueen, and Cutler (2014) is contained in the repo directory with a particular directory and filename.

In [19]:
gating_data_folder = 'GD_AmE'
gating_data_fn = 'AmE-diphones-IPA-annotated-columns.csv'
gating_data_fp = path.join(gating_data_folder, gating_data_fn)

assert path.exists(gating_data_folder), 'AmE gating data folder {0} not found in repo directory'.format(gating_data_folder)
assert path.exists(gating_data_fp), 'AmE gating data {0} not found in repo directory'.format(gating_data_fp)

The third cell below will create a notebook for alignment projection definitions for each of the transcribed lexicons from the previous step and the AmE gating data.

In [20]:
def removeExtension(fp):
    dir_name = path.dirname(fp)
    file_name = path.basename(fp)
    ext = file_name.split('.')[-1]
    rest = '.'.join( file_name.split('.')[:-1] )
    return path.join(dir_name, rest)

In [21]:
arg_bundles = []
for LTR_dirname in tqdm(LTR_folders_to_process):
    LTR_fn = LTR_dirname + '.tsv'
    LTR_fp = path.join(LTR_dirname, LTR_fn)
    
    nb_output_name = 'GD_AmE-diphones - ' + LTR_dirname + ' alignment definition' + '.ipynb'
    my_g = gating_data_fp
    my_l = LTR_fp
    my_s = 'destressed'
    
    gd_alignment_dn = 'GD_AmE_' + my_s + '_' + 'aligned_w_' + LTR_dirname
    gd_alignment_fn = 'alignment_of_' + removeExtension(gating_data_fn) + '_w_' + LTR_dirname + '.json'
    gd_alignment_fp = path.join(gd_alignment_dn, gd_alignment_fn)
    if not path.exists(gd_alignment_dn):
        makedirs(gd_alignment_dn)
    my_gp = gd_alignment_fp
    
    ltr_alignment_dn = LTR_dirname + '_aligned_w_' + 'GD_AmE_' + my_s
    ltr_alignment_fn = 'alignment_of_' + LTR_dirname + '_w_' + removeExtension(gating_data_fn) + '.json'
    ltr_alignment_fp = path.join(ltr_alignment_dn, ltr_alignment_fn)
    if not path.exists(ltr_alignment_dn):
        makedirs(ltr_alignment_dn)
    my_lp = ltr_alignment_fp
    
    
    my_arg_bundle = OrderedDict({
        'LTR_dirname':LTR_dirname,
        'LTR_fn':LTR_fn,
        'LTR_fp':LTR_fp,
        'gd_alignment_dn':gd_alignment_dn,
        'gd_alignment_fn':gd_alignment_fn,
        'gd_alignment_fp':gd_alignment_fp,
        'ltr_alignment_dn':ltr_alignment_dn,
        'ltr_alignment_fn':ltr_alignment_fn,
        'ltr_alignment_fp':ltr_alignment_fp,
        'align_def_nb_output_name':nb_output_name,
        'my_g':my_g,
        'my_l':my_l,
        'my_s':my_s,
        'my_gp':my_gp,
        'my_lp':my_lp,
    })
    arg_bundles.append(my_arg_bundle)


  0%|          | 0/3 [00:00<?, ?it/s][A
100%|██████████| 3/3 [00:00<00:00, 203.82it/s][A

In [22]:
for arg_bundle in tqdm(arg_bundles):
    nb = pm.execute_notebook(
        'Gating Data - Transcription Lexicon Alignment Maker.ipynb',
        arg_bundle['align_def_nb_output_name'],
        parameters=dict(g = arg_bundle['my_g'], 
                        l = arg_bundle['my_l'], 
                        s = arg_bundle['my_s'], 
                        gp = arg_bundle['my_gp'], 
                        lp = arg_bundle['my_lp'])
    )
#     pm.execute_notebook(
#        'Gating Data - Transcription Lexicon Alignment Maker.ipynb',
#        nb_output_name,
#        parameters=dict(g = my_g, l = my_l, s = my_s, gp = my_gp, lp = my_lp)
#     )
    print("Finished creating alignment definition notebook {0}.\nOpen and run the notebook, complete the projection definition, and run the remainder of the notebook (remembering to change the export flag to 'True').\n".format(arg_bundle['align_def_nb_output_name']))


  0%|          | 0/3 [00:00<?, ?it/s][AInput Notebook:  Gating Data - Transcription Lexicon Alignment Maker.ipynb
Output Notebook: GD_AmE-diphones - LTR_newdic_destressed.tsv alignment definition.ipynb


  0%|          | 0/64 [00:00<?, ?it/s][A[A

  6%|▋         | 4/64 [00:00<00:01, 33.84it/s][A[A

 12%|█▎        | 8/64 [00:00<00:01, 35.42it/s][A[A

 20%|██        | 13/64 [00:00<00:01, 36.30it/s][A[A

 25%|██▌       | 16/64 [00:00<00:01, 33.00it/s][A[A

 30%|██▉       | 19/64 [00:00<00:01, 26.75it/s][A[A

 34%|███▍      | 22/64 [00:00<00:02, 20.39it/s][A[A

 39%|███▉      | 25/64 [00:01<00:02, 17.26it/s][A[A

 44%|████▍     | 28/64 [00:01<00:01, 19.69it/s][A[A

 48%|████▊     | 31/64 [00:01<00:01, 21.88it/s][A[A

 53%|█████▎    | 34/64 [00:01<00:01, 19.58it/s][A[A

 58%|█████▊    | 37/64 [00:04<00:09,  2.98it/s][A[A

 61%|██████    | 39/64 [00:04<00:06,  3.86it/s][A[A

 64%|██████▍   | 41/64 [00:05<00:08,  2.71it/s][A[A

 67%|██████▋   | 43/64 [00:06<00:05

Finished creating alignment definition notebook GD_AmE-diphones - LTR_newdic_destressed.tsv alignment definition.ipynb.
Open and run the notebook, complete the projection definition, and run the remainder of the notebook (remembering to change the export flag to 'True').



Input Notebook:  Gating Data - Transcription Lexicon Alignment Maker.ipynb
Output Notebook: GD_AmE-diphones - LTR_CMU_destressed.tsv alignment definition.ipynb


  0%|          | 0/64 [00:00<?, ?it/s][A[A

  3%|▎         | 2/64 [00:00<00:04, 13.97it/s][A[A

  9%|▉         | 6/64 [00:00<00:03, 17.20it/s][A[A

 17%|█▋        | 11/64 [00:00<00:02, 20.80it/s][A[A

 23%|██▎       | 15/64 [00:00<00:02, 23.48it/s][A[A

 28%|██▊       | 18/64 [00:00<00:02, 22.63it/s][A[A

 33%|███▎      | 21/64 [00:00<00:01, 24.00it/s][A[A

 39%|███▉      | 25/64 [00:00<00:01, 20.88it/s][A[A

 44%|████▍     | 28/64 [00:01<00:01, 18.67it/s][A[A

 50%|█████     | 32/64 [00:01<00:01, 18.80it/s][A[A

 53%|█████▎    | 34/64 [00:01<00:02, 13.38it/s][A[A

 56%|█████▋    | 36/64 [00:04<00:13,  2.07it/s][A[A

 59%|█████▉    | 38/64 [00:05<00:11,  2.26it/s][A[A

 62%|██████▎   | 40/64 [00:06<00:10,  2.22it/s][A[A

 64%|██████▍   | 41/64 [00:06<00:11,  2.09it/s][A[A

 69%|██████▉   | 44/64 [

Finished creating alignment definition notebook GD_AmE-diphones - LTR_CMU_destressed.tsv alignment definition.ipynb.
Open and run the notebook, complete the projection definition, and run the remainder of the notebook (remembering to change the export flag to 'True').



Input Notebook:  Gating Data - Transcription Lexicon Alignment Maker.ipynb
Output Notebook: GD_AmE-diphones - LTR_Buckeye.tsv alignment definition.ipynb


  0%|          | 0/64 [00:00<?, ?it/s][A[A

  6%|▋         | 4/64 [00:00<00:01, 33.42it/s][A[A

 12%|█▎        | 8/64 [00:00<00:01, 35.06it/s][A[A

 17%|█▋        | 11/64 [00:00<00:01, 31.36it/s][A[A

 22%|██▏       | 14/64 [00:00<00:01, 29.55it/s][A[A

 27%|██▋       | 17/64 [00:00<00:01, 29.30it/s][A[A

 31%|███▏      | 20/64 [00:00<00:01, 22.17it/s][A[A

 36%|███▌      | 23/64 [00:00<00:01, 22.37it/s][A[A

 41%|████      | 26/64 [00:01<00:02, 14.09it/s][A[A

 45%|████▌     | 29/64 [00:01<00:02, 16.61it/s][A[A

 52%|█████▏    | 33/64 [00:01<00:01, 18.40it/s][A[A

 56%|█████▋    | 36/64 [00:04<00:09,  2.99it/s][A[A

 59%|█████▉    | 38/64 [00:05<00:10,  2.46it/s][A[A

 62%|██████▎   | 40/64 [00:06<00:10,  2.31it/s][A[A

 64%|██████▍   | 41/64 [00:07<00:11,  2.03it/s][A[A

 66%|██████▌   | 42/64 [00:07<0

Finished creating alignment definition notebook GD_AmE-diphones - LTR_Buckeye.tsv alignment definition.ipynb.
Open and run the notebook, complete the projection definition, and run the remainder of the notebook (remembering to change the export flag to 'True').



## Step 2b: Apply inventory alignment projections

The cell below will clear all existing alignment folders created using the code in this subsection:

In [31]:
%rm -rf *" alignment application "*

The cell below will succeed if you have run each of the previously produced notebooks correctly and produced a projection mapping file.

In [15]:
for arg_bundle in tqdm(arg_bundles):
    args = arg_bundle
    assert path.exists(args['gd_alignment_fp']), 'Gating data alignment projection mapping not found:\n\t{0}'.format(args['gd_alignment_fp'])
    assert path.exists(args['ltr_alignment_fp']), 'Transcribed lexicon data alignment projection mapping not found:\n\t{0}'.format(args['ltr_alignment_fp'])


  0%|          | 0/3 [00:00<?, ?it/s][A
100%|██████████| 3/3 [00:00<00:00, 2094.01it/s][A

The cell below applies the alignment projection to each \FIXME

In [28]:
for arg_bundle in arg_bundles:
    args = arg_bundle
    LTR_fn = args['LTR_fn']
    
    my_pg = args['my_gp']
    my_g = args['my_g']
    my_o_fn = 'GD_AmE-diphones' + '_aligned_w_' + removeExtension(LTR_fn) + '.tsv'
    my_og = path.join(args['gd_alignment_dn'], my_o_fn)
    args['align_apply_gd_nb_output_name'] = 'GD_AmE-diphones - ' + removeExtension(LTR_fn) + ' alignment application to ' + 'AmE-diphones' + '.ipynb'
    print("Creating notebook '{0}' w/ args p, g, o = \n\t{1}\n\t{2}\n\t{3}".format(args['align_apply_gd_nb_output_name'], my_pg, my_g, my_og))
    nb = pm.execute_notebook(
        'Align transcriptions.ipynb',
        args['align_apply_gd_nb_output_name'],
        parameters=dict(p = my_pg,
                        g = my_g,
                        o = my_og)
    )
    print('Finished applying alignment projection\n\tp = {0}\nto\n\tg = {1}\nResult saved to\n\t{2}'.format(my_pg, my_g, my_og))
    print(' ')
    
    my_pl = args['my_lp']
    my_l = args['my_l']
    my_o_fn = removeExtension(LTR_fn) + '_aligned_w_' + 'GD_AmE-diphones' + '.tsv'
    my_ol = path.join(args['ltr_alignment_dn'], my_o_fn)
    args['align_apply_ltr_nb_output_name'] = 'GD_AmE-diphones - ' + removeExtension(LTR_fn) + ' alignment application to ' + removeExtension(LTR_fn) + '.ipynb'
    print('Creating notebook {0} w/ args p, g, o = \n\t{1}\n\t{2}\n\t{3}'.format(args['align_apply_ltr_nb_output_name'], my_pg, my_l, my_ol))
    nb = pm.execute_notebook(
        'Align transcriptions.ipynb',
        args['align_apply_ltr_nb_output_name'],
        parameters=dict(p = my_pl,
                        l = my_l,
                        o = my_ol)
    )
    print('Finished applying alignment projection\n\tp = {0}\nto\n\tl = {1}\nResult saved to\n\t{2}'.format(my_pl, my_l, my_ol))
    print('\n')

Creating notebook 'GD_AmE-diphones - LTR_newdic_destressed alignment application to AmE-diphones.ipynb' w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_newdic_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_newdic_destressed.json
	GD_AmE/AmE-diphones-IPA-annotated-columns.csv
	GD_AmE_destressed_aligned_w_LTR_newdic_destressed/GD_AmE-diphones_aligned_w_LTR_newdic_destressed.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_newdic_destressed alignment application to AmE-diphones.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  6%|▋         | 4/64 [00:00<00:02, 29.28it/s][A[A[A[A[A[A





 14%|█▍        | 9/64 [00:00<00:01, 32.26it/s][A[A[A[A[A[A





 19%|█▉        | 12/64 [00:00<00:02, 19.83it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:00<00:02, 17.57it/s][A[A[A[A[A[A





 30%|██▉       | 19/64 [00:00<00:02, 20.09it/s][A[A[A[A[A[A





 34%|███▍      | 22/64 [00:00<00:01, 21.92it/s][A[A[A[A[A[A





 41%|████      | 26/64 [00:01<00:01, 24.22it/s][A[A[A[A[A[A





 45%|████▌     | 29/64 [00:01<00:01, 25.51it/s][A[A[A[A[A[A





 50%|█████     | 32/64 [00:01<00:01, 26.15it/s][A[A[A[A[A[A





 55%|█████▍    | 35/64 [00:04<00:09,  3.11it/s][A[A[A[A[A[A





 61%|██████    | 39/64 [00:04<00:05,  4.23it/s][A[A[A[A[A[A





 66%|███

Finished applying alignment projection
	p = GD_AmE_destressed_aligned_w_LTR_newdic_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_newdic_destressed.json
to
	g = GD_AmE/AmE-diphones-IPA-annotated-columns.csv
Result saved to
	GD_AmE_destressed_aligned_w_LTR_newdic_destressed/GD_AmE-diphones_aligned_w_LTR_newdic_destressed.tsv
 
Creating notebook GD_AmE-diphones - LTR_newdic_destressed alignment applicationto LTR_newdic_destressed.ipynb w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_newdic_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_newdic_destressed.json
	LTR_newdic_destressed/LTR_newdic_destressed.tsv
	LTR_newdic_destressed_aligned_w_GD_AmE_destressed/LTR_newdic_destressed_aligned_w_GD_AmE-diphones.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_newdic_destressed alignment applicationto LTR_newdic_destressed.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  3%|▎         | 2/64 [00:00<00:03, 16.43it/s][A[A[A[A[A[A





  8%|▊         | 5/64 [00:00<00:03, 18.29it/s][A[A[A[A[A[A





 14%|█▍        | 9/64 [00:00<00:02, 21.01it/s][A[A[A[A[A[A





 19%|█▉        | 12/64 [00:00<00:02, 19.62it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:00<00:02, 18.22it/s][A[A[A[A[A[A





 30%|██▉       | 19/64 [00:00<00:02, 21.02it/s][A[A[A[A[A[A





 36%|███▌      | 23/64 [00:00<00:01, 23.38it/s][A[A[A[A[A[A





 42%|████▏     | 27/64 [00:01<00:01, 24.96it/s][A[A[A[A[A[A





 47%|████▋     | 30/64 [00:01<00:01, 19.79it/s][A[A[A[A[A[A





 52%|█████▏    | 33/64 [00:01<00:01, 21.75it/s][A[A[A[A[A[A





 56%|█████▋    | 36/64 [00:01<00:01, 23.67it/s][A[A[A[A[A[A





 

Finished applying alignment projection
	p = LTR_newdic_destressed_aligned_w_GD_AmE_destressed/alignment_of_LTR_newdic_destressed_w_AmE-diphones-IPA-annotated-columns.json
to
	l = LTR_newdic_destressed/LTR_newdic_destressed.tsv
Result saved to
	LTR_newdic_destressed_aligned_w_GD_AmE_destressed/LTR_newdic_destressed_aligned_w_GD_AmE-diphones.tsv


Creating notebook 'GD_AmE-diphones - LTR_CMU_destressed alignment application to AmE-diphones.ipynb' w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_CMU_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_CMU_destressed.json
	GD_AmE/AmE-diphones-IPA-annotated-columns.csv
	GD_AmE_destressed_aligned_w_LTR_CMU_destressed/GD_AmE-diphones_aligned_w_LTR_CMU_destressed.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_CMU_destressed alignment application to AmE-diphones.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  5%|▍         | 3/64 [00:00<00:02, 20.75it/s][A[A[A[A[A[A





  8%|▊         | 5/64 [00:00<00:02, 19.81it/s][A[A[A[A[A[A





 11%|█         | 7/64 [00:00<00:03, 18.84it/s][A[A[A[A[A[A





 17%|█▋        | 11/64 [00:00<00:02, 21.91it/s][A[A[A[A[A[A





 20%|██        | 13/64 [00:00<00:03, 16.33it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:00<00:03, 14.74it/s][A[A[A[A[A[A





 30%|██▉       | 19/64 [00:00<00:02, 17.66it/s][A[A[A[A[A[A





 34%|███▍      | 22/64 [00:01<00:02, 20.12it/s][A[A[A[A[A[A





 39%|███▉      | 25/64 [00:01<00:01, 21.31it/s][A[A[A[A[A[A





 45%|████▌     | 29/64 [00:01<00:01, 23.89it/s][A[A[A[A[A[A





 52%|█████▏    | 33/64 [00:01<00:01, 26.40it/s][A[A[A[A[A[A





 56%|█████▋ 

Finished applying alignment projection
	p = GD_AmE_destressed_aligned_w_LTR_CMU_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_CMU_destressed.json
to
	g = GD_AmE/AmE-diphones-IPA-annotated-columns.csv
Result saved to
	GD_AmE_destressed_aligned_w_LTR_CMU_destressed/GD_AmE-diphones_aligned_w_LTR_CMU_destressed.tsv
 
Creating notebook GD_AmE-diphones - LTR_CMU_destressed alignment applicationto LTR_CMU_destressed.ipynb w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_CMU_destressed/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_CMU_destressed.json
	LTR_CMU_destressed/LTR_CMU_destressed.tsv
	LTR_CMU_destressed_aligned_w_GD_AmE_destressed/LTR_CMU_destressed_aligned_w_GD_AmE-diphones.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_CMU_destressed alignment applicationto LTR_CMU_destressed.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  6%|▋         | 4/64 [00:00<00:01, 36.82it/s][A[A[A[A[A[A





 11%|█         | 7/64 [00:00<00:01, 32.27it/s][A[A[A[A[A[A





 16%|█▌        | 10/64 [00:00<00:01, 28.62it/s][A[A[A[A[A[A





 19%|█▉        | 12/64 [00:00<00:06,  8.36it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:01<00:05,  9.63it/s][A[A[A[A[A[A





 28%|██▊       | 18/64 [00:01<00:03, 11.98it/s][A[A[A[A[A[A





 34%|███▍      | 22/64 [00:01<00:02, 14.71it/s][A[A[A[A[A[A





 41%|████      | 26/64 [00:01<00:02, 17.64it/s][A[A[A[A[A[A





 45%|████▌     | 29/64 [00:01<00:01, 19.97it/s][A[A[A[A[A[A





 50%|█████     | 32/64 [00:01<00:01, 21.26it/s][A[A[A[A[A[A





 55%|█████▍    | 35/64 [00:01<00:01, 23.04it/s][A[A[A[A[A[A





 59%|█

Finished applying alignment projection
	p = LTR_CMU_destressed_aligned_w_GD_AmE_destressed/alignment_of_LTR_CMU_destressed_w_AmE-diphones-IPA-annotated-columns.json
to
	l = LTR_CMU_destressed/LTR_CMU_destressed.tsv
Result saved to
	LTR_CMU_destressed_aligned_w_GD_AmE_destressed/LTR_CMU_destressed_aligned_w_GD_AmE-diphones.tsv


Creating notebook 'GD_AmE-diphones - LTR_Buckeye alignment application to AmE-diphones.ipynb' w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_Buckeye/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_Buckeye.json
	GD_AmE/AmE-diphones-IPA-annotated-columns.csv
	GD_AmE_destressed_aligned_w_LTR_Buckeye/GD_AmE-diphones_aligned_w_LTR_Buckeye.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_Buckeye alignment application to AmE-diphones.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  5%|▍         | 3/64 [00:00<00:02, 24.26it/s][A[A[A[A[A[A





  8%|▊         | 5/64 [00:00<00:02, 19.69it/s][A[A[A[A[A[A





 12%|█▎        | 8/64 [00:00<00:02, 19.39it/s][A[A[A[A[A[A





 19%|█▉        | 12/64 [00:00<00:02, 18.88it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:00<00:03, 15.79it/s][A[A[A[A[A[A





 27%|██▋       | 17/64 [00:01<00:02, 16.04it/s][A[A[A[A[A[A





 30%|██▉       | 19/64 [00:01<00:03, 11.43it/s][A[A[A[A[A[A





 34%|███▍      | 22/64 [00:01<00:03, 13.69it/s][A[A[A[A[A[A





 39%|███▉      | 25/64 [00:01<00:02, 15.93it/s][A[A[A[A[A[A





 44%|████▍     | 28/64 [00:01<00:02, 17.32it/s][A[A[A[A[A[A





 50%|█████     | 32/64 [00:01<00:01, 20.04it/s][A[A[A[A[A[A





 55%|█████▍    | 35

Finished applying alignment projection
	p = GD_AmE_destressed_aligned_w_LTR_Buckeye/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_Buckeye.json
to
	g = GD_AmE/AmE-diphones-IPA-annotated-columns.csv
Result saved to
	GD_AmE_destressed_aligned_w_LTR_Buckeye/GD_AmE-diphones_aligned_w_LTR_Buckeye.tsv
 
Creating notebook GD_AmE-diphones - LTR_Buckeye alignment applicationto LTR_Buckeye.ipynb w/ args p, g, o = 
	GD_AmE_destressed_aligned_w_LTR_Buckeye/alignment_of_AmE-diphones-IPA-annotated-columns_w_LTR_Buckeye.json
	LTR_Buckeye/LTR_Buckeye.tsv
	LTR_Buckeye_aligned_w_GD_AmE_destressed/LTR_Buckeye_aligned_w_GD_AmE-diphones.tsv


Input Notebook:  Align transcriptions.ipynb
Output Notebook: GD_AmE-diphones - LTR_Buckeye alignment applicationto LTR_Buckeye.ipynb






  0%|          | 0/64 [00:00<?, ?it/s][A[A[A[A[A[A





  6%|▋         | 4/64 [00:00<00:01, 34.79it/s][A[A[A[A[A[A





 12%|█▎        | 8/64 [00:00<00:01, 36.12it/s][A[A[A[A[A[A





 19%|█▉        | 12/64 [00:00<00:01, 29.50it/s][A[A[A[A[A[A





 23%|██▎       | 15/64 [00:00<00:02, 23.85it/s][A[A[A[A[A[A





 30%|██▉       | 19/64 [00:00<00:01, 25.49it/s][A[A[A[A[A[A





 36%|███▌      | 23/64 [00:00<00:01, 27.11it/s][A[A[A[A[A[A





 42%|████▏     | 27/64 [00:00<00:01, 29.27it/s][A[A[A[A[A[A





 47%|████▋     | 30/64 [00:01<00:01, 29.39it/s][A[A[A[A[A[A





 52%|█████▏    | 33/64 [00:01<00:01, 29.25it/s][A[A[A[A[A[A





 58%|█████▊    | 37/64 [00:02<00:02, 11.27it/s][A[A[A[A[A[A





 62%|██████▎   | 40/64 [00:02<00:01, 12.97it/s][A[A[A[A[A[A





 69%|██████▉   | 44/

Finished applying alignment projection
	p = LTR_Buckeye_aligned_w_GD_AmE_destressed/alignment_of_LTR_Buckeye_w_AmE-diphones-IPA-annotated-columns.json
to
	l = LTR_Buckeye/LTR_Buckeye.tsv
Result saved to
	LTR_Buckeye_aligned_w_GD_AmE_destressed/LTR_Buckeye_aligned_w_GD_AmE-diphones.tsv




# Step 3: Generating channel and lexicon distributions

## Step 3a: Generating channel distributions and associated metadata

## Step 3b: Filtering transcription lexicons to only include words that can be modeled by a given channel distribution

## Step 3c: Filter transcription lexicons to only include words that are in a language model's vocabulary

## Step 3d: Define a distribution over the transcibed vocabulary of a language model for each n-gram context in a set of (possibly empty) n-gram contexts

# Step 4: Generating posterior distributions

# Step 5: Generating analysis measures