In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Overview and Requirements 

**Notebook author:** emeinhardt@ucsd.edu

At a high level, I am running Python 3.6.5, Jupyter 5.5.0, and otherwise Anaconda 5.2. This notebook uses **joblib** to speed up computations. **Plots make use of the *plotnine package* (see http://plotnine.readthedocs.io/).**

This notebook is for analyzing channel uniphone and triphone channel distributions produced by `Processing Notebook 3a` representing $p(Y|X)$ and $p(Y_1|X_0, X_1; Y_2)$ and receiver distributions based on those channel distributions.

**Objects of interest:**

Let $\Sigma$ denote the segment inventory.

Suppose the speaker produces isolated wordforms consisting of single segments according to a uniform distribution $p(W) = p(X)$.
Suppose that if $X = x^*$ has actually been produced, the listener perceives $Y = y$ according to $p(Y|X)$ and then reasons Bayesianly about what the speaker intended $p(\hat{X}|X = x^*)$.
  1. What is $h(\hat{X} = x^*|X = x^*), \forall x^* \in \Sigma$?
  2. What is $i(\hat{X} = x^*;X = x^*), \forall x^* \in \Sigma$?
  
Suppose the speaker produces isolated wordforms consisting of three segments (i.e. not including boundary symbols) according to a uniform distribution over licit (and modelable) triphones $p(W = x_0 x_1 x_2|x_0 x_1 x_2 \text{is a licit AmE triphone})$. Suppose that if $W = x_0^* x_1^* x_2^*$ has actually been produced, the listener perceives $W' = y_0^* y_1^* y_2^*$ per a triphone channel distribution and reasons Bayesianly about what the speaker intended $p(\hat{X_0} \hat{X_1} \hat{X_2}|x_0^* x_1^* x_2^*)$.
  1. What is $h(\hat{X_1} = x_1^*|X_0 = x_0^*,X_1 = x_1^*, X_2 = x_2^*), \forall x_1^* \in \Sigma$?
  2. What is $i(\hat{X_1} = x_1^*|X_0 = x_0^*,X_1 = x_1^*, X_2 = x_2^*), \forall x_1^* \in \Sigma$?
  3. What is $i(\hat{x_1^*};x_0^*,x_2^*|x_1^*) = i(\hat{X_1} = x_1^*|X_0 = x_0^*,X_1 = x_1^*, X_2 = x_2^*) - i(\hat{X} = x_1^*;X = x_1^*) = h(\hat{X} = x_1^*|X = x_1^*) - h(\hat{X_1} = x_1^*|X_0 = x_0^*,X_1 = x_1^*, X_2 = x_2^*), \forall x_0^*,x_1^*,x_2^* \in \Sigma$?
    - What is the plot of the distribution of this expression for each $x_1^*$ over choices of $x_0^*,x_2^*$?
    - This expression measures the effect of local phonotactic context on the perceptibility of $x_1^*$ after the effect of the production of $x_1^*$ has been accounted for.

# Overhead

In [2]:
from collections import Counter

from math import log2

def log(x):
    if x == 0.0:
        return 0.0
    return log2(x)

def h(p):
    return -1.0 * log(p)

In [3]:
from joblib import Parallel, delayed

## Boilerplate code for representing and manipulating probability distributions

In [4]:
import random

#from 
#    http://nbviewer.jupyter.org/url/norvig.com/ipython/Probability.ipynb
#    http://nbviewer.jupyter.org/url/norvig.com/ipython/ProbabilityParadox.ipynb
#with slight modification.

from fractions import Fraction

from collections import defaultdict, Counter

is_predicate = callable

def P(event, space): 
    """The probability of an event, given a sample space of equiprobable outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency} pairs."""
    if is_predicate(event):
        event = such_that(event, space)
    if isinstance(space, ProbDist):
        return sum(space[o] for o in space if o in event)
    else:
        return Fraction(len(event & space), len(space))
    
def such_that(predicate, space): 
    """The outcomes in the sample pace for which the predicate is true.
    If space is a set, return a subset {outcome,...};
    if space is a ProbDist, return a ProbDist {outcome: frequency,...};
    in both cases only with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

# class ProbDist(dict):
class ProbDist(Counter):
    "A Probability Distribution; an {outcome: probability} mapping where probabilities sum to 1."
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        total = sum(self.values())
        if isinstance(total, int): 
            total = Fraction(total, 1)
        for key in self: # Make probabilities sum to 1.
            self[key] = self[key] / total
            
    def __and__(self, predicate): # Call this method by writing `probdist & predicate`
        "A new ProbDist, restricted to the outcomes of this ProbDist for which the predicate is true."
        return ProbDist({e:self[e] for e in self if predicate(e)})
    
    def __repr__(self):
        s = ""
        for k in self:
            if isinstance(self[k], Fraction):
                s+="{0}: {2}/{3} = {1}\n".format(transcriptionReprHack(k), float(self[k]), self[k].numerator, self[k].denominator)
            else:
                s+="{0}: {1}\n".format(transcriptionReprHack(k), float(self[k]))
        return s

dottedStringToTuple = lambda ds: tuple(ds.split('.'))
tupleToDottedString = lambda t: '.'.join(t)

def transcriptionReprHack(k):
    if type(k) == type(tuple()):
        if all(map(lambda el: type(el) == type(''), k)):
            return tupleToDottedString(k)
    return k.__repr__()    

def Uniform(outcomes): return ProbDist({e: 1 for e in outcomes})

def joint(A, B):
    """The joint distribution of two independent probability distributions. 
    Result is all entries of the form {(a, b): P(a) * P(b)}"""
    return ProbDist({(a,b): A[a] * B[b]
                    for a in A
                    for b in B})

from itertools import product
from functools import reduce
import operator

def prod(iterable):
    return reduce(operator.mul, iterable, 1)

def union(iterable):
    return reduce(set.union, iterable)

def joint2(iter_of_dists):
    #ProbDist({(a,b): A[a] * B[b] for a in A for b in B})
    #ProbDist({ab: A[ab[0]] * B[ab[1]] for ab in product(A,B)})
    return ProbDist({each : prod(dist[each[i]] for i,dist in enumerate(iter_of_dists)) for each in list(product(*iter_of_dists))})

In [112]:
d = {'foo':'bar','baz':'quux'}

first = lambda seq: seq[0]
second = lambda seq: seq[1]

In [113]:
tuple(d.items())
tuple(map(second, tuple(d.items())))

(('foo', 'bar'), ('baz', 'quux'))

('bar', 'quux')

In [119]:
#bookkeeping - don't worry about this function...
def getBoundaryVal(dist, i):
    #p = {a:1/2, b:1/2} -> 2 items -> 2-1=1 boundaries where 
    #                   the boundary separating 'a' (item 0) from 'b' (item 1) is at 0+p(a)
    #q = {0:1/3, 1:1/3, 2:1/6, 3:1/6} -> 4 items -> 4-1=3 boundaries where 
    #                   the boundary separating 0 from 1   is at 0 + q(0) = 1/3,
    #                   the boundary separating 1 from 2   is at 0 + q(0) + q(1) = 2/3,
    #                   the boundary separating 2 from 3   is at 0 + q(0) + q(1) + q(2) = 5/6,
    #                ...the boundary separating i from i+1 is at \sum_{j=0}^{j=i} q(j)
    outcomes = list(dist.keys())
    if i >= len(outcomes) - 1:
        raise Exception("Boundary i = {0} out of bounds / does not exist for distribution {1} with {2} outcomes.".format(i, dist, len(outcomes)))
    if i == 0:
        return dist[outcomes[0]]
    return dist[outcomes[i]] + getBoundaryVal(dist, i-1)

#bookkeeping - don't worry about this function...
def getSampleOutcomeIndex(randReal, boundariesLeft, currIndex):
#     print("boundariesLeft: {0}".format(boundariesLeft))
#     print("currIndex: {0}".format(currIndex))
    if boundariesLeft == [] or randReal <= boundariesLeft[0]:
        return currIndex
    return getSampleOutcomeIndex(randReal, boundariesLeft[1:], currIndex + 1)

from random import choices

def sampleFrom(dist, num_samples = None):
    """
    Given a distribution (either an {outcome: probability} mapping where the 
    probabilities sum to 1 or an implicit definition of a distribution via a thunk), 
    this returns a single sample from the distribution, unless num_samples is specified, 
    in which case a generator with num_samples samples is returned.
    """
    if num_samples == None:
        if callable(dist):
            return dist()
        elif isinstance(dist, ProbDist):
            assocMap = dist.items()
            outcomes = tuple(map(first, assocMap))
            weights = tuple(map(second, assocMap))
            return choices(outcomes, weights)[0]
#             outcomes = list(dist.keys())
#         #     print("outcomes: {0}".format(outcomes))

#             boundaries = [getBoundaryVal(dist, i) for i in range(len(outcomes)-1)]
#         #     print("boundaries: {0}".format(boundaries))

#             randVal = random.random() #random real from unit interval
#         #     print("randval: {0}".format(randVal))

#             sampledOutcomeIndex = getSampleOutcomeIndex(randVal, boundaries, 0)
#         #     print("sampledOutcomeIndex: {0}".format(sampledOutcomeIndex))
#             if not (sampledOutcomeIndex >= 0 and sampledOutcomeIndex < len(outcomes)):
#                 print('sampledOutcomeIndex: {0}'.format(sampledOutcomeIndex))
#                 print('len(outcomes): {0}'.format(len(outcomes)))
#                 if len(outcomes) == 0:
#                     print('len(outcomes) == 0! dist:')
#                     print(type(dist))
#                     print(dist)
#             assert(sampledOutcomeIndex >= 0 and sampledOutcomeIndex < len(outcomes))

#             sampledOutcome = outcomes[sampledOutcomeIndex]
#         #     print("sampledOutcome: {0}".format(sampledOutcome))
#             return sampledOutcome
    else:
        if callable(dist):
            return (dist() for each in range(num_samples))
        elif isinstance(dist, ProbDist):
            assocMap = dist.items()
            outcomes = tuple(map(first, assocMap))
            weights = tuple(map(second, assocMap))
            return tuple(choices(outcomes, weights, k=num_samples))
#             return (sampleFrom(dist, num_samples = None) for each in range(num_samples))

from collections import Counter

def frequencies(samples):
    return Counter(samples)

def makeSampler(dist):
    """
    Given a ProbDist, returns a thunk that when called, returns one sample from dist.
    """
    return lambda: sampleFrom(dist)

In [6]:
leftEdge = '⋊'
rightEdge = '⋉'
edgeSymbols = set([leftEdge, rightEdge])

# Choose distributions to analyze

## See what's available

In [7]:
%pwd

'/home/AD/emeinhar/c2-jn'

Uniphone channel distributions:

In [8]:
%ls *pYX*

'Hammond-aligned_destressed_pseudocount0.01 pYX.json'
'Hammond-aligned_destressed_pseudocount0 pYX.json'
'Hammond-aligned_destressed_pseudocount1 pYX.json'
'IPhOD-aligned_destressed_pseudocount0.01 pYX.json'
'IPhOD-aligned_destressed_pseudocount0 pYX.json'
'IPhOD-aligned_destressed_pseudocount1 pYX.json'
'IPhOD-aligned_stressed_pseudocount0 pYX.json'
'unaligned_destressed_pseudocount0.01 pYX.json'
'unaligned_destressed_pseudocount0 pYX.json'
'unaligned_destressed_pseudocount1 pYX.json'
'unaligned_stressed_pseudocount0 pYX.json'
'unaligned_stressed_pseudocount1 pYX.json'


Triphone and preview channel distributions:

In [9]:
%ls *pY1X0X1X2*

'Hammond-aligned_destressed_pseudocount0.01 pY1X0X1X2.json'
'Hammond-aligned_destressed_pseudocount1 pY1X0X1X2.json'
'IPhOD-aligned_destressed_pseudocount0.01 pY1X0X1X2.json'
'IPhOD-aligned_destressed_pseudocount1 pY1X0X1X2.json'
'unaligned_destressed_pseudocount0.01 pY1X0X1X2.json'
'unaligned_destressed_pseudocount1 pY1X0X1X2.json'
'unaligned_stressed_pseudocount1 pY1X0X1X2.json'


In [10]:
%ls *p3Y1X01*

'Hammond-aligned_destressed_pseudocount0.01 p3Y1X01.json'
'Hammond-aligned_destressed_pseudocount1 p3Y1X01.json'
'IPhOD-aligned_destressed_pseudocount0.01 p3Y1X01.json'
'IPhOD-aligned_destressed_pseudocount1 p3Y1X01.json'
'unaligned_destressed_pseudocount0.01 p3Y1X01.json'
'unaligned_destressed_pseudocount1 p3Y1X01.json'
'unaligned_stressed_pseudocount1 p3Y1X01.json'


## Make a choice

Choose whether you want to look at channel distributions unaligned with either lexicon, aligned with Hammond's newdic, or aligned with IPhOD:

In [11]:
# which_alignment = 'unaligned'
which_alignment = 'Hammond-aligned'
# which_alignment = 'IPhOD-aligned'

Choose whether you want to look at channel distributions where stimuli contain stress annotations or where such distinctions have been collapsed:

In [12]:
which_stress = 'destressed'
# which_stress = 'stressed'

Choose a smoothing pseudocount:

In [13]:
# pseudocount = 0
pseudocount = 0.01
# pseudocount = 1

which_pseudocount = 'pseudocount' + str(pseudocount)

In [14]:
which = '_'.join([which_alignment, which_stress, which_pseudocount])
which

whichNoCount = '_'.join([which_alignment, which_stress])
whichNoCount

'Hammond-aligned_destressed_pseudocount0.01'

'Hammond-aligned_destressed'

## Load distributions

In [15]:
import csv, json

In [16]:
uniphone_suff = ' pYX'
triphone_suff = ' pY1X0X1X2'
preview_suff = ' p3Y1X01'

file_ext = '.json'

uniphone_fn = which + uniphone_suff + file_ext
triphone_fn = which + triphone_suff + file_ext
preview_fn = which + preview_suff + file_ext

# response_triphone_list_filename = whichNoCount + ' response diphone-based' + ' ' + 'constructible triphones'

In [17]:
with open(uniphone_fn, encoding='utf-8') as data_file:
   uniph_dist = json.loads(data_file.read())

with open(triphone_fn, encoding='utf-8') as data_file:
   triph_dist = json.loads(data_file.read())

with open(preview_fn, encoding='utf-8') as data_file:
   preview_dist = json.loads(data_file.read())

# response_triphones = []
# with open(response_triphone_list_filename + '.txt', 'r') as the_file:
#     for row in the_file:
#         response_triphones.append(row.rstrip('\r\n'))

In [18]:
len(uniph_dist)
len(triph_dist)
len(preview_dist)
# len(response_triphones)

40

11584

1293

In [19]:
pYX = uniph_dist
pY1X0X1X2 = triph_dist
p3Y1X01 = preview_dist

In [20]:
from functools import reduce
union = lambda Ss: reduce(set.union, Ss, set())

In [70]:
stimuli_uniphones = set(uniph_dist.keys())
len(stimuli_uniphones)
response_uniphones = [set(dist.keys()) for dist in uniph_dist.values()]
response_uniphones = union(response_uniphones)
len(response_uniphones)

stimuli_diphones = set(preview_dist.keys())
len(stimuli_diphones)

stimuli_triphones = set(triph_dist.keys())
len(stimuli_triphones)
# len(response_triphones)
resp_phones = [set(dist.keys()) for dist in triph_dist.values()]
resp_phones = union(resp_phones)
len(resp_phones)
response_uniphones == resp_phones


# response_triphones = [each for each in stimuli_triphones if 'ə' not in each and 'l̩' not in each]
response_triphones = set(map(tupleToDottedString, product(resp_phones, repeat=3)))
len(response_triphones)

40

38

1293

11584

38

True

54872

In [71]:
tupleToDottedString = lambda pair: '.'.join(pair)
dottedStringToTuple = lambda s: tuple(s.split('.'))

In [72]:
leftEdge = '⋊'
rightEdge = '⋉'

In [73]:
Xs = stimuli_uniphones
X_diphs = stimuli_diphones
X_triphs = stimuli_triphones
X_true_triphs = [triph for triph in X_triphs if (not leftEdge in triph) and (not rightEdge in triph)]
Ys = response_uniphones
Y_triphs = response_triphones
Y_true_triphs = [triph for triph in Y_triphs if (not leftEdge in triph) and (not rightEdge in triph)]

In [74]:
len(X_triphs)
len(X_true_triphs)
len(X_diphs)
len(Xs)
len(Ys)
len(Y_triphs)
len(Y_true_triphs)

11584

8958

1293

40

38

54872

54872

# Construct distributions

Let's define $p(X)$:

In [26]:
fX = Counter(stimuli_uniphones)
Xnorm = len(stimuli_uniphones)
pX = {x:fX[x]/Xnorm for x in Xs}
pX

{'l̩': 0.025,
 'l': 0.025,
 'g': 0.025,
 'i': 0.025,
 'aʊ': 0.025,
 'b': 0.025,
 'ɔɪ': 0.025,
 'ʒ': 0.025,
 'v': 0.025,
 'tʃ': 0.025,
 'ʊ': 0.025,
 'ɑ': 0.025,
 'f': 0.025,
 'p': 0.025,
 'dʒ': 0.025,
 's': 0.025,
 'ɪ': 0.025,
 't': 0.025,
 'ɚ': 0.025,
 'r': 0.025,
 'ɛ': 0.025,
 'eɪ': 0.025,
 'd': 0.025,
 'ə': 0.025,
 'ð': 0.025,
 'aɪ': 0.025,
 'ʌ': 0.025,
 'ŋ': 0.025,
 'æ': 0.025,
 'm': 0.025,
 'j': 0.025,
 'u': 0.025,
 'w': 0.025,
 'oʊ': 0.025,
 'h': 0.025,
 'ʃ': 0.025,
 'n': 0.025,
 'k': 0.025,
 'z': 0.025,
 'θ': 0.025}

...and $p(X_0 X_1 X_2)$:

In [27]:
fX_triphs = Counter(X_true_triphs)
num_X_triphs = len(X_true_triphs)
pX0X1X2 = {x0x1x2:fX_triphs[x0x1x2]/num_X_triphs for x0x1x2 in X_true_triphs}
pX0X1X2

{'t.ɚ.b': 0.00011163206072784104,
 'b.ʊ.tʃ': 0.00011163206072784104,
 'æ.n.ə': 0.00011163206072784104,
 't.aɪ.k': 0.00011163206072784104,
 's.ʊ.r': 0.00011163206072784104,
 'i.k.oʊ': 0.00011163206072784104,
 'oʊ.ə.t': 0.00011163206072784104,
 'l.t.m': 0.00011163206072784104,
 'l.eɪ.t': 0.00011163206072784104,
 'r.t.i': 0.00011163206072784104,
 'æ.m.d': 0.00011163206072784104,
 'ə.v.ɚ': 0.00011163206072784104,
 'dʒ.æ.k': 0.00011163206072784104,
 'p.ɚ.n': 0.00011163206072784104,
 'θ.æ.l': 0.00011163206072784104,
 'f.h.j': 0.00011163206072784104,
 'r.aʊ.s': 0.00011163206072784104,
 'æ.v.r': 0.00011163206072784104,
 'n.w.ɑ': 0.00011163206072784104,
 'b.ɔɪ.d': 0.00011163206072784104,
 'j.ɚ.z': 0.00011163206072784104,
 'r.d.t': 0.00011163206072784104,
 't.ɛ.g': 0.00011163206072784104,
 'd.p.ɛ': 0.00011163206072784104,
 'ɪ.m.ɪ': 0.00011163206072784104,
 'z.æ.n': 0.00011163206072784104,
 'ɔɪ.n.d': 0.00011163206072784104,
 'j.ʊ.ɑ': 0.00011163206072784104,
 'r.ð.i': 0.00011163206072784104,
 's.p

In [28]:
# fX_triphs = Counter(X_triphs)
# num_X_triphs = len(X_triphs)
# pX0X1X2 = {x0x1x2:fX_triphs[x0x1x2]/num_X_triphs for x0x1x2 in X_triphs}
# pX0X1X2

...and $p(X_1)$:

In [29]:
def triphsCenteredOn(x1):
    return [triph for triph in X_true_triphs if dottedStringToTuple(triph)[1] == x1]

pX1 = {x1:sum(map(lambda triph: pX0X1X2[triph], triphsCenteredOn(x1))) for x1 in Xs}
pX1
sum(pX1.values())

{'l̩': 0,
 'l': 0.04532261665550316,
 'g': 0.019758874748827843,
 'i': 0.04175039071221229,
 'aʊ': 0.010828309890600584,
 'b': 0.0322616655503459,
 'ɔɪ': 0.007256083947309659,
 'ʒ': 0.0029024335789238683,
 'v': 0.017414601473543206,
 'tʃ': 0.012837686983701732,
 'ʊ': 0.011163206072784109,
 'ɑ': 0.036503683858003816,
 'f': 0.024559053360124955,
 'p': 0.03293145791471294,
 'dʒ': 0.013619111408796623,
 's': 0.04096896628711741,
 'ɪ': 0.043759767805313404,
 't': 0.04442956016968044,
 'ɚ': 0.0448760884125918,
 'r': 0.03192676936816238,
 'ɛ': 0.030029024335789106,
 'eɪ': 0.029917392275061266,
 'd': 0.03315472203616862,
 'ə': 0.03918285331547197,
 'ð': 0.003460593882563074,
 'aɪ': 0.029135967849966388,
 'ʌ': 0.024559053360124955,
 'ŋ': 0.004911810672025004,
 'æ': 0.0339361464612635,
 'm': 0.03516409912926974,
 'j': 0.0065862915829426145,
 'u': 0.026010270149586873,
 'w': 0.015181960258986405,
 'oʊ': 0.03404777852199134,
 'h': 0.01406563965170799,
 'ʃ': 0.01417727171243583,
 'n': 0.04521098459

0.9999999999999962

# Some probability calculations

## Uniphone source and channel distribution

$p(\widehat{X} = \widehat{x}|X = x^*) = \sum\limits_y p(\widehat{X} = \widehat{x}, Y = y|X = x^*) = \sum\limits_y p(\widehat{X} = \widehat{x}|Y = y, X = x^*)p(Y = y|X = x^*) = \sum\limits_y p(\widehat{X} = \widehat{x}|Y = y)p(Y = y|X = x^*)$
$\sum\limits_y p(\widehat{X} = \widehat{x}|Y = y)p(Y = y|X = x^*) = \sum\limits_y \frac{p(Y = y|\widehat{X} = \widehat{x})p(\widehat{X} = \widehat{x})}{p(Y = y)}p(Y = y|X = x^*)$

Below is $p(\widehat{X}|Y)$:

In [30]:
#p(X_hat|Y) \propto             p(Y|X)      * p(X)
pXhatY_unnormalized = {y:{x_hat:pYX[x_hat][y]*pX[x_hat] for x_hat in Xs} for y in Ys}

#"p(Y)" = \sum\limits_x' p(X'_hat|Y)
pY_norm = {y:sum(pXhatY_unnormalized[y].values()) for y in Ys}

#p(X_hat|Y)
pXhatY = {y:{x_hat:pXhatY_unnormalized[y][x_hat]/pY_norm[y] for x_hat in Xs} for y in Ys}

pXhatY_unnormalized['d']
sum(pXhatY_unnormalized['d'].values())
pXhatY['d']
sum(pXhatY['d'].values())

{'l̩': 0.00013519438760595752,
 'l': 0.00026112933802520405,
 'g': 0.0003989554955200285,
 'i': 0.00015602181049967432,
 'aʊ': 0.00016273439751045401,
 'b': 0.0006390965088129636,
 'ɔɪ': 0.00017415327933418265,
 'ʒ': 0.00029936283282487835,
 'v': 0.0002481937279758116,
 'tʃ': 0.0002900495755318373,
 'ʊ': 0.00020211979250161564,
 'ɑ': 0.000258441188975483,
 'f': 0.00023850645684367545,
 'p': 0.00024491158914809224,
 'dʒ': 0.0020055550291575366,
 's': 0.00023941692873427931,
 'ɪ': 0.00016728057133022996,
 't': 0.003233188667055826,
 'ɚ': 0.00015560964995975146,
 'r': 0.00025282523437558944,
 'ɛ': 0.00019101308983007802,
 'eɪ': 0.00016492086000780955,
 'd': 0.011126557853764244,
 'ə': 0.0002687447034207156,
 'ð': 0.00040751125454101104,
 'aɪ': 0.0001746202710584594,
 'ʌ': 0.0003002574383221723,
 'ŋ': 0.00026260759073222907,
 'æ': 0.00024774261053645295,
 'm': 0.0002481825447889833,
 'j': 0.0002387926016250667,
 'u': 0.0001711576658926878,
 'w': 0.00023549443959038682,
 'oʊ': 0.00014960231

0.025449106167675598

{'l̩': 0.005312343259335208,
 'l': 0.0102608451670055,
 'g': 0.015676601484211076,
 'i': 0.006130738324234223,
 'aʊ': 0.006394503462646264,
 'b': 0.025112729091629846,
 'ɔɪ': 0.006843198271355594,
 'ʒ': 0.01176319635167056,
 'v': 0.009752551871195263,
 'tʃ': 0.011397240186778988,
 'ʊ': 0.007942117541178708,
 'ɑ': 0.010155216740136213,
 'f': 0.009371899165033013,
 'p': 0.009623583144117211,
 'dʒ': 0.07880650172715731,
 's': 0.009407675348471641,
 'ɪ': 0.006573141320880763,
 't': 0.12704527403648025,
 'ɚ': 0.006114542842270838,
 'r': 0.009934542797291545,
 'ɛ': 0.0075056895346955225,
 'eɪ': 0.006480418562491016,
 'd': 0.4372081982154932,
 'ə': 0.010560084179383243,
 'ð': 0.016012792427995564,
 'aɪ': 0.006861548295957634,
 'ʌ': 0.01179834907929092,
 'ŋ': 0.010318931792810168,
 'æ': 0.00973482561250522,
 'm': 0.009752112437811843,
 'j': 0.009383142969805799,
 'u': 0.006725488304578853,
 'w': 0.009253544625056502,
 'oʊ': 0.0058784900719062805,
 'h': 0.009445858162949647,
 'ʃ': 0.00947498586

0.9999999999999999

Below is $p(\widehat{X} = \widehat{x}|X = x^*)$:

In [31]:
#p(X_hat = x_hat|X = x_star)
pXhatX = {x_star:{x_hat:sum([pXhatY[y][x_hat]*pYX[x_star][y] for y in Ys]) for x_hat in Xs} for x_star in Xs}
pXhatX['d']
sum(pXhatX['d'].values())

{'l̩': 0.0078707239970974,
 'l': 0.01535735597123141,
 'g': 0.02627794534137534,
 'i': 0.013118710559441188,
 'aʊ': 0.013524962859933709,
 'b': 0.025952492597645413,
 'ɔɪ': 0.01310881415657127,
 'ʒ': 0.01859808008884723,
 'v': 0.016216471377660618,
 'tʃ': 0.02205245651872236,
 'ʊ': 0.016833641209583704,
 'ɑ': 0.015324103830955697,
 'f': 0.016941059383919377,
 'p': 0.01777117402517931,
 'dʒ': 0.04881812700405284,
 's': 0.01697322068275788,
 'ɪ': 0.014854670437171983,
 't': 0.11718452852725722,
 'ɚ': 0.013111068490954227,
 'r': 0.022766856212756516,
 'ɛ': 0.014305952017351544,
 'eɪ': 0.011421922768982138,
 'd': 0.21595964720527147,
 'ə': 0.016795588056344245,
 'ð': 0.021481632280241664,
 'aɪ': 0.012977295072991104,
 'ʌ': 0.01801433881020344,
 'ŋ': 0.016513485005505018,
 'æ': 0.01623652859918686,
 'm': 0.01714967926953457,
 'j': 0.016127604051741542,
 'u': 0.013527291404055055,
 'w': 0.016295268889455147,
 'oʊ': 0.012492943463359895,
 'h': 0.018064135095838287,
 'ʃ': 0.016124296889435362,

0.9999999999999978

$p(\widehat{X} = x)$:

In [32]:
#p(X_hat = x)
pXhat = {x:sum([pXhatX[x_star][x]*pX[x_star] for x_star in Xs]) for x in Xs}
pXhat['d']

0.024999999999999935

Below is $h(X = x)$:

In [33]:
# h(X = x)
hX = {x:h(pX[x]) for x in Xs}
hX['d']

5.321928094887363

$h(\widehat{X} = \widehat{x})$:

In [34]:
#h(X_hat = x_hat)
hXhat = {x:h(pXhat[x]) for x in Xs}
hXhat['d']

5.321928094887366

Below is $h(\widehat{X} = \widehat{x}|X = x^*)$:

In [35]:
#h(X_hat = x_hat|X = x_star)
hXhatX = {x_star:{x_hat:h(pXhatX[x_star][x_hat]) for x_hat in Xs} for x_star in Xs}
hXhatX['d']

{'l̩': 6.989287934954674,
 'l': 6.024926336706624,
 'g': 5.250003713265619,
 'i': 6.252230265620546,
 'aʊ': 6.208231557413061,
 'b': 5.267983081420263,
 'ɔɪ': 6.253319006717943,
 'ʒ': 5.748702492595293,
 'v': 5.94639625923114,
 'tʃ': 5.502916816986028,
 'ʊ': 5.892508916444649,
 'ɑ': 6.028053483652488,
 'f': 5.883332095497837,
 'p': 5.814317195838916,
 'dʒ': 4.356439245204625,
 's': 5.880595846333154,
 'ɪ': 6.072939591597818,
 't': 3.0931459866493327,
 'ɚ': 6.253070926504424,
 'r': 5.456921100733128,
 'ɛ': 6.12724068197656,
 'eɪ': 6.452050655030457,
 'd': 2.2111663297926163,
 'ə': 5.895773880887675,
 'ð': 5.540752569210248,
 'aɪ': 6.26786648374514,
 'ʌ': 5.794710489036723,
 'ŋ': 5.920211571062692,
 'æ': 5.944612975236756,
 'm': 5.8656745941684,
 'j': 5.954324064946345,
 'u': 6.207983195185509,
 'w': 5.939403031482668,
 'oʊ': 6.322742759290694,
 'h': 5.790728009060079,
 'ʃ': 5.95461993753283,
 'n': 5.806433718993289,
 'k': 5.568770983181521,
 'z': 5.917458362744713,
 'θ': 5.7661994885028

Below is $i(\widehat{X} = \widehat{x};X = x^*) = h(\widehat{X} = \widehat{x}) - h(\widehat{X} = \widehat{x}| X = x^*)$

In [36]:
#i(X_hat = x_hat ; X = x_star)
iXhatX = {x_star:{x_hat:(hXhat[x_hat] - hXhatX[x_star][x_hat]) for x_hat in Xs} for x_star in Xs}
iXhatX['d']

{'l̩': -0.6673598400672507,
 'l': -0.7029982418192642,
 'g': 0.07192438162174675,
 'i': -0.9303021707332055,
 'aʊ': -0.8863034625257198,
 'b': 0.053945013467103564,
 'ɔɪ': -0.9313909118305963,
 'ʒ': -0.42677439770793146,
 'v': -0.6244681643437753,
 'tʃ': -0.18098872209867078,
 'ʊ': -0.570580821557348,
 'ɑ': -0.7061253887651411,
 'f': -0.5614040006104704,
 'p': -0.49238910095155575,
 'dʒ': 0.9654888496827381,
 's': -0.5586677514457907,
 'ɪ': -0.7510114967105022,
 't': 2.228782108238093,
 'ɚ': -0.9311428316170964,
 'r': -0.13499300584575735,
 'ɛ': -0.8053125870892464,
 'eɪ': -1.1301225601431168,
 'd': 3.11076176509475,
 'ə': -0.5738457860002866,
 'ð': -0.218824474322874,
 'aɪ': -0.9459383888577992,
 'ʌ': -0.4727823941493261,
 'ŋ': -0.598283476175343,
 'æ': -0.6226848803494445,
 'm': -0.543746499281041,
 'j': -0.6323959700590027,
 'u': -0.8860551002981678,
 'w': -0.6174749365953298,
 'oʊ': -1.000814664403351,
 'h': -0.46879991417273814,
 'ʃ': -0.6326918426454782,
 'n': -0.4845056241059255

## Triphone source and channel distributions

$p_3(Y_0, Y_1, Y_2 | X_0, X_1; X_2) = p_3(Y_0|X_0;X_1) p_3(Y_1|X_0,X_1;X_2) p_3(Y_2|X_1;X_2)$
$ = p_{triph}(Y_0|X_{-1},X_0;X_1) p_{triph}(Y_1|X_0,X_1;X_2) p_3(Y_2|X_1;X_2)$

In [75]:
def p3Y012X012(y012, x012):
#     assert x012 in X_true_triphs
#     assert y012 in Y_true_triphs
    
    x012_t = dottedStringToTuple(x012)
    x01 = tupleToDottedString(x012_t[0:2])
    x12 = tupleToDottedString(x012_t[1:3])
    xL01 = leftEdge + '.' + x01
#     assert xL01 in stimuli_triphones
#     assert x12 in p3Y1X01.keys(), '{0} not in p3Y1X01 keys.'.format(x12)
    
    y012_t = dottedStringToTuple(y012)
    y0,y1,y2 = y012_t[0], y012_t[1], y012_t[2]

#     distForTerm1 = pY1X0X1X2[xL01]
#     term1 = distForTerm1[y0]
    term1 = pY1X0X1X2[xL01][y0]
#     distForTerm2 = pY1X0X1X2[x012]
#     term2 = distForTerm2[y1]
    term2 = pY1X0X1X2[x012][y1]
#     distForTerm3 = p3Y1X01[x12]
#     term3 = distForTerm3[y2]
    term3 = p3Y1X01[x12][y2]
    return term1*term2*term3

# p3Y012X012 = {x012:{y012:p3Y012X012(y012, x012) for y012 in Y_true_triphs} for x012 in X_true_triphs}

p3Y012X012('t.u.p','t.u.b')

0.11441911876381183

$p(\widehat{X_0}, \widehat{X_1}; \widehat{X_2} | Y_0^2) = \frac{p(Y_0^2|X_0^1; X_2) p(X_0^2)}{p(Y_0^2)}$

In [76]:
def p3X_hat012Y012_unnormalized(x012, y012):
#     assert x012 in X_true_triphs
#     assert y012 in Y_true_triphs
    return p3Y012X012(y012, x012)*pX0X1X2[x012]

# The marginalization in this is *killer* - there are 10-50k stimuli triphones to sum over.
print(len(X_true_triphs))
def p3Y012_norm(y012):
#     assert y012 in Y_true_triphs

#     return sum(Parallel(n_jobs=8)(delayed(p3X_hat012Y012_unnormalized)(x012, y012) for x012 in X_true_triphs))
    return sum([p3X_hat012Y012_unnormalized(x012, y012) for x012 in X_true_triphs])

def p3X_hat012Y012(x012, y012):
#     assert x012 in X_true_triphs
#     assert y012 in Y_true_triphs
    return p3X_hat012Y012_unnormalized(x012, y012) / p3Y012_norm(y012)

# execution time is ~90s on kotoba with 40-50k stimuli triphones and no parallelization
#  - ~5-6s with 8-10k and no parallelization
#  - ~3-4s with 8-10k and parallelization (4-8 jobs)
#  - 2-3s with 8-10k stimuli and response triphones and no parallelization
#  - 1s with 8-10k stimuli and response triphones and no parallelization, but removing asserts from this cell
#  - 20-50ms after removing asserts in earlier cells
# p3X_hat012Y012('t.u.b','t.u.p')
p3X_hat012Y012('t.u.p','t.u.p')

8958


0.13475329647041479

Returns one sample by default (or k samples) from $p_3(Y_0^2|x_0, x_1; x_2)$:

In [123]:
def sampleY012(y0Dist, y1Dist, y2Dist):
    y0 = sampleFrom(y0Dist)
    y1 = sampleFrom(y1Dist)
    y2 = sampleFrom(y2Dist)
    return '.'.join((y0, y1, y2))

def p3Y012X012_sample(x012, k = None):
    if k == None:
        k = 1
#     assert x012 in X_true_triphs
    
    x012_t = dottedStringToTuple(x012)
    x01 = tupleToDottedString(x012_t[0:2])
    x12 = tupleToDottedString(x012_t[1:3])
    xL01 = leftEdge + '.' + x01
#     assert xL01 in stimuli_triphones
#     assert x12 in p3Y1X01.keys(), '{0} not in p3Y1X01 keys.'.format(x12)
    
#     def sampleY012():
#         y0 = sampleFrom(ProbDist(pY1X0X1X2[xL01]))
#         y1 = sampleFrom(ProbDist(pY1X0X1X2[x012]))
#         y2 = sampleFrom(ProbDist(p3Y1X01[x12]))
#         return '.'.join((y0, y1, y2))
    y0Dist = ProbDist(pY1X0X1X2[xL01])
    y1Dist = ProbDist(pY1X0X1X2[x012])
    y2Dist = ProbDist(p3Y1X01[x12])
    if k == 1:
        return sampleY012(y0Dist, y1Dist, y2Dist)
    thunk = lambda: sampleY012(y0Dist, y1Dist, y2Dist)
    return set([thunk() for each in range(k)])
#     return sampleY012(xL01, x012, x12, k)

p3Y012X012_sample('t.u.b', 10)

{'aɪ.u.p',
 'n.u.ʒ',
 'r.u.b',
 't.u.b',
 't.u.d',
 't.u.h',
 't.u.p',
 'u.u.ʊ',
 'ʌ.u.m'}

$p(\widehat{X_0^2}|X_0^1; X_2) = \sum\limits_{Y_0^2} p(\widehat{X_0^2}|Y_0^2)p(Y_0^2|X_0^1;X_2)$

In [79]:
# Exact inference is a waste of time - there are ~50k response triphones to sum over
# This takes 5.5m on Kotoba, parallelized (all 12 cores)
print(len(Y_true_triphs))
def p3Xhat012X012(x_hat012, x012):
#     raise Exception("Bad idea.")
#     return sum([p3X_hat012Y012(x_hat012, y012)*p3Y012X012(y012, x012) for y012 in Y_true_triphs])
    def foo(y012):
        return p3X_hat012Y012(x_hat012, y012)*p3Y012X012(y012, x012)
    return sum(Parallel(n_jobs=-1)(delayed(foo)(y012) for y012 in Y_true_triphs))

p3Xhat012X012('t.u.b','t.u.b')

54872


0.043131956442224825

$\widehat{p}(\widehat{X_0^2}|X_0^1;X_2) = \frac{1}{n} \sum\limits_{n \text{ samples from } p(Y_0^2|X_0^1;X_2)} p(\widehat{X_0^2}|Y_0^2)$

In [128]:
def est_p3Xhat012X012(x_hat012, x012, n = None):
    if n == None:
        n = 100
    y012_samples = p3Y012X012_sample(x012, n)
    scale = 1.0 / n
    #unparallelized: 100 in ~1s, 1000 in ~4s, 10000 in 12.5s
    terms = [p3X_hat012Y012(x_hat012, y012_sample) for y012_sample in y012_samples]
    
#     terms = Parallel(n_jobs=-1)(delayed(p3X_hat012Y012)(x_hat012, y012_sample) for y012_sample in y012_samples)
    return scale * sum(terms)

tub_correct_answer = 0.043131956442224825
[est_p3Xhat012X012('t.u.b','t.u.b', 10000) for each in range(1)]

for n in [10, 100, 1000]:
    m = 10
    print('Estimates for n = {0}, m = {1}:'.format(n, m))
    estimates = [est_p3Xhat012X012('t.u.b','t.u.b', n) for each in range(m)]
    estimates
    mean = sum(estimates)/m
    variance = sum([(est - mean)**2 for est in estimates])/m
    print('𝛍, 𝛔² = {0}, {1}'.format(mean, variance))
    l1_errors = [abs(est - tub_correct_answer) for est in estimates]
    L1_loss = sum(l1_errors)/m
    print('L1 loss = {0}'.format(L1_loss))



0.0001


[0.00037433989767815285]

Estimates for n = 10, m = 10:
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1


[0.045625967093687686,
 0.03958083721463186,
 0.043871876884252585,
 0.03184660117528611,
 0.04634382073073301,
 0.03688708360588161,
 0.04314434248615823,
 0.04216751818648716,
 0.03726607420703343,
 0.04490397228137374]

𝛍, 𝛔² = 0.04116380938655254, 1.9579465237666266e-05
L1 loss = 0.0036141865086885077
Estimates for n = 100, m = 10:
0.01
0.01
0.01
0.01
0.01
0.01
0.01
0.01
0.01
0.01


[0.011716062348681168,
 0.012618561743313725,
 0.0131942311747233,
 0.011950277945105068,
 0.011470710048044041,
 0.013431752134768207,
 0.012613373923277526,
 0.011193548943776698,
 0.013109798750775281,
 0.012022772839263276]

𝛍, 𝛔² = 0.012332108985172828, 5.377760595152362e-07
L1 loss = 0.03079984745705199
Estimates for n = 1000, m = 10:
0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001
0.001


[0.002647401701847063,
 0.002486634128562951,
 0.0025643322509040596,
 0.0024716485926485738,
 0.0026627974141681283,
 0.0024493274682854662,
 0.002520933546745329,
 0.002362641613445844,
 0.0025412943599858024,
 0.002570617803271276]

𝛍, 𝛔² = 0.0025277628879864494, 7.420891111017726e-09
L1 loss = 0.04060419355423838


$p(\widehat{X_1};|X_0^1;X_2) = \sum\limits_{\widehat{X_0},\widehat{X_2}} p(\widehat{X_0^1};\widehat{X_2}|X_0^1;X_2)$

In [42]:
# Correct, but way too slow to be particularly useful
def p3Xhat1X012(x_hat1, x012):
#     raise Exception('No.')
    stimuliTrueTriphsWxhat1 = [triph for triph in X_true_triphs if dottedStringToTuple(triph)[1] == x_hat1]
#     return sum([p3Xhat012X012(x_hat012, x012) for x_hat012 in stimuliTrueTriphsWxhat1])
    return sum(Parallel(n_jobs=4)(delayed(p3Xhat012X012(x_hat012, x012)) for x_hat012 in stimuliTrueTriphsWxhat1))

p3Xhat1X012('u','t.u.b')

KeyboardInterrupt: 

$p_3(\widehat{X_1};|X_1) = \sum\limits_{X_0,X_2} p_3(\widehat{X_1};|X_0^1;X_2)p(X_0,X_2|X_1) = \sum\limits_{X_0,X_2} p_3(\widehat{X_1};|X_0^1;X_2)\frac{p(X_0^2)}{p(X_1)}$

In [None]:
def p3Xhat1X1(x_hat1, x1):
    stimuliTrueTriphsWxhat1 = [triph for triph in X_true_triphs if dottedStringToTuple(triph)[1] == x_hat1]
#     return sum([p3Xhat1X012(x_hat1, x012)*(pX0X1X2(x012) / pX1[x1]) for x012 in stimuliTrueTriphsWxhat1])
    def foo(x012):
        return p3Xhat1X012(x_hat1, x012)*(pX0X1X2(x012) / pX1[x1])
    return sum(Parallel(n_jobs=4)(delayed(foo)(x012) for x012 in stimuliTrueTriphsWxhat1))

# p3Xhat1X1('u','u')

$p_3(\widehat{X_1}) = \sum\limits_{X_1} p_3(\widehat{X_1};|X_1)p(X_1)$

In [None]:
def p3Xhat1(x_hat1):
#     return sum((p3Xhat1X1(x_hat1, x1)*pX1(x1) for x1 in Xs))
    def foo(x1):
        return p3Xhat1X1(x_hat1, x1)*pX1(x1)
    return sum(Parallel(n_jobs=4)(delayed(foo)(x1) for x1 in Xs))

$h(\hat{X_1})$:

In [None]:
def hX1hat(x_hat1):
    return h(p3Xhat1(x_hat1))

# hX1hat('u')

$h(\widehat{X_1}|X_1)$:

In [None]:
def hXhat1X1(x_hat1, x1):
    return h(p3Xhat1X1(x_hat1, x1))

# hXhat1X1('u','u')

$i(\widehat{X_1}; X_1)$:

In [None]:
def iXhat1X1(x_hat1, x1):
    return hX1hat(x_hat1) - hXhat1X1(x_hat1, x1)

# iXhat1X1('u','u')

$h(\widehat{X_1}|X_0, X_1; X_2)$:

In [None]:
def hXhat1X012(x_hat1, x012):
    return h(p3Xhat1X012(x_hat1, x012))

# hXhat1X012('u','t.u.b')

$i(\widehat{X_1}; X_0, X_2| X_1)$:

In [None]:
def iXhat1X02X1(x_hat1, x012):
    return hXhat1X1(x_hat1, x1) - hXhat1X012(x_hat1, x012)

# iXhat1X02X1('u','t.u.b')

# Measures of interest

## Uniphone model

$h(\widehat{X} = x^* | X = x^*)$ and $i(\widehat{X} = x^* ; X = x^*)$ are below:

In [None]:
hXX = {x_star:hXhatX[x_star][x_star] for x_star in Xs}
iXX = {x_star:iXhatX[x_star][x_star] for x_star in Xs}

hXX['d']
iXX['d']

\FIXME create two dictionaries mapping each $x^* \in \Sigma$ to each of these two measures, export them as json files,  load them into dataframes, and plot them.

## Triphone model

**Goal:** We want to see, for each $x^* \in X$ how $i(\widehat{X_1} = x^*; x_0, x_2 | X_1 = x^*)$ varies over choices of $x_0$ and $x_2$.

In [None]:
def localContextEffect(x012):
    x1 = dottedStringToTuple(x012)[1]
    return iXhat1X02X1(x1, x012)

In [None]:
X1s = set([dottedStringToTuple(x012)[1] for x012 in X_true_triphs])
len(X1s)
len(Xs)
set(Xs) - X1s

In [None]:
contextTokens = [(dottedStringToTuple(x012)[0],' ',dottedStringToTuple(x012)[2]) for x012 in X_true_triphs]
len(contextTokens)
contextTypes = set(contextTokens)
len(contextTypes)
contextTokenCounts = Counter(contextTokens)
contextTokenCounts


In [None]:
tuple(triphsCenteredOn('u'))[:10]

In [None]:
X1sToContexts = {x1:tuple(triphsCenteredOn(x1)) for x1 in X1s}
X1ToNumContexts = {x1:len(X1sToContexts[x1]) for x1 in X1s}
X1ToNumContexts

In [None]:
X1sToContextsToContextEffects = {x1:{ctxt:localContextEffect(ctxt) for ctxt in X1sToContexts[x1]} for x1 in X1s}

In [None]:
X1sToEffects = {x1:tuple(X1sToContextsToContextEffects[x1].values()) for x1 in X1s}

In [None]:
#TODO calculate mean and variance of context effects for each x1
#TODO plot distribution of means and distribution of variances over x1s
#TODO plot context effects for a given x1
