# Imports

In [1]:
#Prints **all** console output, not just last item in cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
from boilerplate import *
from probdist import *

# Load data

In [3]:
import bokeh.palettes as bp

In [4]:
bp.Colorblind6

['#0072B2', '#E69F00', '#F0E442', '#009E73', '#56B4E9', '#D55E00']

In [5]:
def makeWhich(alignment, stress, pseudocount, color=None, shortID=None):
    assert alignment in {'unaligned', 'Hammond-aligned', 'IPhOD-aligned'}
    assert stress in {'stressed', 'destressed'}
    assert pseudocount in {0, 0.001, 0.0011, 0.005, 0.01, 1}
    
    return {'which_alignment': alignment,
            'which_stress': stress,
            'pseudocount': pseudocount,
            'which_pseudocount': 'pseudocount' + str(pseudocount),
            'whichNoCount': '_'.join([alignment, stress]),
            'which':'_'.join([alignment, stress, 'pseudocount' + str(pseudocount)]),
            'color':color,
            'shortID':shortID}

hd0 = makeWhich('Hammond-aligned', 'destressed', 0, bp.Colorblind6[0], 'hd0')
hd0['which']
hd01 = makeWhich('Hammond-aligned', 'destressed', 0.01, bp.Colorblind6[1], 'hd01')
hd01['which']
hd001 = makeWhich('Hammond-aligned', 'destressed', 0.001, bp.Colorblind6[2], 'hd001')
hd001['which']
hd0011 = makeWhich('Hammond-aligned', 'destressed', 0.0011, bp.Colorblind6[4], 'hd0011')
hd0011['which']
hd005 = makeWhich('Hammond-aligned', 'destressed', 0.005, bp.Colorblind6[5], 'hd005')
hd005['which']

'Hammond-aligned_destressed_pseudocount0'

'Hammond-aligned_destressed_pseudocount0.01'

'Hammond-aligned_destressed_pseudocount0.001'

'Hammond-aligned_destressed_pseudocount0.0011'

'Hammond-aligned_destressed_pseudocount0.005'

In [6]:
hd0

{'which_alignment': 'Hammond-aligned',
 'which_stress': 'destressed',
 'pseudocount': 0,
 'which_pseudocount': 'pseudocount0',
 'whichNoCount': 'Hammond-aligned_destressed',
 'which': 'Hammond-aligned_destressed_pseudocount0',
 'color': '#0072B2',
 'shortID': 'hd0'}

In [7]:
hd01

{'which_alignment': 'Hammond-aligned',
 'which_stress': 'destressed',
 'pseudocount': 0.01,
 'which_pseudocount': 'pseudocount0.01',
 'whichNoCount': 'Hammond-aligned_destressed',
 'which': 'Hammond-aligned_destressed_pseudocount0.01',
 'color': '#E69F00',
 'shortID': 'hd01'}

In [8]:
hd001

{'which_alignment': 'Hammond-aligned',
 'which_stress': 'destressed',
 'pseudocount': 0.001,
 'which_pseudocount': 'pseudocount0.001',
 'whichNoCount': 'Hammond-aligned_destressed',
 'which': 'Hammond-aligned_destressed_pseudocount0.001',
 'color': '#F0E442',
 'shortID': 'hd001'}

In [9]:
hd0011

{'which_alignment': 'Hammond-aligned',
 'which_stress': 'destressed',
 'pseudocount': 0.0011,
 'which_pseudocount': 'pseudocount0.0011',
 'whichNoCount': 'Hammond-aligned_destressed',
 'which': 'Hammond-aligned_destressed_pseudocount0.0011',
 'color': '#56B4E9',
 'shortID': 'hd0011'}

In [10]:
hd005

{'which_alignment': 'Hammond-aligned',
 'which_stress': 'destressed',
 'pseudocount': 0.005,
 'which_pseudocount': 'pseudocount0.005',
 'whichNoCount': 'Hammond-aligned_destressed',
 'which': 'Hammond-aligned_destressed_pseudocount0.005',
 'color': '#D55E00',
 'shortID': 'hd005'}

In [11]:
def loadChannelDists(whichDict, loadTriphChannelDist = False):
    def loadDist(distID, isFreqDist = False):
        dist = importProbDist(whichDict['which'] + ' ' + distID + '.json')
        if not isFreqDist:
            assert areNormalized(dist)
            dist = condDistsAsProbDists(dist)
        whichDict[distID] = dist
    distIDs = {'p3Y01X01','p6Y01X01',
               'p3Y1X01', 'p6Y0X01',
               'p3Y0X01', 'p6Y1X01',
               'pYX', 'p3YX', 'p6YX'}
    for each in distIDs:
        loadDist(each)
    
    if loadTriphChannelDist:
        loadDist('pY1X0X1X2')
    
    loadDist('f3_Y0Y1_X0X1', True)
    loadDist('f6_Y0Y1_X0X1', True)

In [12]:
loadChannelDists(hd0)
hd0.keys()

dict_keys(['which_alignment', 'which_stress', 'pseudocount', 'which_pseudocount', 'whichNoCount', 'which', 'color', 'shortID', 'p3YX', 'pYX', 'p6YX', 'p6Y01X01', 'p3Y01X01', 'p6Y1X01', 'p3Y0X01', 'p6Y0X01', 'p3Y1X01', 'f3_Y0Y1_X0X1', 'f6_Y0Y1_X0X1'])

In [13]:
loadChannelDists(hd01, True)
hd01.keys()

FileNotFoundError: [Errno 2] No such file or directory: 'Hammond-aligned_destressed_pseudocount0.01 p3YX.json'

In [14]:
loadChannelDists(hd001, True)
hd001.keys()

dict_keys(['which_alignment', 'which_stress', 'pseudocount', 'which_pseudocount', 'whichNoCount', 'which', 'color', 'shortID', 'p3YX', 'pYX', 'p6YX', 'p6Y01X01', 'p3Y01X01', 'p6Y1X01', 'p3Y0X01', 'p6Y0X01', 'p3Y1X01', 'pY1X0X1X2', 'f3_Y0Y1_X0X1', 'f6_Y0Y1_X0X1'])

In [None]:
loadChannelDists(hd0011, True)
hd0011.keys()

In [None]:
loadChannelDists(hd005, True)
hd005.keys()

In [15]:
def comparableDists(wdA, wdB, compareTriphDist = False, showDiffs = False):
    distIDs = {'p3Y01X01','p6Y01X01',
               'p3Y1X01', 'p6Y0X01',
               'p3Y0X01', 'p6Y1X01',
               'pYX', 'p3YX', 'p6YX'}
    triphDistID = 'pY1X0X1X2'
    freqDistIDs = {'f3_Y0Y1_X0X1', 'f6_Y0Y1_X0X1'}
    allIDs = union([distIDs, {triphDistID}, freqDistIDs])
    for eachID in allIDs:
        for eachWD in [wdA, wdB]:
            if eachID not in eachWD:
                print('{0} not in {1}'.format(eachID, eachWD))
    for each in distIDs:
        if set(conditions(wdA[each])) != set(conditions(wdB[each])):
            print('{0} and {1} differ on conditions for {2}:'.format(wdA['which'],
                                                                     wdB['which'],
                                                                     each))
            if showDiffs:
                print('\t{0} in '.format(each) + wdA['which'] + ' - ' + '{0} in '.format(each) + wdB['which'])
                print( set(conditions(wdA[each])) - set(conditions(wdB[each])) )
                print('\t{0} in '.format(each) + wdB['which'] + ' - ' + '{0} in '.format(each) + wdA['which'])
                print( set(conditions(wdB[each])) - set(conditions(wdA[each])) )

        if uniformOutcomes(wdA[each]) != True:
            print(wdA['which'] + ' does not have uniform outcomes for {0}'.format(each))
        if uniformOutcomes(wdB[each]) != True:
            print(wdB['which'] + ' does not have uniform outcomes for {0}'.format(each))
            
        if outcomes(wdA[each]) != outcomes(wdB[each]):
            print('{0} and {1} differ on outcomes for {2}:'.format(wdA['which'],
                                                                   wdB['which'],
                                                                   each))
            if showDiffs:
                print('\t{0} in '.format(each) + wdA['which'] + ' - ' + '{0} in '.format(each) + wdB['which'])
                print( outcomes(wdA[each]) - outcomes(wdB[each]) )
                print('\t{0} in '.format(each) + wdB['which'] + ' - ' + '{0} in '.format(each) + wdA['which'])
                print( outcomes(wdB[each]) - outcomes(wdA[each]) )

    if compareTriphDist:
        if set(conditions(wdA[triphDistID])) != set(conditions(wdB[triphDistID])):
            print('{0} and {1} differ on keys for {2}:'.format(wdA['which'],
                                                               wdB['which'],
                                                               triphDistID))
            if showDiffs:
                print('\t{0} in '.format(triphDistID) + wdA['which'] + ' - ' + '{0} in '.format(triphDistID) + wdB['which'])
                print( set(conditions(wdA[triphDistID])) - set(conditions(wdB[triphDistID])) )
                print('\t{0} in '.format(triphDistID) + wdB['which'] + ' - ' + '{0} in '.format(triphDistID) + wdA['which'])
                print( set(conditions(wdB[triphDistID])) - set(conditions(wdA[triphDistID])) )

        if uniformOutcomes(wdA[triphDistID]) is not True:
            print(wdA['which'] + ' does not have uniform outcomes for {0}'.format(triphDistID))
        if uniformOutcomes(wdB[triphDistID]) is not True:
            print(wdB['which'] + ' does not have uniform outcomes for {0}'.format(triphDistID))
        
        if outcomes(wdA[triphDistID]) != outcomes(wdB[triphDistID]):
            print('{0} and {1} differ on outcomes for {2}:'.format(wdA['which'],
                                                                   wdB['which'],
                                                                   triphDistID))
            if showDiffs:
                print('\t{0} in '.format(triphDistID) + wdA['which'] + ' - ' + '{0} in '.format(triphDistID) + wdB['which'])
                print( outcomes(wdA[each]) - outcomes(wdB[triphDistID]) )
                print('\t{0} in '.format(triphDistID) + wdB['which'] + ' - ' + '{0} in '.format(triphDistID) + wdA['which'])
                print( outcomes(wdB[triphDistID]) - outcomes(wdA[triphDistID]) )
    for each in freqDistIDs:
        if set(conditions(wdA[each])) != set(conditions(wdB[each])):
            print('{0} and {1} differ on conditions for {2}:'.format(wdA['which'],
                                                                     wdB['which'],
                                                                     each))
            if showDiffs:
                print('\t{0} in '.format(each) + wdA['which'] + ' - ' + '{0} in '.format(each) + wdB['which'])
                print( set(conditions(wdA[each])) - set(conditions(wdB[each])) )
                print('\t{0} in '.format(each) + wdB['which'] + ' - ' + '{0} in '.format(each) + wdA['which'])
                print( set(conditions(wdB[each])) - set(conditions(wdA[each])) )

In [None]:
comparableDists(hd0, hd01, showDiffs = False)

In [16]:
comparableDists(hd0, hd001, showDiffs = False)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
comparableDists(hd0, hd0011, showDiffs = False)

In [None]:
comparableDists(hd001, hd0011, showDiffs = False)

In [None]:
comparableDists(hd0, hd005, showDiffs = False)

# Analyze a family of conditional distributions

## Developing calculation and plotting code...

With respect to a family of conditional distributions $p(Y|X)$, let an **error** denote the event where $y \neq x$.

To calculate a non-vacuous **error rate** with respect to a family of conditional distributions $p(Y|X)$, there are two non-trivial requirements: 
 1. we need a prior $p(X)$.
 2. every outcome $x \in X$ needs at least one corresponding outcome in $y \in Y$ that counts as a *non-error*.

In [17]:
def conditionHasMatchingOutcome(dist, conditioning_event):
    return conditioning_event in outcomes(dist)

def conditionsWithMatchingOutcome(dist):
    return {c for c in dist if conditionHasMatchingOutcome(dist, c)}

def conditionsWithNoMatchingOutcome(dist):
    return set(conditions(dist)) - conditionsWithMatchingOutcome(dist)

def everyConditionHasAMatchingOutcome(dist, asBool = False):
    conditionHasMatch = {c:conditionHasMatchingOutcome(dist, c)
                         for c in dist}
    if not asBool:
        return conditionHasMatch
    return all(conditionHasMatch.values())

def canCalculateErrorRate(condDist, prior):
    priorCoversConditions = all({c in prior for c in condDist})
    everyConditionHasOutcomeMatch = everyConditionHasAMatchingOutcome(condDist, True)
    unCoveredConditions = {c for c in condDist if c not in prior}
    unMatchedConditions = conditionsWithNoMatchingOutcome(condDist)
#     assert all({c in prior for c in condDist})
#     assert everyConditionHasAMatchingOutcome(condDist, True)
    if priorCoversConditions and everyConditionHasOutcomeMatch:
        return True
    elif priorCoversConditions and not everyConditionHasOutcomeMatch:
        return False, 'Not every condition has a matching outcome.', unMatchedConditions
    elif everyConditionHasOutcomeMatch and not priorCoversConditions:
        return False, 'Prior does not specify a probability for every condition.', unCoveredConditions

In [None]:
distIDs = ['p3Y01X01','p6Y01X01',
#            'p3Y1X01', 'p6Y0X01',
#            'p3Y0X01', 'p6Y1X01',
           'pYX', 'p3YX', 'p6YX']
allDistIDs = distIDs #+ ['pY1X0X1X2']

for model in [hd0, hd01, hd001, hd0011, hd005]:
    for distID in allDistIDs:
        if distID in model:
            result = canCalculateErrorRate(model[distID], 
                                           Uniform(set(conditions(model[distID]))))
            if result != True:
                print("Can't do a trivial error rate calculation for {0} in {1} because:\n\t{2}".format(distID, 
                                                                                                        model['which'],
                                                                                                        result[1]#, result[2]
                                                                                                        ) )
    print(' ')

In [None]:
conditionsWithNoMatchingOutcome(hd0['pYX'])
conditionsWithNoMatchingOutcome(hd01['pYX'])
conditionsWithNoMatchingOutcome(hd001['pYX'])
conditionsWithNoMatchingOutcome(hd0011['pYX'])
conditionsWithNoMatchingOutcome(hd005['pYX'])

While stimuli diphones in the gating data systematically vary in terms of stress, subject responses do not contain any stress information. Generally speaking, schwa and wedge (`ʌ`) really only differ in terms of stress. (I am told that it is primarily UK phoneticians who maintain there is something more to the difference between schwa and wedge.) Wedge appears in subject responses in the gating data, so that will count as the correct response for schwa.

In [18]:
def matchSymbol(y, x):
    if x == 'ə':
        return y in {'ə', 'ʌ'}
    return x == y

def matchString(v, u):
    u_t = ds2t(u)
    v_t = ds2t(v)
    if len(v_t) != len(u_t):
#         print(u_t)
#         print(v_t)
        return False
#     for y,x in zip(v_t, u_t):
#         print(y)
#         print(x)
#         print(matchSymbol(y,x))
#         print('---')
    return all([matchSymbol(y,x) for y,x in zip(v_t, u_t)])

list(zip('abc', '123'))

matchString('a.b.c', '1.2.3') #should be False
matchString('a.b.c', 'a.b.c') #should be True
matchString('a.ə.c', 'a.ʌ.c') #should be False
matchString('a.ʌ.c', 'a.ə.c') #should be True

schwa = 'ə'
wedge = 'ʌ'

[('a', '1'), ('b', '2'), ('c', '3')]

False

True

False

True

The function below calculates the probability for each condition $c$ that the resulting outcome $o = c$ - the 'probability of correct transmission of $c$'.

In [19]:
def calculateCorrectProbs(condDist, identityRelation = lambda y,x: y == x):
    correctProbs = {c:sum(condDist[c][o] for o in condDist[c] if identityRelation(o, c))
                  for c in condDist}
    return correctProbs

In [20]:
hd0['pYX_correctProbs'] = calculateCorrectProbs(hd0['pYX'], identityRelation = matchSymbol)
hd0['pYX_correctProbs']

{'v': 0.8254063467492263,
 'dʒ': 0.5620162538699692,
 'aɪ': 0.8234302484302484,
 'm': 0.8477852852852853,
 'b': 0.6370300751879705,
 'r': 0.8372241086587437,
 'n': 0.8828947368421052,
 'j': 0.7220512820512822,
 'ʌ': 0.7202399380804955,
 'h': 0.7629456327985742,
 'd': 0.6806484962406018,
 'g': 0.5911184210526319,
 'i': 0.9635178376906315,
 'ð': 0.24096283783783767,
 'l': 0.8673611111111108,
 's': 0.8917669172932331,
 'ə': 0.36676065162907273,
 'eɪ': 0.922149292149292,
 'θ': 0.6907205882352944,
 'ŋ': 0.8663851351351352,
 'ʒ': 0.6997906698564598,
 'f': 0.7521016081871346,
 'w': 0.7558333333333335,
 't': 0.6543402777777779,
 'ɚ': 0.9470770402755697,
 'ɔɪ': 0.9414014821272885,
 'ʊ': 0.2963492063492064,
 'u': 0.9411254084967319,
 'æ': 0.4675892857142858,
 'ɛ': 0.6477976190476192,
 'ɪ': 0.5547282608695656,
 'ɑ': 0.7418123433583957,
 'p': 0.8784173976608187,
 'k': 0.9122076023391812,
 'aʊ': 0.8571641494435611,
 'oʊ': 0.8525503444621091,
 'tʃ': 0.8662538699690404,
 'ʃ': 0.8557647058823529,
 'z'

In [None]:
hd01['pYX_correctProbs'] = calculateCorrectProbs(hd01['pYX'], identityRelation = matchSymbol)
hd01['pYX_correctProbs']

In [21]:
hd001['pYX_correctProbs'] = calculateCorrectProbs(hd001['pYX'], identityRelation = matchSymbol)
hd001['pYX_correctProbs']

{'m': 0.8031529246381675,
 't': 0.6222920110043708,
 'z': 0.8478893603277659,
 'tʃ': 0.8213807012911137,
 'd': 0.6448085136172291,
 'h': 0.729286656877763,
 'j': 0.6887629007080917,
 'æ': 0.4537473447134127,
 'θ': 0.6549122876413597,
 'aʊ': 0.8334457722975906,
 'w': 0.7211810901967612,
 'i': 0.9365024733913928,
 'aɪ': 0.8005631382301814,
 'b': 0.6052174990398306,
 'ɑ': 0.7212485370398266,
 'ɛ': 0.6281862543353222,
 'k': 0.8634876691528857,
 'oʊ': 0.8287654244382325,
 'l': 0.8211858949164839,
 'ɚ': 0.9193170245254986,
 'u': 0.9146811638209081,
 'ʌ': 0.677783601168951,
 'ʊ': 0.28776081785088725,
 'ʒ': 0.6645787039266405,
 'r': 0.7917551479643298,
 'ə': 0.3487151986302878,
 'ʃ': 0.8135892892914447,
 'dʒ': 0.5322864619517309,
 'ɔɪ': 0.915204899340184,
 's': 0.8442353657721512,
 'ɪ': 0.537607764709214,
 'g': 0.5615410940736137,
 'f': 0.7109400692173881,
 'eɪ': 0.896406975783208,
 'p': 0.83170214392847,
 'ð': 0.22909319661955782,
 'v': 0.7823711802636998,
 'n': 0.8362511981832337,
 'ŋ': 0.82

In [None]:
hd0011['pYX_correctProbs'] = calculateCorrectProbs(hd0011['pYX'], identityRelation = matchSymbol)
hd0011['pYX_correctProbs']

In [None]:
hd005['pYX_correctProbs'] = calculateCorrectProbs(hd005['pYX'], identityRelation = matchSymbol)
hd005['pYX_correctProbs']

In [22]:
from bokeh.io import show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Legend, HoverTool
from bokeh.plotting import figure
output_notebook()

In [23]:
Xs = sorted(list(set(conditions(hd01['pYX_correctProbs']))))
correctProbs0 = [hd0['pYX_correctProbs'][x] for x in Xs]
correctProbs01 = [hd01['pYX_correctProbs'][x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y0=correctProbs0, y1=correctProbs0))

TOOLS = "box_select,lasso_select,help"

# create a new plot and add a renderer
left = figure(x_range=Xs, tools=TOOLS, plot_width=400, plot_height=300, title='hd0 p(Y=x|X=x)')
left.circle('x', 'y0', source=mySource, color='red')

# create another new plot and add a renderer
right = figure(x_range=Xs, tools=TOOLS, plot_width=400, plot_height=300, title='hd01 p(Y=x|X=x)')
right.circle('x', 'y1', source=mySource, color='blue')

p = gridplot([[left, right]])

show(p)

KeyError: 'pYX_correctProbs'

In [None]:
Xs = sorted(list(set(conditions(hd01['pYX_correctProbs']))))
correctProbs0 = [hd0['pYX_correctProbs'][x] for x in Xs]
correctProbs01 = [hd01['pYX_correctProbs'][x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y0=correctProbs0, y1=correctProbs01))

TOOLS = "box_select,lasso_select,help,hover"

# create a new plot and add a renderer
together = figure(x_range=Xs, tools=TOOLS, plot_width=600, plot_height=400, title='p(Y=x|X=x)')
reds = together.circle('x', 'y0', source=mySource, color='red', legend='hd0')
blues = together.circle('x', 'y1', source=mySource, color='blue', legend='hd01')
together.legend.location = 'bottom_left'

legend = Legend(items=[
    ('hd0', [reds]),
    ('hd01', [blues])
], location=(0,-30))
together.add_layout(legend, 'right')

# # create another new plot and add a renderer
# right = figure(x_range=Xs, tools=TOOLS, plot_width=400, plot_height=300, title='hd01 p(Y|X) Error Probability')
# right.circle('x', 'y1', source=mySource)

# p = gridplot([[left, right]])

show(together)

In [24]:
def comparePlotsOfCorrectProbs(wdA, wdB, distID, identityRelation, title = None, wdA_legend_key = None, wdB_legend_key = None, colorA = None, colorB = None, alpha = 1.0):
    wdA_correctProbs = calculateCorrectProbs(wdA[distID], identityRelation = identityRelation)
    wdB_correctProbs = calculateCorrectProbs(wdB[distID], identityRelation = identityRelation)
    
    Xs = sorted(list(set(conditions(wdA_correctProbs))))
    correctProbsA = [wdA_correctProbs[x] for x in Xs]
    correctProbsB = [wdB_correctProbs[x] for x in Xs]

    mySource = ColumnDataSource(data=dict(x=Xs, y0=correctProbsA, y1=correctProbsB))

    TOOLS = "box_select,lasso_select,help"

    # create a new plot and add a renderer
    if title is None:
        title = 'Probability of channel signal matching source signal'
    if wdA_legend_key is None:
        if wdA['shortID'] is None:
            wdA_legend_key = wdA['which']
        else:
            wdA_legend_key = wdA['shortID']
    if wdB_legend_key is None:
        if wdB['shortID'] is None:
            wdB_legend_key = wdB['which']
        else:
            wdB_legend_key = wdB['shortID']
    if colorA is None:
        if wdA['color'] is None:
            colorA = bp.Colorblind3[0]
        else:
            colorA = wdA['color']
    if colorB is None:
        if wdB['color'] is None:
            colorB = bp.Colorblind3[1]
        else:
            colorB = wdB['color']
    together = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title=title)
    A_glyphs = together.circle('x', 'y0', source=mySource, color=colorA, legend=wdA_legend_key, name=wdA_legend_key, alpha=alpha)
    B_glyphs = together.circle('x', 'y1', source=mySource, color=colorB, legend=wdB_legend_key, name=wdB_legend_key, alpha=alpha)
    together.legend.location = 'bottom_left'
    
    hover = HoverTool(
        tooltips=[
            ("{0} p(correct)".format(wdA_legend_key), "@y0"),
            ("{0} p(correct)".format(wdB_legend_key), "@y1"),
            ("x", "@x"),
        ]
    )
    together.add_tools(hover)
    
#     show(together)
    return together

In [25]:
pyx_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'pYX', matchSymbol, 'p(Y=x|X=x)')
show(pyx_plot)

KeyError: 'pYX'

In [None]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')

In [None]:
hd0_pYX_correct = hv.Scatter(hd0['pYX_correctProbs'], 'X', 'p(y = x|X = x)').opts(color='red', tools=['hover'])
# hd0_pYX_correct
hd01_pYX_correct = hv.Scatter(hd01['pYX_correctProbs'], 'X', 'p(y = x|X = x)').opts(color='blue', tools=['hover'])
# hd01_pYX_correct
# pYX_correct_layout = hd0_pYX_correct + hd01_pYX_correct
# pYX_correct_layout
pYX_correct_overlay = hd0_pYX_correct * hd01_pYX_correct
pYX_correct_overlay.opts(width=600, height=400)

## Channel error probability distribution comparisons

$C(p(Y|X), p(X), \text{id}) = \sum\limits_{x} p(x) \cdot p(y|x) \cdot \text{id}(y,x)$, where
 - $\text{id}(y,x) = 1$ iff $(y,x) \in $ the identity relation $\text{id}$, and $0$ otherwise.
 
$\text{Error}(p(Y|X), p(X), \text{id}) = 1.0 - C(p(Y|X), p(X), \text{id})$

In [None]:
def calculateCorrectRate(condDist, prior = None, identityRelation = lambda y,x: y == x):
    if prior is None:
        prior = Uniform(set(conditions(condDist)))
    
#     unweighted_errorRate = {c:P(lambda o: identityRelation(o, c),
#                                 condDist[c])
#                             for c in condDist}
    correctProbs = {c:sum(condDist[c][o] for o in condDist[c] if identityRelation(o, c))
                  for c in condDist}
    weighted_correctRate = {c:prior[c] * correctProbs[c]
                          for c in condDist}
    return sum(weighted_correctRate.values())

In [None]:
def rateReport(wdA, wdB, distID, prior = None, identityRelation = None):
    if identityRelation is None:
        identityRelation = lambda y,x: y == x
    correctRateA = calculateCorrectRate(wdA[distID], prior, identityRelation)
    if wdB is not None:
        correctRateB = calculateCorrectRate(wdB[distID], prior, identityRelation)

    print('Avg. probability of correct channel tranmsission:')
    print('\t' + wdA['which'] + ':\t' + '{0}'.format(correctRateA))
    if wdB is not None:
        print('\t' + wdB['which'] + ':\t' + '{0}'.format(correctRateB))
    
    print('Avg. probability of channel error:')
    print('\t' + wdA['which'] + ':\t' + '{0}'.format(1.0 - correctRateA))
    if wdB is not None:
        print('\t' + wdB['which'] + ':\t' + '{0}'.format(1.0 - correctRateB))
        
# def avgEntropyReport(wdA, wdB, distID, prior = None):
#     avgEntropyA = sum()

### Uniphone error rate

In [None]:
lowAlpha=0.3
highAlpha=0.6

The plot below combines information about gates 3 and gates 6. (I.e. `pYX` is an aggregate of `p3YX` and `p6YX`.)

In [None]:
pyx_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'pYX', matchSymbol, 'p(Y=x|X=x)')
show(pyx_plot)

rateReport(hd0, hd01, 'pYX', identityRelation = matchSymbol)

In [None]:
pyx_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'pYX', matchSymbol, 'p(Y=x|X=x)')
show(pyx_plot)

rateReport(hd0, hd001, 'pYX', identityRelation = matchSymbol)

In [None]:
pyx_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'pYX', matchSymbol, 'p(Y=x|X=x)')
show(pyx_plot)

rateReport(hd0, hd0011, 'pYX', identityRelation = matchSymbol)

In [None]:
pyx_plot = comparePlotsOfCorrectProbs(hd0, hd005, 'pYX', matchSymbol, 'p(Y=x|X=x)')
show(pyx_plot)

rateReport(hd0, hd005, 'pYX', identityRelation = matchSymbol)

## Diphone error rate

$p_3(Y_0, Y_1 | X_0; X_1):$

In [None]:
p3Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p3Y01X01', matchString, 'p3(Y01=x01|X01=x01)', alpha=0.4)
p3Y01X01_plot.xgrid.grid_line_color = None
p3Y01X01_plot.legend.location = 'top_left'

show(p3Y01X01_plot)

rateReport(hd0, hd01, 'p3Y01X01', identityRelation = matchString)

In [None]:
p3Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'p3Y01X01', matchString, 'p3(Y01=x01|X01=x01)', alpha=0.4)
p3Y01X01_plot.xgrid.grid_line_color = None
p3Y01X01_plot.legend.location = 'top_left'

show(p3Y01X01_plot)

rateReport(hd0, hd001, 'p3Y01X01', identityRelation = matchString)

In [None]:
p3Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'p3Y01X01', matchString, 'p3(Y01=x01|X01=x01)', alpha=0.4)
p3Y01X01_plot.xgrid.grid_line_color = None
p3Y01X01_plot.legend.location = 'top_left'

show(p3Y01X01_plot)

rateReport(hd0, hd0011, 'p3Y01X01', identityRelation = matchString)

In [None]:
p3Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd005, 'p3Y01X01', matchString, 'p3(Y01=x01|X01=x01)', alpha=0.4)
p3Y01X01_plot.xgrid.grid_line_color = None
p3Y01X01_plot.legend.location = 'top_left'

show(p3Y01X01_plot)

rateReport(hd0, hd005, 'p3Y01X01', identityRelation = matchString)

$p_6(Y_0, Y_1| X_0, X_1;)$:

In [None]:
p6Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p6Y01X01', matchString, 'p6(Y01=x01|X01=x01)', alpha=0.4)
p6Y01X01_plot.xgrid.grid_line_color = None
# p6Y01X01_plot.legend.location = 'top_left'

show(p6Y01X01_plot)

rateReport(hd0, hd01, 'p6Y01X01', identityRelation = matchString)

In [None]:
p6Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'p6Y01X01', matchString, 'p6(Y01=x01|X01=x01)', alpha=0.4)
p6Y01X01_plot.xgrid.grid_line_color = None
# p6Y01X01_plot.legend.location = 'top_left'

show(p6Y01X01_plot)

rateReport(hd0, hd001, 'p6Y01X01', identityRelation = matchString)

In [None]:
p6Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'p6Y01X01', matchString, 'p6(Y01=x01|X01=x01)', alpha=0.4)
p6Y01X01_plot.xgrid.grid_line_color = None
# p6Y01X01_plot.legend.location = 'top_left'

show(p6Y01X01_plot)

rateReport(hd0, hd0011, 'p6Y01X01', identityRelation = matchString)

In [None]:
p6Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd005, 'p6Y01X01', matchString, 'p6(Y01=x01|X01=x01)', alpha=0.4)
p6Y01X01_plot.xgrid.grid_line_color = None
# p6Y01X01_plot.legend.location = 'top_left'

show(p6Y01X01_plot)

rateReport(hd0, hd005, 'p6Y01X01', identityRelation = matchString)

In [None]:
p3Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p3Y01X01', matchString, 'p3(Y01=x01|X01=x01)')
p3Y01X01_plot.width=6000
p3Y01X01_plot.xgrid.grid_line_color = None
p3Y01X01_plot.legend.location = 'top_left'
show(p3Y01X01_plot)

p6Y01X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p6Y01X01', matchString, 'p6(Y01=x01|X01=x01)')
p6Y01X01_plot.width=6000
p6Y01X01_plot.xgrid.grid_line_color = None
# p6Y01X01_plot.legend.location = 'top_left'
show(p6Y01X01_plot)

In [None]:
schwa
wedge

def matchSymbolToFirstSymbol(y, v):
    v_t = ds2t(v)
    x = v_t[0]
    return matchSymbol(y, x)

def matchSymbolToSecondSymbol(y, v):
    v_t = ds2t(v)
    x = v_t[1]
    return matchSymbol(y, x)

matchSymbolToFirstSymbol('ʌ', 'ə' + '.' +  'i')
matchSymbolToFirstSymbol('ʌ', 'i' + '.' +  'ə')
matchSymbolToSecondSymbol('ʌ', 'ə' + '.' +  'i')
matchSymbolToSecondSymbol('ʌ', 'i' + '.' +  'ə')

$p_3(Y_0 | X_0; X_1)$:

In [None]:
p3Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p3Y0X01', matchSymbolToFirstSymbol, 'p3(Y0=x0|X01=x01)', alpha=0.4)
p3Y0X01_plot.xgrid.grid_line_color = None
p3Y0X01_plot.legend.location = 'top_left'

show(p3Y0X01_plot)

rateReport(hd0, hd01, 'p3Y0X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
p3Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'p3Y0X01', matchSymbolToFirstSymbol, 'p3(Y0=x0|X01=x01)', alpha=0.4)
p3Y0X01_plot.xgrid.grid_line_color = None
p3Y0X01_plot.legend.location = 'top_left'

show(p3Y0X01_plot)

rateReport(hd0, hd001, 'p3Y0X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
p3Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'p3Y0X01', matchSymbolToFirstSymbol, 'p3(Y0=x0|X01=x01)', alpha=0.4)
p3Y0X01_plot.xgrid.grid_line_color = None
p3Y0X01_plot.legend.location = 'top_left'

show(p3Y0X01_plot)

rateReport(hd0, hd0011, 'p3Y0X01', identityRelation = matchSymbolToFirstSymbol)

$p_6(Y_1| X_0, X_1;)$:

In [None]:
p6Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p6Y1X01', matchSymbolToSecondSymbol, 'p6(Y1=x1|X01=x01)', alpha=0.4)
p6Y0X01_plot.xgrid.grid_line_color = None
p6Y0X01_plot.legend.location = 'top_left'

show(p6Y0X01_plot)

rateReport(hd0, hd01, 'p6Y1X01', identityRelation = matchSymbolToSecondSymbol)

In [None]:
p6Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'p6Y1X01', matchSymbolToSecondSymbol, 'p6(Y1=x1|X01=x01)', alpha=0.4)
p6Y0X01_plot.xgrid.grid_line_color = None
p6Y0X01_plot.legend.location = 'top_left'

show(p6Y0X01_plot)

rateReport(hd0, hd001, 'p6Y1X01', identityRelation = matchSymbolToSecondSymbol)

In [None]:
p6Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'p6Y1X01', matchSymbolToSecondSymbol, 'p6(Y1=x1|X01=x01)', alpha=0.4)
p6Y0X01_plot.xgrid.grid_line_color = None
p6Y0X01_plot.legend.location = 'top_left'

show(p6Y0X01_plot)

rateReport(hd0, hd0011, 'p6Y1X01', identityRelation = matchSymbolToSecondSymbol)

In [None]:
p3Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p3Y0X01', matchSymbolToFirstSymbol, 'p3(Y0=x0|X01=x01)', alpha=0.4)
p3Y0X01_plot.xgrid.grid_line_color = None
p3Y0X01_plot.legend.location = 'top_left'
p3Y0X01_plot.width = 6000

show(p3Y0X01_plot)


p6Y0X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p6Y1X01', matchSymbolToFirstSymbol, 'p6(Y1=x1|X01=x01)', alpha=0.4)
p6Y0X01_plot.xgrid.grid_line_color = None
p6Y0X01_plot.legend.location = 'top_left'
p6Y0X01_plot.width = 6000

show(p6Y0X01_plot)


### Triphone error rate

$p_3(Y_1| X_0, X_1; X_2)$:

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd01['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title='p3(Y1=x1|X012=x012)')
triphDist_fig.circle('x', 'y', source=mySource, color=hd01['color'], name=hd01['shortID'], alpha = 0.1)
# triphDist_fig.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format('hd01'), "@y"),
        ("x", "@x"),
    ]
)
triphDist_fig.add_tools(hover)

triphDist_fig.xgrid.grid_line_color = None

show(triphDist_fig)

rateReport(hd01, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd001['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title='p3(Y1=x1|X012=x012)')
triphDist_fig.circle('x', 'y', source=mySource, color=hd001['color'], name=hd001['shortID'], alpha = 0.1)
# triphDist_fig.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format('hd001'), "@y"),
        ("x", "@x"),
    ]
)
triphDist_fig.add_tools(hover)

triphDist_fig.xgrid.grid_line_color = None

show(triphDist_fig)

rateReport(hd001, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd0011['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title='p3(Y1=x1|X012=x012)')
triphDist_fig.circle('x', 'y', source=mySource, color=hd0011['color'], name=hd0011['shortID'], alpha = 0.1)
# triphDist_fig.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format('hd0011'), "@y"),
        ("x", "@x"),
    ]
)
triphDist_fig.add_tools(hover)

triphDist_fig.xgrid.grid_line_color = None

show(triphDist_fig)

rateReport(hd0011, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd005['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title='p3(Y1=x1|X012=x012)')
triphDist_fig.circle('x', 'y', source=mySource, color=hd005['color'], name=hd005['shortID'], alpha = 0.1)
# triphDist_fig.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format('hd005'), "@y"),
        ("x", "@x"),
    ]
)
triphDist_fig.add_tools(hover)

triphDist_fig.xgrid.grid_line_color = None

show(triphDist_fig)

rateReport(hd005, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd01['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig_WIDE = figure(x_range=Xs, tools=TOOLS, plot_width=12000, plot_height=500, title='p3(Y1=x1|X012=x012)')
triphDist_fig_WIDE.circle('x', 'y', source=mySource, color='blue', name=hd01['shortID'], alpha = 0.1)
# triphDist_fig_WIDE.legend.location = 'bottom_left'

# hover = HoverTool(
#     tooltips=[
#         ("{0} p(correct)".format('hd01'), "@y"),
#         ("x", "@x"),
#     ]
# )
triphDist_fig_WIDE.add_tools(hover)

triphDist_fig_WIDE.xgrid.grid_line_color = None

show(triphDist_fig_WIDE)

rateReport(hd01, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

In [None]:
pY1X0X1X2_correctProbs = calculateCorrectProbs(hd001['pY1X0X1X2'], identityRelation = matchSymbolToSecondSymbol)
    
Xs = sorted(list(set(conditions(pY1X0X1X2_correctProbs))))
correctProbs = [pY1X0X1X2_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=correctProbs))

# TOOLS = "box_select,lasso_select,help"
TOOLS = "help"

triphDist_fig_WIDE = figure(x_range=Xs, tools=TOOLS, plot_width=12000, plot_height=500, title='p3(Y1=x1|X012=x012)')
goldenrods = triphDist_fig_WIDE.circle('x', 'y', source=mySource, color=hd001['color'], name=hd001['shortID'], alpha = 0.4)
# triphDist_fig.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format('hd001'), "@y"),
        ("x", "@x"),
    ]
)
triphDist_fig_WIDE.add_tools(hover)

triphDist_fig_WIDE.xgrid.grid_line_color = None

show(triphDist_fig_WIDE)

rateReport(hd001, None, 'pY1X0X1X2', identityRelation = matchSymbolToSecondSymbol)

The 'preview distribution' $p_3(X_1|X_0; X_1)$ below represents coarticulatory information about $X_1$ available to the listener even though only $X_0$ has finished; it is combined with the distribution above in calculating the posterior $p(\widehat{X_1}|X_0^1;X_2)$.

$p_3(Y_1|X_0; X_1)$:

In [None]:
p3Y1X01_plot = comparePlotsOfCorrectProbs(hd0, hd01, 'p3Y1X01', matchSymbolToFirstSymbol, 'p3(Y1=x1|X01=x01)', alpha=0.4)
p3Y1X01_plot.xgrid.grid_line_color = None
p3Y1X01_plot.legend.location = 'top_left'

show(p3Y1X01_plot)

rateReport(hd0, hd01, 'p3Y1X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
p3Y1X01_plot = comparePlotsOfCorrectProbs(hd0, hd001, 'p3Y1X01', matchSymbolToFirstSymbol, 'p3(Y1=x1|X01=x01)', alpha=0.4)
p3Y1X01_plot.xgrid.grid_line_color = None
p3Y1X01_plot.legend.location = 'top_left'

show(p3Y1X01_plot)

rateReport(hd0, hd001, 'p3Y1X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
p3Y1X01_plot = comparePlotsOfCorrectProbs(hd0, hd0011, 'p3Y1X01', matchSymbolToFirstSymbol, 'p3(Y1=x1|X01=x01)', alpha=0.4)
p3Y1X01_plot.xgrid.grid_line_color = None
p3Y1X01_plot.legend.location = 'top_left'

show(p3Y1X01_plot)

rateReport(hd0, hd0011, 'p3Y1X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
p3Y1X01_plot = comparePlotsOfCorrectProbs(hd0, hd005, 'p3Y1X01', matchSymbolToFirstSymbol, 'p3(Y1=x1|X01=x01)', alpha=0.4)
p3Y1X01_plot.xgrid.grid_line_color = None
p3Y1X01_plot.legend.location = 'top_left'

show(p3Y1X01_plot)

rateReport(hd0, hd005, 'p3Y1X01', identityRelation = matchSymbolToFirstSymbol)

In [None]:
wdA_correctProbs = calculateCorrectProbs(hd001['p3Y0X01'], identityRelation = matchSymbolToFirstSymbol)
wdB_correctProbs = calculateCorrectProbs(hd001['p6Y1X01'], identityRelation = matchSymbolToSecondSymbol)

Xs = sorted(list(set(conditions(wdA_correctProbs))))
correctProbsA = [wdA_correctProbs[x] for x in Xs]
correctProbsB = [wdB_correctProbs[x] for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y0=correctProbsA, y1=correctProbsB))

TOOLS = "box_select,lasso_select,help"

# create a new plot and add a renderer
title = 'Probability of channel signal matching source signal'
wdA_legend_key = 'p3Y0X01'
wdB_legend_key = 'p6Y1X01'
colorA = 'blue'
colorB = 'red'
together = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title=title)
A_glyphs = together.circle('x', 'y0', source=mySource, color=colorA, legend=wdA_legend_key, name=wdA_legend_key, alpha=0.3)
B_glyphs = together.circle('x', 'y1', source=mySource, color=colorB, legend=wdB_legend_key, name=wdB_legend_key, alpha=0.3)
together.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("{0} p(correct)".format(wdA_legend_key), "@y0"),
        ("{0} p(correct)".format(wdB_legend_key), "@y1"),
        ("x", "@x"),
    ]
)
together.add_tools(hover)
together.xgrid.grid_line_color = None
# together.legend.location = 'top_left'

show(together)

In [None]:
wdA_correctProbs = calculateCorrectProbs(hd001['p3Y0X01'], identityRelation = matchSymbolToFirstSymbol)
wdB_correctProbs = calculateCorrectProbs(hd001['p6Y1X01'], identityRelation = matchSymbolToSecondSymbol)

Xs = sorted(list(set(conditions(wdA_correctProbs))))
correctProbsA = [wdA_correctProbs[x] for x in Xs]
correctProbsB = [wdB_correctProbs[x] for x in Xs]
diffs = [abs(wdA_correctProbs[x] - wdB_correctProbs[x]) for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=diffs))

TOOLS = "box_select,lasso_select,help"

# create a new plot and add a renderer
title = 'Difference in probability of channel signal matching source signal: |p3Y0X01 - p6Y1X01|'
# wdA_legend_key = 'p3Y0X01'
# wdB_legend_key = 'p6Y1X01'
colorA = 'lime'
# colorB = 'red'
myPlot = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title=title)
A_glyphs = myPlot.circle('x', 'y', source=mySource, color=colorA, alpha=1.0)
# B_glyphs = myPlot.circle('x', 'y1', source=mySource, color=colorB, legend=wdB_legend_key, name=wdB_legend_key, alpha=0.3)
# myPlot.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("|p3(correct) - p6(correct)|", "@y"),
#         ("{0} p(correct)".format(wdA_legend_key), "@y0"),
#         ("{0} p(correct)".format(wdB_legend_key), "@y1"),
        ("x", "@x"),
    ]
)
myPlot.add_tools(hover)
myPlot.xgrid.grid_line_color = None
# myPlot.legend.location = 'top_left'

show(myPlot)

diphoneDist = Uniform(Xs)
print('Avg. magnitude of discrepancy:')
print(sum([diphoneDist[x] * abs(wdA_correctProbs[x] - wdB_correctProbs[x])
           for x in diphoneDist]))

In [None]:
wdA_correctProbs = calculateCorrectProbs(hd001['p3Y0X01'], identityRelation = matchSymbolToFirstSymbol)
wdB_correctProbs = calculateCorrectProbs(hd001['p6Y1X01'], identityRelation = matchSymbolToSecondSymbol)

Xs = sorted(list(set(conditions(wdA_correctProbs))), key = lambda diph: ds2t(diph)[1])
correctProbsA = [wdA_correctProbs[x] for x in Xs]
correctProbsB = [wdB_correctProbs[x] for x in Xs]
diffs = [abs(wdA_correctProbs[x] - wdB_correctProbs[x]) for x in Xs]

mySource = ColumnDataSource(data=dict(x=Xs, y=diffs))

TOOLS = "box_select,lasso_select,help"

# create a new plot and add a renderer
title = 'Difference in probability of channel signal matching source signal: |p3Y0X01 - p6Y1X01|'
# wdA_legend_key = 'p3Y0X01'
# wdB_legend_key = 'p6Y1X01'
colorA = 'green'
# colorB = 'red'
myPlot = figure(x_range=Xs, tools=TOOLS, plot_width=800, plot_height=500, title=title)
A_glyphs = myPlot.circle('x', 'y', source=mySource, color=colorA, alpha=1.0)
# B_glyphs = myPlot.circle('x', 'y1', source=mySource, color=colorB, legend=wdB_legend_key, name=wdB_legend_key, alpha=0.3)
# myPlot.legend.location = 'bottom_left'

hover = HoverTool(
    tooltips=[
        ("|p3(correct) - p6(correct)|", "@y"),
#         ("{0} p(correct)".format(wdA_legend_key), "@y0"),
#         ("{0} p(correct)".format(wdB_legend_key), "@y1"),
        ("x", "@x"),
    ]
)
myPlot.add_tools(hover)
myPlot.xgrid.grid_line_color = None
# myPlot.legend.location = 'top_left'

show(myPlot)

diphoneDist = Uniform(Xs)
print('Avg. magnitude of discrepancy:')
print(sum([diphoneDist[x] * abs(wdA_correctProbs[x] - wdB_correctProbs[x])
           for x in diphoneDist]))

# Similarity measures

Which channel distributions are most similar to a given reference distribution $h^*$?

In [None]:
hd001.keys()

In [None]:
Ps = hd001['pYX']
h_star = getRandomKey(Ps)
h_star

h_alt = getRandomKey(Ps)
h_alt

P_star = Ps[h_star]
P_alt = Ps[h_alt]

In [None]:
def divergences(c, condDists):
    return {c_prime:DKL(condDists[c], condDists[c_prime])
            for c_prime in condDists}

def avgDivergence(c, condDists, prior = None):
    if prior is None:
        prior = Uniform(conditions(condDists))
    
    divs = divergences(c, condDists)
    return sum(prior[c_prime]*divs[c_prime] for c_prime in conditions(condDists))
    
myDivergences = divergences(h_star, condDistsAsProbDists(Ps))
sorted(list(myDivergences.items()), key = lambda pair: pair[1], reverse=False)
avgDivergence(h_star, condDistsAsProbDists(Ps))

In [None]:
plot_div = hv.Scatter(myDivergences, "X'", "DK(p(Y|X={0} || p(Y|X = x'))".format(h_star)).opts(color='red', tools=['hover'])
plot_dv.opts(width=600, height=400)

In [None]:
myAvgDivergences = {x:avgDivergence(x, condDistsAsProbDists(Ps))
                    for x in conditions(condDistsAsProbDists(Ps))}
sorted(list(myAvgDivergences.items()), key=lambda pair:pair[1], reverse=False)

plot_avgDiv = hv.Scatter(myAvgDivergences, "X", "Avg. Div. from X").opts(color='orange', tools=['hover'])
plot_avgDiv.opts(width=600, height=400)

In [None]:
# I(X;Y) = H(Y) - H(Y|X)
# H(Y|X) = -1.0 * sum([p(x)*p(y|x)*log2(p(y|x))
#                     for x,y in product(Xs,Ys)])

mixPrior_X = Uniform({h_star, h_alt})
# mixPrior_X
mix_Y_given_X = {c:ProbDist({o:Ps[c][o]
                             for o in P_star})
                 for c in mixPrior_X}

H(mix_Y_given_X, mixPrior_X)
mix_Y = ProbDist({o:(mixPrior_X[h_star]*P_star[o] + mixPrior_X[h_alt]*P_alt[o])
                  for o in P_star})
# mix_Y
H(mix_Y)
H(mix_Y) - H(mix_Y_given_X, mixPrior_X)

In [None]:
def discriminability(symbol_A, symbol_B, condDists, prior = None):
    if prior is None:
        mixPrior_X = Uniform({symbol_A, symbol_B})
    else:
        assert symbol_A in prior
        assert symbol_B in prior
        mixPrior_X = prior
    mix_Y_given_X = {c:ProbDist({o:condDists[c][o]
                                 for o in condDists[symbol_A]})
                     for c in mixPrior_X}
    mix_Y = ProbDist({o:(mixPrior_X[symbol_A]*condDists[symbol_A][o] + mixPrior_X[symbol_B]*condDists[symbol_B][o])
                      for o in condDists[symbol_A]})
    results = {'p(X)':mixPrior_X,
               'p(Y|X)':mix_Y_given_X,
               'p(Y)':mix_Y,
               'H(X)':H(mixPrior_X),
               'H(Y)':H(mix_Y),
               'H(Y|X)':H(mix_Y_given_X, mixPrior_X),
               'I(Y;X)':H(mix_Y) - H(mix_Y_given_X, mixPrior_X)}
    return results

def discriminabilities(symbol, condDists, symbolToPriorMap = None):
    if symbolToPriorMap is None:
        symbolToPriorMap = {c:Uniform({symbol, c}) for c in condDists}
    return {c:discriminability(symbol, c, condDists, symbolToPriorMap[c])
            for c in condDists}

def avgDiscriminability(symbol, condDists, symbolToPriorMap = None, prior = None):
    if prior is None:
        prior = Uniform(conditions(condDists))
    
    discrims = discriminabilities(symbol, condDists, symbolToPriorMap)
    return sum(prior[c_prime]*discrims[c_prime]['I(Y;X)'] for c_prime in conditions(condDists))

In [None]:
# discriminabilities = {h:discriminability(h_star, h, condDistsAsProbDists(Ps))['I(Y;X)']
#                       for h in Ps}

# discriminabilities(h_star, condDistsAsProbDists(Ps))
discrims = mapValues(lambda value: value['I(Y;X)'], 
                     discriminabilities(h_star, condDistsAsProbDists(Ps)))
sorted(discrims.items(), key=lambda pair:pair[1], reverse=False)

avgDiscriminability(h_star, condDistsAsProbDists(Ps))

In [None]:
# plot_div = hv.Scatter(divergences, "X'", "DK(p(Y|X={0} || p(Y|X = x'))".format(h_star)).opts(color='red', tools=['hover'])
plot_dis = hv.Scatter(discrims, "X'", "D(x',{0})".format(h_star)).opts(color='blue', tools=['hover'])

plot_dis.opts(width=600, height=400)

# plot_overlay = plot_div * plot_dis
# plot_overlay.opts(width=600, height=400)

In [None]:
myAvgDiscrims = {x:avgDiscriminability(x, condDistsAsProbDists(Ps))
                    for x in condDistsAsProbDists(Ps)}
sorted(list(myAvgDiscrims.items()), key=lambda pair:pair[1], reverse=False)

plot_avgDiscrim = hv.Scatter(myAvgDiscrims, "X", "Avg. Discrim. of X").opts(color='black', tools=['hover'])
plot_avgDiscrim.opts(width=600, height=400)