# Lund et al. (2019): The pragmatics of semantic change: Modeling the progressive-to-imperfective shift

This is a Python implementation of the imperfective shift model in Lund et al. (2019).

The paper can be found here: https://gunnarnl.github.io/publications/2019-lundetal.pdf

In [350]:
import numpy as np
import pandas as pd

# We need some helper functions to generate the world states

In [351]:
#powerset helper function
def powerset(s):
    if len(s)==0: #base case
        return [[]]
    else:
        r = powerset(s[:-1])
        element = [s[-1]]
        new_r = r + list(map(lambda x: x+element, r))
        return new_r

In [352]:
def state_gen(number_bins):
    states = []
    bin_size = 1 / number_bins
    eventPop = map(lambda x:round(((bin_size/2)+(bin_size*x))*100)/100, range(number_bins))
    return list(eventPop)

In [353]:
#generate state space
all_states = state_gen(5)
pset_states = list(filter(lambda x:len(x)>0, powerset(all_states)))

In [354]:
def state_prior(states):
    indices = [str(state) for state in states]
    return pd.Series(np.ones(len(states))/len(states), index=indices)

In [355]:
state_prior(pset_states)

[0.1]                        0.032258
[0.3]                        0.032258
[0.1, 0.3]                   0.032258
[0.5]                        0.032258
[0.1, 0.5]                   0.032258
[0.3, 0.5]                   0.032258
[0.1, 0.3, 0.5]              0.032258
[0.7]                        0.032258
[0.1, 0.7]                   0.032258
[0.3, 0.7]                   0.032258
[0.1, 0.3, 0.7]              0.032258
[0.5, 0.7]                   0.032258
[0.1, 0.5, 0.7]              0.032258
[0.3, 0.5, 0.7]              0.032258
[0.1, 0.3, 0.5, 0.7]         0.032258
[0.9]                        0.032258
[0.1, 0.9]                   0.032258
[0.3, 0.9]                   0.032258
[0.1, 0.3, 0.9]              0.032258
[0.5, 0.9]                   0.032258
[0.1, 0.5, 0.9]              0.032258
[0.3, 0.5, 0.9]              0.032258
[0.1, 0.3, 0.5, 0.9]         0.032258
[0.7, 0.9]                   0.032258
[0.1, 0.7, 0.9]              0.032258
[0.3, 0.7, 0.9]              0.032258
[0.1, 0.3, 0

## Utterances and costs

By changing the costs, we get different interpretations mirroring the shift we see historically.

Note that the utterance prior defined here isn't used later on, but it does tell us how different utterance costs influence the utterance prior.

In [356]:
utterances = ["prog", "impf", "null"]
costs = {"prog": 1,
        "impf": 1,
        "null": 100}

In [357]:
# Not using in model itself
def utterance_prior():
    probs = list(map(lambda x:np.exp(-costs[x]), utterances))
    return pd.Series(probs / np.sum(probs), index=utterances)

In [358]:
utterance_prior()

prog    5.000000e-01
impf    5.000000e-01
null    5.056107e-44
dtype: float64

## Helper functions used with semantic interpretation

The theta values are proxies for the intervals and super-intervals that Deo (2009; 2015) uses to define the progressive and imperfective.

In [359]:
possible_thetas = [0.4,0.5,0.6,0.7,0.8,0.9,1]

In [360]:
alpha = 1

In [361]:
#Generate ordered pair <thetaR, thetaImpf> s.t. thetaImpf is greater than or equal to thetaR
def theta_gen(thetas):
    return [(a, b) for a in thetas for b in thetas if b>=a]

theta_pairs = theta_gen(possible_thetas)

def thetas_prior(thetas):
    indices = [str(theta) for theta in thetas]
    return pd.Series(np.ones(len(thetas))/len(thetas), index=indices)

thetas_prior(theta_pairs)

(0.4, 0.4)    0.035714
(0.4, 0.5)    0.035714
(0.4, 0.6)    0.035714
(0.4, 0.7)    0.035714
(0.4, 0.8)    0.035714
(0.4, 0.9)    0.035714
(0.4, 1)      0.035714
(0.5, 0.5)    0.035714
(0.5, 0.6)    0.035714
(0.5, 0.7)    0.035714
(0.5, 0.8)    0.035714
(0.5, 0.9)    0.035714
(0.5, 1)      0.035714
(0.6, 0.6)    0.035714
(0.6, 0.7)    0.035714
(0.6, 0.8)    0.035714
(0.6, 0.9)    0.035714
(0.6, 1)      0.035714
(0.7, 0.7)    0.035714
(0.7, 0.8)    0.035714
(0.7, 0.9)    0.035714
(0.7, 1)      0.035714
(0.8, 0.8)    0.035714
(0.8, 0.9)    0.035714
(0.8, 1)      0.035714
(0.9, 0.9)    0.035714
(0.9, 1)      0.035714
(1, 1)        0.035714
dtype: float64

In [362]:
# Generates the bins from the different thetas
# Effective: creates a partition with a certain number of cells given a theta
def theta_bins(number_bins, theta):
  new_bins = [0]
  bin_size = theta / number_bins
  bin_array = range(0, number_bins)
  bin_pop = list(map(lambda x:bin_size + bin_size*x, bin_array))
  return new_bins+bin_pop

In [363]:
#working with 2 bins
n_bins=2

## Meaning function
Returns true or false given an utterance, related partitions of time, and the state.

In [364]:
#meaning fxn: checks to make sure at least one event is contained in every bin.
def meaning_function(state, bins, counter=0):
    if counter==len(bins)-1:
        return True
    elif any(map(lambda x:x>bins[counter] and x<=bins[counter+1], state)):
        return meaning_function(state, bins, counter+1)
    else:
        return False
        

In [365]:
def meaning(utterance, bins_r, bins_t, state):
    if utterance == "prog":
        return meaning_function(state, bins_r, 0)
    elif utterance == "impf":
        return meaning_function(state, bins_t, 0)
    else:
        return True

## Literal listener
Returns a probability distribution over world states for a given utterance and thetas.

In [366]:
# need to return distribution over world states
def literal_listener(utterance, theta_r, theta_t):
    pr_states = state_prior(pset_states)
    bins_r = theta_bins(n_bins, theta_r)
    bins_t = theta_bins(n_bins, theta_t)
    truth_values = np.array(list(map(lambda x: meaning(utterance, bins_r, bins_t, x), pset_states)))
    return truth_values * pr_states / pd.Series.sum(truth_values * pr_states)

In [367]:
literal_listener("prog", 0.3, 0.7)

[0.1]                        0.000
[0.3]                        0.000
[0.1, 0.3]                   0.125
[0.5]                        0.000
[0.1, 0.5]                   0.000
[0.3, 0.5]                   0.000
[0.1, 0.3, 0.5]              0.125
[0.7]                        0.000
[0.1, 0.7]                   0.000
[0.3, 0.7]                   0.000
[0.1, 0.3, 0.7]              0.125
[0.5, 0.7]                   0.000
[0.1, 0.5, 0.7]              0.000
[0.3, 0.5, 0.7]              0.000
[0.1, 0.3, 0.5, 0.7]         0.125
[0.9]                        0.000
[0.1, 0.9]                   0.000
[0.3, 0.9]                   0.000
[0.1, 0.3, 0.9]              0.125
[0.5, 0.9]                   0.000
[0.1, 0.5, 0.9]              0.000
[0.3, 0.5, 0.9]              0.000
[0.1, 0.3, 0.5, 0.9]         0.125
[0.7, 0.9]                   0.000
[0.1, 0.7, 0.9]              0.000
[0.3, 0.7, 0.9]              0.000
[0.1, 0.3, 0.7, 0.9]         0.125
[0.5, 0.7, 0.9]              0.000
[0.1, 0.5, 0.7, 0.9]

## Pragmatic speaker

Returns a distribution over utterances for a given object.

In [368]:
def utility(L1_probs):
    return L1_probs.apply(lambda x:np.exp(alpha * (np.log(x) - costs[x.name])), axis=1)

In [369]:
def speaker(theta_r, theta_t):
    pr_utterances = utterance_prior()
    L1 = pd.DataFrame(map(lambda x: literal_listener(x, theta_r, theta_t), utterances), index=utterances)
    return utility(L1).apply(lambda x:x / np.sum(x))

In [370]:
speaker(0.4, 0.6)

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,[0.1],[0.3],"[0.1, 0.3]",[0.5],"[0.1, 0.5]","[0.3, 0.5]","[0.1, 0.3, 0.5]",[0.7],"[0.1, 0.7]","[0.3, 0.7]",...,"[0.3, 0.5, 0.9]","[0.1, 0.3, 0.5, 0.9]","[0.7, 0.9]","[0.1, 0.7, 0.9]","[0.3, 0.7, 0.9]","[0.1, 0.3, 0.7, 0.9]","[0.5, 0.7, 0.9]","[0.1, 0.5, 0.7, 0.9]","[0.3, 0.5, 0.7, 0.9]","[0.1, 0.3, 0.5, 0.7, 0.9]"
prog,0.0,0.0,1.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,...,0.0,0.6,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.6
impf,0.0,0.0,0.0,0.0,1.0,1.0,0.4,0.0,0.0,0.0,...,1.0,0.4,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.4
,1.0,1.0,2.6096039999999997e-44,1.0,3.914406e-44,3.914406e-44,1.5657619999999998e-44,1.0,1.0,1.0,...,3.914406e-44,1.5657619999999998e-44,1.0,1.0,1.0,2.6096039999999997e-44,1.0,3.914406e-44,3.914406e-44,1.5657619999999998e-44


## Pragmatic listener
Returns a distribution over world states and thetas for a given utterance. Difficult to read with the thetas, so we'll just return a distribution over world states.

In [371]:
def pragmatic_listener():
    pr_state = state_prior(pset_states)
    pr_theta = thetas_prior(thetas)
    S1 = list(map(lambda x: speaker(x[0], x[1]), thetas))
    theta_S1 = [S1[i] * pr_theta[i] for i in range(len(S1))]
    utterance_dist = np.multiply(sum(theta_S1), pr_state).apply(lambda x: x / np.sum(x), axis=1)
    return utterance_dist

In [372]:
pragmatic_listener()

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,[0.1],[0.3],"[0.1, 0.3]",[0.5],"[0.1, 0.5]","[0.3, 0.5]","[0.1, 0.3, 0.5]",[0.7],"[0.1, 0.7]","[0.3, 0.7]",...,"[0.3, 0.5, 0.9]","[0.1, 0.3, 0.5, 0.9]","[0.7, 0.9]","[0.1, 0.7, 0.9]","[0.3, 0.7, 0.9]","[0.1, 0.3, 0.7, 0.9]","[0.5, 0.7, 0.9]","[0.1, 0.5, 0.7, 0.9]","[0.3, 0.5, 0.7, 0.9]","[0.1, 0.3, 0.5, 0.7, 0.9]"
prog,0.0,0.0,0.051567,0.0,0.0589,0.041846,0.080277,0.0,0.022911,0.022911,...,0.036126,0.07171429,0.0,0.022911,0.022911,0.0625,0.002223,0.051564,0.036126,0.07171429
impf,0.0,0.0,0.004472,0.0,0.037535,0.040207,0.028563,0.0,0.053813,0.053813,...,0.0539,0.03791037,0.0,0.053813,0.053813,0.041337,0.020764,0.049196,0.0539,0.03791037
,0.084848,0.084848,0.045455,0.084848,0.009091,0.018182,0.00303,0.084848,0.018182,0.018182,...,0.009091,2.084498e-45,0.084848,0.018182,0.018182,0.00303,0.063636,0.00303,0.009091,2.084498e-45
