In [1]:
import numpy as np
from scipy.optimize import minimize

Define a class Tableau, which can be initalized from a string input in markdown format.

Has instance attributes for input to tableau, winning output, a list of candidate outputs, and a dictionary of violations each candidate makes of constraints. The class also has a constraint attribute, which ensures that all Tableaus are working from the same set of constraints

In [2]:
class Tableau:
    constraints = ["LeftFoc", "NiFoc", "LeftP", "DelLow", "DepC", "MaxSubj", "DepSubj",
                   "MaxObj", "DepObj", "MaxPoss", "DepPoss", "MaxV", "DepV", "MaxAdj"]
    
    def fix(my_constraints, v):
        """
        Given a list of constraints MY_CONSTRAINTS that corresponds to a list V, repairs
        V to correspond to Tableau.CONSTRAINTS by appropriately sorting and inserting
        zeroes
        """
        fixed = []
        for c in Tableau.constraints:
            if c in my_constraints:
                fixed.append(v[my_constraints.index(c)])
            else:
                fixed.append(0)
        return fixed
    
    def __init__(self, tableau_txt):
        """
        Initializes new tableau object from given input in string form
        """
        tableau_txt = tableau_txt.strip().split("\n")
        self.inpt = tableau_txt.pop(0)
        tableau_txt = [line.split("|")[1:] for line in tableau_txt]
        tableau_txt = [[el.strip() for el in line] for line in tableau_txt]
        my_constraints = tableau_txt[0][1:-1]
        tableau_txt = tableau_txt[2:]
        self.candidates = [line[0] for line in tableau_txt]
        self.winner = max(self.candidates, key=lambda s: '+' in s)
        self.winner_ind = self.candidates.index(self.winner)
        self.violations = {line[0]:
                           np.array(Tableau.fix(my_constraints,
                                                [int(line[i + 1] == '*') for i in range(len(my_constraints))]))
                           for line in tableau_txt}
        
    def calc_probs(self, w):
        """
        Given a vector of weights W, calculates the output probability for each candidate
        """
        viols = np.exp([np.dot(w, self.violations[row]) for row in self.violations])
        return list(viols / np.sum(viols))
    
    def str_from_weight(self, w):
        """
        Outputs a string representation of the tableau, where weights and probability of candidate outputs
        are arranged according to what would be predicted by W
        """
        probs = self.calc_probs(w)
        sorted_candidates = sorted(zip(self.candidates, probs), key=lambda x: -x[1])
        sorted_constraints = sorted(zip(self.constraints, w, range(len(w))), key=lambda x: x[1])
        str_template = "| ({:.3f}) {:<" + str(max([len(c) for c in self.candidates])) + "} |" + "".join(
            [" {:<" + str(len(c)) + "} |" for c,_,_ in sorted_constraints])
        otpt_str = ["INPUT: " + self.inpt]
        row = [0, "Candidate"] + [c for c,_,_ in sorted_constraints]
        otpt_str.append(str_template.format(*row))
        otpt_str.append("|" + '-' * (max([len(c) for c in self.candidates]) + 10) + "|" + "".join(
            ["-" * (len(c) + 2) + "|" for c,_,_ in sorted_constraints]))
        for cand,prob in sorted_candidates:
            row = [prob, cand]
            for cons,weight,i in sorted_constraints:
                row.append('*' if self.violations[cand][i] == 1 else ' ')
            to_add = str_template.format(*row)
            index = to_add.find('.') + 1
            to_add = to_add[:index] + ' ' * (cand.count("ẹ́") + cand.count("ọ̀")) + to_add[index:]
            otpt_str.append(to_add)
            
        return "\n".join(otpt_str)
        
    def __repr__(self):
        s = "Input: " + self.inpt
        s += "\nWinner: " + self.winner
        s += "\nViolations " + str(self.violations)
        return s

Reads in all tableaus from local markdown file, and converts each of them to Tableau objects.

In [3]:
f = open("tableaus.md", "r")
contents = f.read()
f.close()
contents = contents.split("INPUT:")[1:]
contents = [Tableau(c) for c in contents]

Define an objective function based on Goldwater and Johnson.

In [4]:
def obj(w,s2,c):
    ll = [np.exp([np.dot(w, t.violations[l]) for l in t.candidates]) for t in c]
    ll = [l / np.sum(l) for l in ll]
    ll = [ll[i][c[i].winner_ind] for i in range(len(ll))]
    ll = np.sum(np.log(ll))
    reg = np.sum(np.square(w) / s2)
    objective = ll - reg
    return -objective

Optimize over that objective function.

In [5]:
w = [0.0] * len(contents[0].constraints) # Initializes w to all zeroes
s2 = 33333 # Sets s2 based on value in G&J (which seems to work for this problem pretty well)

print("objective before minimization", obj(w,s2,contents))
res = minimize(obj, w,args=(s2,contents), 
               options={'disp': True, 'maxiter': 10000}, 
               tol = 1.0E-6)
print("objective after minimization", res.fun)

objective before minimization 22.45751442363649
Optimization terminated successfully.
         Current function value: 0.779621
         Iterations: 124
         Function evaluations: 1792
         Gradient evaluations: 128
objective after minimization 0.7796205018533113


Write resulting tableaus to an output file

In [6]:
write_out = "CONSTRAINT WEIGHTS:\n"

for cons,weight in sorted(zip(contents[0].constraints, res.x), key=lambda x: x[1]):
    write_out += "{:<7} {:.2f}\n".format(cons, weight)
    
write_out += "\n"

for t in contents:
    write_out += t.str_from_weight(res.x)
    write_out += "\n\n"
    
f = open("output_tableaus.md", "w")
contents = f.write(write_out)
f.close()