In [1]:
import subprocess
from collections import OrderedDict, defaultdict
from bs4 import BeautifulSoup as bs
import pandas as pd

In [2]:
def console(cmnd, cwd='.', show_output=True, raise_error=True):
    try:
        output = subprocess.check_output(
            cmnd, stderr=subprocess.STDOUT, shell=True, cwd=cwd,
            universal_newlines=True)
    except subprocess.CalledProcessError as exc:
        if show_output:
            print("Status : FAIL", exc.returncode, exc.output)
        if raise_error:
            raise Exception("Error")
        return exc.returncode
    else:
        output = output.strip()
        if show_output:
            print("Output: \n{}\n".format(output))
        return output 

def print_errors(messages):
    for m in messages:
        for k, v in m.attrs.items():
            print(f' {k}: {v}')
        print()

def ape_console(text, attributes='-cdrspp -guess', cwd='APE-master', show_output=True):
    output = console(f'./ape.exe -text "{text}" {attributes}', show_output=show_output, cwd=cwd)
    return output

def ape(text, attributes='-cdrspp -guess', cwd='APE-master', show_output=True, raise_error=True):
    output = ape_console(text, attributes=attributes, show_output=show_output, cwd=cwd)
    soup = bs(output, "lxml")
    messages = soup.find_all('message')
    if raise_error and len(messages):
        print_errors(messages)
        raise ValueError(f"Found {len(messages)} errors")
    return soup

In [10]:
data_dir = 'data/ACE - Word Problems/'
fp = data_dir+'ACE - Word Problems - Present Simple 2.csv'
df = pd.read_csv(fp)
df.head()

Unnamed: 0,index,problem,answer
0,1.1.1,A man has 3 apples. A woman gives 4 apples to ...,7
1,1.1.2,A woman has 1 ball. A man gives 9 balls to the...,10
2,1.1.3,A boy has 8 bananas. A girl gives 1 banana to ...,9
3,1.1.4,A girl has 3 melons. A boy gives 6 melons to t...,9
4,1.2.1,A restaurant has 175 normal chairs. The restau...,195


In [44]:
class Inventory:
    def __init__(self, store=None) -> None:
        self.store = store or defaultdict(dict)

    def push(self, name, amount, prop=None):
        self.store[name][prop] = amount
    
    def get(self, name, prop=None, default=0):
        try:
            if prop:
                return self.store[name][prop]
            else:
                return sum([amount for amount in self.store[name].values()])
        except:
            return default

class Object:
    def __init__(self, idx=None, name=None, func=None, na=None, operator=None, amount=None, inventory=None, property=None) -> None:
        self.idx = idx
        self.name = name
        self.func = func
        self.na = na
        self.operator = operator
        self.amount = amount

        self.inventory = inventory or Inventory()
        self.property = property
        self.guess_preds = dict()

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.name = s[1]
        self.func = s[2]
        self.na = s[3]
        self.operator = s[4]
        self.amount = int(s[5])

class Has_part:
    def __init__(self, idx=None, agent=None) -> None:
        self.idx = idx
        self.agent = agent

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.agent = s[1]

class Property:
    def __init__(self, idx=None, name=None) -> None:
        self.idx = idx
        self.name = name

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.name = s[1]

class Modifier_pp:
    def __init__(self, idx=None, name=None, agent=None) -> None:
        self.idx = idx
        self.name = name
        self.agent = agent

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.name = s[1]
        self.agent = s[2]

class Predicate:
    def __init__(self, idx=None, action=None, agents=[], modifier_pp=None) -> None:
        self.idx = idx
        self.action = action
        self.agents = agents
        self.n_agents = len(agents)

        self.modifier_pp = modifier_pp or OrderedDict()

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.action = s[1]
        self.agents = s[2:]
        self.n_agents = len(self.agents)


class Relation:
    def __init__(self, agent1=None, link=None, agent2=None) -> None:
        self.agent1 = agent1
        self.link = link
        self.agent2 = agent2

    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.agent1 = s[0]
        self.link = s[1]
        self.agent2 = s[2]

class Condition:
    def __init__(self, pre=None, post=None) -> None:
        self.pre = pre
        self.post = post

class Query:
    def __init__(self, idx=None, qtype=None, objects=OrderedDict(), predicate=Predicate(), ans='No ans') -> None:
        self.idx = idx
        self.qtype = qtype
        self.objects = objects
        self.predicate = predicate
        
        self.ans = ans


    def parse(self, s):
        s = s[s.find("(")+1:s.find(")")].split(',')
        self.idx = s[0]
        self.qtype = s[1]


class Calc:
    def __init__(self) -> None:
        self.objects = OrderedDict()
        self.predicates = OrderedDict()
        self.relations = OrderedDict()
        self.order = []
        self.conditions = []
        self.queries = [Query()]

    def find_next_question(self, lines, i):
        try:
            q_i = lines[i:].index('QUESTION')
            return i + q_i + 1
        except ValueError:
            return 0

    def parse_queries(self, lines, i):
        queries = []
        next_question = self.find_next_question(lines, i)
        while next_question:
            queries.append(self.parse_query(lines[i:next_question]))
            i = next_question
            next_question = self.find_next_question(lines, i)
        queries.append(self.parse_query(lines[i:]))
        return queries

    @staticmethod
    def parse_query(lines):
        q = Query()
        for line in lines:
            if 'query' in line:
                q.parse(line)
            elif 'object' in line:
                ob = Object()
                ob.parse(line)
                q.objects[ob.idx] = ob
            elif 'predicate' in line:
                pr = Predicate()
                pr.parse(line)
                q.predicate = pr
            elif 'property' in line:
                prop = Property()
                prop.parse(line)
                q.objects[prop.idx].property = prop.name
        return q

    def parse_drspp(self, lines):
        conditional = False
        for i, line in enumerate(lines):
            if '[' in line:
                context_ids = line.strip()[1:-1].split(',')
                self.order.append(context_ids)
            elif 'object' in line:
                ob = Object()
                ob.parse(line)
                self.objects[ob.idx] = ob
            elif 'has_part' in line:
                hp = Has_part()
                hp.parse(line)
                ob = self.objects[hp.idx]
                self.objects[hp.agent] = Object(idx=hp.idx, name=ob.name, func=ob.func, na=ob.func, operator=ob.operator, amount=1, inventory=ob.inventory, property=ob.property)
            elif 'predicate' in line:
                pr = Predicate()
                pr.parse(line)
                self.predicates[pr.idx] = pr
            elif 'relation' in line:
                rel = Relation()
                rel.parse(line)
                self.relations[rel.agent1] = rel
            elif 'property' in line:
                prop = Property()
                prop.parse(line)
                self.objects[prop.idx].property = prop.name
            elif 'modifier_pp' in line:
                mod_pp = Modifier_pp()
                mod_pp.parse(line)
                self.predicates[mod_pp.idx].modifier_pp[mod_pp.name] = mod_pp
            if conditional:
                post = self.order.pop()
                pre = self.order.pop()
                condition = Condition(pre=pre, post=post)
                self.conditions.append(condition)
                conditional = False
            if '=>' in line:
                conditional = True
            elif 'QUESTION' in line:
                self.queries = self.parse_queries(lines, i+1)
                break

    def ape_lxml(self, text, show_output=False):
        soup = ape(text, show_output=show_output)
        lines = soup.find('drspp').text.splitlines()
        lines = [line.strip() for line in lines]
        self.parse_drspp(lines)

    @staticmethod
    def get_ob_name_amount(object):
        ob_name = object.name
        ob_amount = object.amount
        ob_prop = object.property
        return ob_name, ob_amount, ob_prop

    def action_have(self, objects, pr):
        [a, b] = pr.agents
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        objects = self._add(objects, [a], ob_name, ob_amount, ob_prop)
        mod_pp = pr.modifier_pp
        if len(mod_pp):
            if 'with' in mod_pp:
                c = mod_pp['with'].agent
                ob_name_c, ob_amount_c, ob_prop_c = self.get_ob_name_amount(objects[c])
                objects = self._add(objects, [a], ob_name_c, ob_amount_c, ob_prop_c)
            else:
                raise NotImplementedError(f'mod_pp.name={mod_pp.keys()}')
        return objects

    def _exhange(self, objects, agents, ob_name, ob_amount, ob_prop):
        [a, c] = agents
        inventory_a = objects[a].inventory
        inventory_c = objects[c].inventory
        inventory_ia = inventory_a.get(ob_name, ob_prop) - ob_amount
        inventory_ic = inventory_c.get(ob_name, ob_prop) + ob_amount
        inventory_a.push(ob_name, inventory_ia, ob_prop)
        inventory_c.push(ob_name, inventory_ic, ob_prop)
        objects[a].inventory = inventory_a
        objects[c].inventory = inventory_c
        return objects

    def _add(self, objects, agents, ob_name, ob_amount, ob_prop):
        [a] = agents
        inventory_a = objects[a].inventory
        inventory_ia = inventory_a.get(ob_name, ob_prop) + ob_amount
        inventory_a.push(ob_name, inventory_ia, ob_prop)
        objects[a].inventory = inventory_a
        return objects
    
    def _remove(self, objects, agents, ob_name, ob_amount, ob_prop):
        [a] = agents
        inventory_a = objects[a].inventory
        inventory_ia = inventory_a.get(ob_name, ob_prop) - ob_amount
        inventory_a.push(ob_name, inventory_ia, ob_prop)
        objects[a].inventory = inventory_a
        return objects

    def action_give(self, objects, pr):
        [a, b] = pr.agents[:2]
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        mod_pp = pr.modifier_pp
        if len(mod_pp) == 1:
            c = mod_pp[list(mod_pp.keys())[0]].agent
        else:
            c = pr.agents[2]
        objects = self._exhange(objects, [a, c], ob_name, ob_amount, ob_prop)
        return objects

    def action_take(self, objects, pr):
        [a, b] = pr.agents
        mod_pp = pr.modifier_pp
        if len(mod_pp) == 1:
            c = mod_pp[list(mod_pp.keys())[0]].agent
        else:
            c = self.relations[b].agent2
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        objects = self._exhange(objects, [c, a], ob_name, ob_amount, ob_prop)
        return objects

    def action_get(self, objects, pr, c=None):
        [a, b] = pr.agents
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        mod_pp = pr.modifier_pp
        if len(mod_pp):
            if 'from' in mod_pp:
                c = mod_pp['from'].agent
                objects = self._exhange(objects, [c, a], ob_name, ob_amount, ob_prop)
            elif 'for' in mod_pp:
                c = mod_pp['for'].agent
                ob_name_c, ob_amount_c, ob_prop_c = self.get_ob_name_amount(objects[c])
                objects = self._add(objects, [a], ob_name, ob_amount, ob_prop)
                objects = self._remove(objects, [a], ob_name_c, ob_amount_c, ob_prop_c)
            else:
                raise NotImplementedError(f'mod_pp.name={mod_pp.name}')
        
            
        else:
            objects = self._add(objects, [a], ob_name, ob_amount, ob_prop)

        return objects

    def action_flee(self, objects, pr):
        [b] = pr.agents
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        mod_pp = pr.modifier_pp
        if mod_pp:
            if mod_pp.name == 'from':
                a = mod_pp.agent
            else:
                raise NotImplementedError(f'mod_pp.name={mod_pp.name}')
        
            objects = self._remove(objects, [a], ob_name, ob_amount, ob_prop)
        else:
            raise NotImplementedError
        return objects

    def action_remove(self, objects, pr):
        [a, b] = pr.agents
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        objects = self._remove(objects, [a], ob_name, ob_amount, ob_prop)
        return objects

    def action_guess_pred(self, objects, pr):

        [a, b] = pr.agents
        # ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        # objects[a].guess_pred[pr.action] = objects[b]
        return objects



    def classify_action(self, objects, pr, guess_pred=True):
        action = pr.action
        if action == 'have' or action == 'work' or action == 'bake' or action == 'run' or action == 'need':
            objects = self.action_have(objects, pr)
        elif action == 'give' or action == 'place':
            objects = self.action_give(objects, pr)
        elif action == 'take' or action == 'remove':
            objects = self.action_take(objects, pr)
        elif action == 'get' or action == 'buy' or action == 'steal' or action == 'find':
            objects = self.action_get(objects, pr)
        elif action == 'sell' or action == 'lose':
            objects = self.action_remove(objects, pr)
        elif action == 'flee':
            objects = self.action_flee(objects, pr)
        else:
            if guess_pred:
                print(f'Guessing pred for "{action}"')
                self.action_guess_pred(objects, pr)
            else:
                raise NotImplementedError(action)
        
        return objects

    def run_predicates(self, objects={}, predicates={}, order=[], update_objects=True, agent_a=None):
        objects = self.objects or objects
        predicates = self.predicates or predicates
        order = order or self.order
        for idx in order:
            for _, pr in predicates.items():
                if pr.idx in idx:
                    if agent_a and agent_a not in pr.agents:
                        pr.agents[0] = agent_a
                    objects = self.classify_action(objects, pr)
        if update_objects:
            self.objects =  objects
        return objects

    def action_val_have(self, objects, pr):
        [a, b] = pr.agents
        ob_name, ob_amount, ob_prop = self.get_ob_name_amount(objects[b])
        amount = self.objects[a].inventory.get(ob_name, ob_prop)
        amount = amount // ob_amount
        return amount

    def classify_val_action(self, objects, pr):
        action = pr.action
        if action == 'have' or action == 'work' or action == 'bake' or action == 'run' or action == 'need':
            amount = self.action_val_have(objects, pr)
        else:
            raise NotImplementedError(action)
        
        return amount

    def validate_predicates(self, predicates, objects:dict=None, agent_a=None):
        objects = objects or self.objects
        amounts = []
        for pr in predicates:
            if agent_a:
                pr.agents[0] = agent_a
            amounts.append(self.classify_val_action(objects, pr))
        return min(amounts)
                        
    def calc_cond_amount(self, order:list):
        amounts = []
        v_preds = None
        objs = [self.objects.get(idx) for idx in order if idx in self.objects]
        preds = [self.predicates.get(idx) for idx in order if idx in self.predicates]
        if len(preds):
            v_preds = self.validate_predicates(preds)
        for ob in objs:
                name = ob.name
                for order_i in self.order:
                    for f_idx in order_i:
                        f_ob = self.objects.get(f_idx, None)
                        if not f_ob:
                            continue
                        if f_ob.name == name:
                            amount = f_ob.amount//ob.amount
                            if v_preds:
                                amount = min(v_preds, amount)
                            amounts.append((f_idx, amount))
                        if name in f_ob.inventory.store:
                            inv_amount = f_ob.inventory.get(name)
                            amount = inv_amount//ob.amount
                            if v_preds:
                                amount = min(v_preds, amount)
                            amounts.append((f_idx, amount))
        return amounts
             

    def run_conditions(self, conditions=[]):
        conditions = self.conditions or conditions
        for condition in conditions:
            amounts = self.calc_cond_amount(condition.pre)
            if len(amounts):
                for (agent_a, amount) in amounts:
                    for _ in range(amount):
                        self.run_predicates(order=condition.post, agent_a=agent_a)

    def q_action_have(self, objects, query):
        [a,b] = query.predicate.agents
        ob_name, _, ob_prop = self.get_ob_name_amount(query.objects[b])
        
        '''Problem: Query agent (a) could be not referring to story. /A year has 52 weeks. How many weeks does a year have?/'''
        try:
            answer = objects[a].inventory.get(ob_name, ob_prop)
        except:
            ob_name_x, _, _ = self.get_ob_name_amount(query.objects[a])
            obs = [idx for idx, ob in objects.items() if ob.name == ob_name_x]
            idx = obs[0]
            answer = objects[idx].inventory.get(ob_name, ob_prop)
        if ob_prop:
            query.ans = f'{answer} {ob_prop} {ob_name}'
        else:
            query.ans = f'{answer} {ob_name}'
        return query.ans

    def q_action_howm(self, objects, query):
        action = query.predicate.action
        if action in ['have', 'work', 'bake', 'run', 'need', 'buy', 'get', 'read']:
            answer = self.q_action_have(objects, query)
        else:
            raise NotImplementedError(action)
            answer = None
        return answer
    
    def run_queries(self, objects=None, queries=None):
        objects = self.objects or objects
        queries = self.queries or queries
        answers = []
        for query in queries:
            if query.qtype == 'howm':
                answers.append(self.q_action_howm(objects, query))
        return answers

    def main(self, text, show_output=False):
        self.ape_lxml(text, show_output=show_output)
        self.run_predicates()
        self.run_conditions()
        answers = self.run_queries()
        for i, answer in enumerate(answers):
            print(f'Q{i}: answer = {answer}')
        return answers

s = df['problem'][68]
s = 'A girl has 2 bags with 2 marbles in each bag. How many marbles does the girl have?'
# s = 'There is a box. It has 8 eggs. A man takes 2 eggs from the box. A woman places 2 eggs in the box. How many eggs does the box have?'
# s = 'A year has 52 weeks. Each week has 7 days. Each day has 24 hours. Each hour has 60 minutes. How many minutes does the year have?'
# s = 'A year has 365 days. Each day has 24 hours. Each hour has 60 minutes. Each minute has 60 seconds. How many seconds does a year have?'
# s = 'A student has 3 books. Each of the books has 14 chapters. How many chapters does the student have?'
ape(s)
C = Calc()
C.main(s)

Output: 
<?xml version="1.0" encoding="UTF-8"?>

<apeResult>
  <duration tokenizer="0.030" parser="0.003" refres="0.000"/>
  <drspp>[A,B,C]
object(A,girl,countable,na,eq,1)-1/2
object(B,bag,countable,na,eq,2)-1/5
object(C,marble,countable,na,eq,2)-1/8
   [D]
   object(D,bag,countable,na,eq,1)-1/11
   =&gt;
   [E]
   predicate(E,have,A,B)-1/3
   modifier_pp(E,in,D)-1/9
   modifier_pp(E,with,C)-1/6
   QUESTION
   [F,G]
   query(F,howm)-2/1
   object(F,marble,countable,na,geq,2)-2/3
   predicate(G,have,A,F)-2/7
</drspp>
  <messages/>
</apeResult>

Q0: answer = 4 marble


['4 marble']

In [45]:
for p in df['problem'].tolist()[63:]:
    C = Calc()
    C.main(p)


Q0: answer = 3750 pillow
Q0: answer = 233 book
Q0: answer = 11 dollar
Q0: answer = 7 banana
Q0: answer = 9 egg
Q0: answer = 17 marble
Q0: answer = 16 coin
Q0: answer = 81 pen
Q0: answer = 17 carrot
Q0: answer = 0 chalk
Q0: answer = 20 wheel
Q0: answer = 10 shoe
Q0: answer = 20 chair
Q0: answer = 0 chalk
Q0: answer = 30 wheel
Q0: answer = 22 leg
Q0: answer = 60 branch
