In [2]:
from allennlp.data.tokenizers import Token
from allennlp.data.tokenizers import SpacyTokenizer
import timeit
from allennlp.common import Params
from allennlp_semparse.common import Date, ExecutionError
from templi.templi_languages.templi_language import Templi_Language, TempliTimeContext
from templi.dataset_readers.search_timeml_logical_forms import get_all_valid_logical_forms, get_valid_logical_forms
from ortools.linear_solver import pywraplp
from templi.templi_languages.allen_algebra import infer_relation
from allennlp_semparse.common.action_space_walker import ActionSpaceWalker
from allennlp.commands.train import train_model_from_file, train_model
import time
import pathlib
import json

In [None]:
# for generating logical forms
with open("sentence_rels.json", "r") as f:
    sentence_rels = json.load(f)
sentences_logical_forms = get_valid_logical_forms(sentence_rels)
# write to file
json.dump(sentences_logical_forms, open("sentences_logical_forms.json", "w"), indent=4)

  8%|▊         | 207/2741 [15:31<5:23:33,  7.66s/it]

In [70]:
import json
from tqdm import tqdm
from templi.templi_languages.allen_algebra import converse, timeml_to_uci
from typing import Dict, Tuple
from templi.templi_languages.templi_language import Templi_Language, TempliTimeContext

from allennlp_semparse.common.action_space_walker import ActionSpaceWalker
from functools import reduce
from multiprocessing import Pool

# single process version
def get_all_valid_logical_forms():
    with open("sentence_rels.json", "r") as f:
        sentence_rels = json.load(f)

    training_data = {}  # {sentence:{target_var:[logical form]}}

    for sentence, rels in tqdm(sentence_rels.items(), desc="generating dpd"):
        if not rels:
            continue

        # initizalize training data
        training_data[sentence] = {}

        # convert pos to str, timeml to uci
        for idx, rel in enumerate(rels):
            for pos_key in {"lhs", "rhs"}:
                rels[idx][pos_key] = f"{rel[pos_key][0]}_{rel[pos_key][1]}"
            rels[idx]["rel"] = timeml_to_uci(rels[idx]["rel"])

        # populate temp_vars
        temp_vars = set(sum([[rel["lhs"], rel["rhs"]] for rel in rels], []))

        """
        we create logical form for one variable once at a time, so the context is all variables
        except the main variable (the one logical form should evaluate to)
        """
        for main_var in temp_vars:
            # generate possible logical forms
            target_vars = temp_vars.difference({main_var})
            context = TempliTimeContext(temp_vars=target_vars)
            world = Templi_Language(context)
            walker = ActionSpaceWalker(world, max_path_length=10)
            all_logical_forms = walker.get_all_logical_forms(max_num_logical_forms=1000)

            # generate target relations
            target_relations = {}  # {target_var: rel}
            for rel in rels:
                if rel["lhs"] == rel["rhs"]:
                    # TODO don't know why timeml_parser.py produces this... might be a bug
                    continue
                if main_var == rel["lhs"]:
                    target_relations[rel["rhs"]] = rel["rel"]
                if main_var == rel["rhs"] and converse(rel["rel"]):
                    target_relations[rel["lhs"]] = converse(rel["rel"])

            # filter correct logical forms
            correct_logical_forms = []
            for logical_form in all_logical_forms:
                if world.evaluate_logical_form(logical_form, target_relations):
                    correct_logical_forms.append(logical_form)

            # collect training data
            training_data[sentence][main_var] = correct_logical_forms

    # write to file
    json.dump(training_data, open("training_data.json", "w"), indent=4)


    
    
    
    
def get_valid_logical_forms(sentences_rels: Dict[str, Dict[str, str]]):
    with Pool(8) as p:
        data = list(tqdm(p.imap(logical_forms_of_sentence, sentences_rels.items()),total=len(sentences_rels)))
        p.close()
        sentences_logical_forms = reduce(lambda a, b: {**a, **b}, data, {})
        return sentences_logical_forms


def logical_forms_of_sentence(sentence_rels: Tuple):
    sentence = sentence_rels[0]
    rels = sentence_rels[1]

    if not rels:
        return {}

    # convert pos to str, timeml to uci
    for idx, rel in enumerate(rels):
        for pos_key in {"lhs", "rhs"}:
            rels[idx][pos_key] = f"{rel[pos_key][0]}_{rel[pos_key][1]}"
        rels[idx]["rel"] = timeml_to_uci(rels[idx]["rel"])

    # populate temp_vars
    temp_vars = set(sum([[rel["lhs"], rel["rhs"]] for rel in rels], []))
    print(temp_vars)
    # initialize {main_var:logical_forms}
    result = {i: [] for i in temp_vars}

    """
    we create logical form for one variable once at a time, so the context is all variables
    except the main variable (the one logical form should evaluate to)
    """
    for main_var in temp_vars:
        # generate possible logical forms
        target_vars = temp_vars.difference({main_var})
        print(target_vars)
        context = TempliTimeContext(temp_vars=target_vars)
        world = Templi_Language(context)
        walker = ActionSpaceWalker(world, max_path_length=10)
        all_logical_forms = walker.get_all_logical_forms(max_num_logical_forms=5000)

        # generate target relations
        target_relations = {}  # {target_var: rel}
        for rel in rels:
            if rel["lhs"] == rel["rhs"]:
                # TODO don't know why timeml_parser.py produces this... might be a bug
                continue
            if main_var == rel["lhs"]:
                target_relations[rel["rhs"]] = rel["rel"]
            if main_var == rel["rhs"] and converse(rel["rel"]):
                target_relations[rel["lhs"]] = converse(rel["rel"])

        # filter correct logical forms
        correct_logical_forms = []
        for logical_form in all_logical_forms:
            if world.evaluate_logical_form(logical_form, target_relations):
                correct_logical_forms.append(logical_form)

        # collect training data
        result[main_var] = correct_logical_forms
    return {sentence: result}



In [68]:
list(sentence_rels.items())[1]

('The financial assistance from the World Bank and the International Monetary Fund are not helping.',
 [{'lhs': '12_22', 'rhs': '75_82', 'rel': 'p'},
  {'lhs': '75_82', 'rhs': '12_22', 'rel': 'p'}])

In [80]:
# from templi.dataset_readers.search_timeml_logical_forms import logical_forms_of_sentence

with open("sentence_rels.json", "r") as f:
    sentence_rels = json.load(f)
# sentences_logical_forms = get_valid_logical_forms(sentence_rels)
# get_valid_logical_forms(t)
# import sys
# print(sys.executable)
i = 1
print(list(sentence_rels.items())[i])
tmp = logical_forms_of_sentence(list(sentence_rels.items())[i])
tmp

('The financial assistance from the World Bank and the International Monetary Fund are not helping.', [{'lhs': [12, 22], 'rhs': [75, 82], 'rel': 'BEFORE'}, {'lhs': [75, 82], 'rhs': [12, 22], 'rel': 'BEFORE'}])
{'12_22', '75_82'}
{'75_82'}
{'12_22'}


{'The financial assistance from the World Bank and the International Monetary Fund are not helping.': {'12_22': ['(P 75_82)',
   '(P (D 75_82))',
   '(P (F 75_82))',
   '(P (M 75_82))',
   '(P (P 75_82))',
   '(P (d 75_82))',
   '(P (e 75_82))',
   '(P (f 75_82))',
   '(P (m 75_82))',
   '(P (p 75_82))',
   '(P (s 75_82))',
   '(intersection 75_82 (P 75_82))',
   '(union 75_82 (P 75_82))',
   '(P (D (D 75_82)))',
   '(P (D (F 75_82)))',
   '(P (D (M 75_82)))',
   '(P (D (P 75_82)))',
   '(P (D (d 75_82)))',
   '(P (D (e 75_82)))',
   '(P (D (f 75_82)))',
   '(P (D (m 75_82)))',
   '(P (D (p 75_82)))',
   '(P (D (s 75_82)))',
   '(P (F (D 75_82)))',
   '(P (F (F 75_82)))',
   '(P (F (M 75_82)))',
   '(P (F (P 75_82)))',
   '(P (F (d 75_82)))',
   '(P (F (e 75_82)))',
   '(P (F (f 75_82)))',
   '(P (F (m 75_82)))',
   '(P (F (p 75_82)))',
   '(P (F (s 75_82)))',
   '(P (M (D 75_82)))',
   '(P (M (F 75_82)))',
   '(P (M (M 75_82)))',
   '(P (M (P 75_82)))',
   '(P (M (d 75_82)))',
   '(P 

In [None]:
'assistance BEFORE helping'
'helping to assistance'

In [88]:
from allennlp.data.tokenizers import Token
from allennlp.data.tokenizers import SpacyTokenizer
import timeit
from allennlp.common import Params
from allennlp_semparse.common import Date, ExecutionError
from templi.templi_languages.templi_language import Templi_Language, TempliTimeContext
from templi.dataset_readers.search_timeml_logical_forms import get_all_valid_logical_forms, get_valid_logical_forms
from ortools.linear_solver import pywraplp
from templi.templi_languages.allen_algebra import infer_relation
from allennlp_semparse.common.action_space_walker import ActionSpaceWalker
from allennlp.commands.train import train_model_from_file, train_model
import time
import pathlib
import json


# training model
# PROJECT_ROOT = (pathlib.Path(__file__).parents[0]).resolve()
# MODULE_ROOT = PROJECT_ROOT / "allennlp_semparse"
# TOOLS_ROOT = None  # just removing the reference from super class
# TESTS_ROOT = PROJECT_ROOT / "tests"
# FIXTURES_ROOT = PROJECT_ROOT / "test_fixtures"

# print(FIXTURES_ROOT)
# param_file = FIXTURES_ROOT / "wikitables" / "experiment_my.json"
# # dataset_file = FIXTURES_ROOT / "data" / "wikitables" / "sample_data.examples"
# # dataset_file = PROJECT_ROOT / "WikitableQuestions" / "data" / "train.examples"
# params = Params.from_file(param_file)

# ## train from scratch
# # reader = DatasetReader.from_params(params["dataset_reader"])
# model = train_model_from_file(param_file, "./mytrain2", recover=False)
# model = train_model()




# for generating logical forms
with open("sentence_rels.json", "r") as f:
    sentence_rels = json.load(f)
# sentences_logical_forms = get_valid_logical_forms(sentence_rels)
# write to file
# json.dump(sentences_logical_forms, open("sentences_logical_forms.json", "w"), indent=4)
fff = 9


templilanguage = Templi_Language(TempliTimeContext({}))
valid_actions = templilanguage.get_nonterminal_productions()
walker = ActionSpaceWalker(templilanguage, max_path_length=5)
all_logical_forms = walker.get_all_logical_forms(max_num_logical_forms=1000)
solver = pywraplp.Solver.CreateSolver("GLOP")

logical_form = "(intersection (d (clothes_dry)) (D (d (intersection (d Thursday) (d evening)))))"
# logical_form = "(intersection (precedes (during (clothes_dry))) (contains (during (intersection (during Thursday) (during evening)))))"
final_interval = templilanguage.execute(logical_form)
final_interval
# using allen algebra

# interval_variables_list = final_interval.get_interval_variables()
# clothes_dry_var = [i for i in interval_variables_list if i.name == "clothes_dry"][0]
# rel = infer_relation(final_interval, final_interval.intervalvar, clothes_dry_var)

# # using solver
# constraints_list = final_interval.constraints_list
# variables_list = final_interval.get_variables()

# for i, constraints in enumerate(constraints_list):
#     id_IntVar_dict = {i.id: solver.NumVar(i.lb, i.ub, str(i.id)) for i in variables_list}
#     for constraint in constraints:
#         ct = solver.Constraint(*constraint.rhs, str(constraint.id))
#         for lhs_tup in constraint.lhs:
#             ct.SetCoefficient(id_IntVar_dict[lhs_tup[0].id], lhs_tup[1])

#     objective = solver.Objective()
#     objective.SetCoefficient(id_IntVar_dict[final_interval.endvar.id], 1)
#     objective.SetCoefficient(id_IntVar_dict[final_interval.startvar.id], -1)
#     objective.SetMinimization()
#     result = solver.Solve()

#     print("Solution:")
#     print("Objective value =", objective.Value())
#     print("start =", id_IntVar_dict[final_interval.startvar.id].solution_value())
#     print("end =", id_IntVar_dict[final_interval.endvar.id].solution_value())
#     print("--- %s seconds ---" % (time.time() - start_time))
#     fff = 9



ExecutionError: 'Unrecognized function: clothes_dry'