/
statement_to_tree.py
188 lines (156 loc) · 10.6 KB
/
statement_to_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Given a statement, find the parse tree that led to it.
from tree_parser import *
import pickle #pickle.dump(database.proof_datas, open(output_file,'wb'))
from copy import deepcopy
from tree import *
#output_file = 'tree_parse_data'
# class InitialStringSearcher:
# def __init__(self):
# self.nodes = {}
# self.return_value = None
#
# def add(self,string,value):
# return self.add()
#
# def add_with_index(self,string, value,index):
# if index==len(string):
# self.value=value
# char = string[index]
# if char not in self.nodes:
# self.nodes[char] = InitialStringSearcher()
# self.nodes[char].add_with_index(string,value,index+1)
class InitialStringSearcher:
def __init__(self):
self.known_values = {}
#self.known_lengths = {}
def add(self,string,value):
self.known_values[tuple(string)]=value
#print 'found new substring', value[0]
#self.known_lengths[string]=len(string)
def find_longest(self,string):
#label=any(label for label in self.known_values if is_initial_string(label,string))
for l in self.known_values:
if is_initial_string(l,string): return self.known_values[l]
return False
def clear(self):
self.known_values = {}
class StatementParser:
def __init__(self,database):
# this is a really ugly hack.
self.wff_variables = set(['ph','ps','ch','th','et','ze','si','rh','mu','la','ka','ta'])
self.set_variables = set(['a', 'a"', "a'", 'a0', 'a0_', 'a1', 'a1_', 'b', 'b"', "b'", 'b0', 'b0_', 'b1', 'b1_', 'c', 'c"', "c'", 'c0_', 'c1_', 'd', 'd"', "d'", 'd0', 'd1', 'e', 'e"', "e'", 'e0', 'e1', 'f', 'f"', "f'", 'f0_', 'f1', 'g', 'g"', "g'", 'g0', 'g1', 'h', 'h"', "h'", 'h0', 'h1', 'i', 'i"', "i'", 'i0', 'i1', 'j', 'j"', "j'", 'j0', 'j1', 'k', 'k"', "k'", 'k0', 'k1', 'l', 'l"', "l'", 'l0', 'l1', 'm', 'm"', "m'", 'm0', 'm1', 'n', 'n"', "n'", 'n0_', 'n1', 'o', 'o"', 'o"_', "o'", "o'_", 'o0', 'o0_', 'o1', 'o1_', 'p', 'p"', "p'", 'p0', 'p1', 'q', 'q"', "q'", 'q0', 'q1', 'r', 'r"', "r'", 'r0', 'r1', 's', 's"', 's"_', "s'", "s'_", 's0', 's1', 't', 't"', "t'", 't0', 't1', 'u', 'u"', "u'", 'u0', 'u1', 'v', 'v"', 'v"_', "v'", "v'_", 'v0', 'v1', 'v2', 'w', 'w"', "w'", 'w0', 'w1', 'x', 'x"', "x'", 'x0', 'x1', 'y', 'y"', "y'", 'y0', 'y1', 'z', 'z"', "z'", 'z0', 'z1'])
self.search = InitialStringSearcher()
self.failed_parses = set()
self.propositions = database.non_entails_axioms
# self.propositions = {}
# for label in database.propositions:
# statement = database.propositions[label].statement
# if statement[0] != '|-': # used in the parse tree
# self.propositions[label] = database.propositions[label]#statement
# add all the variables we ever use to self.variables
# self.variables = set()
# for label in database.propositions:
# self.variables=self.variables.union(database.propositions[label].block.v)
# predefine self.variables because why not
self.variables = set(['.0.', "d'", 'd"', 'D"', 'c0_', 'd0', 'd1', 'q1', 'q0', 'x1', 'G2', 'G1', 'G0', "G'", '-t', '-w', 'q"', 'G"', 'th', 'ta', 'g1', 'g0', 'H', '+t', 'p0', '.0b', "g'", 'P', 'F3', 'g"', 'X', "W'", 'W"', 'h', "ch'", 'J0', 'J1', 't0', 't1', 'p', 'W2', 'W1', '.(x)', "t'", "J'", 't"', 'J"', "w'", "M'", 'j', 'w"', 'M"', 'ze', 'j0', 'j1', '.id', 'M1', 'M0', 'w1', 'M2', "j'", 'j"', 'mu', 'et0', 'et1', 'H1_', "Z'", 'Z"', 'et"', "et'", 'Z0', 'Z1', 'C', 'P1', 'K', "z'", 'z"', 'P"', 'S', "v'_", "P'", 'z0', 'z1', 'c', 'L1_', '0w', "s'_", 'k', './\\', 'r"', 's', 'ch', 'O1_', 'S2', 'S1', 'S0', 'S"', 'v"_', 'b0_', 'ps', "m'", 'E1', 'm"', 'C"', 'o0_', "C'", 'G1_', 'm1', 'm0', 'C2', 'ph', 'C0', 'S1_', "q'", 'F', 'c"', "c'", 'N', 'V', 'f0_', 'F0', 'F1', 'F2', 'p1', '0t', 'f', 'D1_', 'n', 'p"', 'ch1', 'F"', "p'", 'v', 'f1', 'ph"', "ph'", 'o"_', 'f"', "f'", 'ph1', 'ph0', "ze'", 'V"', '.dom', "O'", "V'", 'I1', 'I0', 's1', 's0', 's"_', 'th1', 'th0', 'V0', 'V1', 'V2', 'V3', "th'", 's"', 'I"', "s'", 'th"', "I'", 'A', "L'", 'v"', 'L"', 'ta1', "v'", 'I', 'i0', 'Q', 'v1', 'v2', "ta'", 'L2', 'L3', 'L0', 'L1', 'Y', 'i"', '/t', "i'", 'a', 'si1', 'si0', 'Y1', 'i', 'Y"', 'D2', "Y'", 'q', 'si"', 'si', 'ze1', "si'", 'y', 'Y0', 'I2', 'ps"', 'y"', "ps'", "y'", 'y1', 'y0', 'ps0', 'ps1', 'O2', 'O1', 'O0', "o'_", 'la', '.Morphism', 'n0_', 'k0', 'O"', 'ze"', 'R0', 'D', 'L', 'ze0', "R'", 'c1_', 'T', 'R"', '.X.', '.1.', 'a0_', "l'", "B'", 'l"', 'd', 'B"', 'l', 'B0', 't', 'l0', 'l1', "b'", 'b"', '.(+)', 'U1', 'U0', 'h"', 'b0', 'b1', 'ta0', "U'", 'U"', "o'", "S'", "E'", 'o"', 'E"', 'i1', '+w', 'F1_', '.xb', 'Ro1', 'Ro2', 'rh', 'E0', 'o1', 'o0', "F'", 'B2', 'G', 'R1_', "e'", 'W0', 'I1_', 'O', 'e"', '._|_', 'W', 'x', 'e1', 'e0', '1t', '<_b', 'v0', '<_a', 'r0', 'r1', 'g', 'H2', 'H0', 'H1', 'o', "r'", 'w', 'H"', '.x.', "H'", 'K"', "K'", 'ta"', 'h0', 'h1', "X'", 'K1', 'K0', "T'", 'ch"', "h'", 'M1_', '.*', '.+', '.,', '.-', './', 'X"', 'u1', 'u0', '.<', "u'", 'X1', 'u"', 'ch0_', 'B', 'x"', 'N"', 'J', "x'", "N'", 'R', '.^', 'N0', 'N1', 'x0', 'Z', '.cod', 'C1', 'b', 'o1_', 'X0', '.graph', 'r', '.~', 'B1_', 'z', '.t', '.<_', '.w', 'Q1', 'Q0', 'V1_', 'rh1', 'rh0', 'w0', '~t', '~w', 'Q"', "Q'", 'et', 'rh"', '.||', "rh'", 'k"', 'A"', "k'", "A'", 'P0', 'a1_', '.\\/', 'B1', 'A1', 'A0', 'k1', 'A2', 'C1_', '.+b', 'a"', 'E', "a'", 'A1_', 'M', 'T0', 'T1', 'a1', 'a0', 'U', 'b1_', 'T"', 'ka', 'e', "D'", 'n"', 'm', "n'", 'u', 'D0', 'n1', '.Object', '.+^', 'D1'])
def parse_new_statement(self,statement,context):
string, tree = self.parse_statement(statement,context)
self.search.clear()
self.failed_parses = set()
return string,tree
# This checks whether we're just parsing a variable and otherwise check whether any of the propositions
# describes it.
def parse_statement(self,statement,context):
# this should return a parse tree for a list of strings that form statement
# check whether we are doomed to failure
if (statement[0],len(statement)) in self.failed_parses:
return False,False
# attempt to search for the phrase
tuples = self.search.find_longest(statement)
if tuples: return tuples
# check if it's just a variable
# if statement[1] in self.variables and not (statement[0]=='wff' and statement[1] not in self.wff_variables) and not (statement[0]=='set' and statement[1] not in self.set_variables):
# #yep. It's just a variable. Skip the type checking. Variables can only have one type ever, right?
# length = 1; # exclude the wff term
# string = statement[:2]
# tree = Tree(value='VAR'+statement[1])
# return string,tree
if statement[1] in self.variables:
for f in context.f.values():
if f.variable == statement[1] and f.vclass == statement[0]:
return statement[:2], f.tree
#found_valid_parsing = False
for prop_label in self.propositions:
prop = self.propositions[prop_label]
string, tree = self.proposition_describes_statement(prop,statement,context)
if string == False:
continue
#print 'found',string
#if statement[0]=='wff'
self.search.add(string,(string,tree)) # add to the search tree
return string, tree
#print 'could not find expression for ',statement
self.failed_parses.add((statement[0],len(statement)))
return False, False
# this is a brutally inefficient way to go about this
# when it completes it returns a parse tree for the statement and the length of the tree
def proposition_describes_statement(self,proposition,s,context):
prop_s = proposition.statement
# the types of all the free variables
variable_types = {hyp.variable:hyp.vclass for hyp in proposition.f.values()}
# string definitions of all the free variables
variable_definitions = {hyp.variable:None for hyp in proposition.f.values()}
#tree defintions of all the free variables
variable_trees = {hyp.variable:None for hyp in proposition.f.values()}
index_into_s = 0
index_into_prop_s=0
while index_into_prop_s < len(prop_s):
if index_into_s>=len(s):
#print 'ran out of s'
return False,False
prop_value = prop_s[index_into_prop_s]
if prop_value in variable_types:
# it's a variable
# check if already defined
if variable_definitions[prop_value]==None:
#then we need to figure out the parsing of the substatement
#print 'testing ',[variable_types[prop_value]]+s[index_into_s:],' because of ',proposition.label,prop_s
string, tree = self.parse_statement([variable_types[prop_value]]+s[index_into_s:],context)
if string == False:
return False,False
length = len(string)-1 # skip the wff/set/class bit
index_into_s+=length
index_into_prop_s+=1
variable_definitions[prop_value] = string[1:]
variable_trees[prop_value] = tree
continue
else:
#we've already seen this expression before
if is_initial_string(variable_definitions[prop_value],statement):
# Yes, yes, we get the point
index_into_s+=len(variable_definitions[prop_value])
index_into_prop_s+=1
continue
else:
return False,False # eh. Whatever.
else:
#it's not a variable
if prop_value == s[index_into_s]:
index_into_s+=1
index_into_prop_s+=1
continue
else:
#it's not a variable and it doesn't match
return False,False
#we have the entire parsing and it appears to work
leaves = [variable_trees[hyp.variable] for hyp in proposition.hyps if hyp.type == 'f']
tree = Tree(value=proposition.label,leaves = leaves)
#now construct the string.
out_string = s[:index_into_s]
return out_string, tree
def is_initial_string(initial_string,string):
#return (len(string)>=len(initial_string)) and string[:len(initial_string)]==initial_string
if len(initial_string)>len(string): return False
for i in range(len(initial_string)):
if initial_string[i]!=string[i]: return False
return True