Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task6 (probably not) #5

Merged
merged 11 commits into from
May 11, 2024
12 changes: 7 additions & 5 deletions project/task2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from pyformlang.finite_automaton import DeterministicFiniteAutomaton
from pyformlang.finite_automaton import NondeterministicFiniteAutomaton
from pyformlang.finite_automaton import NondeterministicFiniteAutomaton
from pyformlang.finite_automaton import State


def regex_to_dfa(regex: str) -> DeterministicFiniteAutomaton:
Expand All @@ -15,20 +17,20 @@ def graph_to_nfa(
) -> NondeterministicFiniteAutomaton:
nfa = NondeterministicFiniteAutomaton()

if len(start_states) == 0:
for node in graph.nodes():
if start_states is None or len(start_states) == 0:
for node in graph.nodes:
nfa.add_start_state(node)
for node in start_states:
nfa.add_start_state(node)

if len(final_states) == 0:
for node in graph.nodes():
if final_states is None or len(final_states) == 0:
for node in graph.nodes:
nfa.add_final_state(node)
for node in final_states:
nfa.add_final_state(node)

for u, v, label in graph.edges(data="label"):
nfa.add_transition(u, label, v)
nfa.add_transition(State(u), label, State(v))

return nfa
pass
61 changes: 38 additions & 23 deletions project/task3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
self.final_states = aut.final_states
states = aut.to_dict()
len_states = len(aut.states)
self.state_to_idx = {v: i for i, v in enumerate(aut.states)}
self.state_mat_mapping = {v: i for i, v in enumerate(aut.states)}
self.matrix = dict()

for label in aut.symbols:
Expand All @@ -38,48 +38,52 @@ def __init__(
if label in edges:
for v in as_set(edges[label]):
self.matrix[label][
self.state_to_idx[u], self.state_to_idx[v]
self.state_mat_mapping[u], self.state_mat_mapping[v]
] = True

def accepts(self, word: Iterable[Symbol]) -> bool:
return self.to_automaton().accepts("".join(word))
pass

def is_empty(self) -> bool:
return len(self.matrix) == 0 or len(list(self.matrix.values())[0]) == 0
return len(self.matrix.values()) == 0

def size(self) -> int:
return len(self.state_to_idx)
return len(self.state_mat_mapping)

def transitive_closure(self):
if len(self.matrix.values()) == 0:
def transitive_closure(self) -> sparse.dok_matrix:
if self.is_empty():
return sparse.dok_matrix((0, 0), dtype=bool)
adj = sum(self.matrix.values())

adj = sum(self.matrix.values()) + sparse.eye(
self.size(), self.size(), dtype=bool
)

for _ in range(adj.shape[0]):
adj += adj @ adj

return adj

def to_automaton(self) -> NondeterministicFiniteAutomaton:
ans = NondeterministicFiniteAutomaton()

idx_to_state = {v: k for k, v in self.state_to_idx.items()}
idx_to_state = {v: k for k, v in self.state_mat_mapping.items()}

for label in self.matrix.keys():
matrix_size = self.matrix[label].shape[0]
for x in range(matrix_size):
for y in range(matrix_size):
if self.matrix[label][x, y]:
ans.add_transition(
self.state_to_idx[State(x)],
self.state_mat_mapping[State(x)],
label,
self.state_to_idx[State(y)],
self.state_mat_mapping[State(y)],
)

for s in self.start_states:
ans.add_start_state(self.state_to_idx[State(s)])
ans.add_start_state(self.state_mat_mapping[State(s)])

for s in self.final_states:
ans.add_final_state(self.state_to_idx[State(s)])
ans.add_final_state(self.state_mat_mapping[State(s)])

return ans

Expand All @@ -88,8 +92,13 @@ def intersect_automata(
automaton1: FiniteAutomaton, automaton2: FiniteAutomaton
) -> FiniteAutomaton:
a = deepcopy(automaton1)
num_states = len(automaton2.state_to_idx)
symbols = set(automaton1.matrix.keys()).intersection(automaton2.matrix.keys())
num_states = len(automaton2.state_mat_mapping)
symbols = automaton1.matrix.keys() & automaton2.matrix.keys()
# symbols = None
# if take_from_mapping:
# symbols = automaton1.state_mat_mapping.keys() & automaton2.state_mat_mapping.keys()
# else:
# symbols = automaton1.matrix.keys() & automaton2.matrix.keys()
matrices = {
label: sparse.kron(automaton1.matrix[label], automaton2.matrix[label], "csr")
for label in symbols
Expand All @@ -98,10 +107,10 @@ def intersect_automata(
final = set()
mapping = dict()

for u, i in automaton1.state_to_idx.items():
for v, j in automaton2.state_to_idx.items():
for u, i in automaton1.state_mat_mapping.items():
for v, j in automaton2.state_mat_mapping.items():

k = len(automaton2.state_to_idx) * i + j
k = len(automaton2.state_mat_mapping) * i + j
mapping[State(k)] = k

assert isinstance(u, State)
Expand All @@ -112,7 +121,7 @@ def intersect_automata(
final.add(State(k))

a.matrix = matrices
a.state_to_idx = mapping
a.state_mat_mapping = mapping
a.start_states = start
a.final_states = final
return a
Expand All @@ -127,11 +136,17 @@ def paths_ends(
closure = intersection.transitive_closure()

size = query.size()
result = []
result = set()
for u, v in zip(*closure.nonzero()):
if u in intersection.start_states and v in intersection.final_states:
result.append(
(aut_graph.state_to_idx[u // size], aut_graph.state_to_idx[v // size])
result.add(
(
aut_graph.state_mat_mapping[u // size],
aut_graph.state_mat_mapping[v // size],
)
)

return result
if len(query.start_states & query.final_states) > 0:
result |= {(i, i) for i in start_nodes & final_nodes}

return list(result)
19 changes: 15 additions & 4 deletions project/task4.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def reachability_with_constraints(
n = fa.size()

constr_start_inds = [
constraints_fa.state_to_idx[State(i)] for i in constraints_fa.start_states
constraints_fa.state_mat_mapping[State(i)] for i in constraints_fa.start_states
]

symbols = fa.matrix.keys() & constraints_fa.matrix.keys()
Expand All @@ -33,24 +33,35 @@ def reachability_with_constraints(
for label in symbols
}

for v in [fa.state_to_idx[State(k)] for k in fa.start_states]:
for v in [fa.state_mat_mapping[State(k)] for k in fa.start_states]:
front = sparse.dok_matrix((m, m + n), dtype=bool)
for i in constr_start_inds:
front[i, i] = True
for i in range(m):
front[i, v + m] = True

for i in [
constraints_fa.state_mat_mapping[State(k)]
for k in (constraints_fa.final_states & constraints_fa.start_states)
]:
for j in [
fa.state_mat_mapping[State(k)]
for k in (fa.final_states & fa.start_states)
]:
if front[i, i] and front[i, j + m]:
result[v].add(j)

for _ in range(m * n):
new_front = sparse.dok_matrix((m, m + n), dtype=bool)
for sym in symbols:
new_front += fixed_matrix(front @ transitions[sym])
front = new_front

for i in [
constraints_fa.state_to_idx[State(k)]
constraints_fa.state_mat_mapping[State(k)]
for k in constraints_fa.final_states
]:
for j in [fa.state_to_idx[State(k)] for k in fa.final_states]:
for j in [fa.state_mat_mapping[State(k)] for k in fa.final_states]:
if front[i, j + m]:
result[v].add(j)
return result
89 changes: 89 additions & 0 deletions project/task6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from pyformlang.cfg import CFG, Variable, Terminal, Epsilon
import pyformlang
import networkx as nx
from typing import Tuple
from copy import deepcopy


def cfg_to_weak_normal_form(cfg: pyformlang.cfg.CFG) -> pyformlang.cfg.CFG:
gramm1 = cfg.eliminate_unit_productions().remove_useless_symbols()
long_rules = gramm1._get_productions_with_only_single_terminals()
new_rules = set(gramm1._decompose_productions(long_rules))
return CFG(start_symbol=Variable("S"), productions=new_rules)


def gramm_from_file(filepath: str) -> CFG:
with open(filepath) as f:
return CFG.from_text("".join(l for l in f))


def cfpq_with_hellings(
cfg: pyformlang.cfg.CFG,
graph: nx.DiGraph,
start_nodes: set[int] = None,
final_nodes: set[int] = None,
) -> set[Tuple[int, int]]:

if start_nodes is None:
start_nodes = graph.nodes
if final_nodes is None:
final_nodes = graph.nodes

gramm = cfg_to_weak_normal_form(cfg)
p1 = {}
p2 = set()
p3 = {}

for p in gramm.productions:
p_len = len(p.body)
if p_len == 1 and isinstance(p.body[0], Terminal):
p1.setdefault(p.head, set()).add(p.body[0])
elif p_len == 0 or p_len == 1 and isinstance(p.body[0], Epsilon):
p2.add(p.head)
elif p_len == 2:
p3.setdefault(p.head, set()).add((p.body[0], p.body[1]))
# петли
result = {(n, v, v) for n in p2 for v in graph.nodes}
# переходы по терминалам
increment = {
(n, v, u)
for (v, u, tag) in graph.edges.data("label")
for n in p1
if Terminal(tag) in p1[n]
}
# i = 0
# for v, u, tag in graph.edges.data("label"):
# for n in p1:
# if tag in p1[n]:
# increment[i] = (n, v, u)
# i += 1
result |= increment

queue_to_process = deepcopy(result)

while len(queue_to_process) > 0:
n_i, vi, ui = queue_to_process.pop()

step_increment = set()

for n_j, vj, uj in result:
if vi == uj:
for n_k in p3:
if (n_j, n_i) in p3[n_k] and (n_k, vj, ui) not in result:
queue_to_process.add((n_k, vj, ui))
step_increment.add((n_k, vj, ui))

for n_j, vj, uj in result:
if ui == vj:
for n_k in p3:
if (n_i, n_j) in p3[n_k] and (n_k, vi, uj) not in result:
queue_to_process.add((n_k, vi, uj))
step_increment.add((n_k, vi, uj))

result |= step_increment

return {
(v, u)
for (n_i, v, u) in result
if v in start_nodes and u in final_nodes and Variable(n_i) == cfg.start_symbol
}
54 changes: 54 additions & 0 deletions tests/test_task6_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import project.task4 as task4
import project.task2 as task2
import project.task3 as task3
import project.task6 as task6
from pyformlang import cfg
import networkx as nx
from pyformlang.regular_expression import Regex


def test_cfpq():
gramm = cfg.CFG.from_text("S -> $ | a S")
regex_str = "a b c*"
a = Regex("a b c*")
# b = Regex("(abc)*")
# assert Regex("abc*") == Regex("(abc)*")
gr = nx.MultiDiGraph()
gr.add_edges_from(
[
(0, 1, "l"),
(1, 2, "l"),
(2, 0, "b"),
(2, 1, "b"),
(2, 5, "l"),
(2, 8, "l"),
(3, 1, "l"),
(3, 0, "e"),
(4, 2, "e"),
(6, 0, "b"),
(7, 5, "b"),
]
)
# gr.add_edge(0, 1, label='a')
# gr.add_edge(1, 2, label='b')
# gr.add_edge(2, 0, label='a')
# start_nodes = {0, 1, 2}
# final_nodes = {0, 1}
start_nodes = {0, 2, 6}
final_nodes = {1, 2, 4}

print()

print(gramm.to_text())
print(task6.cfg_to_weak_normal_form(gramm).to_text())
print()
print(
task4.reachability_with_constraints(
task3.FiniteAutomaton(task2.graph_to_nfa(gr, start_nodes, final_nodes)),
task3.FiniteAutomaton(task2.regex_to_dfa(regex_str)),
)
)
print(task6.cfpq_with_hellings(gramm, gr, start_nodes, final_nodes))
print(task3.paths_ends(gr, start_nodes, final_nodes, regex_str))

assert True
Loading