In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os
from glob import glob
import sys
# this example uses package's resources
# one can indicate the path to unpacked (TPTP)[http://tptp.org/] instead
# it's obligatory to set the list of problems
# Here are CNF problems from TPTP
if sys.version_info.major == 3 and sys.version_info.minor >= 9:
    from importlib.resources import files
else:
    from importlib_resources import files

problem_list = sorted(glob(
    os.path.join(
        files(
            "gym_saturation"
        ).joinpath(os.path.join(
            "resources", "TPTP-mock"
        ),
        os.path.join("Problems", "*", "*-*.p")
    )
)))
env = gym.make(
    "gym_saturation:saturation-v0",
    problem_list=problem_list
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(2))

2


In [3]:
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
obs = env.reset()

In [4]:
# during the reset a problem is chosen at random
print(env.render())

cnf(b_equals_bb, hypothesis, equal_sets(b, bb)).
cnf(element_of_b, hypothesis, member(element_of_b, b)).
cnf(prove_element_of_bb, negated_conjecture, ~member(element_of_b, bb)).
cnf(membership_in_subsets, axiom, ~member(X0, X1) | ~subset(X1, X2) | member(X0, X2)).
cnf(subsets_axiom1, axiom, subset(X3, X4) | member(member_of_1_not_of_2(X3,X4), X3)).
cnf(subsets_axiom2, axiom, ~member(member_of_1_not_of_2(X5,X6), X6) | subset(X5, X6)).
cnf(set_equal_sets_are_subsets1, axiom, ~equal_sets(X7, X8) | subset(X7, X8)).
cnf(set_equal_sets_are_subsets2, axiom, ~equal_sets(X9, X10) | subset(X10, X9)).
cnf(subsets_are_set_equal_sets, axiom, ~subset(X11, X12) | ~subset(X12, X11) | equal_sets(X12, X11)).


In [5]:
# observation is a dict with two keys: "action_mask" and "real_obs"
# "action_mask" is 1 for actions an agent can choose and 0 for the rest
# "real_obs" is a list of dictionaries, representing logical clauses
print(obs.keys())
print(obs["action_mask"][:20])
print(obs["real_obs"][0]["literals"][0])

dict_keys(['real_obs', 'action_mask'])
[1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equal_sets', 'arguments': [{'class': 'Function', 'name': 'b', 'arguments': []}, {'class': 'Function', 'name': 'bb', 'arguments': []}]}}


In [6]:
 # possible actions are indices of not processed clauses from the state
# formally, action space is a large enough discrete
print(env.action_space)
# but the number of ones in the mask
print(obs["action_mask"].sum())
# equals to number of not processed clauses
print(sum(
    [1.0 for clause in obs["real_obs"] if not clause["processed"]]
))

Discrete(100000)
9.0
9.0


In [7]:
# only "ansi" and "human" rendering methods are implemented
# "human" (default) returns TPTP representation
# "ansi" returns a JSON string representing the state
print(env.render("ansi")[:1000])

[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equal_sets', 'arguments': [{'class': 'Function', 'name': 'b', 'arguments': []}, {'class': 'Function', 'name': 'bb', 'arguments': []}]}}], 'label': 'b_equals_bb', 'birth_step': 0, 'processed': False, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'member', 'arguments': [{'class': 'Function', 'name': 'element_of_b', 'arguments': []}, {'class': 'Function', 'name': 'b', 'arguments': []}]}}], 'label': 'element_of_b', 'birth_step': 0, 'processed': False, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': True, 'atom': {'class': 'Predicate', 'name': 'member', 'arguments': [{'class': 'Function', 'name': 'element_of_b', 'arguments': []}, {'class': 'Function', 'name': 'bb', 'arguments': []}]}}], 'label': 'prov

In [8]:
# the package includes a simple episode function
from gym_saturation.agent_testing import episode
# and an agent selecting the shortest clause five times
# and then once --- the oldest clause
from gym_saturation.agent_testing import SizeAgeAgent

env.unwrapped.problem_list = [
    files("gym_saturation")
    .joinpath(
        os.path.join("resources", "TPTP-mock", "Problems", "SET", "SET001-1.p")
    )
]
last_transition = episode(env, SizeAgeAgent(5, 1))
# this agent manages to solve a simple problem from TPTP
print(last_transition.done, last_transition.reward)

True 1.0


In [9]:
# the proof here consists of only three steps
# but this agent finds it only after 11 steps
# a trained DL agent might have done better
print(env._elapsed_steps)

11


In [10]:
# one can now check the TSTP proof found
print(env.tstp_proof)

cnf(xcf54a780_8507_11ec_8f1c_5d6e15ba2bdd, lemma, subset(b, bb), inference(resolution, [], [b_equals_bb, set_equal_sets_are_subsets1])).
cnf(xcf55e40f_8507_11ec_adc5_5d6e15ba2bdd, lemma, ~subset(b, X2) | member(element_of_b, X2), inference(resolution, [], [element_of_b, membership_in_subsets])).
cnf(xcf585131_8507_11ec_82ed_5d6e15ba2bdd, lemma, member(element_of_b, bb), inference(resolution, [], [xcf54a780_8507_11ec_8f1c_5d6e15ba2bdd, xcf55e40f_8507_11ec_adc5_5d6e15ba2bdd])).
cnf(xcf5908ea_8507_11ec_bd60_5d6e15ba2bdd, lemma, $false, inference(resolution, [], [prove_element_of_bb, xcf585131_8507_11ec_82ed_5d6e15ba2bdd])).


In [11]:
# paramodulation test
env.unwrapped.problem_list = [
    files("gym_saturation")
    .joinpath(
        os.path.join("resources", "TPTP-mock", "Problems", "TST", "TST002-1.p")
    )
]
env.reset()
print(env.render())
episode(env, SizeAgeAgent(5, 1))
# here the proof consists of five steps
# but the agent needs ten
print(env.tstp_proof)
print(
    len(env.tstp_proof.split("\n")), 
    env._elapsed_steps
)

cnf(a1, hypothesis, q(a)).
cnf(a2, hypothesis, ~q(a) | f(X0) = X0).
cnf(a3, hypothesis, p(X1) | p(f(a))).
cnf(c, negated_conjecture, ~p(X2) | ~p(f(X2))).
cnf(xd06cabea_8507_11ec_8d98_5d6e15ba2bdd, lemma, p(f(a)), inference(factoring, [], [a3])).
cnf(xd06dd384_8507_11ec_a5d8_5d6e15ba2bdd, lemma, f(X0) = X0, inference(resolution, [], [a1, a2])).
cnf(xd06f0879_8507_11ec_bd6b_5d6e15ba2bdd, lemma, p(a), inference(paramodulation, [], [xd06cabea_8507_11ec_8d98_5d6e15ba2bdd, xd06dd384_8507_11ec_a5d8_5d6e15ba2bdd])).
cnf(xd06fd9c7_8507_11ec_bba6_5d6e15ba2bdd, lemma, ~p(a), inference(resolution, [], [xd06cabea_8507_11ec_8d98_5d6e15ba2bdd, c])).
cnf(xd07282da_8507_11ec_aeaf_5d6e15ba2bdd, lemma, $false, inference(resolution, [], [xd06f0879_8507_11ec_bd6b_5d6e15ba2bdd, xd06fd9c7_8507_11ec_bba6_5d6e15ba2bdd])).
5 10
