In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os
from glob import glob
import sys
if sys.version_info.major == 3 and sys.version_info.minor == 9:
    from importlib.resources import files
else:
    from importlib_resources import files

# this example uses package's resources
# one can indicate the path to unpacked (TPTP)[http://tptp.org/] instead
# it's obligatory to set the maximal number of steps of saturation algorithm
# and the list of problems. Here are CNF problems from TPTP
problem_list = glob(
    os.path.join(
        files(
            "gym_saturation"
        ).joinpath("resources/TPTP-mock"),
        "Problems/*/*-*.p"
    )
)
env = gym.make(
    "gym_saturation:saturation-v0",
    step_limit=100,
    problem_list=problem_list
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(3))

3


In [3]:
# when resettig the environment one may pass an optional argument ---
# a name of an exact problem from TPTP
# (if omitted, a problem will be chosen at random)
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
state = env.reset()

In [4]:
# during the reset a problem is chosen at random
print(os.path.basename(env.problem))

TST002-1.p


In [5]:
# state is a list of dictionaries, representing logical clauses
print(state[0]["literals"][0])

{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'q', 'arguments': [{'class': 'Function', 'name': 'a', 'arguments': []}]}}


In [6]:
# possible actions are indices of not processed clauses from the state
print(env.action_space)

[0, 1, 2, 3]


In [7]:
# only "ansi" and "human" rendering methods are implemented
# "human" returns TPTP representation
print(env.render())

cnf(a1, hypothesis, q(a) ).
cnf(a2, hypothesis, ~q(a) |=(f(X0),X0) ).
cnf(a3, hypothesis, p(X1) |p(f(a)) ).
cnf(c, hypothesis, ~p(X2) |~p(f(X2)) ).


In [8]:
# "ansi" returns a JSON string representing the state
print(env.render("ansi")[:1000])

[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'q', 'arguments': [{'class': 'Function', 'name': 'a', 'arguments': []}]}}], 'label': 'a1', 'birth_step': 0, 'processed': False, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': True, 'atom': {'class': 'Predicate', 'name': 'q', 'arguments': [{'class': 'Function', 'name': 'a', 'arguments': []}]}}, {'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': '=', 'arguments': [{'class': 'Function', 'name': 'f', 'arguments': [{'class': 'Variable', 'name': 'X0'}]}, {'class': 'Variable', 'name': 'X0'}]}}], 'label': 'a2', 'birth_step': 0, 'processed': False, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'p', 'arguments': [{'class': 'Variable', 'name': 'X1'}]}}, {'class': 'Literal', 'negat

In [9]:
# the package includes a simple episode function
from gym_saturation.policy_testing import episode
# and an example policy, selecting the shortest clause
from gym_saturation.policy_testing import size_policy

env, episode_memory = episode(
    problem_filename=files(
        "gym_saturation"
    ).joinpath("resources/TPTP-mock/Problems/SET/SET001-1.p"), 
    step_limit=15,
    policy=size_policy
)
# this policy manages to solve a simple problem from TPTP
print(episode_memory[-1].done, episode_memory[-1].reward)

True 1.0


In [10]:
# the proof here consists of only three steps
# but a naïve clause size policy finds it only after 15 steps
# a trained DL policy might have done better
print(len(episode_memory))

15


In [11]:
# one can now check the TSTP proof found
print(env.tstp_proof)

cnf(inferred_27, hypothesis, $false, inference(resolution, [], [prove_element_of_bb,inferred_23])).
cnf(inferred_23, hypothesis, member(element_of_b,bb) , inference(resolution, [], [inferred_0,inferred_4])).
cnf(inferred_0, hypothesis, subset(b,bb) , inference(resolution, [], [b_equals_bb,set_equal_sets_are_subsets1])).
cnf(inferred_4, hypothesis, ~subset(b,X2) |member(element_of_b,X2) , inference(resolution, [], [element_of_b,membership_in_subsets])).


In [12]:
# paramodulation test
env, episode_memory = episode(
    problem_filename=files(
        "gym_saturation"
    ).joinpath("resources/TPTP-mock/Problems/TST/TST002-1.p"), 
    step_limit=25,
    policy=size_policy
)
# here the proof consists of four steps
# but a naïve policy needs 25
print(
    len(env.tstp_proof.split("\n")), 
    len(episode_memory)
)

4 25
