In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os
from glob import glob

# suppose that (TPTP)[http://tptp.org/] archive was unpacked to ~/Downloads
tptp_folder = os.path.join(os.environ["HOME"], "Downloads", "TPTP-v7.4.0")
# it's obligatory to set the maximal number of steps of saturation algorithm
# and the list of problems. Here are CNF problems from TPTP
problem_list = glob(os.path.join(tptp_folder, "Problems/*/*-*.p"))
env = gym.make(
    "gym_saturation:saturation-v0",
    step_limit=100,
    problem_list=problem_list
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(3))

3


In [3]:
# when resettig the environment one may pass an optional argument ---
# a name of an exact problem from TPTP
# (if omitted, a problem will be chosen at random)
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
state = env.reset()

In [4]:
# during the reset a problem is chosen at random
print(os.path.basename(env.problem))

GEO156-1.p


In [5]:
# state is a list of dictionaries, representing logical clauses
print(state[0]["literals"][0])

{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equidistant', 'arguments': [{'class': 'Variable', 'name': 'X0'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X0'}]}}


In [6]:
# possible actions are indices of not processed clauses from the state
print(env.action_space)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]


In [7]:
# only "ansi" and "human" rendering methods are implemented
# "human" returns TPTP representation
print(env.render())

cnf(reflexivity_for_equidistance, hypothesis, equidistant(X0,X1,X1,X0) ).
cnf(transitivity_for_equidistance, hypothesis, ~equidistant(X2,X3,X4,X5) |~equidistant(X2,X3,X6,X7) |equidistant(X4,X5,X6,X7) ).
cnf(identity_for_equidistance, hypothesis, ~equidistant(X8,X9,X10,X10) |=(X8,X9) ).
cnf(segment_construction1, hypothesis, between(X11,X12,extension(X11,X12,X13,X14)) ).
cnf(segment_construction2, hypothesis, equidistant(X15,extension(X16,X15,X17,X18),X17,X18) ).
cnf(outer_five_segment, hypothesis, ~equidistant(X19,X20,X21,X22) |~equidistant(X20,X23,X22,X24) |~equidistant(X19,X25,X21,X26) |~equidistant(X20,X25,X22,X26) |~between(X19,X20,X23) |~between(X21,X22,X24) |=(X19,X20) |equidistant(X23,X25,X24,X26) ).
cnf(identity_for_betweeness, hypothesis, ~between(X27,X28,X27) |=(X27,X28) ).
cnf(inner_pasch1, hypothesis, ~between(X29,X30,X31) |~between(X32,X33,X31) |between(X30,inner_pasch(X29,X30,X31,X33,X32),X32) ).
cnf(inner_pasch2, hypothesis, ~between(X34,X35,X36) |~between(X37,X38,X36) |

In [8]:
# "ansi" returns a JSON string representing the state
print(env.render("ansi")[:705])


[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equidistant', 'arguments': [{'class': 'Variable', 'name': 'X0'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X0'}]}}], 'label': 'reflexivity_for_equidistance', 'birth_step': 0, 'processed': False, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': True, 'atom': {'class': 'Predicate', 'name': 'equidistant', 'arguments': [{'class': 'Variable', 'name': 'X2'}, {'class': 'Variable', 'name': 'X3'}, {'class': 'Variable', 'name': 'X4'}, {'class': 'Variable', 'name': 'X5'


In [9]:
# the package includes a simple episode function
from gym_saturation.policy_testing import episode
# and an example policy, selecting the shortest clause
from gym_saturation.policy_testing import size_policy

env, episode_memory = episode(
    problem_filename=os.path.join(tptp_folder, "Problems", "SET", "SET001-1.p"),
    step_limit=15,
    policy=size_policy
)
# this policy manages to solve a simple problem from TPTP
print(episode_memory[-1].done, episode_memory[-1].reward)



True 1.0


In [10]:
# the proof here consists of only three steps
# but a naïve clause size policy finds it only after 15 steps
# a trained DL policy might have done better
print(len(episode_memory))

15


In [11]:
# one can now check the TSTP proof found
print(env.tstp_proof)

cnf(inferred_27, hypothesis, $false, inference(resolution, [], [prove_element_of_bb,inferred_23])).
cnf(inferred_23, hypothesis, member(element_of_b,bb) , inference(resolution, [], [inferred_0,inferred_4])).
cnf(inferred_0, hypothesis, subset(b,bb) , inference(resolution, [], [b_equals_bb,set_equal_sets_are_subsets1])).
cnf(inferred_4, hypothesis, ~subset(b,X2) |member(element_of_b,X2) , inference(resolution, [], [element_of_b,membership_in_subsets])).


In [12]:
# paramodulation test
import sys
if sys.version_info.major == 3 and sys.version_info.minor == 9:
    from importlib.resources import files
else:
    from importlib_resources import files
env, episode_memory = episode(
    problem_filename=files(
        "gym_saturation"
    ).joinpath("resources/TPTP-mock/Problems/TST/TST002-1.p"), 
    step_limit=25,
    policy=size_policy
)
# here the proof consists of four steps
# but a naïve policy needs 25
print(
    len(env.tstp_proof.split("\n")), 
    len(episode_memory)
)

4 25
