In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os
from glob import glob

# suppose that (TPTP)[http://tptp.org/] archive was unpacked to ~/Downloads
tptp_folder = os.path.join(os.environ["HOME"], "Downloads", "TPTP-v7.5.0")
# it's obligatory to set the maximal number of steps of saturation algorithm
# and the list of problems. Here are CNF problems from TPTP
problem_list = glob(os.path.join(tptp_folder, "Problems/*/*-*.p"))
env = gym.make(
    "gym_saturation:saturation-v0",
    step_limit=100,
    problem_list=problem_list
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(3))

3


In [3]:
# when resettig the environment one may pass an optional argument ---
# a name of an exact problem from TPTP
# (if omitted, a problem will be chosen at random)
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
state = env.reset()

In [4]:
# during the reset a problem is chosen at random
print(os.path.basename(env.problem))

ANA037-2.p


In [5]:
# state is a list of dictionaries, representing logical clauses
print(state[0]["literals"][0])

{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'c_lessequals', 'arguments': [{'class': 'Function', 'name': 'c_0', 'arguments': []}, {'class': 'Function', 'name': 'v_f', 'arguments': [{'class': 'Function', 'name': 'v_xa', 'arguments': []}]}, {'class': 'Function', 'name': 't_b', 'arguments': []}]}}


In [6]:
# possible actions are indices of not processed clauses from the state
print(env.action_space)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [7]:
# only "ansi" and "human" rendering methods are implemented
# "human" returns TPTP representation
print(env.render())

cnf(cls_conjecture_2, hypothesis, c_lessequals(c_0,v_f(v_xa),t_b) ).
cnf(cls_conjecture_4, hypothesis, c_lessequals(c_HOL_Oabs(v_a(v_xa),t_b),c_times(v_c,v_f(v_xa),t_b),t_b) ).
cnf(cls_conjecture_8, hypothesis, ~c_lessequals(c_HOL_Oabs(v_a(v_xa),t_b),c_times(c_Orderings_Omax(v_c,v_ca,t_b),v_f(v_xa),t_b),t_b) ).
cnf(tfree_tcs, hypothesis, class_Ring__and__Field_Oordered__idom(t_b) ).
cnf(cls_Orderings_Ole__maxI1_0, hypothesis, ~class_Orderings_Olinorder(X0) |c_lessequals(X1,c_Orderings_Omax(X1,X2,X0),X0) ).
cnf(cls_Orderings_Oorder__class_Oorder__trans_0, hypothesis, ~class_Orderings_Oorder(X3) |~c_lessequals(X4,X5,X3) |~c_lessequals(X6,X4,X3) |c_lessequals(X6,X5,X3) ).
cnf(cls_Ring__and__Field_Opordered__semiring__class_Omult__right__mono_0, hypothesis, ~class_Ring__and__Field_Opordered__semiring(X7) |~c_lessequals(X8,X9,X7) |~c_lessequals(c_0,X10,X7) |c_lessequals(c_times(X8,X10,X7),c_times(X9,X10,X7),X7) ).
cnf(clsrel_Ring__and__Field_Oordered__idom_33, hypothesis, ~class_Ring__and__

In [10]:
# "ansi" returns a JSON string representing the state
print(env.render("ansi")[:1000])


[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equal_sets', 'arguments': [{'class': 'Function', 'name': 'b', 'arguments': []}, {'class': 'Function', 'name': 'bb', 'arguments': []}]}}], 'label': 'b_equals_bb', 'birth_step': 0, 'processed': True, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'member', 'arguments': [{'class': 'Function', 'name': 'element_of_b', 'arguments': []}, {'class': 'Function', 'name': 'b', 'arguments': []}]}}], 'label': 'element_of_b', 'birth_step': 0, 'processed': True, 'inference_parents': [], 'inference_rule': None}, {'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': True, 'atom': {'class': 'Predicate', 'name': 'member', 'arguments': [{'class': 'Function', 'name': 'element_of_b', 'arguments': []}, {'class': 'Function', 'name': 'bb', 'arguments': []}]}}], 'label': 'prove_

In [11]:
# the package includes a simple episode function
from gym_saturation.policy_testing import episode
# and an example policy, selecting the shortest clause
from gym_saturation.policy_testing import size_policy

env, episode_memory = episode(
    problem_filename=os.path.join(tptp_folder, "Problems", "SET", "SET001-1.p"),
    step_limit=15,
    policy=size_policy
)
# this policy manages to solve a simple problem from TPTP
print(episode_memory[-1].done, episode_memory[-1].reward)



True 1.0


In [12]:
# the proof here consists of only three steps
# but a naïve clause size policy finds it only after 15 steps
# a trained DL policy might have done better
print(len(episode_memory))

15


In [13]:
# one can now check the TSTP proof found
print(env.tstp_proof)

cnf(inferred_27, hypothesis, $false, inference(resolution, [], [prove_element_of_bb,inferred_23])).
cnf(inferred_23, hypothesis, member(element_of_b,bb) , inference(resolution, [], [inferred_0,inferred_4])).
cnf(inferred_0, hypothesis, subset(b,bb) , inference(resolution, [], [b_equals_bb,set_equal_sets_are_subsets1])).
cnf(inferred_4, hypothesis, ~subset(b,X2) |member(element_of_b,X2) , inference(resolution, [], [element_of_b,membership_in_subsets])).


In [14]:
# paramodulation test
import sys
if sys.version_info.major == 3 and sys.version_info.minor == 9:
    from importlib.resources import files
else:
    from importlib_resources import files
env, episode_memory = episode(
    problem_filename=files(
        "gym_saturation"
    ).joinpath("resources/TPTP-mock/Problems/TST/TST002-1.p"), 
    step_limit=25,
    policy=size_policy
)
# here the proof consists of four steps
# but a naïve policy needs 25
print(
    len(env.tstp_proof.split("\n")), 
    len(episode_memory)
)

4 25
