In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os
from glob import glob

# suppose that (TPTP)[http://tptp.org/] archive was unpacked to ~/Downloads
tptp_folder = os.path.join(os.environ["HOME"], "Downloads", "TPTP-v7.4.0")
# it's obligatory to set the maximal number of steps of saturation algorithm
# and the list of problems. Here are CNF problems from TPTP
problem_list = glob(os.path.join(tptp_folder, "Problems/*/*-*.p"))
env = gym.make(
    "gym_saturation:saturation-v0",
    step_limit=100,
    problem_list=problem_list
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(3))
# when resettig the environment one may pass an optional argument ---
# a name of an exact problem from TPTP
# (if omitted, a problem will be chosen at random)
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
state = env.reset()
# during the reset a problem is chosen at random
print(os.path.basename(env.problem))
# state is a list of dictionaries, representing logical clauses
print(state[0]["literals"][0])
# possible actions are indices of not processed clauses from the state
print(env.action_space)
# only "ansi" rendering method is implemented
# it returns a JSON string representing the state
print(env.render("ansi")[:100])

3


GEO156-1.p
{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equidistant', 'arguments': [{'class': 'Variable', 'name': 'X0'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X0'}]}}
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicat


In [3]:
# let's define policy which choses the smallest logical clause as
# "given clause" for saturation algorithm step
from operator import itemgetter
from typing import List

from gym_saturation.grammar import Clause
from gym_saturation.logic_ops.utils import clause_length
from gym_saturation.parsing.json_grammar import dict_to_clause


def policy(state: List[Clause]) -> int:
    """
    :param state: a list of clauses
    :returns: the index of the clause with minimal length
    """
    return min(
        [
            (i, clause_length(dict_to_clause(clause)))
            for i, clause in enumerate(state)
            if not clause["processed"]
        ],
        key=itemgetter(1),
    )[0]



In [4]:
# a typical episode with a constant policy
# here we fix the problem by passing an optional argument
state = env.reset(
    problem=os.path.join(tptp_folder, "Problems", "SET", "SET001-1.p")
)
def episode():
    global state
    done = False
    total_reward = 0
    step_count = 0
    while not done:
        action = policy(state)
        state, reward, done, info = env.step(action)
        total_reward += reward
        step_count += 1
    return total_reward, step_count

# the proof here consists of only three steps
# but a naïve clause size policy finds it only after 14 steps
# a trained DL policy might have done better
print(episode())

(1.0, 15)


In [5]:
# one can now check the TSTP proof found
print(env.tstp_proof)

cnf(inferred_27, hypothesis, $false, inference(resolution, [], [prove_element_of_bb,inferred_23])).
cnf(inferred_23, hypothesis, member(element_of_b,bb) , inference(resolution, [], [inferred_0,inferred_4])).
cnf(inferred_0, hypothesis, subset(b,bb) , inference(resolution, [], [b_equals_bb,set_equal_sets_are_subsets1])).
cnf(inferred_4, hypothesis, ~subset(b,X2) |member(element_of_b,X2) , inference(resolution, [], [element_of_b,membership_in_subsets])).


In [6]:
# paramodulation test
import sys
if sys.version_info.major == 3 and sys.version_info.minor == 9:
    from importlib.resources import files
else:
    from importlib_resources import files
state = env.reset(
    problem=files("gym_saturation")
        .joinpath("resources/TPTP-mock/Problems/TST/TST002-1.p")
)
# here the proof consists of four steps
# but a naïve policy needs 25
print(episode())

(1.0, 25)
