In [1]:
# saturation prover environment can be created
# as any other OpenAI Gym environment
import gym
import os

# it's obligatory to set the maximal number of steps of saturation algorithm
# and the folder where (TPTP)[http://tptp.org/] archive was unpacked
tptp_folder = os.path.join(os.environ["HOME"], "Downloads/TPTP-v7.4.0/")
env = gym.make(
    "gym_saturation:saturation-v0",
    step_limit=20,
    tptp_folder=tptp_folder
)

In [2]:
# to make results reproducible, let's set the seed
print(env.seed(3))
# when resettig the environment one may pass an optional argument ---
# a name of an exact problem from TPTP
# (if omitted, a problem will be chosen at random)
# parsing is done with `lark` package which sometimes might be slow
# reimplementing parsing in a faster language than Python might help
state = env.reset()
# during the reset a problem is chosen at random
print(os.path.basename(env.problem))
# state is a list of dictionaries, representing logical clauses
print(state[0]["literals"][0])
# possible actions are indices of not processed clauses from the state
print(env.action_space)
# only "ansi" rendering method is implemented
# it returns a JSON string representing the state
print(env.render("ansi")[:100])

3


GEO156-1.p
{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicate', 'name': 'equidistant', 'arguments': [{'class': 'Variable', 'name': 'X0'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X1'}, {'class': 'Variable', 'name': 'X0'}]}}
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
[{'class': 'Clause', 'literals': [{'class': 'Literal', 'negated': False, 'atom': {'class': 'Predicat


In [3]:
# let's define policy which choses the smallest logical clause as
# "given clause" for saturation algorithm step
from operator import itemgetter
from typing import List

from gym_saturation.grammar import Clause
from gym_saturation.logic_ops.utils import clause_length
from gym_saturation.parsing.json_grammar import dict_to_clause


def policy(state: List[Clause]) -> int:
    """
    :param state: a list of clauses
    :returns: the index of the clause with minimal length
    """
    return min(
        [
            (i, clause_length(dict_to_clause(clause)))
            for i, clause in enumerate(state)
            if not clause["processed"]
        ],
        key=itemgetter(1),
    )[0]



In [4]:
# a typical episode with a constant policy
# here we fix the problem by passing an optional argument
state = env.reset(
    problem=os.path.join(tptp_folder, "Problems", "SET", "SET001-1.p")
)
done = False
total_reward = 0
step_count = 0
while not done:
    action = policy(state)
    state, reward, done, info = env.step(action)
    total_reward += reward
    step_count += 1
# the proof here consists of only three steps
# but a naïve clause size policy finds it only after 14 steps
# a trained DL policy might have done better
print(total_reward, step_count)

1.0 14
