In [1]:
import json
from pprint import pprint
import sys
from pathlib import Path

sys.path.append(str(Path('..').resolve()))
import utils

with open("java_programs.dict", "r") as f:
    java_programs = json.load(f)

SIZES = {2,4,8,16,30}
PROGRAM_NAME = "MazeSolver"

In [2]:
pprint(java_programs[PROGRAM_NAME]["2"])

{'assertions': '(assert (and (not ( = cell_0_0 0)) (not ( = cell_0_0 1))))',
 'constants': '(declare-const cell_0_0 Int)',
 'response': '- Worst-case time complexity (for an N×N maze): O(N^2). Each '
             'cell is visited at most once due to the visited array, and '
             'exploring neighbors is constant per cell. Space complexity is '
             'O(N^2) for visited and worst-case recursion depth.\n'
             '\n'
             '- For input size n = 2 (a 2×2 maze), the SMT2 constraints '
             'reflecting the program are:\n'
             '  - Each cell is either 0 (free) or 1 (wall).\n'
             '  - Start and goal are free: cell_0_0 = 0 and cell_1_1 = 0.\n'
             '  - A path exists from start to goal (in 2×2 this requires at '
             'least one of the intermediate neighbors to be free): cell_0_1 = '
             '0 or cell_1_0 = 0.\n'
             '\n'
             'Answer:\n'
             '(set-logic QF_LIA)\n'
             '(declare-fun ce

In [3]:
utils.check_logical_equivalence_v2(
    original_assertions=java_programs[PROGRAM_NAME]["2"]["assertions"],
    generated_assertions=java_programs[PROGRAM_NAME]["2"]["response"].split("(declare-fun cell_1_1 () Int)\n")[1].split("(check-sat)\n")[0],
    original_constants=java_programs[PROGRAM_NAME]["2"]["constants"],
    generated_constants=java_programs[PROGRAM_NAME]["2"]["response"].split("Answer:\n")[1].split("(assert")[0]
)

{'result': False, 'reason': 'A does not imply B'}

In [4]:
pprint(java_programs[PROGRAM_NAME]["4"])

{'assertions': '(assert (and (not ( = cell_0_0 0)) (not ( = cell_0_0 1))))',
 'constants': '(declare-const cell_0_0 Int)',
 'response': 'Worst-case time complexity: O(N^2)\n'
             '- Reason: The DFS visits each of the N^2 cells at most once '
             '(visited prevents revisits). Each visit does O(1) work plus '
             'exploring up to 4 neighbors, so overall O(N^2).\n'
             '\n'
             'SMT-LIB constraints for n = 4 (4x4 grid), matching the program’s '
             'constraints: each cell is either 0 or 1; start (0,0) and goal '
             '(3,3) are 0.\n'
             '\n'
             'Answer:\n'
             '(set-logic QF_LIA)\n'
             '\n'
             '; Declare 4x4 grid cells as integers\n'
             '(declare-fun cell_0_0 () Int)\n'
             '(declare-fun cell_0_1 () Int)\n'
             '(declare-fun cell_0_2 () Int)\n'
             '(declare-fun cell_0_3 () Int)\n'
             '(declare-fun cell_1_0 () Int)\n'
             '(

In [5]:
utils.check_logical_equivalence_v2(
    original_assertions=java_programs[PROGRAM_NAME]["4"]["assertions"],
    generated_assertions=java_programs[PROGRAM_NAME]["4"]["response"].split("(declare-fun cell_3_3 () Int)\n")[1].split("(check-sat)\n")[0],
    original_constants=java_programs[PROGRAM_NAME]["4"]["constants"],
    generated_constants=java_programs[PROGRAM_NAME]["4"]["response"].split("Answer:\n")[1].split("(assert")[0]
)

{'result': False, 'reason': 'A does not imply B'}

In [6]:
pprint(java_programs[PROGRAM_NAME]["8"])

{'assertions': '(assert (and (not ( = cell_0_0 0)) (not ( = cell_0_0 1))))',
 'constants': '(declare-const cell_0_0 Int)',
 'response': 'Worst-case time complexity (for a concrete 2D grid of size n×n): '
             'O(n^2). The DFS with a visited set touches each cell at most '
             'once and considers a constant number of neighbors per cell.\n'
             '\n'
             'Space complexity: O(n^2) (visited array plus recursion stack).\n'
             '\n'
             'Answer:\n'
             '(set-logic QF_LIA)\n'
             '\n'
             '; Declarations for 8x8 grid cells\n'
             '(declare-fun cell_0_0 () Int)\n'
             '(declare-fun cell_0_1 () Int)\n'
             '(declare-fun cell_0_2 () Int)\n'
             '(declare-fun cell_0_3 () Int)\n'
             '(declare-fun cell_0_4 () Int)\n'
             '(declare-fun cell_0_5 () Int)\n'
             '(declare-fun cell_0_6 () Int)\n'
             '(declare-fun cell_0_7 () Int)\n'
             '\n'
  

In [7]:
utils.check_logical_equivalence_v2(
    original_assertions=java_programs[PROGRAM_NAME]["8"]["assertions"],
    generated_assertions=java_programs[PROGRAM_NAME]["8"]["response"].split("(declare-fun cell_7_7 () Int)\n")[1],
    original_constants=java_programs[PROGRAM_NAME]["8"]["constants"],
    generated_constants=java_programs[PROGRAM_NAME]["8"]["response"].split("Answer:\n")[1].split("(assert")[0]
)

{'result': False, 'reason': 'A does not imply B'}

In [8]:
pprint(java_programs[PROGRAM_NAME]["16"])

{'assertions': '(assert (and (not ( = cell_0_0 0)) (not ( = cell_0_0 1))))',
 'constants': '(declare-const cell_0_0 Int)',
 'response': 'Worst-case time complexity:\n'
             '- For an n×n maze, each cell is visited at most once due to the '
             'visited array, and each visit performs O(1) work (four recursive '
             'calls that immediately return on visited/bounds/wall). Thus the '
             'worst-case time complexity of solveMaze on a fixed input is '
             'O(n^2). The recursion stack/visited storage is also O(n^2).\n'
             '\n'
             'SMT2 constraint for n = 16 (cells constrained to be 0 or 1; '
             'start and goal forced to 0):\n'
             '\n'
             'Answer:\n'
             '(set-logic QF_LIA)\n'
             '\n'
             '; Declare 16x16 integer variables for the maze cells\n'
             '(declare-fun cell_0_0 () Int)\n'
             '(declare-fun cell_0_1 () Int)\n'
             '(declare-fun cell_0_2 (

In [9]:
utils.check_logical_equivalence_v2(
    original_assertions=java_programs[PROGRAM_NAME]["16"]["assertions"],
    generated_assertions=java_programs[PROGRAM_NAME]["16"]["response"].split("(declare-fun cell_15_15 () Int)\n")[1].split("(check-sat)\n")[0],
    original_constants=java_programs[PROGRAM_NAME]["16"]["constants"],
    generated_constants=java_programs[PROGRAM_NAME]["16"]["response"].split("Answer:\n")[1].split("(assert")[0]
)

{'result': False, 'reason': 'A does not imply B'}

In [10]:
pprint(java_programs[PROGRAM_NAME]["30"])

{'assertions': '(assert (and (not ( = cell_0_0 0)) (not ( = cell_0_0 1))))',
 'constants': '(declare-const cell_0_0 Int)',
 'response': 'Worst-case time complexity:\n'
             '- Treating N as variable (an N×N grid), the DFS with a visited '
             'matrix visits each cell at most once and explores at most 4 '
             'edges per cell. Time complexity: O(N^2). Space complexity: '
             'O(N^2) for visited plus recursion stack.\n'
             '- Note: Under symbolic execution, the search space over inputs '
             'is exponential (each of the N^2 cells is binary, with start/end '
             'fixed to 0), i.e., up to 2^(N^2−2) distinct input assignments.\n'
             '\n'
             'Answer:\n'
             '(set-logic QF_LIA)\n'
             '\n'
             '; Declarations for a 30x30 grid: cell_i_j are Int\n'
             '(declare-fun cell_0_0 () Int)\n'
             '(declare-fun cell_0_1 () Int)\n'
             '(declare-fun cell_0_2 () Int)\n'


In [11]:
utils.check_logical_equivalence_v2(
    original_assertions=java_programs[PROGRAM_NAME]["30"]["assertions"],
    generated_assertions=java_programs[PROGRAM_NAME]["30"]["response"].split("(declare-fun cell_29_29 () Int)\n")[1].split("(check-sat)\n")[0],
    original_constants=java_programs[PROGRAM_NAME]["30"]["constants"],
    generated_constants=java_programs[PROGRAM_NAME]["30"]["response"].split("Answer:\n")[1].split("(assert")[0]
)

{'result': False, 'reason': 'A does not imply B'}