In [1]:
import json

In [51]:
# read json
with open('expanded_incremental_arc_dataset.json') as f:
    data = json.load(f)

In [20]:
example = data['train'][0]

In [52]:
def extract_programs(data):
    """Extract all programs from the data structure"""
    programs = []
    
    # Parse train example programs
    if 'subroutines' in data:
        for subroutine_key, subroutine in data['subroutines'].items():
            program = subroutine.get('program')
            if program:
                programs.append(f"{subroutine_key}: {program}")
    
    # Parse test example programs
    if 'test_subroutines' in data:
        for subroutine_key, subroutine in data['test_subroutines'].items():
            program = subroutine.get('program')
            if program:
                programs.append(f"{subroutine_key} (test): {program}")
    
    return programs

In [53]:
result = []
for data in data['train']:
    result.extend(extract_programs(data))

In [54]:
result

['subroutine_0: replace(I, FIVE, ZERO)',
 'subroutine_1: downscale(I, THREE)',
 'subroutine_0 (test): replace(I, FIVE, ZERO)',
 'subroutine_1 (test): downscale(I, THREE)',
 'subroutine_0: bottomhalf(I)',
 'subroutine_1: vconcat(hmirror(I), I)',
 'subroutine_0 (test): bottomhalf(I)',
 'subroutine_1 (test): vconcat(hmirror(I), I)',
 'subroutine_0: bottomhalf(I)',
 'subroutine_1: vconcat(hmirror(I), I)',
 'subroutine_0 (test): bottomhalf(I)',
 'subroutine_1 (test): vconcat(hmirror(I), I)',
 'subroutine_0: subgrid(first(objects(I, T, T, T)), I)',
 'subroutine_1: hconcat(I, I)',
 'subroutine_0 (test): subgrid(first(objects(I, T, T, T)), I)',
 'subroutine_1 (test): hconcat(I, I)',
 'subroutine_0: subgrid(first(objects(I, T, T, T)), I)',
 'subroutine_1: hconcat(I, I)',
 'subroutine_0 (test): subgrid(first(objects(I, T, T, T)), I)',
 'subroutine_1 (test): hconcat(I, I)',
 'subroutine_0: hconcat(I, vmirror(I))',
 'subroutine_1: vconcat(I, hmirror(I))',
 'subroutine_0 (test): hconcat(I, vmirror(

In [115]:
import json

class ProgramSequenceAnalyzer:
    def __init__(self, descriptions='operation_descriptions.json'):
        with open(descriptions, 'r') as f:
            self.descriptions = json.load(f)
        self.var_references = {}  # Track where variables are first defined

    def generate_instructions(self, solver_code):
        var_descriptions = {}
        instructions = []
        step_counter = 1

        for line in solver_code:
            if '=' not in line:
                continue

            var, expr = line.strip().split('=', 1)
            var = var.strip()
            expr = expr.strip()

            func_name, args = self._parse_expression(expr)
            func_desc = self.descriptions.get(func_name, f'Perform {func_name} operation.')
            args_list = self._parse_arguments(args)

            # Create concise argument references
            arg_descs = []
            for arg in args_list:
                if arg in var_descriptions:
                    # Reference the step where this variable was defined
                    ref_step = self.var_references[arg]
                    arg_desc = f"the result from step {ref_step}"
                else:
                    arg_desc = arg
                arg_descs.append(arg_desc)

            # Build the instruction using concise references
            instruction = self._format_instruction(func_desc, arg_descs)
            
            # Store the step number where this variable is defined
            self.var_references[var] = step_counter
            var_descriptions[var] = instruction
            
            instructions.append(instruction)
            step_counter += 1

        # Format the final paragraph with proper transitions
        return self._format_paragraph(instructions)

    def _format_instruction(self, func_desc, arg_descs):
        """Format the instruction with natural language transitions."""
        # Replace placeholder arguments with actual descriptions
        for i, arg_desc in enumerate(arg_descs, 1):
            placeholder = f"{{arg{i}}}"
            func_desc = func_desc.replace(placeholder, arg_desc)
        return func_desc

    def _format_paragraph(self, instructions):
        """Format instructions into a coherent paragraph with transitions."""
        transitions = ["Step 1", "Step 2", "Step 3", "Step 4", "Step 5", "Step 6", "Step 7", "Step 8", "Step 9", "Step 10"]
        formatted_steps = []
        
        for i, instruction in enumerate(instructions):
            if i < len(transitions):
                transition = transitions[i]
            else:
                transition = "Then"
            formatted_steps.append(f"{transition}, {instruction.lower()}")
        
        return " ".join(formatted_steps) + "."

    def _parse_expression(self, expr):
        func_end = expr.find('(')
        func_name = expr[:func_end]
        args = expr[func_end + 1:-1]
        return func_name.strip(), args.strip()

    def _parse_arguments(self, args):
        args_list = []
        current_arg = ''
        paren_count = 0
        
        for char in args:
            if char == ',' and paren_count == 0:
                args_list.append(current_arg.strip())
                current_arg = ''
            else:
                current_arg += char
                if char == '(':
                    paren_count += 1
                elif char == ')':
                    paren_count -= 1
                    
        if current_arg:
            args_list.append(current_arg.strip())
        return args_list

In [116]:
solver_code = [
    'x1 = objects(I, T, F, T)',
    'x2 = fork(multiply, height, width)',
    'x3 = argmax(x1, x2)',
    'x4 = color(x3)',
    'O = canvas(x4, TWO_BY_TWO)'
]

analyzer = ProgramSequenceAnalyzer()
instructions = analyzer.generate_instructions(solver_code)
instructions

'Step 1, find all the objects in i based on the settings: univalued=t (objects of one color), diagonal connectivity=f, and excluding background=t. it groups cells into objects accordingly. Step 2, create a new function that takes an input, applies height and width to it separately, and then applies multiply to both results. it combines the outcomes of two functions using another function. Step 3, find the element in the result from step 1 that makes the result from step 2 give the largest result. it returns that element from the result from step 1. for example, if the result from step 2 is "length", and the result from step 1 is ["apple", "banana"], it returns "banana" because it is longer. Step 4, get the color of the result from step 3. if the result from step 3 has only one color, it returns that color. Step 5, create a new grid of size two_by_two, filled entirely with the result from step 4. it’s like making a blank drawing area..'