In [1]:
import re

def tokenize_cobol_code(cobol_code):
    # Basic tokenizer using regex (simplified for demonstration)
    tokens = re.findall(r'\w+|\S', cobol_code)
    return tokens


In [2]:
class Node:
    def __init__(self, type_, value=None):
        self.type = type_
        self.value = value
        self.children = []

    def add_child(self, node):
        self.children.append(node)

    def __repr__(self):
        return f'{self.type}: {self.value} -> {self.children}'

def parse_tokens(tokens):
    root = Node("Program")
    current_node = root
    
    i = 0
    while i < len(tokens):
        token = tokens[i]
        if token.upper() == 'IDENTIFICATION':
            node = Node('IDENTIFICATION_DIVISION')
            current_node.add_child(node)
            current_node = node
        elif token.upper() == 'PROGRAM-ID.':
            program_id_node = Node('PROGRAM_ID', tokens[i+1])
            current_node.add_child(program_id_node)
            i += 1
        elif token.upper() == 'ENVIRONMENT':
            node = Node('ENVIRONMENT_DIVISION')
            current_node.add_child(node)
            current_node = node
        elif token.upper() == 'DATA':
            node = Node('DATA_DIVISION')
            current_node.add_child(node)
            current_node = node
        elif token.upper() == 'WORKING-STORAGE':
            node = Node('WORKING_STORAGE_SECTION')
            current_node.add_child(node)
            current_node = node
        elif token.upper() == 'PROCEDURE':
            node = Node('PROCEDURE_DIVISION')
            current_node.add_child(node)
            current_node = node
        elif token.upper() in ['DISPLAY', 'STOP']:
            node = Node('STATEMENT', token.upper())
            current_node.add_child(node)
        elif re.match(r"[\w'-]+", token):
            node = Node('LITERAL', token)
            current_node.add_child(node)
        i += 1

    return root


In [3]:
def display_syntax_tree(node, indent=0):
    print('  ' * indent + f'{node.type}: {node.value}')
    for child in node.children:
        display_syntax_tree(child, indent + 1)

# Example COBOL code
cobol_code = """
       IDENTIFICATION DIVISION.
       PROGRAM-ID. SAMPLE.
       ENVIRONMENT DIVISION.
       DATA DIVISION.
       WORKING-STORAGE SECTION.
       01  WS-NAME       PIC X(20).
       PROCEDURE DIVISION.
           DISPLAY 'HELLO, WORLD'.
           STOP RUN.
"""

# Tokenize and parse the COBOL code
tokens = tokenize_cobol_code(cobol_code)
syntax_tree = parse_tokens(tokens)

# Display the syntax tree
display_syntax_tree(syntax_tree)


Program: None
  IDENTIFICATION_DIVISION: None
    LITERAL: DIVISION
    LITERAL: PROGRAM
    LITERAL: -
    LITERAL: ID
    LITERAL: SAMPLE
    ENVIRONMENT_DIVISION: None
      LITERAL: DIVISION
      DATA_DIVISION: None
        LITERAL: DIVISION
        LITERAL: WORKING
        LITERAL: -
        LITERAL: STORAGE
        LITERAL: SECTION
        LITERAL: 01
        LITERAL: WS
        LITERAL: -
        LITERAL: NAME
        LITERAL: PIC
        LITERAL: X
        LITERAL: 20
        PROCEDURE_DIVISION: None
          LITERAL: DIVISION
          STATEMENT: DISPLAY
          LITERAL: '
          LITERAL: HELLO
          LITERAL: WORLD
          LITERAL: '
          STATEMENT: STOP
          LITERAL: RUN


In [None]:
class product_record:
    def __init__(self, ):
        pass

In [1]:
with open('product_record.py', 'w') as file:
    file.write(f'class product_record:\n')
    file.write(f'    def __init__(self, ):\n')
    file.write(f'        self.id=\n')