In [1]:
import json
import nltk
import re

# Read the program from the file
with open("read.ipynb") as f:
    notebook = json.load(f)

# Regular expressions for pattern matching
RE_Identifiers = r"[a-zA-Z_][a-zA-Z0-9_]*"
RE_Operators = r"[-+=*/()]"
RE_Special_Characters = r"[;]"

# List to store the output
output = []

# Process code cells
for cell in notebook["cells"]:
    if cell["cell_type"] == "code":
        source_code = "\n".join(cell["source"])
        tokens = nltk.wordpunct_tokenize(source_code)
        
        # Process each token
        for token in tokens:
            if re.match(RE_Identifiers, token):
                output.append("Identifier: " + token)
            elif re.match(RE_Operators, token):
                if token == "+":
                    output.append("Operator: +")
                elif token == "-":
                    output.append("Operator: -")
                elif token == "=":
                    output.append("Operator: =")
                elif token == "(":
                    output.append("Left Parenthesis: (")
                elif token == ")":
                    output.append("Right Parenthesis: )")
            elif re.match(RE_Special_Characters, token):               
                output.append("Special Character: " + token)
                

# Print the output
for item in output:
    print(item)

Left Parenthesis: (
Identifier: a
Operator: +
Identifier: b
Right Parenthesis: )
Operator: =
Identifier: c
Special Character: ;


In [None]:
import json
import re

class Token:
    def __init__(self, type, value):
        self.type = type
        self.value = value

class TokenType:
    LEFT_PAREN = 'LEFT_PAREN'
    IDENTIFIER = 'IDENTIFIER'
    PLUS = 'PLUS'
    RIGHT_PAREN = 'RIGHT_PAREN'
    EQUALS = 'EQUALS'
    SEMICOLON = 'SEMICOLON'

def tokenize(code):
    tokens = []

    patterns = [
        (r'\(', TokenType.LEFT_PAREN),
        (r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', TokenType.IDENTIFIER),
        (r'\+', TokenType.PLUS),
        (r'\)', TokenType.RIGHT_PAREN),
        (r'=', TokenType.EQUALS),
        (r';', TokenType.SEMICOLON),
    ]

    # Tokenize using regular expressions
    while code:
        match = None
        for pattern, token_type in patterns:
            regex = re.compile(pattern)
            match = regex.match(code)
            if match:
                value = match.group(0)
                tokens.append(Token(token_type, value))
                code = code[len(value):].strip()
                break

        if not match:
            print("Syntax error: Invalid token", code)
            return

    return tokens

def parse(tokens):
    pos = 0

    def consume(token_type):
        nonlocal pos
        try:
            token = tokens[pos]
            if token.type == token_type:
                pos += 1
            else:
                print("Syntax error: Unexpected token", token.value)
                return False
        except IndexError:
            if token_type == TokenType.SEMICOLON:
                print("Syntax error: Missing semicolon at the end of the statement")
            else:
                
                print("Syntax is not correct!")
            return False

    def factor():
        nonlocal pos
        token = tokens[pos]
        if token.type == TokenType.IDENTIFIER:
            pos += 1
            return token.value
        elif token.type == TokenType.LEFT_PAREN:
            pos += 1
            result = expr()
            consume(TokenType.RIGHT_PAREN)
            return result
        else:
            print("Syntax error: Unexpected token", token.value)
            return False

    def term():
        nonlocal pos
        result = factor()
        while pos < len(tokens):
            token = tokens[pos]
            if token.type == TokenType.PLUS:
                pos += 1
                result += factor()
            else:
                break
        return result

    def expr():
        nonlocal pos
        result = term()
        return result

    def assignment_statement():
        nonlocal pos
        left = expr()
        consume(TokenType.EQUALS)
        right = expr()

        if pos == len(tokens):
            if tokens[pos-1].type == TokenType.SEMICOLON:
                print("Syntax is correct!")
            else:
                print("Syntax error: Missing semicolon at the end of the statement")
                print("Syntax is not correct!")
        else:
            print("Syntax is correct!")
        return f"{left} = {right}"  # Return as a string for simplicity

    # Start the parsing process
    result = assignment_statement()

def get_user_input():
    return input("Enter your code: ")

def process_input():
    code = get_user_input()
    tokens = tokenize(code)
    if tokens:
        parse(tokens)

if __name__ == "__main__":
    with open('read.ipynb', 'r') as f:
        # Read tokens from read.ipynb (if necessary)
        # ... (extract relevant tokens or data)

        process_input()