# **Compiler Design Project**

## **Project** **Title**:    Mini Compiler for Scripting Language

**Project** **Phases**:
1. Lexical Analysis
2. Syntax Analysis
3. Semantic Analysis

**Example** **Code**:

LET a = 5

LET b = 10

IF a < b

THEN  

LET c = a + b

LET d = c * 2

ELSE

LET e = a - b

ENDIF

CALL myFunction(a, b)

# Phase 1: Lexical Analysis

In [17]:
# Token types
KEYWORDS = ["LET", "IF", "THEN", "ELSE", "ENDIF", "WHILE", "CALL"]
OPERATORS = ["+", "-", "*", "/", "=", "<", ">", "!=", "(", ")", ",", "{", "}"]

# Symbol Table
symbol_table = {}


# --------------------- Utility Functions --------------------- #
def print_header(title):
    """Utility function to print a formatted header."""
    print("\n" + "=" * 50)
    print(f"{title:^50}")
    print("=" * 50)


def print_table(headers, rows):
    """Utility function to display a table-like output."""
    column_widths = [max(len(str(row[i])) for row in rows + [headers]) + 2 for i in range(len(headers))]
    divider = "+".join("-" * w for w in column_widths)
    format_row = "|".join(f" {{:<{w}}} " for w in column_widths)

    print("+", divider, "+", sep="")
    print("|", format_row.format(*headers), "|", sep="")
    print("+", divider, "+", sep="")
    for row in rows:
        print("|", format_row.format(*row), "|", sep="")
    print("+", divider, "+", sep="")


# --------------------- Lexical Analysis --------------------- #
def is_keyword(word):
    return word.upper() in KEYWORDS


def is_operator(char):
    return char in OPERATORS


def tokenize(source_code):
    """Lexical analyzer to split source code into tokens."""
    tokens = []
    current = ""
    i = 0
    length = len(source_code)

    while i < length:
        char = source_code[i]

        # Skip spaces
        if char.isspace():
            if current:
                tokens.append(("KEYWORD" if is_keyword(current.upper()) else "IDENTIFIER", current))
                current = ""
            i += 1
            continue

        # Operators and punctuation
        if is_operator(char):
            if current:
                tokens.append(("KEYWORD" if is_keyword(current.upper()) else "IDENTIFIER", current))
                current = ""
            if char == "!" and i + 1 < length and source_code[i + 1] == "=":  # Handle !=
                tokens.append(("OPERATOR", "!="))
                i += 1
            else:
                tokens.append(("OPERATOR", char))
            i += 1
            continue

        # Comments
        if char == "{":
            while i < length and source_code[i] != "}":
                i += 1
            i += 1
            continue

        # Numbers (Integer and Float)
        if char.isdigit() or (char == '.' and i + 1 < length and source_code[i + 1].isdigit()):
            if current:
                tokens.append(("IDENTIFIER", current))
                current = ""
            number = ""
            while i < length and (source_code[i].isdigit() or source_code[i] == "."):
                number += source_code[i]
                i += 1
            if "." in number:
                tokens.append(("FLOAT", number))
            else:
                tokens.append(("NUMBER", number))
            continue

        # Build current token
        current += char
        i += 1

    if current:
        tokens.append(("KEYWORD" if is_keyword(current.upper()) else "IDENTIFIER", current))
    return tokens


# --------------------- Syntax Analysis --------------------- #
def parse_let_statement(tokens, index):
    if tokens[index][1].upper() == "LET":
        var_name = tokens[index + 1][1]
        value = tokens[index + 3][1]
        value_type = "FLOAT" if '.' in value else "INTEGER"
        symbol_table[var_name] = {"type": value_type, "value": value}
        return index + 4, ["LET", var_name, value_type, value]
    return -1, ["ERROR", "Invalid LET statement"]


def parse_if_statement(tokens, index):
    condition_var = tokens[index + 1][1]
    operator = tokens[index + 2][1]
    condition_value = tokens[index + 3][1]
    results = [["IF", f"{condition_var} {operator} {condition_value}", "CONDITION", "VALID"]]

    index += 4
    if tokens[index][1].upper() == "THEN":
        index += 1
        while index < len(tokens) and tokens[index][1].upper() not in ["ENDIF", "ELSE"]:
            if tokens[index][1].upper() == "LET":
                index, let_result = parse_let_statement(tokens, index)
                results.append(let_result)
            else:
                index += 1
        if tokens[index][1].upper() == "ELSE":
            results.append(["ELSE", "", "", ""])
            index += 1
            while index < len(tokens) and tokens[index][1].upper() != "ENDIF":
                if tokens[index][1].upper() == "LET":
                    index, let_result = parse_let_statement(tokens, index)
                    results.append(let_result)
                else:
                    index += 1
        results.append(["ENDIF", "", "", ""])
        return index + 1, results
    return -1, [["ERROR", "Invalid IF statement"]]


def parse(tokens):
    index = 0
    parsed_results = []
    while index < len(tokens):
        if tokens[index][1].upper() == "LET":
            index, result = parse_let_statement(tokens, index)
            parsed_results.append(result)
        elif tokens[index][1].upper() == "IF":
            index, results = parse_if_statement(tokens, index)
            parsed_results.extend(results)
        elif tokens[index][1].upper() == "CALL":
            func_name = tokens[index + 1][1]
            params = []
            if tokens[index + 2][1] == "(":
                index += 3
                while tokens[index][1] != ")":
                    if tokens[index][0] in ["IDENTIFIER", "NUMBER", "FLOAT"]:
                        params.append(tokens[index][1])
                    index += 1
            parsed_results.append(["CALL", func_name, "FUNCTION", ", ".join(params)])
            index += 2
        else:
            parsed_results.append(["ERROR", f"Unknown token {tokens[index][1]}", "", ""])
            break
    return parsed_results


# --------------------- Main Function --------------------- #
def main():
    print_header("Mini Compiler")
    print("Enter your source code. End input with a blank line.")

    # Collect multiline input from the user
    source_code_lines = []
    while True:
        line = input()
        if line.strip() == "":
            break
        source_code_lines.append(line)

    source_code = "\n".join(source_code_lines)

    print_header("Source Code")
    print(source_code)

    print_header("Lexical Analysis")
    tokens = tokenize(source_code)
    token_rows = [[token[0] if token[0] != "KEYWORD" else token[1].upper(), token[1]] for token in tokens]
    print_table(["TOKEN TYPE", "LEXEME"], token_rows)

    print_header("Syntax Analysis and Parsing")
    parsed_results = parse(tokens)
    print_table(["STATEMENT", "NAME", "TYPE", "VALUE"], parsed_results)

    print_header("Symbol Table")
    symbol_rows = [[name, details["type"], details["value"]] for name, details in symbol_table.items()]
    print_table(["NAME", "TYPE", "VALUE"], symbol_rows)


if __name__ == "__main__":
    main()



                  Mini Compiler                   
Enter your source code. End input with a blank line.
LET a = 5.2
LET b = 10
IF a < b 
THEN 
LET c = a + b
LET d = c * 2
ELSE
LET e = a - b
ENDIF 
CALL myFunction(a, b)


                   Source Code                    
LET a = 5.2
LET b = 10
IF a < b 
THEN 
LET c = a + b
LET d = c * 2
ELSE
LET e = a - b
ENDIF 
CALL myFunction(a, b)

                 Lexical Analysis                 
+------------+------------+
| TOKEN TYPE   | LEXEME       |
+------------+------------+
| LET          | LET          |
| IDENTIFIER   | a            |
| OPERATOR     | =            |
| FLOAT        | 5.2          |
| LET          | LET          |
| IDENTIFIER   | b            |
| OPERATOR     | =            |
| NUMBER       | 10           |
| IF           | IF           |
| IDENTIFIER   | a            |
| OPERATOR     | <            |
| IDENTIFIER   | b            |
| THEN         | THEN         |
| LET          | LET          |
| IDENTIFIER   | c      

# Full Project

# Project Phases:

1.   Lexical Analysis
2.   Syntax Analysis
3. Semantic Analysis


# Code with Parse Tree

In [18]:

# --------------------- Parse Tree Representation --------------------- #
def draw_parse_tree(parsed_results):
    """Draw the parse tree in a hierarchical format."""

    def draw_node(node, prefix="", is_last=True):
        connector = "└── " if is_last else "├── "
        if isinstance(node, list):
            print(f"{prefix}{connector}{node[0].lower()}: {node[1]}")
            child_prefix = prefix + ("    " if is_last else "│   ")
            for i, child in enumerate(node[2:]):
                draw_node(child, child_prefix, is_last=(i == len(node[2:]) - 1))
        else:
            print(f"{prefix}{connector}{node}")

    print_header("Parse Tree")
    print("Program")
    for i, result in enumerate(parsed_results):
        draw_node(result, is_last=(i == len(parsed_results) - 1))


# --------------------- Main Function --------------------- #
def main():
    print_header("Mini Compiler")
    print("Enter your source code. End input with a blank line.")

    # Collect multiline input from the user
    source_code_lines = []
    while True:
        line = input()
        if line.strip() == "":
            break
        source_code_lines.append(line)

    source_code = "\n".join(source_code_lines)

    print_header("Source Code")
    print(source_code)

    print_header("Lexical Analysis")
    tokens = tokenize(source_code)
    token_rows = [[token[0] if token[0] != "KEYWORD" else token[1].upper(), token[1]] for token in tokens]
    print_table(["TOKEN TYPE", "LEXEME"], token_rows)

    print_header("Syntax Analysis and Parsing")
    parsed_results = parse(tokens)
    print_table(["STATEMENT", "NAME", "TYPE", "VALUE"], parsed_results)

    print_header("Symbol Table")
    symbol_rows = [[name, details["type"], details["value"]] for name, details in symbol_table.items()]
    print_table(["NAME", "TYPE", "VALUE"], symbol_rows)

    draw_parse_tree(parsed_results)


if __name__ == "__main__":
    main()



                  Mini Compiler                   
Enter your source code. End input with a blank line.
LET a = 5
LET b = 10.2
IF a < b
THEN 
LET c = a + b
LET d = c * 2
ELSE
LET e = a - b
ENDIF
CALL myFunction(a, b)


                   Source Code                    
LET a = 5
LET b = 10.2
IF a < b
THEN 
LET c = a + b
LET d = c * 2
ELSE
LET e = a - b
ENDIF
CALL myFunction(a, b)

                 Lexical Analysis                 
+------------+------------+
| TOKEN TYPE   | LEXEME       |
+------------+------------+
| LET          | LET          |
| IDENTIFIER   | a            |
| OPERATOR     | =            |
| NUMBER       | 5            |
| LET          | LET          |
| IDENTIFIER   | b            |
| OPERATOR     | =            |
| FLOAT        | 10.2         |
| IF           | IF           |
| IDENTIFIER   | a            |
| OPERATOR     | <            |
| IDENTIFIER   | b            |
| THEN         | THEN         |
| LET          | LET          |
| IDENTIFIER   | c          