# LEXICAL ANALYZER (Multi-Token Ambiguity Handling)

In [3]:
import tkinter as tk
from tkinter import ttk, filedialog
from PIL import Image, ImageTk
import json

# Lexical definitions
KEYWORDS = {'int', 'float', 'if', 'else', 'while', 'return', 'for', 'char'}
OPERATORS = {'+', '-', '*', '/', '=', '==', '!=', '<', '<=', '>', '>='}
SEPARATORS = {'(', ')', '{', '}', '[', ']', ';', ',', '.'}

def is_number(token):
    try:
        float(token)
        return True
    except ValueError:
        return False

def lexer(code):
    tokens = []
    i = 0
    length = len(code)

    while i < length:
        ch = code[i]

        if ch.isspace():
            i += 1
            continue

        if code[i:i+2] in OPERATORS:
            tokens.append(('Operator', code[i:i+2]))
            i += 2
            continue

        if ch in OPERATORS:
            tokens.append(('Operator', ch))
            i += 1
            continue

        if ch in SEPARATORS:
            tokens.append(('Separator', ch))
            i += 1
            continue

        if ch == "'" and i + 2 < length and code[i+2] == "'":
            literal = code[i:i+3]
            tokens.append(('Character', literal))
            i += 3
            continue

        token = ''
        while (i < length and not code[i].isspace() and
               code[i:i+2] not in OPERATORS and
               code[i] not in OPERATORS and
               code[i] not in SEPARATORS and code[i] != "'"):
            token += code[i]
            i += 1

        if token in KEYWORDS:
            tokens.append(('Keyword', token))
        elif is_number(token):
            tokens.append(('Number', token))
        elif token.isidentifier():
            tokens.append(('Identifier', token))
        else:
            tokens.append(('Unknown', token))

    return tokens

def analyze_code():
    code = text_input.get("1.0", tk.END).strip()
    result = lexer(code)

    # Collect tokens by type and count tokens per type
    grouped_tokens = {}
    for type_, token in result:
        if type_ not in grouped_tokens:
            grouped_tokens[type_] = []
        if token not in grouped_tokens[type_]:  # Avoid token repetition if needed
            grouped_tokens[type_].append(token)

    tree.delete(*tree.get_children())
    for type_, tokens_list in grouped_tokens.items():
        count = len(tokens_list)
        type_with_count = f"{type_} ({count})"
        tokens_str = ' | '.join(tokens_list)
        tree.insert('', 'end', values=(type_with_count, tokens_str))

def load_file():
    file_path = filedialog.askopenfilename(filetypes=[
        ("Code files", "*.txt *.c *.ipynb"),
        ("All files", "*.*")
    ])
    if file_path:
        code = ""
        if file_path.endswith(".ipynb"):
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    notebook = json.load(f)
                    for cell in notebook.get("cells", []):
                        if cell.get("cell_type") == "code":
                            code += ''.join(cell.get("source", [])) + "\n"
            except Exception as e:
                code = f"# Error loading .ipynb file:\n{e}"
        else:
            with open(file_path, 'r', encoding='utf-8') as f:
                code = f.read()

        text_input.delete("1.0", tk.END)
        text_input.insert(tk.END, code)

# GUI
root = tk.Tk()
root.title("Lexical Analyzer")
root.geometry("1100x800")
root.configure(bg="#e3f2fd")
root.minsize(1100, 700)

style = ttk.Style()
style.theme_use("clam")
style.configure("Treeview.Heading", font=("Segoe UI", 12, "bold"), background="#0288d1", foreground="white")
style.configure("Treeview", font=("Segoe UI", 11), rowheight=28)

tk.Label(root, text="Lexical Analyzer", font=("Segoe UI", 28, "bold"), bg="#e3f2fd", fg="#01579b").pack(pady=(20,5))
tk.Label(root, text="(Multi-Token Ambiguity Handling)", font=("Segoe UI", 16, "italic"), bg="#e3f2fd", fg="#0277bd").pack()

frame_input = tk.Frame(root, bg="#e3f2fd")
frame_input.pack(fill='x', padx=30)
tk.Label(frame_input, text="Enter Code or Load File", font=("Segoe UI", 14, "bold"), bg="#e3f2fd").pack(anchor="w")

text_input = tk.Text(frame_input, height=10, font=("Consolas", 12), wrap="word", relief="solid", borderwidth=1, bg="#ffffff")
text_input.pack(fill='x', pady=5)

button_frame = tk.Frame(root, bg="#e3f2fd")
button_frame.pack(pady=15)

def style_button(btn):
    btn.bind("<Enter>", lambda e: btn.config(bg="#0277bd"))
    btn.bind("<Leave>", lambda e: btn.config(bg="#039be5"))

btn_analyze = tk.Button(button_frame, text="Analyze", command=analyze_code, font=("Segoe UI", 12, "bold"),
                        bg="#039be5", fg="white", padx=20, pady=6, relief="raised")
style_button(btn_analyze)
btn_analyze.grid(row=0, column=0, padx=10)

btn_load = tk.Button(button_frame, text="Load File", command=load_file, font=("Segoe UI", 12, "bold"),
                     bg="#039be5", fg="white", padx=20, pady=6, relief="raised")
style_button(btn_load)
btn_load.grid(row=0, column=1, padx=10)

tk.Label(root, text="Tokens Identified", font=("Segoe UI", 14, "bold"), bg="#e3f2fd").pack(anchor="w", padx=30)

frame_tree = tk.Frame(root)
frame_tree.pack(fill='both', expand=True, padx=30, pady=10)

scroll_y = tk.Scrollbar(frame_tree, orient="vertical")
scroll_y.pack(side='right', fill='y')

scroll_x = tk.Scrollbar(frame_tree, orient="horizontal")
scroll_x.pack(side='bottom', fill='x')

tree = ttk.Treeview(frame_tree, columns=("Type", "Tokens"), show="headings",
                    yscrollcommand=scroll_y.set, xscrollcommand=scroll_x.set)

tree.heading("Type", text="Token Type (Count)")
tree.heading("Tokens", text="Tokens")

tree.column("Type", width=200, anchor="center", stretch=False)
tree.column("Tokens", width=1400, anchor="w", stretch=False)

scroll_y.config(command=tree.yview)
scroll_x.config(command=tree.xview)

tree.pack(fill='both', expand=True)

root.mainloop()