In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
import re



TOKEN_TYPES = {
    'INTEGER_LITERAL': r'\d+',
    'BOOLEAN_LITERAL': r'true|false',
    'IDENTIFIER': r'[a-zA-Z][a-zA-Z0-9]*',
    'OPERATOR': r'\+|\-|\*|\/|\=|\=\=|\!\=',
    'KEYWORD': r'if|else|print',
    'COMMENT': r'\/\/.*',
    'WHITESPACE': r'\s+',
}

class Scanner:
    def __init__(self, filename):
        self.filename = filename
        self.tokens = []
        self.keywords = ['if', 'else', 'print']
        self.operators = ['+', '-', '*', '/', '=', '==', '!=']
        self.current_line = 1

    def scan(self):
        with open(self.filename, 'r') as file:
            for line in file:
                line = line.strip()
                while line:
                    found_token = False
                    for token_type, pattern in TOKEN_TYPES.items():
                        match = re.match(pattern, line)
                        if match:
                            lexeme = match.group(0)
                            if token_type != 'WHITESPACE':
                                self.tokens.append((token_type, lexeme, self.current_line))
                            line = line[match.end():].lstrip()
                            found_token = True
                            break

                    if not found_token:
                        print(f"Lexical error in line {self.current_line}: Invalid token")
                        return

                    if token_type == 'COMMENT':
                        break
                self.current_line += 1

    def display_tokens(self):
        for token in self.tokens:
            print(token)


if __name__ == "__main__":
    filename = input("Enter the filename to scan: ")
    scanner = Scanner(filename)
    scanner.scan()
    scanner.display_tokens()


Enter the filename to scan: testCase3
Lexical error in line 2: Invalid token
('OPERATOR', '/', 1)
('OPERATOR', '/', 1)
('IDENTIFIER', 'MiniLang', 1)
('IDENTIFIER', 'program', 1)
('IDENTIFIER', 'with', 1)
('IDENTIFIER', 'a', 1)
('IDENTIFIER', 'mix', 1)
('IDENTIFIER', 'of', 1)
('IDENTIFIER', 'different', 1)
('IDENTIFIER', 'token', 1)
('IDENTIFIER', 'types', 1)
('IDENTIFIER', 'x', 2)
('OPERATOR', '=', 2)
('INTEGER_LITERAL', '5', 2)
