# Lexical analyzer for Python code:

```python
# Key words
and, as, assert, break, class, continue, def, del, elif, else, except, finally, for, from, global, if, import, in, is, lambda, not, or, pass, raise, return, try, while, with, yield
# Built-in functions
abs, all, any, bin, callable, chr, classmethod, compile, delattr, dir, divmod, enumerate, eval, exec, filter, format, getattr, globals, hasattr, hash, help, hex, id, input, isinstance, issubclass, iter, len, locals, map, max, min, next, oct, open, ord, print, pow, repr, reversed, round, setattr, sorted, staticmethod, sum, super, vars, zip
# Operators
+, -, *, /, //, **, <<, >>, <, >, <=, >=, ==, !=, &, |, ^, ~, %
# Literals
    # Numbers:
    1, 0.124, -178, .3
    # Strings (including unicode):
    r'Hi', "Hi", """Hi,
    this
    is
    a
    long
    string""", '''Hi'''
    # Boolean:
    True, False
# Identifiers
    # Var
    m, m1, _
    # Function
    my_func()
    # Method
    my_class.method()
    # Attribute
    my_class.attribute
# Comments:
# This is an example of the comment
```

#### Example
Input:
```python
f = open('mbox.txt', 'r')
d = {}
# From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008
for line in f:
    if line.startswith('From '): # begins with From
        email = line.split()[1]
        domen = email.split('@')[1]
        org = domen.split('.')[0]
        d[org] = d.get(org, 0) + 1
        
print(d)
```

Output:  


f = open('mbox.txt', 'r')  
\$VAR\$ = \$BI_FUNCTION(\$STRING\$, \$STRING\$)  
d = {}  
\$VAR\$ = {}  
\# From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008  
\$COMMENT\$  
for line in f:  
\$K_WORD\$ \$VAR\$ \$K_WORD\$ \$VAR\$:  
    if line.startswith('From '): # Начинается с From  
    \$K_WORD\$ \$VAR\$.\$METHOD(\$STRING\$): \$COMMENT\$  
        email = line.split()[1]  
        \$VAR\$ = \$VAR\$.\$METHOD()[\$NUMBER\$]  
        domen = email.split('@')[1]  
        \$VAR\$ = \$VAR\$.\$METHOD(\$STRING\$)[\$NUMBER\$]  
        org = domen.split('.')[0]  
        \$VAR\$ = \$VAR\$.\$METHOD(\$STRING\$)[\$NUMBER\$]  
        d[org] = d.get(org, 0) + 1  
        \$VAR\$[\$VAR\$] = \$VAR\$.\$METHOD(\$VAR\$, \$NUMBER\$) \$OPERATOR\$ \$NUMBER\$  
  
  
print(d)  
\$BI_FUNCTION(\$VAR\$)



In [1]:
import re
s='''
f = open('mbox.txt', 'r')
d = {}
# From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008
for line in f:
    if line.startswith('From '): # Начинается с From
        email = line.split()[1]
        domen = email.split('@')[1]
        org = domen.split('.')[0]
        d[org] = d.get(org, 0) + 1

print(d)

my_class.attribute
my_func(dht)

'''
lines=s.split('\n')
 
subs=((r'''\b(?:and|as|assert|break|class|continue|def|del|elif|else|except|finally|for|
from|global|if|import|in|is|lambda|not|or|pass|raise|return|try|while|with|yield)\b''', '$K_WORD$'), 
      (r'''\b(?:abs|all|any|bin|callable|chr|classmethod|compile|delattr|dir|divmod|enumerate|eval|exec|filter|
      format|getattr|globals|hasattr|hash|help|hex|id|input|isinstance|issubclass|iter|len|locals|map|
      max|min|next|oct|open|ord|print|pow|repr|
      reversed|round|setattr|sorted|staticmethod|sum|super|vars|zip)\b''','''$BI_FUNCTION'''),
      (r'True|False','$BOOLEAN$'),
      (r'#.*','$COMMENT$'),
      (r'(\"\")?(\'\')?[\'\"][^\"\']+(\"\")?(\'\')?[\'\"]','$STRING$'),
      (r'\b[+-]?\d+(?:\.\d+)?\b','$NUMBER$'),
      (r'(\w+\.)\w+\(w*', r'\1$METHOD('), 
      (r'\.[a-zA-Z_]\w*',r'.$ATTRIBUTE$'),  
      (r'([ \(\[,.+|-]|^)[a-zA-Z_]\w*\(', r'$FUNCTION('),  
      (r'([ \(\[,.+|-]|^)[a-zA-Z_]\w*', r'\1$VAR$'), 
      (r'''\+|-|\*|/|//|\*\*|<<|>>|<|>|<=|>=|==|!=|&|\||\^|~|%''', '$OPERATOR$'))


for line in lines:
    line = line.strip()
    print(line)
    for pattern, sub in subs:
        line = re.sub(pattern, sub, line)
    print(line)




f = open('mbox.txt', 'r')
$VAR$ = $BI_FUNCTION($STRING$, $STRING$)
d = {}
$VAR$ = {}
# From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008
$COMMENT$
for line in f:
$K_WORD$ $VAR$ $K_WORD$ $VAR$:
if line.startswith('From '): # Начинается с From
$K_WORD$ $VAR$.$METHOD($STRING$): $COMMENT$
email = line.split()[1]
$VAR$ = $VAR$.$METHOD()[$NUMBER$]
domen = email.split('@')[1]
$VAR$ = $VAR$.$METHOD($STRING$)[$NUMBER$]
org = domen.split('.')[0]
$VAR$ = $VAR$.$METHOD($STRING$)[$NUMBER$]
d[org] = d.get(org, 0) + 1
$VAR$[$VAR$] = $VAR$.$METHOD($VAR$, $NUMBER$) $OPERATOR$ $NUMBER$


print(d)
$BI_FUNCTION($VAR$)


my_class.attribute
$VAR$.$ATTRIBUTE$
my_func(dht)
$FUNCTION($VAR$)






In [2]:
def fsplit(x):
    x=x.split(', ')
    return x
s='''in 
a 
line cat
'''
re.sub(r'\b(?:in|a)\b', 'KW', 'in a line cat')

'KW KW line cat'

In [3]:
def fsplit(x):
    x='|'.join(x.split(', '))
    return x
kwords= "and, as, assert, break, class, continue, def, del, elif, else, except, finally, for, from, global, if, import, in, is, lambda, not, or, pass, raise, return, try, while, with, yield"
fsplit(kwords)

'and|as|assert|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|not|or|pass|raise|return|try|while|with|yield'

In [4]:
operators = "+, -, *, /, //, **, <<, >>, <, >, <=, >=, ==, !=, &, |, ^, ~, %"
operators ='|'.join(operators.split(', '))
print(operators)

+|-|*|/|//|**|<<|>>|<|>|<=|>=|==|!=|&|||^|~|%


In [5]:
s='''
func66
egrf
print(d)'''
s=s.split('\n')
for e in range(len(s)):
    print(s[e])


func66
egrf
print(d)


In [80]:
code = '''my_class.attribute
my_func(lkj)
mk =
nl3
my_class.method()''' 
lines = code.split('\n')
for line in lines:
    print(line)
    s1=re.sub(, line)
    print(s1)

my_class.attribute
my_class.$ATTRIBUTE$
my_func(lkj)
my_func(lkj)
mk =
mk =
nl3
nl3
my_class.method()
my_class.$ATTRIBUTE$()
