<a href="https://colab.research.google.com/github/Trizzole/TinyBasic/blob/main/assignment2_expressions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
#download ply

### RUN Once connected, but then comment out afterwards ###
#%mkdir ply
#%cd ply
#!wget "https://raw.githubusercontent.com/dabeaz/ply/master/src/ply/lex.py"
#!wget "https://raw.githubusercontent.com/dabeaz/ply/master/src/ply/yacc.py"

__file__ = "asg2_expressions.ipynb"

# -----------------------------------------------------------------------------
# example.py
#
# Example of using PLY To parse the following simple grammar.
#
# Example valid program let x = 3; let y = 4; 3 + x * y ### no ending ; and assignments must come before the expression(only one expression)
#
# 2 * 3 + 4 * (5 - x) is valid syntactically, but invalid semantically, x not assigned
#
#
#   PROGRAM ==> ASSIGNMENT_LIST EXPRESSION ==> ASSIGNMENT; ASSIGNMENT; ASSIGNMENT; ASSIGNMENT; EXPRESSION
#           ==> LET NAME = EXPRESSION; LET NAME = EXPRESSION; LET NAME = EXPRESSION; LET NAME = EXPRESSION; EXPRESSION
#
#   program : [assignment_list] expression
#
#   assignment_list : assignment ; 
#               | assignment ; assignment_list
#
#   assignment : (let|LET) NAME = expression
#
#   expression : term PLUS term
#              | term MINUS term
#              | term
#
#   term       : factor TIMES factor
#              | factor DIVIDE factor
#              | factor
#
#   factor     : NUMBER
#              | NAME
#              | PLUS factor
#              | MINUS factor
#              | LPAREN expression RPAREN
#
# -----------------------------------------------------------------------------

from ply.lex import lex
from ply.yacc import yacc  

# --- Tokenizer

# All tokens must be named in advance.
tokens = ( 'LET', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN', 'RPAREN',
           'NAME', 'NUMBER', 'EQUALS', 'SEMICOLON' )

# Ignored characters
t_ignore = ' \r\n\t'

# Token matching rules are written as regexs
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
t_EQUALS = r'=' # or r'\=' if it needs to be escaped
t_SEMICOLON = r';' 

# A function can be used if there is an associated action.
# Write the matching regex in the docstring.
def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)
    return t

def t_NAME(t):
    r'[a-zA-Z_][a-zA-Z0-9_]*'
    # scan through all the reserved words and update the type
    if t.value == 'let' or t.value == 'LET':
        t.type = 'LET' 
    return t

# Ignored token with an action associated with it
def t_ignore_newline(t):
    r'\n+'
    t.lexer.lineno += t.value.count('\n')

# Error handler for illegal characters
def t_error(t):
    print(f'Illegal character {t.value[0]!r}')
    t.lexer.skip(1)

# Build the lexer object
lexer = lex()
    
# --- Parser

# Write functions for each grammar rule which is
# specified in the docstring.

### try adding program/assignment_list/assignment prod rules ###
### 5 new def statements ###

def p_program_withassignments(p):
    '''
    program : assignment_list expression
    '''
    p[0] = ('assignments',p[1], p[2])

def p_program_noassignments(p):
    '''
    program : expression
    '''
    p[0] = p[1]
    
def p_assignment_listsingle(p):
    '''
    assignment_list : assignment SEMICOLON
    '''
    p[0] = (p[1])
    
def p_assignment_listmultiple(p):
    '''
    assignment_list : assignment SEMICOLON assignment_list
    '''
    p[0] = p[1] + p[3]
    ##### WHAT GOES HERE??? HINT p[3] give you a list of tuples ... what can you do with p[1] to combine it with p[3]??? #######
    
def p_assignment(p):
    '''
    assignment : LET NAME EQUALS expression
    '''
    p[0] = (p[2], p[4]) # this returns a tuple with variable name first, followed by the expression used to initialize the variable
  

  ### ###


def p_expression(p):
    '''
    expression : term PLUS term
               | term MINUS term
    '''
    # p is a sequence that represents rule contents.
    #
    # expression : term PLUS term
    #   p[0]     : p[1] p[2] p[3]
    # 
    p[0] = ('binop', p[2], p[1], p[3])

def p_expression_term(p):
    '''
    expression : term
    '''
    p[0] = p[1]

def p_term(p):
    '''
    term : factor TIMES factor
         | factor DIVIDE factor
    '''
    p[0] = ('binop', p[2], p[1], p[3])

def p_term_factor(p):
    '''
    term : factor
    '''
    p[0] = p[1]

def p_factor_number(p):
    '''
    factor : NUMBER
    '''
    p[0] = ('number', p[1])

def p_factor_name(p):
    '''
    factor : NAME
    '''
    p[0] = ('name', p[1])

def p_factor_unary(p):
    '''
    factor : PLUS factor
           | MINUS factor
    '''
    p[0] = ('unary', p[1], p[2])

def p_factor_grouped(p):
    '''
    factor : LPAREN expression RPAREN
    '''
    p[0] = ('grouped', p[2])

def p_error(p):
    print(f'Syntax error at {p.value!r}')


# Build the parser
parser = yacc()

# Parse an expression
ast = parser.parse('let x = 5; 2 * 3 + 4 * (5 - x)')
print(ast)



#### use this ####
# now that we have parse tree, let's evaluate it
# potential expr tuples:
# ('number', 3)
# ('name', x)
# ('binop', '+', expr1, expr2) 
    #([0],  [1],  [2],   [3])#
    ### 4 types of binops ###


## check num and binop ##
def evaluateExpression(expr, symboltable):
  print(expr)
  if expr[0] == 'grouped':
    return evaluateExpression(expr[1], symboltable)
  elif expr[0] == 'name':
    return symboltable[expr[1]] 
    # should check that expr[1] in symbol table
  elif expr[0] == 'number':
    return expr[1]
  elif expr[0] == 'binop':
    if expr[1] == '+':
      return evaluateExpression(expr[2], symboltable) + evaluateExpression(expr[3], symboltable)
    if expr[1] == '-':
      return evaluateExpression(expr[2], symboltable) - evaluateExpression(expr[3], symboltable)
    if expr[1] == '*':
      return evaluateExpression(expr[2], symboltable) * evaluateExpression(expr[3], symboltable)
    if expr[1] == '/':
      return evaluateExpression(expr[2], symboltable) / evaluateExpression(expr[3], symboltable)
  elif expr[0] == 'unary':
    if expr[1] == '-':
      return -evaluateExpression(expr[1], symboltable)
    else:
      return evaluateExpression(expr[1], symboltable)
  else:    
    return 0

def populateSymbols(symbollist):
  symboltable = {}
  for i in range(len(symbollist)):
    if i%2==0:
      symboltable[symbollist[i]] = evaluateExpression(symbollist[i+1], symboltable)
  return symboltable

def evaluate(ast):
  if ast[0]=='assignments':
    return evaluateExpression(ast[2], populateSymbols(ast[1]))
  else:
    return evaluateExpression(ast, {})



('assignments', ('x', ('number', 5)), ('binop', '+', ('binop', '*', ('number', 2), ('number', 3)), ('binop', '*', ('number', 4), ('grouped', ('binop', '-', ('number', 5), ('name', 'x'))))))


In [36]:
test = parser.parse('let x = 5; let y = 4; x * y')
evaluate(test)

('number', 5)
('number', 4)
('binop', '*', ('name', 'x'), ('name', 'y'))
('name', 'x')
('name', 'y')


20

In [None]:
# parse an expression
with open('example1.bas', 'r') as file:
  programstr = file.read()
astt = parser.parse(programstr)
evaluate(astt)

In [None]:
xyz = parser.parse('let x = 4; let y = 5; let z = 6; x + y * z')
print(xyz)
xpl = parser.parse('let x=2;let y=4+x;(y+3)*x')
print(xpl)

In [None]:
print(evaluate(xyz))
print(evaluate(xpl))
print(evaluate(ast))

In [None]:
from google.colab import drive
drive.mount('/content/drive')