# CW2.1:  Compiler Front End for FUNC

Your overall task is to develop a compiler for the programming language given below, called ``FUNC``.
This overall task is composed of two parts:

- **CW2 Part I** is concerned with the implementation of the compiler’s front end (this document). This is worth 10 marks.
- **CW2 Part II** is concerned with the implementation of the compiler’s back end. This is also worth 10 marks and released later. 

**CW 2 Part I** consists of two parts: 
- Writing a lexer (4 points)
- Writing a parser (6 points)

If you have any questions, use the labs slots or ask Kathrin & the Lab Helpers.

**IMPORTANT** 
Compiler errors: All code you submit must compile. Programs that do not compile will receive an automatic zero.
- If you are having trouble getting your assignment to compile, please visit consulting hours.
- If you run out of time, it is better to comment out the parts that do not compile, than hand in a more complete file that does not compile.

## Testing 

At the end of this file you'll find example program you can test your programs with. 
**You will want to write additional tests for intermediate steps.**

You can easily write tests to ensure that your program behaves as expected as follows:

In [None]:
assert ([2;3;5;5;2;1] (* Expected result *) 
= [2;3;5] @ [5;2;1] (* Calling your function *) ) ;; 

**The plagarism policy does not hold for this part of the coursework. 
Please feel free to share your tests with other students in the course.**

## Submission

Please submit a .zip file containing this notebook and the file ``CW/func.mll`` on Canvas until **Fri, 24th March**. 
Please ensure that you do not change the name or signature of the functions ``parse_exp``, ``parse_program``, etc. 

**Late Submissions.** See Canvas for F29LP's late-submission policy. 

**Plagarism.** All code (except tests) is subject to the course's plagarism policy. 

Happy coding!

## The Source Language: FUNC

The ``FUNC`` language has the following syntax: 

```
<program> ::= <methods> 
<methods> ::= <method>;[<methods>] 
<method> ::= method <id>([<args>]) [vars <args>] 
	begin <statements> [return <id>;] endmethod
<args> ::= <id>[,<args>] 
<statements> ::= <statement>;[<statements>] 
<statement> ::= <assign> | <if> | <while> | <rw>
<rw> ::= read <id> | write <exp>
<assign> ::= <id> := <exp>
<if> ::= if  <cond> then <statements> [else <statements>] endif 
<while> ::= while <cond> begin <statements> endwhile
<cond> ::= <bop> ( [<exps>] ) 
<bop> ::= less | lessEq | eq | nEq 
<exps> ::= <exp> [,<exps>] 
<exp> ::= <id>[( [<exps>] )] | <int> 
<int> is a natural number (no leading zeroes) 
<id> is any string starting with a character followed by characters or numbers (that is not already a keyword)
```

- Each program must have a function called ``main`` with no arguments and no return value. 
- All other functions may have an optional return value. If a function does not have a return value, they implicitly return `0`.
- You should support the following built-in functions - assume they have been defined; they accept two integers and return an integer:
     - ``plus``, which adds its arguments;
     - ``times``, which multiplies its arguments;
     - ``minus``, which subtracts its arguments;
     - ``divide``, which divides its arguments.
- All the boolean operators (``less``, ``lessEq``, ``eq``, ``nEq``) are also binary, i.e. take two arguments.
- The ``read`` command assumes that the given variable is an ``int`` variable.

##### Example 

The following example illustrates a valid FUNC program (more examples later in the document)

```
method pow(x, y) vars i, res
begin
    res := x; 
    i := 1; 
    while less(i,y)
    begin
        res := times(res,x);
        i := plus(i,1); 
    endwhile;
    write res;
    return res;
endmethod;

method main() vars a, b, x
begin
    a := 5; b := 2; 
    x := pow(b,a);
    if  eq(x,32) then write 1; else write 0; endif; 
endmethod;
```

## Part 1: Lexing (4 Points)

Produce a lexer file into ``CW/func.mll`` together with a suitable representation of tokens.

**IMPORTANT** Jupyter Notebooks automatically saves some output information. 
Each time you change the ``func.mll`` file and want to re-run the following commands, 
first choose in the menu Kernel -> Restart & Clear Output to ensure your changed file is used.

In [296]:
#require "jupyter.notebook" ;;
open Jupyter_notebook ;;

In [297]:
(* Run the lexer generator *)
Process.sh "ocamllex func.mll";;

(* Compile and load the file produced by the lexer *)
Process.sh "ocamlc -c func.ml";;
#load "func.cmo";;

(* Convert the buffer into a list for further processing. *)
let rec stream_to_list buffer = 
    match Func.token buffer with 
    | EOF -> []
    | x -> x :: stream_to_list buffer

81 states, 5399 transitions, table size 22082 bytes


- : Jupyter_notebook.Process.t =
{Jupyter_notebook.Process.exit_status = Unix.WEXITED 0; stdout = None;
 stderr = None}


- : Jupyter_notebook.Process.t =
{Jupyter_notebook.Process.exit_status = Unix.WEXITED 0; stdout = None;
 stderr = None}


The files func.cmo and func.cmo disagree over interface Func


val stream_to_list : Lexing.lexbuf -> Func.token List.t = <fun>


In [298]:
(*
You can test your lexer here. 
See below the code for lexing program ex1.
You will want to test your lexer with more code snippets!
*)
let ex1 = "method pow(x,y) vars i, res,w
begin

	res := x(da,da(1,2,m(1,1)),1);
	i := 2;
	if eq(x,32) then 
		write 1;
		read a;
	else
		b := 11;
	endif;
	while less(i,y)
	begin
		res := times(res,x);
		i := plus(i,1);
        endwhile;
	write res;
	return res;

endmethod;

method main() vars a, b, x
begin

	a := 5; 
	b := 2;
	x := pow(b,a);
	if eq(x,32)
		 then write 1; 
	else 
		write 0;
	endif; 
endmethod;"

open Func
let res = stream_to_list (Lexing.from_string ex1)

val ex1 : string =
  "method pow(x,y) vars i, res,w\nbegin\n\n\tres := x(da,da(1,2,m(1,1)),1);\n\ti := 2;\n\tif eq(x,32) then \n\t\twrite 1;\n\t\tread a;\n\telse\n\t\tb := 11;\n\tendif;\n\twhile less(i,y)\n\tbegin\n\t\tres := times(res,x);\n\t\ti := plus(i,1);\n        endwhile;\n\twrite res;\n\treturn res;\n\nendmethod;\n\nmethod main() vars a, b, x\nbegin\n\n\t"... (* string length 401; truncated *)


val res : Func.token List.t =
  (::) (METHOD,
   [ID "pow"; LBRA; ID "x"; COMMA; ID "y"; RBRA; VARS; ID "i"; COMMA;
    ID "res"; COMMA; ID "w"; BEGIN; ID "res"; ASGN; ID "x"; LBRA; ID "da";
    COMMA; ID "da"; LBRA; INT 1; COMMA; INT 2; COMMA; ID "m"; LBRA; INT 1;
    COMMA; INT 1; RBRA; RBRA; COMMA; INT 1; RBRA; SEMI; ID "i"; ASGN; 
    INT 2; SEMI; IF; EQ; LBRA; ID "x"; COMMA; INT 32; RBRA; THEN; WRITE;
    INT 1; SEMI; READ; ID "a"; SEMI; ELSE; ID "b"; ASGN; INT 11; SEMI; ENDIF;
    SEMI; WHILE; LS; LBRA; ID "i"; COMMA; ID "y"; RBRA; BEGIN; ID "res";
    ASGN; MULT; LBRA; ID "res"; COMMA; ID "x"; RBRA; SEMI; ID "i"; ASGN;
    PLUS; LBRA; ID "i"; COMMA; INT 1; RBRA; SEMI; ENDWHILE; SEMI; WRITE;
    ID "res"; SEMI; RETURN; ID "res"; SEMI; ENDMETHOD; SEMI; METHOD;
    ID "main"; LBRA; RBRA; VARS; ID "a"; COMMA; ID "b"; COMMA; ID "x"; BEGIN;
    ID "a"; ASGN; INT 5; SEMI; ID "b"; ASGN; INT 2; SEMI; ID "x"; ASGN;
    ID "pow"; LBRA; ID "b"; COMMA; ID "a"; RBRA; SEMI; IF; EQ; LBRA; 
    

## Part 2: Parsing (6 Points)

Below you can see an abstract grammar for the language you've seen before.

In [299]:
(* Declaring the type of expressions*)
type exp = Numb of int | Id of string | App of string * exp list

(* Declaring the type of bop which are conditions statements*)
type bop = Less | LessEq | Eq | NEq 
(* Declaring the type condition *)
type cond = C of bop * exp * exp

(* Declaring type of statements *)
type statement =
  Assign of string * exp
| Read of string 
| Write of exp 
| If of cond * statement list
| Ite of cond * statement list * statement list
| While of cond * statement list

(* Declaring type of method for declaring methods *)
type mmethod = M of string (* name of function *)
                * string list (* arguments *)
                * string list (* declarations *) 
                * statement list (* function body *)
                * string option (* possible return value value *)

type program = P of mmethod list

type exp = Numb of int | Id of string | App of string * exp list


type bop = Less | LessEq | Eq | NEq


type cond = C of bop * exp * exp


type statement =
    Assign of string * exp
  | Read of string
  | Write of exp
  | If of cond * statement list
  | Ite of cond * statement list * statement list
  | While of cond * statement list


type mmethod =
    M of string * string list * string list * statement list * string option


type program = P of mmethod list


Write a recursive-descent parser for ``FUNC``.
Your parser should contain at least: 
- a function ``parse_exp : token list -> exp * token list``
- a function ``parse_cond : token list -> cond * token list``
- a function ``parse_statement : token list -> statement * token list``
- a function ``parse_program : program -> statement * token list``

You will require more functions. 
You can get partial points by providing e.g. only ``parse_exp``. 

**Hints:** 
- Your parser does **not** have to ensure that variables, functions, the ``main`` function etc. exists or functions are applied to the right number of arguments.
- You will want to test your program step-by-step, e.g. test that ``parse_exp`` runs as expected before writing ``parse_cond``. 

In [300]:
exception SyntaxError of string
open List

(* Optional:
   You might want to write a function to print tokens. 
   Comment out if not needed.  *) 
      
let print_token (t : Func.token) : string = match t with 
 | _ -> "TO IMPLEMENT" 

let rec print_list (s :  Func.token list) = match s with 
  | [] -> ""
  | x :: xs -> String.cat (print_token x) (String.cat " " (print_list xs))

exception SyntaxError of string


val print_token : Func.token -> string = <fun>


val print_list : Func.token list -> string = <fun>


In [301]:
(* Optional helper functions *)
let parse_id xs : string * token list = match xs with 
 | ID x :: xs' -> (x, xs')
 | _ -> raise (SyntaxError "Not an identifier.")
  
let parse_token (x : token) (xs : token list) = match xs with 
 | y :: ys -> if (x == y) then ys 
                 else raise (SyntaxError (String.cat "Token expected: "(String.cat (print_token x) (print_list xs) )))
 | _ -> raise (SyntaxError (String.cat "Token expected: "(String.cat (print_token x) (print_list xs) ))) 

val parse_id : Func.token List.t -> string * Func.token list = <fun>


val parse_token : Func.token -> Func.token list -> Func.token list = <fun>


In [302]:

(* Declaring function to parse expressions *)
let rec parse_exp (ts : token list) : exp * token list = 
(* Switch case for ID and INT *)
    match ts with
(*   If ID is there then checking for expressions   *)
    | ID x :: ts' -> ( match ts' with
(*   Checking if Left Bracket is there or not   *)
                       | LBRA :: ts'' -> (
(*   Checking if more expressions or Right bracket is there         *)
                                           match ts'' with
(*                                         Checking if Right bracket is there or not for empty brackets    *)
                                           | RBRA :: xs' -> (App(x,[]),xs')
(*                                         Checkig if anything else is there or not     *)
                                           |_ -> (
(*                                          Calling parse_exps if there are more expressions   *)
                                           let (e1,ts''') = parse_exps ts'' in
                                            match ts''' with
(*                                          Checking the Right Bracket after expressions    *)
                                                |RBRA :: ts'''' -> (App(x,e1),ts'''')
(*                                           Raising error is RBRA is not there       *)
                                                |_->raise (SyntaxError "RBRA Expected")
                                        ))
(*                          Returning id if no other expressions are there                 *)
                       | _ -> (Id(x),ts')
                    )
(*    CHecking and Returning if INT appears in list                  *)
    | INT x :: ts' -> (Numb x, ts')
(*    Raising error if ID or INT is not there  *)
    | _ -> raise (SyntaxError "ID or Int Expected")

(* Helper funciton for expressions *)
and parse_exps (ts: token list) = 
(* Parsing the expressions first *)
    let (e1,ts') = parse_exp ts in 
(*  Switch case for comma and more expressions     *)
    match ts' with
(*   Checking if comma is there or not and if yes, checking more expressions*)
    | COMMA :: ts'' -> (let (e2,ts''') = parse_exps ts'' in
(*    Returning the list with remaining list  *)
                                (e1::e2,ts'''))
(*    Returning result of parse_exp if no comma is there                              *)
    | _ -> ([e1],ts')

val parse_exp : Func.token list -> exp * Func.token List.t = <fun>
val parse_exps : Func.token list -> exp List.t * Func.token List.t = <fun>


In [303]:
parse_exp [ID "x";LBRA;ID "x";COMMA;ID "y";RBRA]

- : exp * Func.token List.t = (App ("x", [Id "x"; Id "y"]), [])


In [304]:
(* Helper funcitons for conditional operators *)
let parse_bop (ts : token list) = 
(* Matching the conditional operators *)
    match ts with 
    | LSE :: ts' -> (LessEq, ts')
    | LS :: ts' -> (Less, ts')
    | EQ :: ts' -> (Eq, ts')
    | NEQ :: ts' -> (NEq, ts')
(* Raising error is none of them is found *)
    | _ -> raise (SyntaxError "Expected LSE, LS, NEQ or EQ.") 
    
(* Funciton for parsing the conditional statements *)
let parse_cond (ts : token list) : cond * token list =  
(* Parsing the conditional operator *)
    let (e1,ts') = parse_bop ts in 
    match ts' with 
(*   Checking whether LBRA is there or not   *)
   | LBRA :: ts'' -> (match ts'' with
(*   If RBRA is there , it means no expressions in brackets so returning tuple of type cond  *)
                       | RBRA :: xs' -> (C(e1,Id "",Id ""),xs')
                       |_ -> (
(*                        Else getting the expression using parse_exp*)
                           let (e2,ts''') = parse_exp ts'' in
(*                          Parsing the comma after one expression   *)
                           let xs = parse_token COMMA ts''' in
(*                          Parsing one more expression for condition   *)
                           let (e3,ys') = parse_exp xs in
(*                          Switch case for matching the Right Bracket   *)
                            match ys' with
                            |RBRA :: ys'' -> (C(e1,e2,e3),ys'')
(*                            Raising error is RBRA is not there  *)
                            |_->raise (SyntaxError "RBRA Expected")
                        )
                    )
(*   If LBRA Is not there returning cond                   *)
   | _ -> (C(e1,Id "",Id ""),ts')

val parse_bop : Func.token list -> bop * Func.token list = <fun>


val parse_cond : Func.token list -> cond * Func.token list = <fun>


In [305]:
parse_cond [EQ; LBRA; ID "x"; COMMA; INT 32; RBRA]

- : cond * Func.token list = (C (Eq, Id "x", Numb 32), [])


In [306]:
(* Function for parsing the statement *)
let rec parse_statement (ts : token list) : statement * token list = 
(* Switch case for chekcing the first token *)
    match ts with
(*  If token is ID then calling parse_assign    *)
    |ID x :: ts' -> (let (e1,ts'')= parse_assign ts in (e1,ts''))
    (*  If token is IF then calling parse_if    *)
    |IF   :: ts' -> (let (e1,ts'')= parse_if ts in (e1,ts''))
    (*  If token is READ then calling parse_rw    *)
    |READ :: ts' -> (let (e1,ts'')= parse_rw ts in (e1,ts''))
    (*  If token is WRITE then calling parse_rw    *)
    |WRITE :: ts' -> (let (e1,ts'')= parse_rw ts in (e1,ts''))
    (*  If token is WHILE then calling parse_while    *)
    |WHILE :: ts' -> (let (e1,ts'')= parse_while ts in (e1,ts''))
    (*  Raising error is nothing is there    *)
    | _ -> raise (SyntaxError "Statement Expected")
    
(* Creating Helper function for assigning value to variable *)
and parse_assign (ts : token list) = 
    match ts with 
(*  Checking if id and ASGN is there or not   *)
    | ID x :: ASGN :: ts' -> (
(*   Parsing the expression and returnign the Assign type   *)
    let (e1,ts'') = parse_exp ts' in (Assign(x,e1),ts''))
(*   Raising Error   *)
    | _ -> raise (SyntaxError "Id Expected")

(* Helper function for parsing if conditions *)
and parse_if (ts: token list) = 
(* Parsing the IF token *)
    let ts' = parse_token IF ts in 
(*  Parsing the conidiotn using parse_cond function    *)
    let (e1,ts'') = parse_cond ts' in 
(*  Parsing the THEN token using parse_token   *)
    let ts''' = parse_token THEN ts'' in
(*  Parsing the statements using parse_statements function    *)
    let (e2,xs') = parse_statements ts''' in
(*  Checking wether ELSE condition is there or not    *)
    match xs' with
    | ELSE :: xs'' -> (
(*   If there is else condition then parsing statements   *)
                        let (e3,ys) = parse_statements xs'' in 
                        match ys with
(*                       Checking if ENDIF is there or not   *)
                        | ENDIF :: ys' -> (Ite(e1,[e2],[e3]),ys')
(*                       Raising ERROR if ENDIF is not there   *)
                        | _ -> raise (SyntaxError "ENDIF Expected")
                    )
(*    CHECKInf if ENDIF is there or not                  *)
    | ENDIF :: xs'' -> (If(e1,[e2]),xs'')
(*   Raising the ERROR if ENDIF is not there   *)
    |_->raise (SyntaxError "Else or ENDIF Expected")

(* Helper function for read and write tokens  *)
and parse_rw (ts: token list) = 
    match ts with 
(*  Checking if READ and ID is there or not  and if it is there then returnign READ of ID  *)
    | READ :: ID x :: ts' -> (Read(x),ts')
(*  Checking id WRITE is there or not    *)
    | WRITE :: ts' -> (
(*    Parsing the expression  *)
                        let (e1,xs) = parse_exp ts' in (Write(e1),xs)
                        )
(*      Raidsing error                    *)
    | _ -> raise (SyntaxError "READ or WRITE Expected")

and parse_while (ts:token list)=
    let ts' = parse_token WHILE ts in 
    let (e1,ts'') = parse_cond ts' in 
    let xs = parse_token BEGIN ts'' in
    let (e2,xs') = parse_statement xs in
    let xs'' = parse_token ENDWHILE xs in
    (While(e1,[e2]),xs'')

and parse_statements (ts : token list) : statement * token list = 
    let (e1,ts') = parse_statement ts in 
    let  ts'' = parse_token SEMI ts' in 
      match ts'' with 
      |ID x :: xs -> (let (e2,ys) = parse_statements ts'' in (e2,ys))
      |IF   :: xs -> (let (e2,ys) = parse_statements ts'' in (e2,ys))
      |READ :: xs -> (let (e2,ys) = parse_statements ts'' in (e2,ys))
      |WRITE :: xs -> (let (e2,ys) = parse_statements ts'' in (e2,ys))
      |WHILE :: xs -> (let (e2,ys) = parse_statements ts'' in (e2,ys))
      | _ -> (e1, ts'')
    

val parse_statement : Func.token list -> statement * Func.token list = <fun>
val parse_assign : Func.token list -> statement * Func.token List.t = <fun>
val parse_if : Func.token list -> statement * Func.token list = <fun>
val parse_rw : Func.token list -> statement * Func.token List.t = <fun>
val parse_while : Func.token list -> statement * Func.token list = <fun>
val parse_statements : Func.token list -> statement * Func.token list = <fun>


In [307]:
parse_if [IF; EQ; LBRA; ID "x"; COMMA; INT 32; RBRA; THEN; WRITE; INT 1; SEMI; ELSE;
   WRITE; INT 0; SEMI; ENDIF; SEMI]

- : statement * Func.token list =
(Ite (C (Eq, Id "x", Numb 32), [Write (Numb 1)], [Write (Numb 0)]), [SEMI])


In [308]:
let rec parse_args (ts:token list) = 
    let (x,ts') = parse_id ts in
    match ts' with
    | COMMA :: xs -> (let (y,xs') = parse_args xs in (x::y,xs')
                        )
    | _ -> ([x],ts')

(* and parse_method (ts : token list) : mmethod * token list =

and parse_methods (ts : token list) = 
    (let (e1,ts') = parse_method ts in 
    (let  ts'' = parse_token SEMI ts' in 
      match ts'' with 
      | METHOD :: xs -> (let (e2,ys) = parse_methods ts'' in (e2,ys))
      | _ -> (e1, ts'')))

let parse_program (ts: token list) : program * token list =
    let (e1,ts') = parse_methods ts in (P([e1]),ts')
 *)

val parse_args : Func.token list -> string List.t * Func.token list = <fun>


## Appendix - Example Programs

In [None]:
let ex1 = "method pow(x, y) vars i, res
begin

	res := x;
	i := 1;
	while less(i,y)
	begin
		res := times(res,x);
		i := plus(i,1);
        endwhile;
	write res;
	return res;

endmethod;

method main() vars a, b, x
begin

	a := 5;
	b := 2;
	x := pow(b,a);
	if eq(x,32)
		 then write 1;
	else
		write 0;
	endif;

endmethod;
"    

let ex2 = "method pow(x,y) vars i, res,w
begin

	res := x(da,da(1,2,m(1,1)),1);
	i := 2;
	if eq(x,32) then 
		write 1;
		read a;
	else
		b := 11;
	endif;
	while less(i,y)
	begin
		res := times(res,x);
		i := plus(i,1);
        endwhile;
	write res;
	return res;

endmethod;

method main() vars a, b, x
begin

	a := 5; 
	b := 2;
	x := pow(b,a);
	if eq(x,32)
		 then write 1; 
	else 
		write 0;
	endif; 
endmethod;"

let ex3 = "method main() vars inp, res
begin
read inp;
res:=0;
while less(0,inp)
begin
res := plus(res,inp);
inp := minus(inp,1);
endwhile;
write res;
endmethod;
"

let ex4 = "method sum(inp) vars res
begin
res:=0;
while less(0,inp)
begin
res := plus(res,inp);
inp := minus(inp,1);
endwhile;
return res;
endmethod;

method main() vars inp,res
begin
read inp;
res := sum(inp);
write res;
endmethod;"

let ex5 = "method sum(inp) vars tmp
begin
if eq(inp,0) then
res := inp;
else
tmp := sum(minus(inp,1));
res := plus(tmp,inp);
endif;
endmethod;

method main() vars inp,res
begin
read inp;
res := sum(inp);
write res;
endmethod;"

let text_to_ast ex = parse_program (stream_to_list (Lexing.from_string ex))

(* Compare with what you expect *)
let parsed1 = text_to_ast ex1 
let parsed2 = text_to_ast ex2
let parsed3 = text_to_ast ex3
let parsed4 = text_to_ast ex4
let parsed5 = text_to_ast ex5