diff --git a/langs/a86/ast.rkt b/langs/a86/ast.rkt index 0f2d073a..847eef81 100644 --- a/langs/a86/ast.rkt +++ b/langs/a86/ast.rkt @@ -131,7 +131,9 @@ (instruct Je (x) check:target) (instruct Jne (x) check:target) (instruct Jl (x) check:target) +(instruct Jle (x) check:target) (instruct Jg (x) check:target) +(instruct Jge (x) check:target) (instruct And (dst src) check:src-dest) (instruct Or (dst src) check:src-dest) (instruct Xor (dst src) check:src-dest) @@ -191,7 +193,9 @@ (Je? x) (Jne? x) (Jl? x) + (Jle? x) (Jg? x) + (Jge? x) (And? x) (Or? x) (Xor? x) @@ -271,8 +275,12 @@ (cons s (label-uses asm))] [(cons (Jg (? label-symbol? s)) asm) (cons s (label-uses asm))] + [(cons (Jge (? label-symbol? s)) asm) + (cons s (label-uses asm))] [(cons (Jl (? label-symbol? s)) asm) (cons s (label-uses asm))] + [(cons (Jle (? label-symbol? s)) asm) + (cons s (label-uses asm))] [(cons (Call (? label-symbol? s)) asm) (cons s (label-uses asm))] [(cons (Lea _ (? label-symbol? s)) asm) diff --git a/langs/a86/printer.rkt b/langs/a86/printer.rkt index b54e6ff6..cfca1460 100644 --- a/langs/a86/printer.rkt +++ b/langs/a86/printer.rkt @@ -54,7 +54,7 @@ [(Offset (? reg? r) i) (string-append "[" (reg->string r) " + " (number->string i) "]")] [(Offset (? label? l) i) - (string-append "[" (symbol->string l) " + " (number->string i) "]")] + (string-append "[" (label-symbol->string l) " + " (number->string i) "]")] [(Const l) (symbol->string l)] [(? exp?) (exp->string a)])) @@ -127,9 +127,15 @@ [(Jl l) (string-append tab "jl " (jump-target->string l))] + [(Jle l) + (string-append tab "jle " + (jump-target->string l))] [(Jg l) (string-append tab "jg " (jump-target->string l))] + [(Jge l) + (string-append tab "jge " + (jump-target->string l))] [(Call l) (string-append tab "call " (jump-target->string l))] diff --git a/langs/evildoer/interp-file.rkt b/langs/evildoer/interp-file.rkt index e6c9b1d3..843f7bc8 100644 --- a/langs/evildoer/interp-file.rkt +++ b/langs/evildoer/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/fraud/interp-file.rkt b/langs/fraud/interp-file.rkt index e6c9b1d3..843f7bc8 100644 --- a/langs/fraud/interp-file.rkt +++ b/langs/fraud/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/hoax/ast.rkt b/langs/hoax/ast.rkt index 50a257d3..4d9f978d 100644 --- a/langs/hoax/ast.rkt +++ b/langs/hoax/ast.rkt @@ -25,7 +25,7 @@ ;; | 'vector? | vector-length ;; | 'string? | string-length ;; type Op2 = '+ | '- | '< | '= -;; | 'cons +;; | 'cons | 'eq? ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! diff --git a/langs/hoax/compile-ops.rkt b/langs/hoax/compile-ops.rkt index 5461146b..a7ee5c5d 100644 --- a/langs/hoax/compile-ops.rkt +++ b/langs/hoax/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/hoax/interp-file.rkt b/langs/hoax/interp-file.rkt index e6c9b1d3..843f7bc8 100644 --- a/langs/hoax/interp-file.rkt +++ b/langs/hoax/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/hoax/interp.rkt b/langs/hoax/interp.rkt index 89243152..cab103af 100644 --- a/langs/hoax/interp.rkt +++ b/langs/hoax/interp.rkt @@ -33,7 +33,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] diff --git a/langs/hoax/test/test-runner.rkt b/langs/hoax/test/test-runner.rkt index 22735575..403a02ef 100644 --- a/langs/hoax/test/test-runner.rkt +++ b/langs/hoax/test/test-runner.rkt @@ -149,7 +149,10 @@ (check-equal? (run '(string-ref "fred" 2)) #\e) (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) - (check-equal? (run '(string? (cons 1 2))) #f)) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff")) (define (test-runner-io run) ;; Evildoer examples diff --git a/langs/hustle/interp-file.rkt b/langs/hustle/interp-file.rkt index e6c9b1d3..843f7bc8 100644 --- a/langs/hustle/interp-file.rkt +++ b/langs/hustle/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/iniquity/compile-ops.rkt b/langs/iniquity/compile-ops.rkt index dbfacad2..44f13545 100644 --- a/langs/iniquity/compile-ops.rkt +++ b/langs/iniquity/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/iniquity/interp-file.rkt b/langs/iniquity/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/iniquity/interp-file.rkt +++ b/langs/iniquity/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/iniquity/interp.rkt b/langs/iniquity/interp.rkt index db40c7f5..e5cba4c9 100644 --- a/langs/iniquity/interp.rkt +++ b/langs/iniquity/interp.rkt @@ -36,7 +36,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] diff --git a/langs/iniquity/test/test-runner.rkt b/langs/iniquity/test/test-runner.rkt index 7a4381f6..0282f301 100644 --- a/langs/iniquity/test/test-runner.rkt +++ b/langs/iniquity/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/jig/compile-ops.rkt b/langs/jig/compile-ops.rkt index eda019ce..3aee63bd 100644 --- a/langs/jig/compile-ops.rkt +++ b/langs/jig/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/jig/interp-file.rkt b/langs/jig/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/jig/interp-file.rkt +++ b/langs/jig/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/jig/interp.rkt b/langs/jig/interp.rkt index db40c7f5..e5cba4c9 100644 --- a/langs/jig/interp.rkt +++ b/langs/jig/interp.rkt @@ -36,7 +36,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] diff --git a/langs/jig/test/test-runner.rkt b/langs/jig/test/test-runner.rkt index 8e79d65e..a04433e7 100644 --- a/langs/jig/test/test-runner.rkt +++ b/langs/jig/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/knock/compile-ops.rkt b/langs/knock/compile-ops.rkt index eda019ce..3aee63bd 100644 --- a/langs/knock/compile-ops.rkt +++ b/langs/knock/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/knock/interp-file.rkt b/langs/knock/interp-file.rkt new file mode 100644 index 00000000..c2490030 --- /dev/null +++ b/langs/knock/interp-file.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; String -> Void +;; Parse and interpret contents of given filename, +;; print result on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) + (close-input-port p)))) diff --git a/langs/knock/test/test-runner.rkt b/langs/knock/test/test-runner.rkt index 26e1b4fc..2bfa21fd 100644 --- a/langs/knock/test/test-runner.rkt +++ b/langs/knock/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/loot/ast.rkt b/langs/loot/ast.rkt index f6a9f159..b430b22d 100644 --- a/langs/loot/ast.rkt +++ b/langs/loot/ast.rkt @@ -21,6 +21,7 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol @@ -30,13 +31,24 @@ ;; | 'write-byte | 'eof-object? ;; | 'box | 'car | 'cdr | 'unbox ;; | 'empty? | 'cons? | 'box? -;; | 'vector? | vector-length -;; | 'string? | string-length +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length ;; type Op2 = '+ | '- | '< | '= ;; | 'cons ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + (struct Eof () #:prefab) (struct Empty () #:prefab) (struct Int (i) #:prefab) @@ -53,3 +65,11 @@ (struct Var (x) #:prefab) (struct App (e es) #:prefab) (struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/loot/compile-ops.rkt b/langs/loot/compile-ops.rkt index 1616d9b8..2923cc2b 100644 --- a/langs/loot/compile-ops.rkt +++ b/langs/loot/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/loot/compile.rkt b/langs/loot/compile.rkt index 40b97486..7b9493f2 100644 --- a/langs/loot/compile.rkt +++ b/langs/loot/compile.rkt @@ -105,7 +105,8 @@ [(Begin e1 e2) (compile-begin e1 e2 c t?)] [(Let x e1 e2) (compile-let x e1 e2 c t?)] [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)])) + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) @@ -300,6 +301,108 @@ (Push rax) (compile-es es (cons #f c)))])) +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) + ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv diff --git a/langs/loot/interp-defun.rkt b/langs/loot/interp-defun.rkt index 36494ae3..3821e0c2 100644 --- a/langs/loot/interp-defun.rkt +++ b/langs/loot/interp-defun.rkt @@ -90,7 +90,44 @@ (if (= (length xs) (length vs)) (interp-env e (append (zip xs vs) r) ds) 'err)] - [_ 'err])])])])) + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/loot/interp-file.rkt b/langs/loot/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/loot/interp-file.rkt +++ b/langs/loot/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/loot/interp.rkt b/langs/loot/interp.rkt index 41a9ca49..ca1ffa57 100644 --- a/langs/loot/interp.rkt +++ b/langs/loot/interp.rkt @@ -89,7 +89,44 @@ [vs (if (procedure? f) (apply f vs) - 'err)])])])) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/loot/parse.rkt b/langs/loot/parse.rkt index 9e52a49c..5cce0885 100644 --- a/langs/loot/parse.rkt +++ b/langs/loot/parse.rkt @@ -42,6 +42,8 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] [(list (or 'lambda 'λ) xs e) (if (and (list? xs) (andmap symbol? xs)) @@ -51,6 +53,32 @@ (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + (define op0 '(read-byte peek-byte void)) diff --git a/langs/loot/test/test-runner.rkt b/langs/loot/test/test-runner.rkt index cdeecb8a..2932fb91 100644 --- a/langs/loot/test/test-runner.rkt +++ b/langs/loot/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run @@ -203,6 +206,40 @@ '(let ((z 2)) (f 1 2))) 10) + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) ;; Loot examples (check-equal? (run '((λ (x) x) 5)) diff --git a/langs/mountebank/Makefile b/langs/mountebank/Makefile index 9b74bfc0..76f979af 100644 --- a/langs/mountebank/Makefile +++ b/langs/mountebank/Makefile @@ -11,6 +11,7 @@ objs = \ main.o \ values.o \ print.o \ + symbol.o \ io.o default: runtime.o diff --git a/langs/mountebank/ast.rkt b/langs/mountebank/ast.rkt index b4be14cd..22e60985 100644 --- a/langs/mountebank/ast.rkt +++ b/langs/mountebank/ast.rkt @@ -17,12 +17,14 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Datum = Integer ;; | Char ;; | Boolean ;; | String +;; | Symbol ;; | (Boxof Datum) ;; | (Listof Datum) ;; | (Vectorof Datum) @@ -35,11 +37,22 @@ ;; | 'empty? | 'cons? | 'box? ;; | 'vector? | vector-length ;; | 'string? | string-length +;; | 'symbol? | string->symbol | symbol->string ;; type Op2 = '+ | '- | '< | '= ;; | 'cons | 'eq? ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() (struct Eof () #:prefab) (struct Prim0 (p) #:prefab) @@ -53,3 +66,11 @@ (struct App (e es) #:prefab) (struct Lam (f xs e) #:prefab) (struct Quote (d) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/mountebank/compile-datum.rkt b/langs/mountebank/compile-datum.rkt index 9e00b110..7f2e96f6 100644 --- a/langs/mountebank/compile-datum.rkt +++ b/langs/mountebank/compile-datum.rkt @@ -1,22 +1,45 @@ #lang racket -(provide compile-datum) -(require "types.rkt" +(provide compile-datum compile-literals) +(require "types.rkt" + "intern.rkt" a86/ast) ;; Registers used (define rax 'rax) ; return +;; QEnv -> Asm +(define (compile-literals q) + (match q + ['() (seq)] + [(cons (cons s (Ref l _)) q) + (seq (compile-literal (to-string s) l) + (compile-literals q))])) + +;; String Label -> Asm +(define (compile-literal s l) + (seq (Label l) + (Dq (string-length s)) + (compile-string-chars (string->list s)) + (if (odd? (string-length s)) + (seq (Dd 0)) + (seq)))) + (define (compound? x) - (or (string? x) + (or ;(string? x) + ;(symbol? x) (cons? x) (vector? x) (box? x))) ;; Datum -> Asm (define (compile-datum d) - (if (compound? d) - (compile-compound-datum d) - (seq (Mov rax (imm->bits d))))) + (cond + [(compound? d) + (compile-compound-datum d)] + [(Ref? d) + (seq (Lea rax (Plus (Ref-label d) (Ref-type-tag d))))] + [else + (seq (Mov rax (imm->bits d)))])) (define (compile-compound-datum d) (match (compile-quoted d) @@ -29,13 +52,16 @@ ;; Datum -> (cons AsmExpr Asm) (define (compile-quoted c) (cond - [(string? c) (compile-datum-string c)] + ;[(string? c) (compile-datum-string c)] + ;[(symbol? c) (compile-datum-symbol (symbol->string c))] [(vector? c) (compile-datum-vector (vector->list c))] [(box? c) (compile-datum-box (unbox c))] [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(Ref? c) (cons (Plus (Ref-label c) (Ref-type-tag c)) '())] [else (cons (imm->bits c) '())])) ;; String -> (cons AsmExpr Asm) +#; (define (compile-datum-string c) (let ((l (gensym 'string))) (cons (Plus l type-str) @@ -46,6 +72,18 @@ (seq (Dd 0)) (seq)))))) +;; String -> (cons AsmExpr Asm) +#; +(define (compile-datum-symbol c) + (let ((l (gensym 'symbol))) + (cons (Plus l type-symb) + (seq (Label l) + (Dq (string-length c)) + (compile-string-chars (string->list c)) + (if (odd? (string-length c)) + (seq (Dd 0)) + (seq)))))) + ;; [Listof Datum] -> (cons AsmExpr Asm) (define (compile-datum-vector ds) (match ds @@ -90,3 +128,9 @@ [(cons c cs) (seq (Dd (char->integer c)) (compile-string-chars cs))])) + +;; (U String Symbol) -> String +(define (to-string s) + (if (symbol? s) + (symbol->string s) + s)) diff --git a/langs/mountebank/compile-ops.rkt b/langs/mountebank/compile-ops.rkt index 1616d9b8..0ef65bc2 100644 --- a/langs/mountebank/compile-ops.rkt +++ b/langs/mountebank/compile-ops.rkt @@ -21,7 +21,11 @@ unpad-stack)] ['peek-byte (seq pad-stack (Call 'peek_byte) - unpad-stack)])) + unpad-stack)] + ['gensym (seq pad-stack + (Call 'gensym) + unpad-stack + (Or rax type-symb))])) ;; Op1 -> Asm (define (compile-op1 p) @@ -80,6 +84,8 @@ (type-pred ptr-mask type-vect)] ['string? (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] ['vector-length (let ((zero (gensym)) (done (gensym))) @@ -105,7 +111,24 @@ (Jmp done) (Label zero) (Mov rax 0) - (Label done)))])) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + (Mov rdi rax) + pad-stack + ;; FIXME: this allocates off-heap + (Call 'str_dup) + unpad-stack + (Or rax type-str))])) ;; Op2 -> Asm (define (compile-op2 p) @@ -140,7 +163,7 @@ (let ((true (gensym))) (seq (Je true) (Mov rax val-false) - (Label true))))] + (Label true))))] ['cons (seq (Mov (Offset rbx 0) rax) (Pop rax) @@ -150,7 +173,7 @@ (Add rbx 16))] ['eq? (seq (Pop r8) - (eq r8 rax))] + (eq r8 rax))] ['make-vector (let ((loop (gensym)) (done (gensym)) @@ -304,6 +327,8 @@ (assert-type ptr-mask type-vect)) (define assert-string (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) (define assert-proc (assert-type ptr-mask type-proc)) diff --git a/langs/mountebank/compile.rkt b/langs/mountebank/compile.rkt index 2e31ef39..f730d443 100644 --- a/langs/mountebank/compile.rkt +++ b/langs/mountebank/compile.rkt @@ -4,6 +4,7 @@ "types.rkt" "lambdas.rkt" "fv.rkt" + "intern.rkt" "compile-ops.rkt" "compile-datum.rkt" a86/ast) @@ -18,12 +19,13 @@ ;; Prog -> Asm (define (compile p) - (match p - [(Prog ds e) + (match (intern p) + [(cons (Prog ds e) q) (prog (externs) (Global 'entry) (Label 'entry) (Mov rbx rdi) ; recv heap pointer + (init-symbol-table q) (compile-defines-values ds) (compile-e e (reverse (define-ids ds)) #t) (Add rsp (* 8 (length ds))) ;; pop function definitions @@ -32,13 +34,18 @@ (compile-lambda-defines (lambdas p)) (Label 'raise_error_align) pad-stack - (Call 'raise_error))])) + (Call 'raise_error) + (Data) + (compile-literals q))])) (define (externs) (seq (Extern 'peek_byte) (Extern 'read_byte) (Extern 'write_byte) - (Extern 'raise_error))) + (Extern 'raise_error) + (Extern 'gensym) + (Extern 'intern_symbol) + (Extern 'str_dup))) ;; [Listof Defn] -> [Listof Id] (define (define-ids ds) @@ -107,7 +114,8 @@ [(Begin e1 e2) (compile-begin e1 e2 c t?)] [(Let x e1 e2) (compile-let x e1 e2 c t?)] [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)])) + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) @@ -280,6 +288,108 @@ (Push rax) (compile-es es (cons #f c)))])) +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) + ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv @@ -289,6 +399,21 @@ [#t 0] [#f (+ 8 (lookup x rest))])])) +;; QEnv -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table q) + (seq pad-stack + (append-map init-symbol q) + unpad-stack)) + +;; (cons (U String Symbol) Ref) -> Asm +(define (init-symbol qb) + (match qb + [(cons (? symbol? s) (Ref l _)) + (seq (Lea rdi l) + (Call 'intern_symbol))] + [_ (seq)])) + ;; Symbol -> Label ;; Produce a symbol that is a valid Nasm label (define (symbol->label s) diff --git a/langs/mountebank/intern.rkt b/langs/mountebank/intern.rkt new file mode 100644 index 00000000..2ecef152 --- /dev/null +++ b/langs/mountebank/intern.rkt @@ -0,0 +1,147 @@ +#lang racket +(require "ast.rkt" "types.rkt") +(provide intern + (struct-out Ref)) + +;; type QEnv = [Listof (cons (U String Symbol) Ref)] + +(struct Ref (label type-tag) #:prefab) + +;; Datum QEnv -> (cons Datum QEnv) +;; Intern all literal strings and symbols. +;; Replaces occurrences of string and symbol literals with +;; a reference bound in qenv. +(define (intern-datum d q) + (cond + [(string? d) (intern! d q type-str)] + [(symbol? d) (intern! d q type-symb)] + [(box? d) + (match (intern-datum (unbox d) q) + [(cons d q) + (cons (box d) q)])] + [(cons? d) + (match (intern-datums (list (car d) (cdr d)) q) + [(cons (list d1 d2) q) + (cons (cons d1 d2) q)])] + [(vector? d) + (match (intern-datums (vector->list d) q) + [(cons ds q) + (cons (apply vector ds) q)])] + [else + (cons d q)])) + +;; (U String Symbol) QEnv -> [Maybe Ref] +(define (lookup d q) + (match q + ['() #f] + [(cons (cons d0 r) q) + (if (equal? d d0) + r + (lookup d q))])) + +;; (U Symbol String) QEnv Tag -> QEnv +;; Either lookup prior reference or create a new one +(define (intern! s q type) + (match (lookup s q) + [#f + (let ((l (gensym 'lit))) + (let ((r (Ref l type))) + (cons r + (cons (cons s r) q))))] + [r (cons r q)])) + +;; Prog -> (cons Prog QEnv) +(define (intern p) + (intern-prog p '())) + +;; [Listof Datum] QEnv -> (cons [Listof Datum] QEnv) +(define (intern-datums ds q) + (match ds + ['() (cons ds q)] + [(cons d ds) + (match (intern-datum d q) + [(cons d q) + (match (intern-datums ds q) + [(cons ds q) + (cons (cons d ds) q)])])])) + +;; Prog QEnv -> (cons Prog QEnv) +(define (intern-prog p q) + (match p + [(Prog ds e) + (match (intern-ds ds q) + [(cons ds q) + (match (intern-e e q) + [(cons e q) + (cons (Prog ds e) q)])])])) + +;; Defns QEnv -> (cons Defns QEnv) +(define (intern-ds ds q) + (match ds + ['() (cons '() q)] + [(cons d ds) + (match (intern-d d q) + [(cons d q) + (match (intern-ds ds q) + [(cons ds q) + (cons (cons d ds) q)])])])) + +;; Defn QEnv -> (cons Defn QEnv) +(define (intern-d d q) + (match d + [(Defn f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Defn f xs e) q)])])) + +;; Expr QEnv -> (cons Expr QEnv) +(define (intern-e e q) + (match e + [(Quote d) + (match (intern-datum d q) + [(cons d q) + (cons (Quote d) q)])] + [(Prim1 p e) + (match (intern-e e q) + [(cons e q) + (cons (Prim1 p e) q)])] + [(Prim2 p e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Prim2 p e1 e2) q)])] + [(Prim3 p e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (Prim3 p e1 e2 e3) q)])] + [(If e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (If e1 e2 e3) q)])] + [(Begin e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Begin e1 e2) q)])] + [(Let x e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Let x e1 e2) q)])] + [(App e1 es) + (match (intern-es (cons e1 es) q) + [(cons (cons e1 es) q) + (cons (App e1 es) q)])] + [(Lam f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Lam f xs e) q)])] + [_ (cons e q)])) + +;; [Listof Expr] QEnv -> (cons [Listof Expr] QEnv) +(define (intern-es es q) + (match es + ['() (cons '() q)] + [(cons e es) + (match (intern-e e q) + [(cons e q) + (match (intern-es es q) + [(cons es q) + (cons (cons e es) q)])])])) diff --git a/langs/mountebank/interp-defun.rkt b/langs/mountebank/interp-defun.rkt index 2d2a5010..ad449140 100644 --- a/langs/mountebank/interp-defun.rkt +++ b/langs/mountebank/interp-defun.rkt @@ -35,6 +35,7 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] @@ -83,7 +84,44 @@ (if (= (length xs) (length vs)) (interp-env e (append (zip xs vs) r) ds) 'err)] - [_ 'err])])])])) + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mountebank/interp-file.rkt b/langs/mountebank/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/mountebank/interp-file.rkt +++ b/langs/mountebank/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/mountebank/interp-prims.rkt b/langs/mountebank/interp-prims.rkt index 15039f9f..79bcf361 100644 --- a/langs/mountebank/interp-prims.rkt +++ b/langs/mountebank/interp-prims.rkt @@ -24,6 +24,9 @@ [(list 'vector-length (? vector?)) (vector-length v)] [(list 'string? v) (string? v)] [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] [_ 'err])) ;; Op2 Value Value -> Answer diff --git a/langs/mountebank/interp.rkt b/langs/mountebank/interp.rkt index 8a448b02..072e5f77 100644 --- a/langs/mountebank/interp.rkt +++ b/langs/mountebank/interp.rkt @@ -34,6 +34,7 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] @@ -82,7 +83,44 @@ [vs (if (procedure? f) (apply f vs) - 'err)])])])) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mountebank/parse.rkt b/langs/mountebank/parse.rkt index 708b0505..3a535b6f 100644 --- a/langs/mountebank/parse.rkt +++ b/langs/mountebank/parse.rkt @@ -39,6 +39,8 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] [(list (or 'lambda 'λ) xs e) (if (and (list? xs) (andmap symbol? xs)) @@ -48,6 +50,32 @@ (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + ;; Datum -> Datum (define (parse-datum d) (match d @@ -56,6 +84,7 @@ [(cons d1 d2) (cons (parse-datum d1) (parse-datum d2))] ['() '()] + [(? symbol? s) s] [(? integer? i) i] [(? boolean? b) b] [(? string? s) s] @@ -73,13 +102,14 @@ (vector? x))) (define op0 - '(read-byte peek-byte void)) + '(read-byte peek-byte void gensym)) (define op1 '(add1 sub1 zero? char? write-byte eof-object? integer->char char->integer box unbox empty? cons? box? car cdr - vector? vector-length string? string-length)) + vector? vector-length string? string-length + symbol->string string->symbol symbol?)) (define op2 '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) (define op3 diff --git a/langs/mountebank/print.c b/langs/mountebank/print.c index 6cb5b1b0..2bcb21dc 100644 --- a/langs/mountebank/print.c +++ b/langs/mountebank/print.c @@ -7,6 +7,7 @@ void print_codepoint(val_char_t); void print_cons(val_cons_t *); void print_vect(val_vect_t*); void print_str(val_str_t*); +void print_symb(val_symb_t*); void print_str_char(val_char_t); void print_result_interior(val_t); int utf8_encode_char(val_char_t, char *); @@ -40,6 +41,10 @@ void print_result(val_t x) print_str(val_unwrap_str(x)); putchar('"'); break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; case T_PROC: printf("#"); break; @@ -48,6 +53,11 @@ void print_result(val_t x) } } +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + void print_result_interior(val_t x) { switch (val_typeof(x)) { @@ -63,6 +73,9 @@ void print_result_interior(val_t x) print_cons(val_unwrap_cons(x)); printf(")"); break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; case T_VECT: print_vect(val_unwrap_vect(x)); break; diff --git a/langs/mountebank/symbol.c b/langs/mountebank/symbol.c new file mode 100644 index 00000000..d8ad04ad --- /dev/null +++ b/langs/mountebank/symbol.c @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include "values.h" + +static uint64_t gensym_ctr = 0; + +val_str_t *str_from_cstr(const char *); +int symb_cmp(const val_symb_t *, const val_symb_t *); +val_str_t *str_dup(const val_str_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +val_symb_t *str_to_symbol(const val_str_t *str) +{ + // str_dup needed if string is mutable + return intern_symbol((val_symb_t*)str_dup(str)); +} + +val_symb_t *gensym(void) +{ + char s[100]; // uint64_t has maximum 20 digits + sprintf(s, "g%" PRIu64, gensym_ctr++); + return (val_symb_t*)str_from_cstr(s); // uninterned symbol +} + +val_str_t *str_from_cstr(const char *s) +{ + int64_t len = strlen(s); + + if (len == 0) + return NULL; + + val_str_t *str = + malloc(sizeof(int64_t) + len * sizeof(val_char_t)); + + if (!str) + return NULL; + + str->len = len; + int i; + for (i = 0; i < len; i++) { + str->codepoints[i] = (val_char_t)s[i]; + } + return str; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i = i+1) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} + +val_str_t *str_dup(const val_str_t *s) +{ + int64_t n = s->len; + val_str_t *d; + + d = calloc(2+n, sizeof(val_char_t)); + if (!d) + return NULL; + + return memcpy(d, s, (2+n) * sizeof(val_char_t)); +} + + +#ifdef CHECK +// $ gcc -DCHECK symbol.c +#include +int main(void) +{ + val_str_t *foo = str_from_cstr("foo"); + val_str_t *foo_ = str_from_cstr("foo"); + val_str_t *bar = str_from_cstr("bar"); + val_str_t *foo1 = str_from_cstr("foo1"); + val_str_t *foo1_ = str_from_cstr("foo1"); + val_str_t *fo = str_from_cstr("fo"); + val_str_t *fo_ = str_from_cstr("fo"); + + assert(str_cmp(foo,foo_) == 0); + assert(str_cmp(foo1,foo1_) == 0); + assert(str_cmp(fo,fo_) == 0); + + assert(str_cmp(foo,foo1) < 0); + assert(str_cmp(foo,foo1) == str_cmp(foo,foo1_)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1_)); + + assert(str_cmp(foo,fo) > 0); + assert(str_cmp(foo,fo) == str_cmp(foo,fo_)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo_)); + + assert(str_cmp(foo,bar) > 0); + assert(str_cmp(bar,foo) < 0); + + assert(str_cmp(fo,bar) > 0); + assert(str_cmp(bar,fo) < 0); + + val_symb_t *foo_symb = (val_symb_t*)foo; + val_symb_t *foo_symb_ = (val_symb_t*)foo_; + assert(foo_symb != foo_symb_); + val_symb_t *foo_symb_i = intern_symbol(foo_symb); + val_symb_t *foo_symb_i_ = intern_symbol(foo_symb_); + assert(foo_symb_i == foo_symb_i_); + + return 0; +} +#endif diff --git a/langs/mountebank/test/test-runner.rkt b/langs/mountebank/test/test-runner.rkt index cc234fad..2b04d07b 100644 --- a/langs/mountebank/test/test-runner.rkt +++ b/langs/mountebank/test/test-runner.rkt @@ -203,6 +203,40 @@ '(let ((z 2)) (f 1 2))) 10) + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) ;; Loot examples (check-equal? (run '((λ (x) x) 5)) @@ -234,6 +268,21 @@ 36)) 666) + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(eq? 'g0 (gensym))) #f) + (check-equal? (run '(eq? (gensym) (gensym))) #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + ;; Mountebank examples (check-equal? (run '#()) #()) @@ -254,6 +303,9 @@ #f) (check-equal? (run '(define (f) '(1 . 2)) '(eq? (f) (f))) + #t) + (check-equal? (run '(let ((x '(foo . foo))) + (eq? (car x) (cdr x)))) #t)) (define (test-runner-io run) diff --git a/langs/mountebank/types.h b/langs/mountebank/types.h index 90668957..4093c4f7 100644 --- a/langs/mountebank/types.h +++ b/langs/mountebank/types.h @@ -24,6 +24,7 @@ #define vect_type_tag 3 #define str_type_tag 4 #define proc_type_tag 5 +#define symb_type_tag 6 #define int_shift (1 + imm_shift) #define int_type_mask ((1 << int_shift) - 1) #define int_type_tag (0 << (int_shift - 1)) diff --git a/langs/mountebank/types.rkt b/langs/mountebank/types.rkt index b025251b..c02d2d1e 100644 --- a/langs/mountebank/types.rkt +++ b/langs/mountebank/types.rkt @@ -9,6 +9,7 @@ (define type-vect #b011) (define type-str #b100) (define type-proc #b101) +(define type-symb #b110) (define int-shift (+ 1 imm-shift)) (define char-shift (+ 2 imm-shift)) (define type-int #b0000) diff --git a/langs/mountebank/values.c b/langs/mountebank/values.c index c6b27244..32e922bd 100644 --- a/langs/mountebank/values.c +++ b/langs/mountebank/values.c @@ -12,6 +12,8 @@ type_t val_typeof(val_t x) return T_VECT; case str_type_tag: return T_STR; + case symb_type_tag: + return T_SYMB; case proc_type_tag: return T_PROC; } @@ -108,3 +110,12 @@ val_t val_wrap_str(val_str_t *v) { return ((val_t)v) | str_type_tag; } + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} diff --git a/langs/mountebank/values.h b/langs/mountebank/values.h index 54fb2039..c1de09d6 100644 --- a/langs/mountebank/values.h +++ b/langs/mountebank/values.h @@ -20,6 +20,7 @@ typedef enum type_t { T_CONS, T_VECT, T_STR, + T_SYMB, T_PROC, } type_t; @@ -39,6 +40,10 @@ typedef struct val_str_t { uint64_t len; val_char_t codepoints[]; } val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; /* return the type of x */ type_t val_typeof(val_t x); @@ -73,4 +78,7 @@ val_t val_wrap_vect(val_vect_t* c); val_str_t* val_unwrap_str(val_t x); val_t val_wrap_str(val_str_t* c); +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + #endif diff --git a/langs/mug/Makefile b/langs/mug/Makefile new file mode 100644 index 00000000..76f979af --- /dev/null +++ b/langs/mug/Makefile @@ -0,0 +1,38 @@ +UNAME := $(shell uname) +.PHONY: test + +ifeq ($(UNAME), Darwin) + format=macho64 +else + format=elf64 +endif + +objs = \ + main.o \ + values.o \ + print.o \ + symbol.o \ + io.o + +default: runtime.o + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + gcc runtime.o $< -o $@ + +.c.o: + gcc -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + racket -t compile-file.rkt -m $< > $@ + +clean: + rm *.o *.s *.run + +test: example.run + @test "$(shell ./example.run)" = "$(shell racket example.rkt)" diff --git a/langs/mug/ast.rkt b/langs/mug/ast.rkt new file mode 100644 index 00000000..d83ef3fa --- /dev/null +++ b/langs/mug/ast.rkt @@ -0,0 +1,79 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn) Expr) +(struct Prog (ds e) #:prefab) + +;; type Defn = (Defn Id (Listof Id) Expr) +(struct Defn (f xs e) #:prefab) + +;; type Expr = (Eof) +;; | (Empty) +;; | (Int Integer) +;; | (Bool Boolean) +;; | (Char Character) +;; | (Str String) +;; | (Symb Symbol) +;; | (Prim0 Op0) +;; | (Prim1 Op1 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (Prim3 Op3 Expr Expr Expr) +;; | (If Expr Expr Expr) +;; | (Begin Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) +;; type Id = Symbol +;; type Op0 = 'read-byte +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +;; type Op2 = '+ | '- | '< | '= +;; | 'cons +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + +(struct Eof () #:prefab) +(struct Empty () #:prefab) +(struct Int (i) #:prefab) +(struct Bool (b) #:prefab) +(struct Char (c) #:prefab) +(struct Str (s) #:prefab) +(struct Symb (s) #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) +(struct Prim2 (p e1 e2) #:prefab) +(struct Prim3 (p e1 e2 e3) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/mug/char.c b/langs/mug/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/mug/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/mug/compile-define.rkt b/langs/mug/compile-define.rkt new file mode 100644 index 00000000..354e6f26 --- /dev/null +++ b/langs/mug/compile-define.rkt @@ -0,0 +1,69 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + a86/ast) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) diff --git a/langs/mug/compile-expr.rkt b/langs/mug/compile-expr.rkt new file mode 100644 index 00000000..97e0319c --- /dev/null +++ b/langs/mug/compile-expr.rkt @@ -0,0 +1,311 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Int i) (compile-value i)] + [(Bool b) (compile-value b)] + [(Char c) (compile-value c)] + [(Eof) (compile-value eof)] + [(Empty) (compile-value '())] + [(Str s) (compile-string s)] + [(Symb s) (compile-symbol s)] + [(Var x) (compile-variable x c)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) + +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea rax (Plus (symbol->data-label s) type-symb)))) + +;; String -> Asm +(define (compile-string s) + (seq (Lea rax (Plus (symbol->data-label (string->symbol s)) type-str)))) + +;; Value -> Asm +(define (compile-value v) + (seq (Mov rax (imm->bits v)))) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (match (lookup x c) + [#f (error "unbound variable")] ;(seq (Lea rax (symbol->label x)))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op0 CEnv -> Asm +(define (compile-prim0 p c) + (compile-op0 p)) + +;; Op1 Expr CEnv -> Asm +(define (compile-prim1 p e c) + (seq (compile-e e c #f) + (compile-op1 p))) + +;; Op2 Expr Expr CEnv -> Asm +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) + +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax val-false) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) diff --git a/langs/mug/compile-file.rkt b/langs/mug/compile-file.rkt new file mode 100644 index 00000000..3593dc7d --- /dev/null +++ b/langs/mug/compile-file.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; String -> Void +;; Compile contents of given file name, +;; emit asm code on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (displayln (asm-string (compile (parse (read-all p))))) + (close-input-port p)))) diff --git a/langs/mug/compile-literals.rkt b/langs/mug/compile-literals.rkt new file mode 100644 index 00000000..95a1b34d --- /dev/null +++ b/langs/mug/compile-literals.rkt @@ -0,0 +1,91 @@ +#lang racket +(provide compile-literals init-symbol-table literals) +(require "ast.rkt" + "utils.rkt" + a86/ast) + +(define rdi 'rdi) + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates + (map to-symbol (literals* p)))) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)))) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds e) + (append (append-map literals-d ds) (literals-e e))])) + +(define (literals-d d) + (match d + [(Defn f xs e) + (literals-e e)])) + +(define (literals-e e) + (match e + [(Str s) (list s)] + [(Symb s) (list s)] + [(Prim1 p e) + (literals-e e)] + [(Prim2 p e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Prim3 p e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Let x e1 e2) + (append (literals-e e1) (literals-e e2))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [_ '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/mug/compile-ops.rkt b/langs/mug/compile-ops.rkt new file mode 100644 index 00000000..44970b4c --- /dev/null +++ b/langs/mug/compile-ops.rkt @@ -0,0 +1,395 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" "types.rkt" "utils.rkt" a86/ast) + +(define rax 'rax) ; return +(define eax 'eax) ; 32-bit load/store +(define rbx 'rbx) ; heap +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 +(define r8 'r8) ; scratch +(define r9 'r9) ; scratch +(define r10 'r10) ; scratch +(define r15 'r15) ; stack pad (non-volatile) +(define rsp 'rsp) ; stack + +;; Op0 -> Asm +(define (compile-op0 p) + (match p + ['void (seq (Mov rax val-void))] + ['read-byte (seq pad-stack + (Call 'read_byte) + unpad-stack)] + ['peek-byte (seq pad-stack + (Call 'peek_byte) + unpad-stack)] + ['gensym (seq pad-stack + (Call 'gensym) + unpad-stack + (Or rax type-symb))])) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ['add1 + (seq (assert-integer rax) + (Add rax (imm->bits 1)))] + ['sub1 + (seq (assert-integer rax) + (Sub rax (imm->bits 1)))] + ['zero? + (seq (assert-integer rax) + (eq-imm 0))] + ['char? + (type-pred mask-char type-char)] + ['char->integer + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + ['integer->char + (seq (assert-codepoint rax) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? (eq-imm eof)] + ['write-byte + (seq (assert-byte rax) + pad-stack + (Mov rdi rax) + (Call 'write_byte) + unpad-stack + (Mov rax val-void))] + ['box + (seq (Mov (Offset rbx 0) rax) + (Mov rax rbx) + (Or rax type-box) + (Add rbx 8))] + ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + ['empty? (eq-imm '())] + ['box? + (type-pred ptr-mask type-box)] + ['cons? + (type-pred ptr-mask type-cons)] + ['vector? + (type-pred ptr-mask type-vect)] + ['string? + (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] + ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + pad-stack + (Call 'memcpy) + unpad-stack + (Mov rax rbx) + (Add rbx rdx))) + +;; Op2 -> Asm +(define (compile-op2 p) + (match p + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax val-false) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Je true) + (Mov rax val-false) + (Label true))))] + ['cons + (seq (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] + ['make-vector + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + + ['vector-ref + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))])) + +;; Op3 -> Asm +(define (compile-op3 p) + (match p + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'raise_error_align) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax val-void))])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'raise_error_align)))) + +(define (type-pred mask type) + (let ((l (gensym))) + (seq (And rax mask) + (Cmp rax type) + (Mov rax (imm->bits #t)) + (Je l) + (Mov rax (imm->bits #f)) + (Label l)))) + +(define assert-integer + (assert-type mask-int type-int)) +(define assert-char + (assert-type mask-char type-char)) +(define assert-box + (assert-type ptr-mask type-box)) +(define assert-cons + (assert-type ptr-mask type-cons)) +(define assert-vector + (assert-type ptr-mask type-vect)) +(define assert-string + (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) + +(define (assert-codepoint r) + (let ((ok (gensym))) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (imm->bits 55295)) + (Jl ok) + (Cmp r (imm->bits 57344)) + (Jg ok) + (Jmp 'raise_error_align) + (Label ok)))) + +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align))) + +;; Value -> Asm +(define (eq-imm imm) + (let ((l1 (gensym))) + (seq (Cmp rax (imm->bits imm)) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) + +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt new file mode 100644 index 00000000..731b3315 --- /dev/null +++ b/langs/mug/compile.rkt @@ -0,0 +1,47 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt" + a86/ast) + +;; Registers used +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; type CEnv = [Listof Id] + +;; Prog -> Asm +(define (compile p) + (match p + [(Prog ds e) + (prog (externs) + (Global 'entry) + (Label 'entry) + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table p) + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #t) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Ret) + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error) + (Data) + (compile-literals p))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Extern 'intern_symbol) + (Extern 'memcpy))) diff --git a/langs/mug/env.rkt b/langs/mug/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/mug/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/mug/fv.rkt b/langs/mug/fv.rkt new file mode 100644 index 00000000..212efadd --- /dev/null +++ b/langs/mug/fv.rkt @@ -0,0 +1,21 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim1 p e) (fv* e)] + [(Prim2 p e1 e2) (append (fv* e1) (fv* e2))] + [(Prim3 p e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [_ '()])) diff --git a/langs/mug/heap.h b/langs/mug/heap.h new file mode 100644 index 00000000..8f2f5e23 --- /dev/null +++ b/langs/mug/heap.h @@ -0,0 +1,9 @@ +#include + +extern int64_t heap[]; +extern int from_side; + +extern char type[]; + +// in words +#define heap_size 1001 diff --git a/langs/mug/interp-defun.rkt b/langs/mug/interp-defun.rkt new file mode 100644 index 00000000..b4d57880 --- /dev/null +++ b/langs/mug/interp-defun.rkt @@ -0,0 +1,160 @@ +#lang racket +(provide interp interp-env (struct-out Closure) zip) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog Defns -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (cons v (interp-env* es r ds))])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/interp-env.rkt b/langs/mug/interp-env.rkt deleted file mode 100644 index c63284ee..00000000 --- a/langs/mug/interp-env.rkt +++ /dev/null @@ -1,190 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require (only-in "syntax.rkt" prim?)) - -;; type Expr = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | Variable -;; | '() -;; | `(box ,Expr) -;; | `(if ,Expr ,Expr ,Expr) -;; | `(let ,(List Variable Expr) ... ,Expr) -;; | `(letrec ,(List Variable Lambda) ... ,Expr) -;; | `(λ ,Bindings ,Expr) -;; | `(apply ,Expr ,Expr) -;; | `(,Prim ,Expr ...) -;; | `(,Expr ,Expr ...) - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | '() -;; | (Box Value) -;; | (Cons Value Value) -;; | Function - -;; type Function = -;; | (Values ... -> Answer) - -;; type Answer = Value | 'err - -;; type REnv = (Listof (List Variable Value)) - -;; Expr REnv -> Answer -(define (interp-env e r) - (match e - ;; produce fresh strings each time a literal is eval'd - [(? string? s) (string-copy s)] - [(? value? v) v] - [''() '()] - [`',(? symbol? s) s] - [`(if ,e0 ,e1 ,e2) - (match (interp-env e0 r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(? symbol? x) - (lookup r x)] - [`(let (,`(,xs ,es) ...) ,e) - (match (interp-env* es r) - ['err 'err] - [vs - (interp-env e (append (zip xs vs) r))])] - [`(letrec (,`(,xs ,es) ...) ,e) - (letrec ((r* (λ () - (append - (zip xs - (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) - es)) - r)))) - (interp-env e (r*)))] - [`(λ (,xs ...) ,e) - (λ vs - (if (= (length vs) (length xs)) - (interp-env e (append (zip xs vs) r)) - 'err))] - [`(λ (,xs ... . ,x) ,e) - (λ vs - (if (>= (length vs) (length xs)) - (interp-env e (append (zip/remainder xs vs x) r)) - 'err))] - [`(apply ,e0 ,e1) - (let ((v0 (interp-env e0 r)) - (vs (interp-env e1 r))) - (if (list? vs) - (apply v0 vs) - 'err))] - [`(,(? prim? p) ,es ...) - (let ((as (interp-env* es r))) - (interp-prim p as))] - [`(,e ,es ...) - (match (interp-env* (cons e es) r) - [(list f vs ...) - (if (procedure? f) - (apply f vs) - 'err)] - ['err 'err])] - [_ 'err])) - -;; (Listof Expr) REnv (Listof Defn) -> (Listof Value) | 'err -(define (interp-env* es r) - (match es - ['() '()] - [(cons e es) - (match (interp-env e r) - ['err 'err] - [v (cons v (interp-env* es r))])])) - -;; Any -> Boolean -(define (value? x) - (or (integer? x) - (boolean? x) - (char? x) - (string? x))) - -;; Prim (Listof Answer) -> Answer -(define (interp-prim p as) - (match (cons p as) - [(list p (? value?) ... 'err _ ...) 'err] - [(list '- (? integer? i0)) (- i0)] - [(list '- (? integer? i0) (? integer? i1)) (- i0 i1)] - [(list 'abs (? integer? i0)) (abs i0)] - [(list 'add1 (? integer? i0)) (+ i0 1)] - [(list 'sub1 (? integer? i0)) (- i0 1)] - [(list 'zero? (? integer? i0)) (zero? i0)] - [(list 'char? v0) (char? v0)] - [(list 'integer? v0) (integer? v0)] - [(list 'boolean? v0) (boolean? v0)] - [(list 'integer->char (? codepoint? i0)) (integer->char i0)] - [(list 'char->integer (? char? c)) (char->integer c)] - [(list '+ (? integer? i0) (? integer? i1)) (+ i0 i1)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'car (? cons? v0)) (car v0)] - [(list 'cdr (? cons? v0)) (cdr v0)] - [(list 'string? v0) (string? v0)] - [(list 'box? v0) (box? v0)] - [(list 'empty? v0) (empty? v0)] - [(list 'cons? v0) (cons? v0)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'box v0) (box v0)] - [(list 'unbox (? box? v0)) (unbox v0)] - [(list 'string-length (? string? v0)) (string-length v0)] - [(list 'make-string (? natural? v0) (? char? v1)) (make-string v0 v1)] - [(list 'string-ref (? string? v0) (? natural? v1)) - (if (< v1 (string-length v0)) - (string-ref v0 v1) - 'err)] - [(list '= (? integer? v0) (? integer? v1)) (= v0 v1)] - [(list '< (? integer? v0) (? integer? v1)) (< v0 v1)] - [(list '<= (? integer? v0) (? integer? v1)) (<= v0 v1)] - [(list 'char=? (? char? v0) (? char? v1)) (char=? v0 v1)] - [(list 'boolean=? (? boolean? v0) (? boolean? v1)) (boolean=? v0 v1)] - [(list 'eq? v0 v1) (eq? v0 v1)] - [(list 'gensym) (gensym)] - [(list 'symbol? v0) (symbol? v0)] - [(list 'procedure? v0) (procedure? v0)] - [_ 'err])) - -;; REnv Variable -> Answer -(define (lookup env x) - (match env - ['() 'err] - [(cons (list y v) env) - (match (symbol=? x y) - [#t v] - [#f (lookup env x)])])) - -;; REnv Variable Value -> Value -(define (ext r x v) - (cons (list x v) r)) - -;; Any -> Boolean -(define (codepoint? x) - (and (integer? x) - (<= 0 x #x10FFFF) - (not (<= #xD800 x #xDFFF)))) - -;; (Listof A) (Listof B) -> (Listof (List A B)) -(define (zip xs ys) - (match* (xs ys) - [('() '()) '()] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip xs ys))])) - -;; like zip but ys can be longer and remainder is associated with r -(define (zip/remainder xs ys r) - (match* (xs ys) - [('() ys) (list (list r ys))] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip/remainder xs ys r))])) - - diff --git a/langs/mug/interp-file.rkt b/langs/mug/interp-file.rkt new file mode 100644 index 00000000..c2490030 --- /dev/null +++ b/langs/mug/interp-file.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; String -> Void +;; Parse and interpret contents of given filename, +;; print result on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) + (close-input-port p)))) diff --git a/langs/mug/interp-io.rkt b/langs/mug/interp-io.rkt new file mode 100644 index 00000000..93f7d3c6 --- /dev/null +++ b/langs/mug/interp-io.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide interp/io) +(require "interp.rkt") + +;; (Expr String -> String +;; Interpret e with given string as input, +;; collect output as string (including printed result) +(define (interp/io e in) + (parameterize ((current-output-port (open-output-string)) + (current-input-port (open-input-string in))) + (cons (interp e) + (get-output-string (current-output-port))))) diff --git a/langs/mug/interp-prims.rkt b/langs/mug/interp-prims.rkt new file mode 100644 index 00000000..7797de69 --- /dev/null +++ b/langs/mug/interp-prims.rkt @@ -0,0 +1,74 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim1 interp-prim2 interp-prim3) + +;; Op1 Value -> Answer +(define (interp-prim1 p1 v) + (match (list p1 v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box?)) (unbox v)] + [(list 'car (? pair?)) (car v)] + [(list 'cdr (? pair?)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string?)) + (string->uninterned-symbol v)] + [_ 'err])) + +;; Op2 Value Value -> Answer +(define (interp-prim2 p v1 v2) + (match (list p v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt index a1e7a2f1..3cc95f9f 100644 --- a/langs/mug/interp.rkt +++ b/langs/mug/interp.rkt @@ -1,75 +1,159 @@ #lang racket -(provide (all-defined-out)) -(require "syntax.rkt" - "interp-env.rkt") +(provide interp interp-env) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") +;; type Answer = Value | 'err -(define (interp e) - (interp-env (desugar e) stdlib)) +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) -(define stdlib - `((append ,append) - (list? ,list?) - (first ,first) - (second ,second) - (rest ,rest) - (reverse ,reverse) - (not ,not) - (compose ,compose) - (symbol=? ,symbol=?) - (memq ,memq) - (length ,length) - (remq* ,remq*) - (remove-duplicates ,remove-duplicates) - (remove ,remove) - (member ,member) - (equal? ,equal?))) +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) +;; Prog Defns -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) -;; Expr REnv Natural -> Answer -(define (interp-qq d r n) - ;(println `(interp-qq ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (interp-env (desugar e) r) ;! - (cons 'unquote (interp-qq-list e r (sub1 n))))] - [`(,'unquote-splicing ,e) 'err] - [`(,'quasiquote ,d) - (cons 'quasiquote (interp-qq-list d r (add1 n)))] - [`(,x . ,y) - (match (interp-qq-list x r n) +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim1 p e) + (match (interp-env e r ds) ['err 'err] - [xv (match (interp-qq y r n) + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) ['err 'err] - ['() xv] - [yv (if (list? xv) - (append xv yv) - 'err)])])] - [d d])) - -;; Expr REnv Natural -> Answer -(define (interp-qq-list d r n) - ;(println `(interp-qq-list ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (match (interp-env (desugar e) r) ;! - ['err 'err] - [v (list v)]) - (list (cons 'unquote (interp-qq-list e r (sub1 n)))))] - [`(,'unquote-splicing ,e) - (if (zero? n) - (interp-env e r) - (list (cons 'unquote-splicing (interp-qq-list e r (sub1 n)))))] - [`(,'quasiquote ,d) - (list (cons 'quasiquote (interp-qq-list d r (add1 n))))] - [`(,x . ,y) - (match (interp-qq-list x r n) + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) ['err 'err] - [xv (match (interp-qq y r n) + [v1 (match (interp-env e2 r ds) ['err 'err] - [yv (list (append xv yv))])])] - [d (list d)])) + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (cons v (interp-env* es r ds))])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) - +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/io.c b/langs/mug/io.c new file mode 100644 index 00000000..7ef82281 --- /dev/null +++ b/langs/mug/io.c @@ -0,0 +1,25 @@ +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); +} + +val_t peek_byte(void) +{ + char c = getc(in); + ungetc(c, in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} diff --git a/langs/mug/lambdas.rkt b/langs/mug/lambdas.rkt new file mode 100644 index 00000000..8ef29b80 --- /dev/null +++ b/langs/mug/lambdas.rkt @@ -0,0 +1,34 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim1 p e) (lambdas-e e)] + [(Prim2 p e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Prim3 p e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [_ '()])) diff --git a/langs/mug/main.c b/langs/mug/main.c new file mode 100644 index 00000000..1ca6115f --- /dev/null +++ b/langs/mug/main.c @@ -0,0 +1,40 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(); +val_t *heap; + +void error_exit() +{ + printf("err\n"); + exit(1); +} + +void raise_error() +{ + return error_handler(); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(8 * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/mug/parse-file.rkt b/langs/mug/parse-file.rkt new file mode 100644 index 00000000..a5021320 --- /dev/null +++ b/langs/mug/parse-file.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; String -> Void +;; Compile contents of given file name, +;; emit asm code on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (displayln (parse (read-all p))) + (close-input-port p)))) diff --git a/langs/mug/parse.rkt b/langs/mug/parse.rkt new file mode 100644 index 00000000..82a02de0 --- /dev/null +++ b/langs/mug/parse.rkt @@ -0,0 +1,99 @@ +#lang racket +(provide parse parse-define parse-e) +(require "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +;; S-Expr -> Defn +(define (parse-define s) + (match s + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] + [_ (error "Parse defn error" s)])) + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? integer?) (Int s)] + [(? boolean?) (Bool s)] + [(? char?) (Char s)] + [(? string?) (Str s)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list 'quote (list)) (Empty)] + [(list 'quote (? symbol? s)) (Symb s)] + [(list (? (op? op0) p0)) (Prim0 p0)] + [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] + [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] + [_ (error "Parse error" s)])) + +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + +(define op0 + '(read-byte peek-byte void)) +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol? symbol->string string->symbol string->uninterned-symbol)) +(define op2 + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) diff --git a/langs/mug/pat.rkt b/langs/mug/pat.rkt deleted file mode 100644 index 2a5c11f6..00000000 --- a/langs/mug/pat.rkt +++ /dev/null @@ -1,200 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; type Expr+ = -;; .... -;; | Match - -;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) - -;; type Pat = -;; | #t -;; | #f -;; | Integer -;; | String -;; | Variable -;; | `_ -;; | `'() -;; | `(quote ,Symbol) -;; | `(cons ,Pat ,Pat) -;; | `(list ,Pat ...) -;; | `(? ,Expr ,Pat ...) - -;; Match -> Expr -;; Rewrite match expression into an equivalent cond expression -(define (match->cond m) - (match m - [(cons 'match (cons e mcs)) - (let ((x (gensym))) - `(let ((,x ,e)) - (cond ,@(map (λ (mc) - (match mc - [(list p e) - (list (pat-match p x) (pat-bind p x e))])) - mcs) - ;; fall through to error - [else (car '())])))])) - -;; Example -#; -(define (sum bt) - (match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))])) -#; -(define (sum^ bt) - (cond - [(eq? 'leaf bt) 0] - [(and (list? bt) - (= 4 (length bt)) - (eq? 'node (first bt))) - (let ((v (second bt)) - (l (third bt)) - (r (fourth bt))) - (+ v - (+ (sum l) - (sum r))))])) - -#; -`(define (sum bt) - ,(match->cond - '(match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))]))) - -;; Two tasks: -;; 1. rewrite patterns into Boolean valued expressions that answer -;; whether the pattern matches the scrutiny -;; 2. rewrite pattern and RHS in to expressions in which the pattern variables -;; of pattern are bound to the appropriately deconstructed parts of the scrutiny - -;; Assume: the scrutiny is a variable. -;; (It's easy to establish this assumption in general.) - -;; Two functions: - -#; -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) ...) - -#; -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) ...) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern matching - -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) - (match p - [#t `(eq? #t ,v)] - [#f `(eq? #f ,v)] - [(? integer? i) `(eq? ,i ,v)] - [(? string? s) - `(and (string? ,v) - (string=? ,s ,v))] - [(list 'quote '()) `(eq? '() ,v)] - [(? symbol?) #t] - [(list 'quote (? symbol? s)) `(eq? ,v ',s)] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(and (cons? ,v) - (let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p1 v1) - ,(pat-match p2 v2)))))] - [(cons 'list ps) - `(and (list? ,v) - (= (length ,v) ,(length ps)) - ,(pat-match-list ps v))] - [(cons '? (cons e ps)) - `(and (,e ,v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if every ps matches x -(define (pats-match ps v) - (match ps - ['() #t] - [(cons p ps) - `(and ,(pat-match p v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if each ps matches each element of list v -(define (pat-match-list ps v) - (match ps - ['() #t] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p v1) - ,(pat-match-list ps v2))))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern binding - -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) - (match p - [#t e] - [#f e] - [(? integer?) e] - [(? string?) e] - [(list 'quote '()) e] - ['_ e] - [(? symbol? x) `(let ((,x ,v)) ,e)] - [(list 'quote (? symbol?)) e] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p1 v1 - (pat-bind p2 v2 e))))] - [(cons 'list ps) - (pat-bind-list ps v e)] - [(cons '? (cons _ ps)) - (pats-bind ps v e)])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that doconstructs v and binds pattern variables -;; of ps (each matched against v) in scope of e. -;; ASSUME: v matches every element of ps -(define (pats-bind ps v e) - (match ps - ['() e] - [(cons p ps) - (pat-bind p v (pats-bind ps v e))])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that deconstructs list v and binds pattern variables -;; of ps (matched element-wise against v) in scope of e. -;; ASSUME: elemens of v matches elements of ps -(define (pat-bind-list ps v e) - (match ps - ['() e] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p v1 (pat-bind-list ps v2 e))))])) diff --git a/langs/mug/print.c b/langs/mug/print.c new file mode 100644 index 00000000..2bcb21dc --- /dev/null +++ b/langs/mug/print.c @@ -0,0 +1,855 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/mug/print.h b/langs/mug/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/mug/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/mug/read-all.rkt b/langs/mug/read-all.rkt new file mode 100644 index 00000000..fd03042b --- /dev/null +++ b/langs/mug/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all p) + (let ((r (read p))) + (if (eof-object? r) + '() + (cons r (read-all p))))) diff --git a/langs/mug/runtime.h b/langs/mug/runtime.h new file mode 100644 index 00000000..f594f0f6 --- /dev/null +++ b/langs/mug/runtime.h @@ -0,0 +1,11 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 10000 +extern int64_t *heap; +#endif /* RUNTIME_H */ diff --git a/langs/mug/symbol.c b/langs/mug/symbol.c new file mode 100644 index 00000000..5ad9f270 --- /dev/null +++ b/langs/mug/symbol.c @@ -0,0 +1,52 @@ +#include +#include "values.h" + +int symb_cmp(const val_symb_t *, const val_symb_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = symb_cmp(symb, t->elem); + if (r == 0) { + // found it, so return saved pointer + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it and return pointer + *curr = calloc(1, sizeof(struct Node)); + (*curr)->elem = symb; + return (*curr)->elem; +} + +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) +{ + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i++) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} diff --git a/langs/mug/syntax.rkt b/langs/mug/syntax.rkt deleted file mode 100644 index bad6439e..00000000 --- a/langs/mug/syntax.rkt +++ /dev/null @@ -1,194 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "pat.rkt") - -;; type Expr+ = -;; .... exprs with match, cond, begin/define, quote etc. - -;; type S-Expr = -;; | Boolean -;; | Integer -;; | String -;; | '() -;; | (Cons S-Expr S-Expr) - -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(match . ,_) (desugar (match->cond e+))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) - -;; S-Expr -> Expr -;; Produce an expression that evaluates to given s-expression, without -;; use of quote (except for symbols and empty list) -(define (quote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] - ['() ''()])) - -(define (quasiquote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) d] - [(cons 'unquote-splicing d) 'ERROR] - [(cons x y) - `(append ,(quasiquote->list-expr x) - ,(quasiquote->expr y))] - ['() ''()])) - -(define (quasiquote->list-expr d) - (match d - [(? symbol?) (list 'quote d)] - ['() ''()] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) `(list ,d)] - [(cons 'unquote-splicing d) d] - [(cons x y) - `(list (append ,(quasiquote->list-expr x) - ,(quasiquote->expr y)))] - [_ `'(,d)])) - -;; Expr -> Expr -(define (cond->if c) - (match c - [`(cond (else ,e)) e] - [`(cond (,c ,e) . ,r) - `(if ,c ,e (cond ,@r))])) - -;; Expr -> Expr -(define (and->if c) - (match c - [`(and) #t] - [`(and ,e) e] - [`(and ,e . ,r) - `(if ,e (and ,@r) #f)])) - -;; Expr -> Expr -(define (or->if c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - (let ((x (gensym))) - `(let ((,x ,e)) - (if ,x ,x (or ,@r))))])) - - -(define (qq-expand x depth) - (match x - [(cons 'quasiquote r) - `(cons 'quasiquote ,(qq-expand r (add1 depth)))] - [(cons 'unquote r) - (cond [(> depth 0) - `(cons ','unquote ,(qq-expand r (sub1 depth)))] - [(and (not (empty? r)) - (empty? (cdr r))) - (car r)] - [else - (error "Illegal")])] - [(cons 'unqupte-splicing r) - (error "Illegal")] - [(cons a b) - `(append ,(qq-expand-list a depth) - ,(qq-expand b depth))] - [_ `',x])) - -(define (qq-expand-list x depth) - (match x - [(cons 'quasiquote r) - `(list (cons 'quasiquote ,(qq-expand r (add1 depth))))] - [(cons 'unquote r) - (cond [(> depth 0) `(list (cons ','unquote ,(qq-expand r (sub1 depth))))] - [else `(list . ,r)])] - [(cons 'unquote-splicing r) - (cond [(> depth 0) `(list (cons ','unquote-splicing ,(qq-expand r (sub1 depth))))] - [else `(append . ,r)])] - [_ - `'(,x)])) - - - -;; Any -> Boolean -(define (imm? x) - (or (integer? x) - (boolean? x) - (char? x) - (equal? ''() x))) - -;; Expr -> LExpr -(define (label-λ e) - (match e - [(? symbol? x) x] - [(? imm? i) i] - [`(,(? prim? p) . ,es) `(,p ,@(map label-λ es))] - [`(if ,e0 ,e1 ,e2) `(if ,(label-λ e0) ,(label-λ e1) ,(label-λ e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(label-λ e0))) ,(label-λ e1))] - [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (label-λ (second b)))) bs) - ,(label-λ e0))] - [`(λ ,xs ,e0) `(λ ,xs ',(gensym) ,(label-λ e0))] - [`(,e . ,es) `(,(label-λ e) ,@(map label-λ es))])) - -;; LExpr -> (Listof LExpr) -;; Extract all the lambda expressions -(define (λs e) - (match e - [(? symbol? x) '()] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map λs es)] - [`(if ,e0 ,e1 ,e2) (append (λs e0) (λs e1) (λs e2))] - [`(let ((,x ,e0)) ,e1) (append (λs e0) (λs e1))] - [`(letrec ,bs ,e0) (append (apply append (map (compose λs second) bs)) (λs e0))] - [`(λ ,xs ,l ,e0) (cons e (λs e0))] - [`(,e . ,es) (append (λs e) (apply append (map λs es)))])) - -;; LExpr -> (Listof Variable) -(define (fvs e) - (define (fvs e) - (match e - [(? symbol? x) (list x)] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map fvs es)] - [`(if ,e0 ,e1 ,e2) (append (fvs e0) (fvs e1) (fvs e2))] - [`(let ((,x ,e0)) ,e1) (append (fvs e0) (remq* (list x) (fvs e1)))] - [`(letrec ,bs ,e0) (remq* (map first bs) - (apply append (fvs e0) (map fvs (map second bs))))] - [`(λ ,xs ,l ,e0) (remq* xs (fvs e0))] - [`(,e . ,es) (append (fvs e) (apply append (map fvs es)))])) - (remove-duplicates (fvs e))) - -;; Any -> Boolean -(define (prim? x) - (and (symbol? x) - (memq x '(add1 sub1 zero? abs - char? boolean? integer? integer->char char->integer - string? box? empty? cons cons? box unbox car cdr string-length - make-string string-ref = < <= char=? boolean=? + eq? gensym symbol? - procedure?)))) diff --git a/langs/mug/test/build-runtime.rkt b/langs/mug/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/mug/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/mug/test/compile.rkt b/langs/mug/test/compile.rkt new file mode 100644 index 00000000..81defae6 --- /dev/null +++ b/langs/mug/test/compile.rkt @@ -0,0 +1,18 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../unload-bits-asm.rkt" + a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) + +(test-runner (λ p (unload/free (asm-interp (compile (parse p)))))) +(test-runner-io (λ (s . p) + (match (asm-interp/io (compile (parse p)) s) + ['err 'err] + [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/mug/test/interp-defun.rkt b/langs/mug/test/interp-defun.rkt new file mode 100644 index 00000000..bd8cc7e8 --- /dev/null +++ b/langs/mug/test/interp-defun.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mug/test/interp.rkt b/langs/mug/test/interp.rkt new file mode 100644 index 00000000..cd7b654e --- /dev/null +++ b/langs/mug/test/interp.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp.rkt" + "../interp-io.rkt") + +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt new file mode 100644 index 00000000..8b58351b --- /dev/null +++ b/langs/mug/test/test-runner.rkt @@ -0,0 +1,357 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) + + ;; Loot examples + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo)) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/mug/types.h b/langs/mug/types.h new file mode 100644 index 00000000..4093c4f7 --- /dev/null +++ b/langs/mug/types.h @@ -0,0 +1,42 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/mug/types.rkt b/langs/mug/types.rkt new file mode 100644 index 00000000..966c22ba --- /dev/null +++ b/langs/mug/types.rkt @@ -0,0 +1,74 @@ +#lang racket +(provide (all-defined-out)) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) +(define val-true #b0011000) +(define val-false #b0111000) +(define val-eof #b1011000) +(define val-void #b1111000) +(define val-empty #b10011000) + +(define (bits->value b) + (cond [(= type-int (bitwise-and b mask-int)) + (arithmetic-shift b (- int-shift))] + [(= type-char (bitwise-and b mask-char)) + (integer->char (arithmetic-shift b (- char-shift)))] + [(= b val-true) #t] + [(= b val-false) #f] + [(= b val-eof) eof] + [(= b val-void) (void)] + [(= b val-empty) '()] + [else (error "invalid bits")])) + +(define (imm->bits v) + (cond [(eof-object? v) val-eof] + [(integer? v) (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [(eq? v #t) val-true] + [(eq? v #f) val-false] + [(void? v) val-void] + [(empty? v) val-empty])) + + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (zero? (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) + +(define (box-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) + +(define (vect-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-vect))) + +(define (str-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-str))) + +(define (proc-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-proc))) + +(define (symb-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-symb))) diff --git a/langs/mug/unload-bits-asm.rkt b/langs/mug/unload-bits-asm.rkt new file mode 100644 index 00000000..121bbe0d --- /dev/null +++ b/langs/mug/unload-bits-asm.rkt @@ -0,0 +1,50 @@ +#lang racket +(provide unload/free unload-value) +(require "types.rkt" + ffi/unsafe) + +;; Answer* -> Answer +(define (unload/free a) + (match a + ['err 'err] + [(cons h v) (begin0 (unload-value v) + (free h))])) + +;; Value* -> Value +(define (unload-value v) + (match v + [(? imm-bits?) (bits->value v)] + [(? box-bits? i) + (box (unload-value (heap-ref i)))] + [(? cons-bits? i) + (cons (unload-value (heap-ref (+ i 8))) + (unload-value (heap-ref i)))] + [(? vect-bits? i) + (if (zero? (untag i)) + (vector) + (build-vector (heap-ref i) + (lambda (j) + (unload-value (heap-ref (+ i (* 8 (add1 j))))))))] + [(? str-bits? i) + (if (zero? (untag i)) + (string) + (build-string (heap-ref i) + (lambda (j) + (char-ref (+ i 8) j))))] + [(? symb-bits? i) + (string->symbol + (if (zero? (untag i)) + (string) + (build-string (heap-ref i) + (lambda (j) + (char-ref (+ i 8) j)))))])) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/mug/utils.rkt b/langs/mug/utils.rkt new file mode 100644 index 00000000..f0b06af6 --- /dev/null +++ b/langs/mug/utils.rkt @@ -0,0 +1,53 @@ +#lang racket +(provide symbol->label symbol->data-label lookup pad-stack unpad-stack) +(require a86/ast) + +(define rsp 'rsp) +(define r15 'r15) + +;; Symbol -> Label +;; Produce a symbol that is a valid Nasm label +(define (symbol->label s) + (to-label "label_" s)) + +(define (symbol->data-label s) + (to-label "data_" s)) + +(define (to-label prefix s) + (string->symbol + (string-append + prefix + (list->string + (map (λ (c) + (if (or (char<=? #\a c #\z) + (char<=? #\A c #\Z) + (char<=? #\0 c #\9) + (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) + c + #\_)) + (string->list (symbol->string s)))) + "_" + (number->string (eq-hash-code s) 16)))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/mug/values.c b/langs/mug/values.c new file mode 100644 index 00000000..32e922bd --- /dev/null +++ b/langs/mug/values.c @@ -0,0 +1,121 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case symb_type_tag: + return T_SYMB; + case proc_type_tag: + return T_PROC; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} diff --git a/langs/mug/values.h b/langs/mug/values.h new file mode 100644 index 00000000..c1de09d6 --- /dev/null +++ b/langs/mug/values.h @@ -0,0 +1,84 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_SYMB, + T_PROC, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + +#endif diff --git a/www/notes.scrbl b/www/notes.scrbl index 1ef6cdfd..eeee0751 100644 --- a/www/notes.scrbl +++ b/www/notes.scrbl @@ -30,4 +30,5 @@ suggestions for improving the material, @bold{please}, @include-section{notes/jig.scrbl} @include-section{notes/knock.scrbl} @include-section{notes/loot.scrbl} +@include-section{notes/mug.scrbl} @;include-section{notes/shakedown.scrbl} diff --git a/www/notes/mug.scrbl b/www/notes/mug.scrbl index c94b4ed5..4ebd5260 100644 --- a/www/notes/mug.scrbl +++ b/www/notes/mug.scrbl @@ -1,1196 +1,890 @@ #lang scribble/manual -@(require (for-label (except-in racket ...))) +@(require (for-label (except-in racket compile ...) a86)) @(require redex/pict racket/runtime-path scribble/examples "utils.rkt" "ev.rkt" - "../utils.rkt") + "../fancyverb.rkt" + "../utils.rkt") @(define codeblock-include (make-codeblock-include #'h)) -@(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "mug" f)))))) - '("interp.rkt" "interp-env.rkt" #;"compile.rkt" "syntax.rkt" "pat.rkt" #;"asm/interp.rkt" #;"asm/printer.rkt")) +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "mug")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt" "unload-bits-asm.rkt")) -@title[#:tag "Mug"]{Mug: matching, throwing, quoting} +@title[#:tag "Mug"]{Mug: Symbols and Interned String Literals} @table-of-contents[] -@section[#:tag-prefix "mug"]{Scaling up with syntax} +@section[#:tag-prefix "mug"]{String Literals} -We have developed a small, but representative functional programming -language. But there's still a long way to go from our Loot language -to the kind of constructs we expect in a modern, expressive -programming language. In particular, there's a fairly large gap -between Loot and the subset of Racket we've explored so far in this -class. +As it currently stands in our language, @bold{string literals} are +dynamically allocated when they are evaluated. -For example, our programs have made extensive use of pattern matching, -quotation, quasi-quotation, and lots of built-in functions. In this -section, we'll examine how to scale Loot up to a language that's nicer -to program in. As we'll see, much of this can be accomplished -@emph{without extending the compiler}. Rather we can explain these -language features by @bold{elaboration} of fancier language syntax -into the existing core forms. +This means, for example, that if we had a program like this: -In this chapter, we'll explore several ideas at the level of an -interpreter, but the techniques should work just as well for the compiler. - -@section[#:tag-prefix "mug"]{The Loot+ interpreter} +@#reader scribble/comment-reader +(racketblock +(define (f) "fred") +(cons (f) (cons (f) (cons (f) '()))) +) -Let us start with an interprter for the Loot language, plus all of the -extensions considered in the various assignments up through -@seclink["Assignment 7"]{Assignment 7}. +This will allocate three distinct copies of the string +@racket["fred"], one for each call to @racket[f]. This is unfortunate +since really just a single allocation of @racket["fred"] that is +referenced three times could've worked just as well and allocated less +memory. +A common approach programming language implementations take is to take +every string literal that appears in a program and all allocate it +@bold{once} and replace occurrences of those literals with references +to memory allocated for it. -@codeblock-include["mug/interp-env.rkt"] +This means, for example, that multiple occurrences of the same string +literal evaluate to the same pointer: -@section[#:tag-prefix "mug"]{A bit more sugar} +@ex[ +(eq? "x" "x") +] +Note that this doesn't mean that every string of the same characters +is represented by a unique pointer. We can dynamically construct +strings that will not be equal to a string literal of the same +characters: -As we saw in @seclink["Loot"]{Loot}, we can consider syntaxtic -extensions of language that elaborate into the core @tt{Expr} form of -a language. We saw this with the @racket[define]-form that we rewrote -into @racket[letrec]. We can consider further extensions such as -@racket[and], @racket[or], and even @racket[cond]. +@ex[ +(eq? "x" (string #\x)) +] -Here are functions for transforming each of these forms into simpler -forms: +Let's consider how strings were previously compiled. Here's an assembly program +that returns @racket["Hello!"]: -@#reader scribble/comment-reader -(ex -(define (cond->if c) - (match c - [`(cond (else ,e)) e] - [`(cond (,c ,e) . ,r) - `(if ,c ,e (cond ,@r))])) - -(define (and->if c) - (match c - [`(and) #t] - [`(and ,e) e] - [`(and ,e . ,r) - `(if ,e (and ,@r) #f)])) - -(define (or->if c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - (let ((x (gensym))) - `(let ((,x ,e)) - (if ,x ,x (or ,@r))))])) -) +@ex[ +(require loot/compile) +(seq (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)) +] -Note that these functions do not necessarily eliminate @emph{all} -@racket[cond], @racket[and], or @racket[or] forms, but rather -eliminate @emph{one} occurrence, potentially creating a new occurrence -within a subexpression: +We can run it just to make sure: @ex[ -(cond->if '(cond [(even? x) 8] [else 9])) -(cond->if '(cond [else 9])) -(and->if '(and)) -(and->if '(and 8)) -(and->if '(and 8 9)) -(or->if '(or)) -(or->if '(or 8)) -(or->if '(or 8 9)) +(unload/free + (asm-interp + (seq (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)))) ] -The idea is that another function will drive the repeated use of these -functions until all these extended forms are eliminated. +Notice that this program dynamically allocates the string by executing +instructions that write to memory pointed to by @racket['rbx] and +incrementing @racket['rbx]. -You may wonder why the @racket[or] elaboration is complicated by the -@racket[let]-binding. Consider a potential simpler approach: +But fundamentally, we shouldn't need to do anything dynamically if we +know statically that the string being return is @racket["Hello!"]. We +could @emph{statically} allocate the memory for the string at +compile-time and return a pointer to this data. -@#reader scribble/comment-reader -(ex -(define (or->if-simple c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - `(if ,e ,e (or ,@r))])) -) +@section[#:tag-prefix "mug"]{Static Memory} + +How can we statically allocate memory? The idea is to use memory in +the program itself to store the data needed to represent the string +literal. It turns out that in an a86 program you can have a section +for the program text and another section with binary data. To switch +between the program text and program data, we use the @racket[(Text)] +and @racket[(Data)] directive. Once in @racket[(Data)] mode we can +write down data that will be placed in the program. -But compare the elaboration of the following exmample: +For example, here is a data section: @ex[ -(or->if-simple '(or (some-expensive-function) #t)) -(or->if '(or (some-expensive-function) #t)) -] +(seq (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) -The second program is much more efficient. Moreover, if -@racket[some-expensive-function] had side-effects, the first program -would duplicate them, thereby changing the program's intended -behavior. +] -We can incorporate these new functions into the desugar function, -which will transform extended programs into ``core'' expressions: +These psuedo-instructions will add to the data segment of our program +56-bytes of data. The first 8-bytes consist of the number 6. The +next 4-bytes consist of the number @racket[72], i.e. the codepoint for +@racket[#\H]. The next 4-bytes consist of the codepoint for +@racket[#\e] and so on. The names of these psuedo-instructions +designate how much memory is used: @racket[Dq] means 8-bytes +(64-bits), while @racket[Dd] means 4-bytes (32-bits). -@#reader scribble/comment-reader -(ex -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) -) +The label @racket['hi] is given to name this data's location. We've +previously seen how to load the address of a label using the +@racket[Lea] instruction in order to compute a place in the code to +jump to. Similarly, if we load the address of @racket['hi], we have a +pointer to the data at that location in the program. -Note how a @racket[cond], @racket[and], or @racket[or] form are -transformed and then @racket[desugar]ed again. This will take care of -eliminating any derived forms introduced by the transformation, which -is useful so that derived forms can be defined in terms of other -derived forms, including itself! +So to write a similar program that returns @racket["Hello!"] but +@emph{statically} allocates the memory for the string, we could do the +following: @ex[ -(desugar '(cond [(even? x) 8] [else 9])) -(desugar '(cond [else 9])) -(desugar '(and)) -(desugar '(and 8)) -(desugar '(and 8 9)) -(desugar '(or)) -(desugar '(or 8)) -(desugar '(or 8 9)) +(unload/free + (asm-interp + (seq (Label 'entry) + (Lea 'rax 'hi) + (Or 'rax type-str) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) ] +A couple things to note: +@itemlist[ -Derived forms that can be elaborated away by rewriting into more -primitive forms are sometimes called @bold{syntactic sugar} since they -are not fundamental but ``sweeten'' the experience of writing programs -with useful shorthands. We call the elaboration function @racket[desugar] -to indicate that it is eliminating the syntactic sugar. +@item{nothing is allocated in the heap memory set up by the run-time; +indeed this program doesn't use the @racket['rbx] register at all.} -@section[#:tag-prefix "mug"]{Exceptional behavior} +@item{Executing this program takes fewer steps than the previous +version; when the @racket['entry] label is called, it executes an +@racket[Lea] and @racket[Or] instruction and returns.} -To see an example of taking the idea of program transformation as a -method for implementing language features, let's consider the case of -exceptions and exception handlers, a common feature of modern -high-level languages. +] -Consider the following program for computing the product of all the -elements in a binary tree: +This is pretty big improvement over the previous approach since the +number of instructions to execute were proportional to the size of the +string being compiled. Now we simply load the address of the static +data in a small, constant number of instructions. -@#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) - -(prod 'leaf) -(prod '(node 8 leaf leaf)) -(prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf))) -) +In fact, we can do one better. The @racket[Or] instruction is there +in order to tag the pointer to @racket['hi] as a string. There's +really no reason to do this at run-time; we should be able to add the +tag statically so that just a single load instruction suffices. The +goal is to add the tag to the address of @racket['hi] at compile time, +but the location of the label is actually not fully known until link +time. Our assembler has a way of resolving this by allowing us to +write @emph{expressions} involving labels and constants that will be +computed at link time. -Now consider the work done in an example such as this: +Here is a version of the same program that avoids the @racket[Or] +instruction, instead computing that type tagging at link time: @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(unload/free + (asm-interp + (seq (Label 'entry) + (Lea 'rax (Plus 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) ] -From a quick scan of the elements, we know the answer is 0 without -doing any arithmetic. But the @racket[prod] function will do a bunch -of multiplication to actually figure this out. - -To see, let's use a helper function to replace @racket[*] that prints -every it multiplies two numbers: -@#reader scribble/comment-reader -(ex -;; Number Number -> Number -(define (mult x y) - (printf "mult: ~a x ~a\n" x y) - (* x y)) - -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))])) - -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) -) - -This could potentially be bad if the tree were quite large. - -How can we do better? One option is to detect if the value at a node -is zero and simply avoid recurring on the left and right subtrees at -that point: - -@#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) - (if (zero? v) - 0 - (mult v (mult (prod l) (prod r))))])) -) - -Does this help our answer? Only slightly: +So one idea is to use static data to represent string literals. This +reduces the run-time memory that is allocated and makes is more +efficient to evaluate string literals. We could replace the old +@racket[compile-string] function with the following: @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(define (compile-string s) + (let ((l (gensym 'string))) + (seq (Data) + (Label l) + (Dq (string-length s)) + (map Dd (map char->integer (string->list s))) + (Text) + (Lea 'rax (Plus l type-str))))) + +(compile-string "Hello!") + +(unload/free + (asm-interp + (seq (Label 'entry) + (compile-string "Hello!") + (Ret)))) ] -Why? +Now, while this does allocate string literals statically, using memory +within to the program to store the string, it doesn't alone solve the +problem with string literals being represented uniquely. -The problem is that you may encounter the zero element deep within a -tree. At that point you not only want to avoid doing the -multiplication of subtrees, but also of the elements surrounding the -zero. But we seemingly don't have control over the context -surrounding the node with a zero in it, just the subtrees. What can -we do? -One option, if the language provides it, is to @bold{raise an -exception}, signalling that a zero element has been found. An outer -function can @bold{catch} that exception and produce zero. Such a -program will avoid doing any multiplication in case there's a zero in -the tree. +@section[#:tag-prefix "mug"]{Static Interning} -Racket comes with an exception mechanism that uses @racket[raise] to -signal an exception, which is propagated to the nearest enclosing -exception handler. If there is no such handler, an uncaught exception -error occurs. +We've seen static memory, but we still need to make sure every string +literal is allocated just once. -@ex[ +Here is the basic idea: -(eval:error (raise 5)) -(eval:error (mult (raise 5) 2)) -(eval:error (mult (raise (mult 5 3)) 2)) +@itemlist[ -] +@item{Collect all of the string literals in the program.} -The general form of an exception handler uses the -@racket[with-handlers] form that includes a series of predicates and -handler expressions. We'll consider a simpler form called -@racket[catch] that unconditionally catches any exception throw and -handles it with a function that takes the raised value as an argument. -It can be expressed in terms of the more sophisticated -@racket[with-handlers] form: +@item{For each distinct string literal, compile it to static data as +described above, labelling the data location.} + +@item{For each string literal expression, compile it to a reference to +the appropropiate label for that string.} -@ex[ -(define-syntax-rule (catch e f) - (with-handlers ([(λ (x) #t) f]) e)) - -(catch (raise 5) (λ (x) x)) -(catch (mult (raise 5) 2) (λ (x) x)) -(catch (mult (raise (mult 5 3)) 2) (λ (x) x)) -(catch (mult (mult 5 3) 2) (λ (x) x)) -(catch (mult (mult 5 3) 2) (λ (x) (mult x x))) -(catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x))) ] -Now we can solve our problem: +For example, let's say we want to compile this program: @#reader scribble/comment-reader -(ex -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (catch (prod/r bt) (λ (x) 0))) - -;; BT -> Number -;; Throws: 0 -(define (prod/r bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) - (if (zero? v) - (raise 0) - (mult v (mult (prod/r l) (prod/r r))))])) - -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(racketblock +(begin "Hello!" + "Hello!") ) -(This code is a bit problematic for reasons that are beside the point -of this section, but... the problem is this will catch any exception, -including things like system signals, out of memory exceptions, etc. -A better solution would have the handler check that the exception -value was 0 and re-raise it if not. That way it doesn't ``mask'' any -other exceptions.) - -This code works great for our purposes, but what if the language -didn't provide an exception handling mechanism? Could we achieve the -same effect without relying on exceptions? - -One solution is to re-write the program in what's called -@bold{continuation passing style} (CPS). Continuation passing style -makes explicit what is implicit in the recursive calls to -@racket[prod] in our original program, which is that after recursively -computing the product of the subtree, we have to do more work such as -another recursive call and multiplication. By making this work -explicit, we gain control over it and have the option to do things -like throw away this work. - -Here is the basic idea. We will write a version of @racket[prod] that -takes an additional argument which represents ``the work to be done -after this function call completes.'' It will take a single argument, -a number, which is the result of this function call, and it will -produce some final result for the computation (in this case, a number). - -In general, we want @racket[(k (prod bt))] ≡ @racket[(prod/k bt k)] -for all functions @racket[k] and binary trees @racket[bt]. - -Starting from the spec, we have: +We'd like it to compile to something like this: @#reader scribble/comment-reader -(ex -;; BT (Number -> Number) -> Number -(define (prod/k bt k) - (k (prod bt))) +(racketblock +(seq (Mov 'rax (Add 'hi type-str)) + (Mov 'rax (Add 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) ) -We can unroll the definition of @racket[prod]: +Notice how the two occurrences of @racket["Hello!"] turn into the +instruction @racket[(Mov 'rax (Add 'hi type-str))]. The labelled +location @racket['hi] contains the data for the string and it is +statically allocated just once. -@#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (k (mult v (mult (prod l) (prod r))))])) -) +In order to do this, we need to maintain an association between unique +string literals and the labels our compiler will choose to label their +static data. + +We @emph{could} do this by making a pass over the program to compute +this association. Initially it would be empty and every time a string +literal was encountered, we'd check to see if it's already in the +association. If it is, there's nothing to be done. If isn't, we'd +generate a new label and add it to the association. + +This association would have to be added as a parameter to each of our +@racket[compile-e] functions and string literals would consult the +association to emit the @racket[(Mov 'rax (Add _label type-str))] +instruction. -Now we'd like to replace the calls to @racket[prod] with calls to -@racket[prod/k], which we can do by recognizing the work to be done -around the call to @racket[prod] and placing it in the -@bold{continuation} argument to @racket[prod/k]. Let's do the first call: +We'd also take every label and string pair in the association and +compile the string data to static data labelled with the associated +label. + +However, here's a fun ``trick'' we can employ to avoid having to +explicitly represent this association between strings and their +labels. + +Strings can be converted to symbols, and symbols can be used as +labels. Symbols that consist of the same characters are guaranteed to +be pointer-equal to each other, so by converting a string to a symbol, +we can take advantage of our implementation language's (Racket's) +facility for interning to help us implement interning in our compiler. + +So here is our revised apporach will produce code like this for our +example program: @#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (prod/k l (λ (pl) - (k (mult v (mult pl (prod r))))))])) +(racketblock +(seq (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Ret) + (Data) + (Label (symbol->label (string->symbol "Hello!"))) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) ) -Doing this again, we get: +So now an occurrence of a string literal @racket[_str] can be compiled +as @racket[(Mov 'rax (string->label (string->symbol _str)))]; no +association needs to be maintained explicity. @#reader scribble/comment-reader -(ex -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (prod/k l (λ (pl) - (prod/k r (λ (pr) - (k (mult v (mult pl pr)))))))])) +(racketblock +;; String -> Asm +(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str)))) ) -Now we have a definition of @racket[prod/k] that is independent of -@racket[prod] that satisfies the spec we started with. +@(ev '(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str))))) -A couple of things to note: +So here's how an occurrence of @racket["Hello!"] is compiled: -@itemlist[ -@item{Every call to @racket[prod/k] is a tail-call,} -@item{The context of the recursive calls are given explicitly as continuation arguments.} +@ex[ +(compile-string "Hello!") ] -We can recreate the original function by giving the appropriate initial continuation: +We still need to compile the set of string literals that appear in the +program into statically allocated data, so for this we will write a +function: @#reader scribble/comment-reader -(ex -;; BT -> Number -(define (prod bt) - (prod/k bt (λ (x) x))) +(racketblock +;; Prog -> [Listof Symbol] +(define (literals p) ...) ) -Now, this code doesn't do anything smart on zero elements; it does -exactly the same multiplications our first program does: +This will produce the set of strings that appear literally in the +program text. Each string will be converted to its symbol +representation. The string representation is easy to recover by using +@racket[symbol->string]. + +This function is straightforwad, if a bit tedious, to write. It +traverses the AST. Recursive results are collected with +@racket[append]; when a string node @racket[(Str _s)] is encountered, +it produces @racket[(list (string->symbol _s))]. After all of the +strings have been collected, a final call to +@racket[remove-duplicates] ensures a list of unique symbols is +returned. @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(literals (parse '["Hello!"])) +(literals (parse '[(begin "Hello!" "Hello!")])) +(literals (parse '[(begin "Hello!" "Fren")])) +(literals (parse '[(define (f x) "Hello!") + (cons (f "Fren") (cons (f "Hello!") '()))])) ] -However, with a small tweak, we can get the behavior of the exception-handling code. - -Consider this definition: +Using @racket[literals], we can write a function that compiles all of +the string literals into static data as follows: @#reader scribble/comment-reader -(ex -;; BT (Number -> Number) -> Number -(define (prod/k bt k) - (match bt - ['leaf (k 1)] - [`(node ,v ,l ,r) - (if (zero? v) - 0 - (prod/k l (λ (pl) - (prod/k r (λ (pr) - (k (mult v (mult pl pr))))))))])) - -;; BT -> Number -(define (prod bt) - (prod/k bt (λ (x) x))) +(racketblock +;; Prog -> Asm +(define (compile-literals p) + (seq (Data) + (compile-literals-data (literals p)) + (Text))) + +;; [Listof Symbol] -> Asm +(define (compile-literals-data ss) + (append-map compile-literal-data ss)) + +;; Symbol -> Asm +(define (compile-literal-data s) + (let ((str (symbol->string s))) + (seq (Label (symbol->label s)) + (Dq (string-length str)) + (map Dq (map char->integer (string->list str)))))) ) -Notice that this program, when the value in a node is zero, -immediately returns @racket[0]. It does not do any of the work -represented by @racket[k]. It does something akin to raising an -exception: it blows off all the work of the surround context and -returns a value to the ``handler'' (in this case, @racket[prod]). +@(ev +'(define (compile-literals p) + (seq (Data) + (compile-literals-data (strings p)) + (Text)))) + +@(ev +'(define (compile-literals-data ss) + (append-map compile-literal-data ss))) -Returning to our example, we can see that no multiplications occur: +@(ev +'(define (compile-literal-data s) + (let ((str (symbol->string s))) + (seq (Label (symbol->label s)) + (Dq (string-length str)) + (map Dq (map char->integer (string->list str))))))) + +So now we can reconstruct our example with: @ex[ -(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literals-data '(Hello!))) ] -We've now achieved our original goal without the use of exception -handlers. We achieved this by rewriting our program to make explicit -the work that remains to do, giving us the ability to avoid doing it -when necessary. This is a slighly simplified version of the general -exception handling transformation, which we will look at next, since -there's only a single handler and all it does it produce 0. But, the -by-hand transformation we did provides a useful blueprint for how can -generally transform programs that use exception handling into ones -that don't. - +We've seemingly reached our goal. However, there is a fairly nasty +little bug with our approach. Can you spot it? -@section[#:tag-prefix "mug"]{Exceptional transformation} +Here's a hint: we are generating labels based on the content of string +literals. What else do we generate labels based off of and is it +possible to create a conflict? -Let's consider a very small subset of expressions, extended with -@racket[raise] and @racket[catch], and see how we can transform away -those added mechanisms: +The answer is yes. Consider this program: @#reader scribble/comment-reader (racketblock -;; An Expr is one of: -;; - Integer -;; - Variable -;; - `(if ,Expr ,Expr ,Expr) -;; - `(,Prim1 ,Expr) -;; - `(,Prim2 ,Expr ,Expr) -;; - `(raise ,Expr) -;; - `(catch ,Expr (λ (,Variable) ,Expr)) +(define (Hello! x) "Hello!") +42 ) -Here is the basic idea of the transformation, we transform every -expression into a function of two arguments. The two arguments -represent the two ways an expression may produce results: either by -returning normally or by raising an exception. - - -So for example, if the original expression were @racket[1], we'd want -the transformed program to be -@racketblock[ -'(λ (retn raze) (retn 1)) -] +It contains both a function called @racket[Hello!] and a string +literal @racket["Hello!"]. Unfortunately, the label used both for the +function and the string data will be @racket[(symbol->label 'Hello!)]. +If the compiler emits two definitions of this label, the assembler +will complain and fail to assemble the program. -Why? Because @racket[1] just produces @racket[1]; it can't possibly -raise an exception. So given the two ways of producing a value, we -choose the @racket[ret] way and ``return'' by apply @racket[retn] to -the value we want to return: @racket[1]. +The solution is simple, when generating labels for data, we will use a +different symbol to label function, let's call it +@racket[symbol->data-label] that is guaranteed to produce disjoint +labels from @racket[symbol->label], which we will continue to use for +code labels. +Using this function in all the places we used @racket[symbol->label] +will resolve the issue and our problematic program will now have two +different labels defined in it: -Suppose the original expression is @racket[(raise 1)]. Then we want -to produce: -@racketblock[ -'(λ (retn raze) (raze 1)) +@ex[ +(symbol->label 'Hello!) +(symbol->data-label 'Hello!) ] -This is choosing to not return a value, but rather ``raise'' an -exception by calling the @racket[raze] function. -This is a lot like the by-hand transformation we did, except we now -have two continuations: one to represent work to do after -returning (normally) and one for work to do after raising an -exception. +So now we have accomplished our goal: string literals are statically +allocated and different occurrence of the same string literal are +considered @racket[eq?] to each other: -At the top-level, to run an expression we simply plug in appropriate -definitions for @racket[retn] and @racket[raze]. The @racket[retn] -function should just produce the result, i.e. it should be @racket[(λ -(x) x)], while @racket[raze] should signal an uncaught exception. -Since our language has such a simple model of errors, we'll just cause -an error to occur, i.e. @racket[(λ (x) (add1 #f))]. Let's try our -examples. +@(ev '(require mug/compile)) @ex[ -(interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '()) -(interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literals-data '(Hello!))) ] -What about something like @racket[(add1 _e)]? - -Well if @racket[_e] returns normally, then the whole thing should -produce one more than that value. If @racket[_e] raises an exception, -then @racket[(add1 _e)] should raise that exception. - -Suppose @racket[_t] where the transformed version of @racket[_e], -which means it is a function of two parameters: what to do if -@racket[_e] returns and what to do if @racket[_e] raises. - -Then the transformation of @racket[(add1 _e)] is -@racketblock[ -(λ (retn raze) - (_t (λ (x) (retn (add1 x))) (λ (x) (raze x))))] - -This can be simplified slightly by observing that @racket[(λ (x) (raze -x))] is equal to @racket[raze]: -@racketblock[ -(λ (retn raze) - (_t (λ (x) (retn (add1 x))) raze))] - -How about something like @racket[(catch _e0 (λ (_x) _e1))]? If -@racket[_e0] produces a value normally, then the whole expression -produces that value normally. However if @racket[_e0] raises an -expression then the whole expression produces whatever @racket[_e1] -with @racket[x] bound to the raised value produces. Let @racket[_t0] -and @racket[_t1] be the transformed versions of @racket[_e0] and -@racket[_e1]. Then transformation of the whole expressions should be - -@racketblock[ -(λ (retn raze) - (_t0 retn (λ (_x) (_t1 retn raze)))) -] +We can try it out to confirm some examples. + -One thing to notice here is that we are running @racket[_t0] with a @racket[raze] function -that, if called, will run @racket[_t1] normally. +@ex[ +(define (run . p) + (unload/free (asm-interp (compile (parse p))))) -Guided by the examples, let's define the transformation (note: we have -to take care of avoiding unintended variable capture): +(run "Hello!") -@#reader scribble/comment-reader -(ex -;; Expr -> Expr -(define (exn-transform e) - (match e - [(? integer? i) `(λ (retn raze) (retn ,i))] - [(? symbol? x) - (let ((retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) (,retn ,x)))] - [`(if ,e0 ,e1 ,e2) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (t2 (exn-transform e2)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t0 - (λ (x) - ((if x ,t1 ,t2) ,retn ,raze)) - ,raze)))] - [`(,(? prim? p) ,e0) - (let ((t0 (exn-transform e0)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t0 (λ (v) (,retn (,p v))) - ,raze)))] - [`(,(? prim? p) ,e0 ,e1) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (retn (gensym 'retn)) - (raze (gensym 'raze)) - (v0 (gensym 'v0))) - `(λ (,retn ,raze) - (,t0 (λ (,v0) - (,t1 (λ (v1) (,retn (,p v0 v1))) - ,raze)) - ,raze)))] - [`(raise ,e) - (let ((t (exn-transform e)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - `(λ (,retn ,raze) - (,t ,raze ,raze)))] - [`(catch ,e0 (λ (,x) ,e1)) - (let ((t0 (exn-transform e0)) - (t1 (exn-transform e1)) - (retn (gensym 'retn)) - (raze (gensym 'raze))) - - `(λ (,retn ,raze) - (,t0 ,retn - (λ (,x) - (,t1 ,retn ,raze)))))])) -) +(run '(begin "Hello!" "Hello!")) -Here's what the transformation looks like on examples: +(run '(eq? "Hello!" "Hello!")) +(run '(eq? "Hello!" "Fren")) -@ex[ -(exn-transform '1) -(exn-transform '(raise 1)) -(exn-transform '(catch (raise 1) (λ (x) x))) -(exn-transform '(catch (raise 1) (λ (x) (add1 x)))) -(exn-transform '(catch (add1 (raise 1)) (λ (x) 1))) -(exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x)))) +(run '(define (Hello! x) "Hello!") + '(eq? (Hello! 42) "Hello!")) ] -Now let's give it a spin: +It's still worth noting that only string literals are interned. +Dynamically created strings are not pointer-equal to structurally +equal string literals: @ex[ -;; Expr -> Value -(define (run e) - (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '())) - -(run '1) -(run '(raise 1)) -(run '(catch (raise 1) (λ (x) x))) -(run '(catch (raise 1) (λ (x) (add1 x)))) -(run '(catch (add1 (raise 1)) (λ (x) 1))) -(run '(catch (add1 (raise 1)) (λ (x) (raise x)))) -(run '(if (raise 0) 1 2)) -(run '(if (zero? 0) (raise 1) 2)) +(run '(eq? "fff" (make-string 3 #\f))) ] -@section[#:tag-prefix "mug"]{Quotation} +This is why we refer to this kind of interning as ``static'' interning. -We have seen how to interpret limited uses of @racket[quote], such as -in @racket[(quote ())] and @racket[(quote x)], i.e. the empty list and symbols. +Let us now turn to a new, but familar, data type that supports a +stronger sense of interning: the symbol. -But we've also been using @emph{using} @racket[quote] more generally -where we can write down an arbitrary constant s-expression within a -@racket[quote]: +@section[#:tag-prefix "mug"]{Symbols} -@ex[ -'#t -'5 -'(1 2 3) -'(add1 x) -'(car '(1 2 3)) -'(((1) 2) 3) -'(1 . 2) -'("asdf" fred ((one))) -] +One basic data type that we've used frequently in the writing of our +compiler, but which is not currently accounted for in our language is +that of @bold{symbols}. -We can understand the more general @racket[quote] form as a shorthand -for expressions that construct an equivalent list to the one denoted -by the s-expression. +At first cut, a symbol is alot like a string: the name of a symbol +consists of some textual data. We can represent a symbol much like we +represent a string: using a tagged pointer to a sized array of +characters that comprise the name of the symbol. -For example, -@itemlist[ +In fact, we made extensive use of this in our implementation of static +interning for string literals. This section will now uncover +@emph{how} symbols do their (dynamic) interning. -@item{@racket['1] is shorthand for @racket[1],} +From a syntax point of view, we add a new AST constructor for symbols +and names of the new operations: -@item{@racket['(1 . 2)] is shorthand for @racket[(cons '1 '2)], which -is shorthand for @racket[(cons 1 2)],} +@filebox-include-fake[codeblock "mug/ast.rkt"]{ +;; type Expr = ... +;; | (Symb Symbol) +;; type Op1 = ... +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +(struct Symb (s) #:prefab) +} -@item{@racket['(1 2 3)] is shorthand for @racket[(cons '1 '(2 3))], -which is shorthand for @racket[(cons 1 (cons '2 '(3)))], which is -shorthand for @racket[(cons 1 (cons 2 (cons '3 '())))], which is -shorthand for @racket[(cons 1 (cons 2 (cons 3 '())))],} +The parser is updated to construct such AST nodes when it encounters a +symbol: -@item{@racket['()] is as simple as possible (the empty list),} +@ex[ +(parse-e ''foo) +] -@item{@racket['x] is as simple as possible (a symbol), and} +We can create a new pointer type tag: -@item{@racket[5] is as simple as possible.} -] +@filebox-include-fake[codeblock "mug/types.rkt"]{ +(define type-symb #b110) +} -Guided by these examples, we can write a function that transforms the -s-expression inside of a @racket[quote] into an equivalent expression -that only uses @racket[quote] for constructing symbols and the empty -list: +The run-time system has to be updated to handle symbol results and the +printer is updated to properly print symbols, but all of this follows +the blueprint of strings. It's simply a different tag and a slightly +different printer, which uses and initial @tt{'} delimiter instead of +an initial @tt{"} and subsequent @tt{"} delimiter. +But one of the key differences between strings and symbols is that +symbols that have the same name are considered the same, i.e. they +should be represented by the same @emph{pointer}. -@#reader scribble/comment-reader -(ex -;; S-Expr -> Expr -;; Produce an expression that evaluates to given s-expression, without -;; use of quote (except for symbols and empty list) -(define (quote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] - ['() ''()])) - - -(quote->expr 'x) -(quote->expr 5) -(quote->expr "Fred") -(quote->expr '(1 . 2)) -(quote->expr '(1 2 3)) -(quote->expr '(car '(1 2 3))) -(quote->expr '(((1) 2) 3)) -(quote->expr '(1 . 2)) -(quote->expr '("asdf" fred ((one)))) -) +This means that two symbols of the same name should be @racket[eq?] to +each other: -We can now incorporate this into @racket[desugar] to eliminate uses of -compound-data @racket[quote]: +@ex[ +(eq? 'x 'x) +] + +Having seen how string literals are handled, you can see that symbol +literals are like string literals and we can take a similar approach +to transform a program into one that statically allocates all of the +symbols that appear in the program and replace their occurrences with +references. + +Again, we just follow the blueprint of strings. + +The key additions are a function for compiling symbol occurrences: @#reader scribble/comment-reader -(ex -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +(racketblock +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea 'rax (Plus (symbol->data-label s) type-symb)))) ) -And now we can @racket[desugar] programs such as these: +Which works as follows: @ex[ -(desugar '(map f '(1 2 3))) -(desugar '(map f '(and 1 2))) -(desugar '(if x '(1 . 2) 3)) +(compile-symbol 'Hello!) ] -And our prior interpterter will work just fine on these programs: +And the @racket[literals] function should now include a case for +@racket[(Symb _sym)] to return @racket[(list _sym)]. @ex[ -(interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1))) -(interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity))) -(interp-env (desugar '(if x '(1 . 2) 3)) `((x #t))) +(literals (parse '['Hello!])) ] -And: +You might worry that programs that have similar strings and symbols +may cause problem. Since @racket[literals] on the following program +only returns a single literal: @ex[ -(interp-env (desugar ''(((1) 2) 3)) '()) -(interp-env (desugar ''(1 . 2)) '()) -(interp-env (desugar ''("asdf" fred ((one)))) '()) +(literals (parse '[(begin "Hello!" 'Hello!)])) ] -@section[#:tag-prefix "mug"]{Pattern matching} - -One of the most ubiquitous language features we've used, but not -implemented, is pattern matching with the @racket[match] form. +But actually this is just fine. What happens is that only a signle +chunk of memory is allocated to hold the character data @tt{H}, +@tt{e}, @tt{l}, @tt{l}, @tt{o}, @tt{!}, but the @emph{symbol} +@racket['Hello] is represented as a pointer to this data tagged as a +symbol, while the string @racket["Hello"] is represent as the same +pointer, but tagged as a string. So this program compiles to: -Pattern matching too can be seen as syntactic sugar since it's easy to -imagine how you could rewrite uses of @racket[match] into equivalent -expressions that didn't involve @racket[match]. - -For example, consider the program: +@ex[ +(seq (compile-string "Hello!") + (compile-symbol 'Hello!) + (compile-literals-data '(Hello!))) +] -@#reader scribble/comment-reader -(racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) -) +We have now added a symbol data type and have implement static +interning just as we did for strings. -An alternative, equivalent, formulation that doesn't use -@racket[match] is the following: +However this strategy alone won't fully solve the problem of symbol +identity because it is possible to dynamically create symbols and even +then it should be the case that symbols with the same name are ``the +same.'' This in contrast to how strings work: -@#reader scribble/comment-reader -(racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (cond - [(eq? 'leaf bt) 1] - [(and (list? bt) - (= 4 (length bt)) - (eq? 'node (first bt))) - (let ((v (second bt)) - (l (third bt)) - (r (fourth bt))) - (* v (* (prod l) (prod r))))] - ; corresponds to a match failure - [else (add1 #f)])) -) +@ex[ +(eq? 'x (string->symbol (string #\x))) +] -This code is less nice to read and write, but essentially does the -same thing the pattern-matching code does. +Here we are creating a symbol dynamically, using the string +@racket["x"] to specify the name of the symbol. Comparing it to a +@racket['x] that appears statically should still produce @racket[#t]. -In this example, each @racket[match]-clause becomes a -@racket[cond]-clause. The question-part of each @racket[cond]-clause -is an expression that determines whether the corresponding -pattern-part of the @racket[match-clause] matches. The answer-part of -each @racket[cond]-clause corresponds to the expression-part of the -@racket[match]-clause, with an add @racket[let]-form that destructures -the scrutinee and binds the pattern variables of the pattern-part. +This was in fact a critical property we relied upon in implementing +static string interning. +This latter example shows that we need to @emph{dynamically} ensure +symbols of the same name evaluate to unique pointers. -Let's consider the following extension to the grammar of @tt{Expr+} to -include a simplified version of the pattern matchin form we've been -using: +@section[#:tag-prefix "mug"]{Dynamic Interning} -@#reader scribble/comment-reader -(racketblock -;; type Expr+ = -;; .... -;; | Match - -;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) - -;; type Pat = -;; | #t -;; | #f -;; | Integer -;; | String -;; | Variable -;; | `_ -;; | `'() -;; | `(quote ,Symbol) -;; | `(cons ,Pat ,Pat) -;; | `(list ,Pat ...) -;; | `(? ,Expr ,Pat ...) - ) - -A @racket[match] form consists of an expression to match against, -sometimes callsed the @bold{scrutinee}, followed by some number of -pattern-matching clauses; each one consists of a pattern and -expression to evaluate should the pattern match the scrutinee's value. - -Here a pattern can either be a literal boolean, integer, string, empty -list, or symbol, or a pattern variable, which matches anything and -binds the value to the variable, a ``wildcard'' which matches anything -and binds nothing, a @racket[cons] pattern which matches pairs of -things that match the subpatterns, @racket[list] pattern which matches -lists of a fixed-size where elements matches the subpatterns, or a -@racket[?] pattern which matches if the results of evaluated the first -subexpression applied to scrutinee produces true and all of the -subpatterns match. - -This doesn't include the @racket[quasiquote]-patterns we used above, -but still this is a useful subset of pattern matching and allows us to -write programs such as: +Static interning requires identical static occurrences of data to have +a unique representation. Dynamic interning requires identical data, +regardless of when it's created, to have a unique representation. +Symbols are like strings that support dynamic interning. -@#reader scribble/comment-reader -(racketblock -;; BT -> Number -;; Multiply all the numbers in given binary tree -(define (prod bt) - (match bt - ['leaf 1] - [(list 'node v l r) (* v (* (prod l) (prod r)))])) -) +This is going to require more support from our run-time system. -As alluded to above, each pattern plays two roles: it used to -determine whether the scrutinee matches the pattern, and it used to -bind variable names (in the scope of the RHS expression) to sub-parts -of the scrutinee when it does match. +Essentially, the run-time systems needs to keep track of all of the +symbols that have appeared so far during the running of the program. +When a new symbol is dynamically created, e.g. through +@racket[string->symbol], the run-time will check whether this symbol +has been seen before (based on the characters of its name). If it has +been seen before, the run-time can give us the pointer for the +previous use of the symbol, thus preserving the pointer-equality +between this symbol and any other occurrences. -We can write two helper functions to accomplish each of these tasks: -@itemlist[ +On the other hand if the run-time has not see this symbol, it can +allocate memory for it, return the pointer, and remember in the future +that this symbol has been seen. -@item{rewrite patterns into Boolean valued expressions that answer -whether the pattern matches the scrutinee,} +To accomplish this, we will implement a @bold{symbol table}. It +associates symbol names, i.e. the characters of a symbol, with +pointers to symbols. When a program wishes to create a symbol, it +confers with the table to either fetch an existing pointer for the +symbol or create a new on, updating the table. -@item{rewrite pattern and RHS in to expressions in which the pattern -variables of pattern are bound to the appropriately deconstructed -parts of the scrutinee.} +To implement this table, we'll use a binary search tree of symbols, +represented in C as. We have a globally defined pointer +@tt{symbol_tbl} is which is initially empty (@tt{NULL}). The work of +dynamically interning a symbol will be done by the @tt{intern_symbol} +function. It searches the BST, using @tt{symb_cmp} to compare symbols +for alphabetic ordering. If an entry is found, it returns the +previously seen symbol, otherwise it adds the symbol to the table and +returns it. -] +@filebox-include[fancy-c "mug/symbol.c"] +The idea will be that every time a symbol is constructed, we call +@tt{intern_symbol} to intern it. -Assume: the scrutinee is a variable. (It's easy to establish this assumption in general.) +So in addition to collecting all of the literals and compiling each to +static data, we will need to collect all of the symbols and emit a +call to @tt{intern_symbol} at the start of the program. -We need two functions: +To accomplish this, we'll design a function: @#reader scribble/comment-reader (racketblock -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) ...) - -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) ...) +;; Prog -> Asm +;; Initialize the symbol table with all the symbols that occur statically +(define (init-symbol-table p) ...) ) -Let's turn to @racket[pat-match] first. - -Suppose the pattern is a literal @racket[#t]. When does @racket[v] -match it? When @racket[v] is @racket[eq?] to @racket[#t]. +Here's what it will produce for some example programs: -So an expression that produces true when this pattern matches is @racket[(eq? #t v)]. - -Handling @racket[#f], integers, characters, symbols, and the empty list is similar. - -What about variables? Suppose the pattern is @racket[x]. When does -@racket[v] match it? Always. So @racket[#t] is an expression that -produces true with this pattern matches. +@ex[ +(init-symbol-table (parse '['Hello!])) +(init-symbol-table (parse '[(begin 'Hello! 'Hello!)])) +(init-symbol-table (parse '["Hello!"])) +(init-symbol-table (parse '[(define (Hello! x) 'Hello!) + (Hello! 'Fren)])) +] -Wildcards are the same. +For each unique symbol in the program, it emits two instructions: -What about when the pattern is a @racket[cons]-pattern? Suppose the -pattern is @racket[(cons _p1 _p2)] for some patterns @racket[_p1] and -@racket[_p2]. When does @racket[v] match @racket[(cons _p1 _p2)]? -When @racket[v] is a pair and @racket[(car v)] matches @racket[_p1] -and @racket[(cdr v)] matches @racket[_p2]. +@itemlist[ -A @racket[list] pattern is similar, except that the scrunity must be a -list with as many elements as there are patterns, and the elements -must match the corresponding subpattern. +@item{move the address of the symbol's data into @racket['rdi], the +register used for the first argument in the System V ABI,} -What about a @racket[?] pattern? When does @racket[v] match it? -Suppose the pattern is @racket[(? even?)]. When does @racket[v] match -it? When @racket[(even? v)] is true. If the pattern had a non-empty -list of sub-patterns they would all need to match @racket[v], too. +@item{call @tt{intern_symbol}.} +] +We know that initially the table is empty, so each of these calls will +insert the given symbols into the table ensure that if any subsequent +symbol is interned that has the same character data, call +@tt{intern_symbol} will produce the original pointer to static data +for that symbol. -We can now formulate a defintion of @racket[pat-match]: +Now we can implement the two operations @racket[string->symbol] and +@racket[symbol->string]. Here's what we do for +@racket[string->symbol]: @#reader scribble/comment-reader (racketblock -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) +;; Op1 -> Asm +(define (compile-op1 p) (match p - [#t `(eq? #t ,v)] - [#f `(eq? #f ,v)] - [(? integer? i) `(eq? ,i ,v)] - [(? string? s) - `(and (string? ,v) - (string=? ,s ,v))] - [(list 'quote '()) `(eq? '() ,v)] - [(? symbol?) #t] - [(list 'quote (? symbol? s)) `(eq? ,v ',s)] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(and (cons? ,v) - (let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p1 v1) - ,(pat-match p2 v2)))))] - [(cons 'list ps) - `(and (list? ,v) - (= (length ,v) ,(length ps)) - ,(pat-match-list ps v))] - [(cons '? (cons e ps)) - `(and (,e ,v) - ,(pats-match ps v))])) + ; ... + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))])) ) -The @racket[list]-pattern case relies on a helper function -@racket[pat-match-list] and the @racket[?]-pattern case relies on -@racket[pats-match], both defined below: +This first does some type-tag checking to make sure the argument is a +string, then it untags the pointer and moves it to the @racket['rdi] +register in order to call @racket[intern_symbol]. The address of the +interned symbol is returned in @racket['rax], which is then tagged as +being a symbol. - -@#reader scribble/comment-reader -(racketblock -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if every ps matches x -(define (pats-match ps v) - (match ps - ['() #t] - [(cons p ps) - `(and ,(pat-match p v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if each ps matches each element of list v -(define (pat-match-list ps v) - (match ps - ['() #t] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p v1) - ,(pat-match-list ps v2))))])) -) - -Here are some examples: +We can now confirm that dynamically created symbols are still +pointer-equal to symbols that statically appear in the program: @ex[ - -(pat-match 'leaf 'bt) -(pat-match '(list 'node v l r) 'bt) -(pat-match '(list 'node (? even? v) l r) 'bt) - +(run '(eq? 'fff (string->symbol (make-string 3 #\f)))) ] -These aren't very readable programs that emerge, however, we check -that they're doing the right thing. Note that the elaboration -requires a few functions to be available, such as @racket[list?], -and @racket[length]. We make these available in an initial -environment: +Even creating two symbols dynamically will result in the same pointer +so long as they are spelled the same: @ex[ -(define env0 - `((length ,length) - (list? ,list?))) -(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0) -(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0) -(interp-env (desugar - `(let ((bt '(node 1 leaf leaf))) - ,(pat-match '(list 'node v l r) 'bt))) - env0) -(interp-env (desugar - `(let ((bt '(node 1 leaf leaf))) - ,(pat-match '(list 'node (? zero?) l r) 'bt))) - env0) -(interp-env (desugar - `(let ((bt '(node 0 leaf leaf))) - ,(pat-match '(list 'node (? zero?) l r) 'bt))) - env0) +(run '(eq? (string->symbol (make-string 3 #\a)) + (string->symbol (make-string 3 #\a)))) ] -Now moving on to @racket[pat-bind], it follows a similar structure, -but we always assume the given pattern matches the scrutinee. The -addition @tt{Expr} argument represents the right-hand-side expression -of the @racket[match]-clause. The idea is that the pattern variables -of @racket[p] are bound to sub-parts of @racket[v] in @racket[e]. +Going the other direction from symbols to strings is easy: we copy the +string data and tag the pointer as a string. Note that we could get +away will simply retagging the pointer and not actually copying the +string, but we make a copy to mimic Racket's behavior and to be safe +should we add string mutation operations. @#reader scribble/comment-reader (racketblock -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) +;; Op1 -> Asm +(define (compile-op1 p) (match p - [#t e] - [#f e] - [(? integer?) e] - [(? string?) e] - [(list 'quote '()) e] - ['_ e] - [(? symbol? x) `(let ((,x ,v)) ,e)] - [(list 'quote (? symbol?)) e] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p1 v1 - (pat-bind p2 v2 e))))] - [(cons 'list ps) - (pat-bind-list ps v e)] - [(cons '? (cons _ ps)) - (pats-bind ps v e)])) + ; ... + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + pad-stack + (Call 'memcpy) + unpad-stack + (Mov rax rbx) + (Add rbx rdx))) ) -Here are some examples: +The @racket[char-array-copy] sequence of instructions sets up a call +to C's @tt{memcpy} function giving the address of the string data as +the source, the current heap pointer as the destination, and the +number of bytes which will be copied. After the call returns, the +heap pointer is incremented by that number of copied bytes. + +We can see that this works: @ex[ +(run '(symbol->string 'foo)) +] -(pat-bind 'leaf 'bt 'bt) -(pat-bind '(list 'node v l r) 'bt 'v) -(pat-bind '(list 'node (? even? v) l r) 'bt 'v) +To observe the copying behavior, notice: +@ex[ +(run '(eq? (symbol->string 'foo) (symbol->string 'foo))) ] -These are tough to read, but we can confirm what they compute: + +@section[#:tag-prefix "mug"]{Uninterned Symbols} + +Sometimes it is useful to create a symbol that is distinct from all +other symbols. We've relied on the ability to create a symbol with +this property whenever we used the @racket[gensym] operation. What +@racket[gensym] produces is an @bold{uninterned} symbol. Even if you +constructed a symbol with the same letters, it would be a different +pointer from the symbol created by a call to @racket[gensym]. + +To add this ability, we will add an precursor to @racket[gensym] +called @racket[string->uninterned-symbol]. It consumes a string and +produces a symbol with the same letters, but distinct from all other +symbols, even those that are spelled the same. + +@ex[ +(eq? 'Hello! (string->uninterned-symbol "Hello!")) +] + +Calling @racket[string->uninterned-symbol] twice with the same string +will produce two different symbols: @ex[ -(interp-env (desugar - `(let ((bt '(node 0 leaf leaf))) - ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z))) - '()) +(eq? (string->uninterned-symbol "Hello!") + (string->uninterned-symbol "Hello!")) ] -Putting the pieces together, we can now write a @racket[match->cond] function -that rewrites a @racket[match]-expression into a @racket[cond]-expression: +Implementing @racket[string->uninterned-symbol] is fairly simple: we +allocate a new symbol, thereby ensuring it is unique and then simple +avoid calling @tt{intern_symbol}: @#reader scribble/comment-reader (racketblock -;; Match -> Expr -;; Rewrite match expression into an equivalent cond expression -(define (match->cond m) - (match m - [(cons 'match (cons e mcs)) - (let ((x (gensym))) - `(let ((,x ,e)) - (cond ,@(map (λ (mc) - (match mc - [(list p e) - (list (pat-match p x) (pat-bind p x e))])) - mcs) - ;; fall through to error - [else (add1 #f)])))])) +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ; ... + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) ) +We can confirm this works as expected: + @ex[ -(match->cond '(match '(node 2 leaf leaf) - ['leaf 0] - [(list 'node v l r) v])) +(run '(string->uninterned-symbol "foo")) +(run '(eq? 'foo (string->uninterned-symbol "foo"))) +(run '(eq? (string->uninterned-symbol "foo") + (string->uninterned-symbol "foo"))) ] -Finally, we can incorporate @racket[match->cond] into @racket[desugar]: +With that, we have completed the implementation of symbols and strings +with the proper interning behavior. -@#reader scribble/comment-reader -(ex #:no-prompt -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(match . ,_) (desugar (match->cond e+))] ; new - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) -) +@section[#:tag-prefix "mug"]{Compiling Symbols and Strings} -Now we can interpret programs such as this: +We can now put the pieces together for the complete compiler. -@ex[ +@(define (code-link fn) + (link (string-append "code/" fn) (tt fn))) -(interp-env - (desugar - '(begin (define (prod bt) - (match bt - ['leaf 1] - [(list 'node v l r) - (* v (* (prod l) (prod r)))])) - - (prod '(node 3 (node 4 leaf leaf) leaf)))) - `((* ,*) (list? ,list?) (length ,length))) +We do a bit of housekeeping and move the code for compiling +expressions to its own module: @code-link{mug/compile-expr.rkt}. -] - - + +The top-level compiler is now: + +@filebox-include[codeblock "mug/compile.rkt"] + +The work of compiling literals and emitting calls to initialize the +symbol table is contained in its own module: + +@filebox-include[codeblock "mug/compile-literals.rkt"] diff --git a/www/notes/sugar.scrbl b/www/notes/sugar.scrbl new file mode 100644 index 00000000..c94b4ed5 --- /dev/null +++ b/www/notes/sugar.scrbl @@ -0,0 +1,1196 @@ +#lang scribble/manual + +@(require (for-label (except-in racket ...))) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(for-each (λ (f) (ev `(require (file ,(path->string (build-path notes "mug" f)))))) + '("interp.rkt" "interp-env.rkt" #;"compile.rkt" "syntax.rkt" "pat.rkt" #;"asm/interp.rkt" #;"asm/printer.rkt")) + +@title[#:tag "Mug"]{Mug: matching, throwing, quoting} + +@table-of-contents[] + +@section[#:tag-prefix "mug"]{Scaling up with syntax} + +We have developed a small, but representative functional programming +language. But there's still a long way to go from our Loot language +to the kind of constructs we expect in a modern, expressive +programming language. In particular, there's a fairly large gap +between Loot and the subset of Racket we've explored so far in this +class. + +For example, our programs have made extensive use of pattern matching, +quotation, quasi-quotation, and lots of built-in functions. In this +section, we'll examine how to scale Loot up to a language that's nicer +to program in. As we'll see, much of this can be accomplished +@emph{without extending the compiler}. Rather we can explain these +language features by @bold{elaboration} of fancier language syntax +into the existing core forms. + +In this chapter, we'll explore several ideas at the level of an +interpreter, but the techniques should work just as well for the compiler. + +@section[#:tag-prefix "mug"]{The Loot+ interpreter} + +Let us start with an interprter for the Loot language, plus all of the +extensions considered in the various assignments up through +@seclink["Assignment 7"]{Assignment 7}. + + +@codeblock-include["mug/interp-env.rkt"] + +@section[#:tag-prefix "mug"]{A bit more sugar} + + +As we saw in @seclink["Loot"]{Loot}, we can consider syntaxtic +extensions of language that elaborate into the core @tt{Expr} form of +a language. We saw this with the @racket[define]-form that we rewrote +into @racket[letrec]. We can consider further extensions such as +@racket[and], @racket[or], and even @racket[cond]. + +Here are functions for transforming each of these forms into simpler +forms: + +@#reader scribble/comment-reader +(ex +(define (cond->if c) + (match c + [`(cond (else ,e)) e] + [`(cond (,c ,e) . ,r) + `(if ,c ,e (cond ,@r))])) + +(define (and->if c) + (match c + [`(and) #t] + [`(and ,e) e] + [`(and ,e . ,r) + `(if ,e (and ,@r) #f)])) + +(define (or->if c) + (match c + [`(or) #f] + [`(or ,e) e] + [`(or ,e . ,r) + (let ((x (gensym))) + `(let ((,x ,e)) + (if ,x ,x (or ,@r))))])) +) + +Note that these functions do not necessarily eliminate @emph{all} +@racket[cond], @racket[and], or @racket[or] forms, but rather +eliminate @emph{one} occurrence, potentially creating a new occurrence +within a subexpression: + +@ex[ +(cond->if '(cond [(even? x) 8] [else 9])) +(cond->if '(cond [else 9])) +(and->if '(and)) +(and->if '(and 8)) +(and->if '(and 8 9)) +(or->if '(or)) +(or->if '(or 8)) +(or->if '(or 8 9)) +] + +The idea is that another function will drive the repeated use of these +functions until all these extended forms are eliminated. + +You may wonder why the @racket[or] elaboration is complicated by the +@racket[let]-binding. Consider a potential simpler approach: + +@#reader scribble/comment-reader +(ex +(define (or->if-simple c) + (match c + [`(or) #f] + [`(or ,e) e] + [`(or ,e . ,r) + `(if ,e ,e (or ,@r))])) +) + +But compare the elaboration of the following exmample: + +@ex[ +(or->if-simple '(or (some-expensive-function) #t)) +(or->if '(or (some-expensive-function) #t)) +] + +The second program is much more efficient. Moreover, if +@racket[some-expensive-function] had side-effects, the first program +would duplicate them, thereby changing the program's intended +behavior. + +We can incorporate these new functions into the desugar function, +which will transform extended programs into ``core'' expressions: + +@#reader scribble/comment-reader +(ex +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +Note how a @racket[cond], @racket[and], or @racket[or] form are +transformed and then @racket[desugar]ed again. This will take care of +eliminating any derived forms introduced by the transformation, which +is useful so that derived forms can be defined in terms of other +derived forms, including itself! + +@ex[ +(desugar '(cond [(even? x) 8] [else 9])) +(desugar '(cond [else 9])) +(desugar '(and)) +(desugar '(and 8)) +(desugar '(and 8 9)) +(desugar '(or)) +(desugar '(or 8)) +(desugar '(or 8 9)) +] + + +Derived forms that can be elaborated away by rewriting into more +primitive forms are sometimes called @bold{syntactic sugar} since they +are not fundamental but ``sweeten'' the experience of writing programs +with useful shorthands. We call the elaboration function @racket[desugar] +to indicate that it is eliminating the syntactic sugar. + +@section[#:tag-prefix "mug"]{Exceptional behavior} + +To see an example of taking the idea of program transformation as a +method for implementing language features, let's consider the case of +exceptions and exception handlers, a common feature of modern +high-level languages. + +Consider the following program for computing the product of all the +elements in a binary tree: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) + +(prod 'leaf) +(prod '(node 8 leaf leaf)) +(prod '(node 8 (node 2 leaf leaf) (node 4 leaf leaf))) +) + +Now consider the work done in an example such as this: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +From a quick scan of the elements, we know the answer is 0 without +doing any arithmetic. But the @racket[prod] function will do a bunch +of multiplication to actually figure this out. + +To see, let's use a helper function to replace @racket[*] that prints +every it multiplies two numbers: + +@#reader scribble/comment-reader +(ex +;; Number Number -> Number +(define (mult x y) + (printf "mult: ~a x ~a\n" x y) + (* x y)) + +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (mult v (mult (prod l) (prod r)))])) + +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +) + +This could potentially be bad if the tree were quite large. + +How can we do better? One option is to detect if the value at a node +is zero and simply avoid recurring on the left and right subtrees at +that point: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) + (if (zero? v) + 0 + (mult v (mult (prod l) (prod r))))])) +) + +Does this help our answer? Only slightly: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +Why? + +The problem is that you may encounter the zero element deep within a +tree. At that point you not only want to avoid doing the +multiplication of subtrees, but also of the elements surrounding the +zero. But we seemingly don't have control over the context +surrounding the node with a zero in it, just the subtrees. What can +we do? + +One option, if the language provides it, is to @bold{raise an +exception}, signalling that a zero element has been found. An outer +function can @bold{catch} that exception and produce zero. Such a +program will avoid doing any multiplication in case there's a zero in +the tree. + +Racket comes with an exception mechanism that uses @racket[raise] to +signal an exception, which is propagated to the nearest enclosing +exception handler. If there is no such handler, an uncaught exception +error occurs. + +@ex[ + +(eval:error (raise 5)) +(eval:error (mult (raise 5) 2)) +(eval:error (mult (raise (mult 5 3)) 2)) + +] + +The general form of an exception handler uses the +@racket[with-handlers] form that includes a series of predicates and +handler expressions. We'll consider a simpler form called +@racket[catch] that unconditionally catches any exception throw and +handles it with a function that takes the raised value as an argument. +It can be expressed in terms of the more sophisticated +@racket[with-handlers] form: + +@ex[ +(define-syntax-rule (catch e f) + (with-handlers ([(λ (x) #t) f]) e)) + +(catch (raise 5) (λ (x) x)) +(catch (mult (raise 5) 2) (λ (x) x)) +(catch (mult (raise (mult 5 3)) 2) (λ (x) x)) +(catch (mult (mult 5 3) 2) (λ (x) x)) +(catch (mult (mult 5 3) 2) (λ (x) (mult x x))) +(catch (mult (raise (mult 5 3)) 2) (λ (x) (mult x x))) +] + +Now we can solve our problem: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (catch (prod/r bt) (λ (x) 0))) + +;; BT -> Number +;; Throws: 0 +(define (prod/r bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) + (if (zero? v) + (raise 0) + (mult v (mult (prod/r l) (prod/r r))))])) + +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +) + +(This code is a bit problematic for reasons that are beside the point +of this section, but... the problem is this will catch any exception, +including things like system signals, out of memory exceptions, etc. +A better solution would have the handler check that the exception +value was 0 and re-raise it if not. That way it doesn't ``mask'' any +other exceptions.) + +This code works great for our purposes, but what if the language +didn't provide an exception handling mechanism? Could we achieve the +same effect without relying on exceptions? + +One solution is to re-write the program in what's called +@bold{continuation passing style} (CPS). Continuation passing style +makes explicit what is implicit in the recursive calls to +@racket[prod] in our original program, which is that after recursively +computing the product of the subtree, we have to do more work such as +another recursive call and multiplication. By making this work +explicit, we gain control over it and have the option to do things +like throw away this work. + +Here is the basic idea. We will write a version of @racket[prod] that +takes an additional argument which represents ``the work to be done +after this function call completes.'' It will take a single argument, +a number, which is the result of this function call, and it will +produce some final result for the computation (in this case, a number). + +In general, we want @racket[(k (prod bt))] ≡ @racket[(prod/k bt k)] +for all functions @racket[k] and binary trees @racket[bt]. + +Starting from the spec, we have: + +@#reader scribble/comment-reader +(ex +;; BT (Number -> Number) -> Number +(define (prod/k bt k) + (k (prod bt))) +) + +We can unroll the definition of @racket[prod]: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (k (mult v (mult (prod l) (prod r))))])) +) + +Now we'd like to replace the calls to @racket[prod] with calls to +@racket[prod/k], which we can do by recognizing the work to be done +around the call to @racket[prod] and placing it in the +@bold{continuation} argument to @racket[prod/k]. Let's do the first call: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (prod/k l (λ (pl) + (k (mult v (mult pl (prod r))))))])) +) + +Doing this again, we get: + +@#reader scribble/comment-reader +(ex +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (prod/k l (λ (pl) + (prod/k r (λ (pr) + (k (mult v (mult pl pr)))))))])) +) + +Now we have a definition of @racket[prod/k] that is independent of +@racket[prod] that satisfies the spec we started with. + +A couple of things to note: + +@itemlist[ +@item{Every call to @racket[prod/k] is a tail-call,} +@item{The context of the recursive calls are given explicitly as continuation arguments.} +] + +We can recreate the original function by giving the appropriate initial continuation: + +@#reader scribble/comment-reader +(ex +;; BT -> Number +(define (prod bt) + (prod/k bt (λ (x) x))) +) + +Now, this code doesn't do anything smart on zero elements; it does +exactly the same multiplications our first program does: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +However, with a small tweak, we can get the behavior of the exception-handling code. + +Consider this definition: + +@#reader scribble/comment-reader +(ex +;; BT (Number -> Number) -> Number +(define (prod/k bt k) + (match bt + ['leaf (k 1)] + [`(node ,v ,l ,r) + (if (zero? v) + 0 + (prod/k l (λ (pl) + (prod/k r (λ (pr) + (k (mult v (mult pl pr))))))))])) + +;; BT -> Number +(define (prod bt) + (prod/k bt (λ (x) x))) +) + +Notice that this program, when the value in a node is zero, +immediately returns @racket[0]. It does not do any of the work +represented by @racket[k]. It does something akin to raising an +exception: it blows off all the work of the surround context and +returns a value to the ``handler'' (in this case, @racket[prod]). + +Returning to our example, we can see that no multiplications occur: + +@ex[ +(prod '(node 9 (node 0 leaf leaf) (node 4 (node 2 leaf leaf) (node 3 leaf leaf)))) +] + +We've now achieved our original goal without the use of exception +handlers. We achieved this by rewriting our program to make explicit +the work that remains to do, giving us the ability to avoid doing it +when necessary. This is a slighly simplified version of the general +exception handling transformation, which we will look at next, since +there's only a single handler and all it does it produce 0. But, the +by-hand transformation we did provides a useful blueprint for how can +generally transform programs that use exception handling into ones +that don't. + + +@section[#:tag-prefix "mug"]{Exceptional transformation} + +Let's consider a very small subset of expressions, extended with +@racket[raise] and @racket[catch], and see how we can transform away +those added mechanisms: + +@#reader scribble/comment-reader +(racketblock +;; An Expr is one of: +;; - Integer +;; - Variable +;; - `(if ,Expr ,Expr ,Expr) +;; - `(,Prim1 ,Expr) +;; - `(,Prim2 ,Expr ,Expr) +;; - `(raise ,Expr) +;; - `(catch ,Expr (λ (,Variable) ,Expr)) +) + +Here is the basic idea of the transformation, we transform every +expression into a function of two arguments. The two arguments +represent the two ways an expression may produce results: either by +returning normally or by raising an exception. + + +So for example, if the original expression were @racket[1], we'd want +the transformed program to be +@racketblock[ +'(λ (retn raze) (retn 1)) +] + +Why? Because @racket[1] just produces @racket[1]; it can't possibly +raise an exception. So given the two ways of producing a value, we +choose the @racket[ret] way and ``return'' by apply @racket[retn] to +the value we want to return: @racket[1]. + + +Suppose the original expression is @racket[(raise 1)]. Then we want +to produce: +@racketblock[ +'(λ (retn raze) (raze 1)) +] + +This is choosing to not return a value, but rather ``raise'' an +exception by calling the @racket[raze] function. + +This is a lot like the by-hand transformation we did, except we now +have two continuations: one to represent work to do after +returning (normally) and one for work to do after raising an +exception. + +At the top-level, to run an expression we simply plug in appropriate +definitions for @racket[retn] and @racket[raze]. The @racket[retn] +function should just produce the result, i.e. it should be @racket[(λ +(x) x)], while @racket[raze] should signal an uncaught exception. +Since our language has such a simple model of errors, we'll just cause +an error to occur, i.e. @racket[(λ (x) (add1 #f))]. Let's try our +examples. + +@ex[ +(interp-env '((λ (retn raze) (retn 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +(interp-env '((λ (retn raze) (raze 1)) (λ (x) x) (λ (x) (add1 #f))) '()) +] + +What about something like @racket[(add1 _e)]? + +Well if @racket[_e] returns normally, then the whole thing should +produce one more than that value. If @racket[_e] raises an exception, +then @racket[(add1 _e)] should raise that exception. + +Suppose @racket[_t] where the transformed version of @racket[_e], +which means it is a function of two parameters: what to do if +@racket[_e] returns and what to do if @racket[_e] raises. + +Then the transformation of @racket[(add1 _e)] is +@racketblock[ +(λ (retn raze) + (_t (λ (x) (retn (add1 x))) (λ (x) (raze x))))] + +This can be simplified slightly by observing that @racket[(λ (x) (raze +x))] is equal to @racket[raze]: +@racketblock[ +(λ (retn raze) + (_t (λ (x) (retn (add1 x))) raze))] + +How about something like @racket[(catch _e0 (λ (_x) _e1))]? If +@racket[_e0] produces a value normally, then the whole expression +produces that value normally. However if @racket[_e0] raises an +expression then the whole expression produces whatever @racket[_e1] +with @racket[x] bound to the raised value produces. Let @racket[_t0] +and @racket[_t1] be the transformed versions of @racket[_e0] and +@racket[_e1]. Then transformation of the whole expressions should be + +@racketblock[ +(λ (retn raze) + (_t0 retn (λ (_x) (_t1 retn raze)))) +] + +One thing to notice here is that we are running @racket[_t0] with a @racket[raze] function +that, if called, will run @racket[_t1] normally. + +Guided by the examples, let's define the transformation (note: we have +to take care of avoiding unintended variable capture): + +@#reader scribble/comment-reader +(ex +;; Expr -> Expr +(define (exn-transform e) + (match e + [(? integer? i) `(λ (retn raze) (retn ,i))] + [(? symbol? x) + (let ((retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) (,retn ,x)))] + [`(if ,e0 ,e1 ,e2) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (t2 (exn-transform e2)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t0 + (λ (x) + ((if x ,t1 ,t2) ,retn ,raze)) + ,raze)))] + [`(,(? prim? p) ,e0) + (let ((t0 (exn-transform e0)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t0 (λ (v) (,retn (,p v))) + ,raze)))] + [`(,(? prim? p) ,e0 ,e1) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (retn (gensym 'retn)) + (raze (gensym 'raze)) + (v0 (gensym 'v0))) + `(λ (,retn ,raze) + (,t0 (λ (,v0) + (,t1 (λ (v1) (,retn (,p v0 v1))) + ,raze)) + ,raze)))] + [`(raise ,e) + (let ((t (exn-transform e)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + `(λ (,retn ,raze) + (,t ,raze ,raze)))] + [`(catch ,e0 (λ (,x) ,e1)) + (let ((t0 (exn-transform e0)) + (t1 (exn-transform e1)) + (retn (gensym 'retn)) + (raze (gensym 'raze))) + + `(λ (,retn ,raze) + (,t0 ,retn + (λ (,x) + (,t1 ,retn ,raze)))))])) +) + +Here's what the transformation looks like on examples: + +@ex[ +(exn-transform '1) +(exn-transform '(raise 1)) +(exn-transform '(catch (raise 1) (λ (x) x))) +(exn-transform '(catch (raise 1) (λ (x) (add1 x)))) +(exn-transform '(catch (add1 (raise 1)) (λ (x) 1))) +(exn-transform '(catch (add1 (raise 1)) (λ (x) (raise x)))) +] + +Now let's give it a spin: + +@ex[ +;; Expr -> Value +(define (run e) + (interp-env `(,(exn-transform e) (λ (x) x) (λ (x) (add1 #f))) '())) + +(run '1) +(run '(raise 1)) +(run '(catch (raise 1) (λ (x) x))) +(run '(catch (raise 1) (λ (x) (add1 x)))) +(run '(catch (add1 (raise 1)) (λ (x) 1))) +(run '(catch (add1 (raise 1)) (λ (x) (raise x)))) +(run '(if (raise 0) 1 2)) +(run '(if (zero? 0) (raise 1) 2)) +] + +@section[#:tag-prefix "mug"]{Quotation} + +We have seen how to interpret limited uses of @racket[quote], such as +in @racket[(quote ())] and @racket[(quote x)], i.e. the empty list and symbols. + +But we've also been using @emph{using} @racket[quote] more generally +where we can write down an arbitrary constant s-expression within a +@racket[quote]: + +@ex[ +'#t +'5 +'(1 2 3) +'(add1 x) +'(car '(1 2 3)) +'(((1) 2) 3) +'(1 . 2) +'("asdf" fred ((one))) +] + +We can understand the more general @racket[quote] form as a shorthand +for expressions that construct an equivalent list to the one denoted +by the s-expression. + +For example, +@itemlist[ + +@item{@racket['1] is shorthand for @racket[1],} + +@item{@racket['(1 . 2)] is shorthand for @racket[(cons '1 '2)], which +is shorthand for @racket[(cons 1 2)],} + +@item{@racket['(1 2 3)] is shorthand for @racket[(cons '1 '(2 3))], +which is shorthand for @racket[(cons 1 (cons '2 '(3)))], which is +shorthand for @racket[(cons 1 (cons 2 (cons '3 '())))], which is +shorthand for @racket[(cons 1 (cons 2 (cons 3 '())))],} + +@item{@racket['()] is as simple as possible (the empty list),} + +@item{@racket['x] is as simple as possible (a symbol), and} + +@item{@racket[5] is as simple as possible.} +] + +Guided by these examples, we can write a function that transforms the +s-expression inside of a @racket[quote] into an equivalent expression +that only uses @racket[quote] for constructing symbols and the empty +list: + + +@#reader scribble/comment-reader +(ex +;; S-Expr -> Expr +;; Produce an expression that evaluates to given s-expression, without +;; use of quote (except for symbols and empty list) +(define (quote->expr d) + (match d + [(? boolean?) d] + [(? integer?) d] + [(? string?) d] + [(? char?) d] + [(? symbol?) (list 'quote d)] + [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] + ['() ''()])) + + +(quote->expr 'x) +(quote->expr 5) +(quote->expr "Fred") +(quote->expr '(1 . 2)) +(quote->expr '(1 2 3)) +(quote->expr '(car '(1 2 3))) +(quote->expr '(((1) 2) 3)) +(quote->expr '(1 . 2)) +(quote->expr '("asdf" fred ((one)))) +) + +We can now incorporate this into @racket[desugar] to eliminate uses of +compound-data @racket[quote]: + +@#reader scribble/comment-reader +(ex +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`',d (quote->expr d)] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +And now we can @racket[desugar] programs such as these: + +@ex[ +(desugar '(map f '(1 2 3))) +(desugar '(map f '(and 1 2))) +(desugar '(if x '(1 . 2) 3)) +] + +And our prior interpterter will work just fine on these programs: + +@ex[ +(interp-env (desugar '(map f '(1 2 3))) `((map ,map) (f ,add1))) +(interp-env (desugar '(map f '(and 1 2))) `((map ,map) (f ,identity))) +(interp-env (desugar '(if x '(1 . 2) 3)) `((x #t))) +] + +And: + +@ex[ +(interp-env (desugar ''(((1) 2) 3)) '()) +(interp-env (desugar ''(1 . 2)) '()) +(interp-env (desugar ''("asdf" fred ((one)))) '()) +] + +@section[#:tag-prefix "mug"]{Pattern matching} + +One of the most ubiquitous language features we've used, but not +implemented, is pattern matching with the @racket[match] form. + +Pattern matching too can be seen as syntactic sugar since it's easy to +imagine how you could rewrite uses of @racket[match] into equivalent +expressions that didn't involve @racket[match]. + +For example, consider the program: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [`(node ,v ,l ,r) (* v (* (prod l) (prod r)))])) +) + +An alternative, equivalent, formulation that doesn't use +@racket[match] is the following: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (cond + [(eq? 'leaf bt) 1] + [(and (list? bt) + (= 4 (length bt)) + (eq? 'node (first bt))) + (let ((v (second bt)) + (l (third bt)) + (r (fourth bt))) + (* v (* (prod l) (prod r))))] + ; corresponds to a match failure + [else (add1 #f)])) +) + +This code is less nice to read and write, but essentially does the +same thing the pattern-matching code does. + +In this example, each @racket[match]-clause becomes a +@racket[cond]-clause. The question-part of each @racket[cond]-clause +is an expression that determines whether the corresponding +pattern-part of the @racket[match-clause] matches. The answer-part of +each @racket[cond]-clause corresponds to the expression-part of the +@racket[match]-clause, with an add @racket[let]-form that destructures +the scrutinee and binds the pattern variables of the pattern-part. + + +Let's consider the following extension to the grammar of @tt{Expr+} to +include a simplified version of the pattern matchin form we've been +using: + +@#reader scribble/comment-reader +(racketblock +;; type Expr+ = +;; .... +;; | Match + +;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) + +;; type Pat = +;; | #t +;; | #f +;; | Integer +;; | String +;; | Variable +;; | `_ +;; | `'() +;; | `(quote ,Symbol) +;; | `(cons ,Pat ,Pat) +;; | `(list ,Pat ...) +;; | `(? ,Expr ,Pat ...) + ) + +A @racket[match] form consists of an expression to match against, +sometimes callsed the @bold{scrutinee}, followed by some number of +pattern-matching clauses; each one consists of a pattern and +expression to evaluate should the pattern match the scrutinee's value. + +Here a pattern can either be a literal boolean, integer, string, empty +list, or symbol, or a pattern variable, which matches anything and +binds the value to the variable, a ``wildcard'' which matches anything +and binds nothing, a @racket[cons] pattern which matches pairs of +things that match the subpatterns, @racket[list] pattern which matches +lists of a fixed-size where elements matches the subpatterns, or a +@racket[?] pattern which matches if the results of evaluated the first +subexpression applied to scrutinee produces true and all of the +subpatterns match. + +This doesn't include the @racket[quasiquote]-patterns we used above, +but still this is a useful subset of pattern matching and allows us to +write programs such as: + +@#reader scribble/comment-reader +(racketblock +;; BT -> Number +;; Multiply all the numbers in given binary tree +(define (prod bt) + (match bt + ['leaf 1] + [(list 'node v l r) (* v (* (prod l) (prod r)))])) +) + +As alluded to above, each pattern plays two roles: it used to +determine whether the scrutinee matches the pattern, and it used to +bind variable names (in the scope of the RHS expression) to sub-parts +of the scrutinee when it does match. + +We can write two helper functions to accomplish each of these tasks: +@itemlist[ + +@item{rewrite patterns into Boolean valued expressions that answer +whether the pattern matches the scrutinee,} + +@item{rewrite pattern and RHS in to expressions in which the pattern +variables of pattern are bound to the appropriately deconstructed +parts of the scrutinee.} + +] + + +Assume: the scrutinee is a variable. (It's easy to establish this assumption in general.) + +We need two functions: + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable -> Expr +;; Produces an expression determining if p matches v +(define (pat-match p v) ...) + +;; Pat Variable Expr -> Expr +;; Produce an expression that deconstructs v and binds pattern variables +;; of p in scope of e. +;; ASSUME: v matches p +(define (pat-bind p v e) ...) +) + +Let's turn to @racket[pat-match] first. + +Suppose the pattern is a literal @racket[#t]. When does @racket[v] +match it? When @racket[v] is @racket[eq?] to @racket[#t]. + +So an expression that produces true when this pattern matches is @racket[(eq? #t v)]. + +Handling @racket[#f], integers, characters, symbols, and the empty list is similar. + +What about variables? Suppose the pattern is @racket[x]. When does +@racket[v] match it? Always. So @racket[#t] is an expression that +produces true with this pattern matches. + +Wildcards are the same. + +What about when the pattern is a @racket[cons]-pattern? Suppose the +pattern is @racket[(cons _p1 _p2)] for some patterns @racket[_p1] and +@racket[_p2]. When does @racket[v] match @racket[(cons _p1 _p2)]? +When @racket[v] is a pair and @racket[(car v)] matches @racket[_p1] +and @racket[(cdr v)] matches @racket[_p2]. + +A @racket[list] pattern is similar, except that the scrunity must be a +list with as many elements as there are patterns, and the elements +must match the corresponding subpattern. + +What about a @racket[?] pattern? When does @racket[v] match it? +Suppose the pattern is @racket[(? even?)]. When does @racket[v] match +it? When @racket[(even? v)] is true. If the pattern had a non-empty +list of sub-patterns they would all need to match @racket[v], too. + + +We can now formulate a defintion of @racket[pat-match]: + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable -> Expr +;; Produces an expression determining if p matches v +(define (pat-match p v) + (match p + [#t `(eq? #t ,v)] + [#f `(eq? #f ,v)] + [(? integer? i) `(eq? ,i ,v)] + [(? string? s) + `(and (string? ,v) + (string=? ,s ,v))] + [(list 'quote '()) `(eq? '() ,v)] + [(? symbol?) #t] + [(list 'quote (? symbol? s)) `(eq? ,v ',s)] + [(list 'cons p1 p2) + (let ((v1 (gensym)) + (v2 (gensym))) + `(and (cons? ,v) + (let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + (and ,(pat-match p1 v1) + ,(pat-match p2 v2)))))] + [(cons 'list ps) + `(and (list? ,v) + (= (length ,v) ,(length ps)) + ,(pat-match-list ps v))] + [(cons '? (cons e ps)) + `(and (,e ,v) + ,(pats-match ps v))])) +) + +The @racket[list]-pattern case relies on a helper function +@racket[pat-match-list] and the @racket[?]-pattern case relies on +@racket[pats-match], both defined below: + + +@#reader scribble/comment-reader +(racketblock +;; (Listof Pat) Variable -> Expr +;; Produces an expression determining if every ps matches x +(define (pats-match ps v) + (match ps + ['() #t] + [(cons p ps) + `(and ,(pat-match p v) + ,(pats-match ps v))])) + +;; (Listof Pat) Variable -> Expr +;; Produces an expression determining if each ps matches each element of list v +(define (pat-match-list ps v) + (match ps + ['() #t] + [(cons p ps) + (let ((v1 (gensym)) + (v2 (gensym))) + `(let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + (and ,(pat-match p v1) + ,(pat-match-list ps v2))))])) +) + +Here are some examples: + +@ex[ + +(pat-match 'leaf 'bt) +(pat-match '(list 'node v l r) 'bt) +(pat-match '(list 'node (? even? v) l r) 'bt) + +] + +These aren't very readable programs that emerge, however, we check +that they're doing the right thing. Note that the elaboration +requires a few functions to be available, such as @racket[list?], +and @racket[length]. We make these available in an initial +environment: + +@ex[ +(define env0 + `((length ,length) + (list? ,list?))) +(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 'leaf 'bt))) env0) +(interp-env (desugar `(let ((bt 'leaf)) ,(pat-match 8 'bt))) env0) +(interp-env (desugar + `(let ((bt '(node 1 leaf leaf))) + ,(pat-match '(list 'node v l r) 'bt))) + env0) +(interp-env (desugar + `(let ((bt '(node 1 leaf leaf))) + ,(pat-match '(list 'node (? zero?) l r) 'bt))) + env0) +(interp-env (desugar + `(let ((bt '(node 0 leaf leaf))) + ,(pat-match '(list 'node (? zero?) l r) 'bt))) + env0) +] + +Now moving on to @racket[pat-bind], it follows a similar structure, +but we always assume the given pattern matches the scrutinee. The +addition @tt{Expr} argument represents the right-hand-side expression +of the @racket[match]-clause. The idea is that the pattern variables +of @racket[p] are bound to sub-parts of @racket[v] in @racket[e]. + +@#reader scribble/comment-reader +(racketblock +;; Pat Variable Expr -> Expr +;; Produce an expression that deconstructs v and binds pattern variables +;; of p in scope of e. +;; ASSUME: v matches p +(define (pat-bind p v e) + (match p + [#t e] + [#f e] + [(? integer?) e] + [(? string?) e] + [(list 'quote '()) e] + ['_ e] + [(? symbol? x) `(let ((,x ,v)) ,e)] + [(list 'quote (? symbol?)) e] + [(list 'cons p1 p2) + (let ((v1 (gensym)) + (v2 (gensym))) + `(let ((,v1 (car ,v)) + (,v2 (cdr ,v))) + ,(pat-bind p1 v1 + (pat-bind p2 v2 e))))] + [(cons 'list ps) + (pat-bind-list ps v e)] + [(cons '? (cons _ ps)) + (pats-bind ps v e)])) +) + +Here are some examples: + +@ex[ + +(pat-bind 'leaf 'bt 'bt) +(pat-bind '(list 'node v l r) 'bt 'v) +(pat-bind '(list 'node (? even? v) l r) 'bt 'v) + +] + +These are tough to read, but we can confirm what they compute: + +@ex[ +(interp-env (desugar + `(let ((bt '(node 0 leaf leaf))) + ,(pat-bind '(list 'node (? zero? z) l r) 'bt 'z))) + '()) +] + +Putting the pieces together, we can now write a @racket[match->cond] function +that rewrites a @racket[match]-expression into a @racket[cond]-expression: + +@#reader scribble/comment-reader +(racketblock +;; Match -> Expr +;; Rewrite match expression into an equivalent cond expression +(define (match->cond m) + (match m + [(cons 'match (cons e mcs)) + (let ((x (gensym))) + `(let ((,x ,e)) + (cond ,@(map (λ (mc) + (match mc + [(list p e) + (list (pat-match p x) (pat-bind p x e))])) + mcs) + ;; fall through to error + [else (add1 #f)])))])) +) + +@ex[ +(match->cond '(match '(node 2 leaf leaf) + ['leaf 0] + [(list 'node v l r) v])) +] + +Finally, we can incorporate @racket[match->cond] into @racket[desugar]: + +@#reader scribble/comment-reader +(ex #:no-prompt +;; Expr+ -> Expr +(define (desugar e+) + (match e+ + [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) + `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) + ,(desugar e))] + [(? symbol? x) x] + [(? imm? i) i] + [`',(? symbol? s) `',s] + [`',d (quote->expr d)] + [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] + [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] + [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] + [`(letrec ,bs ,e0) + `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) + ,(desugar e0))] + [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] + [`(cond . ,_) (desugar (cond->if e+))] + [`(and . ,_) (desugar (and->if e+))] + [`(or . ,_) (desugar (or->if e+))] + [`(match . ,_) (desugar (match->cond e+))] ; new + [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) +) + +Now we can interpret programs such as this: + +@ex[ + +(interp-env + (desugar + '(begin (define (prod bt) + (match bt + ['leaf 1] + [(list 'node v l r) + (* v (* (prod l) (prod r)))])) + + (prod '(node 3 (node 4 leaf leaf) leaf)))) + `((* ,*) (list? ,list?) (length ,length))) + +] + + diff --git a/www/schedule.scrbl b/www/schedule.scrbl index 7005674e..9f9fd333 100644 --- a/www/schedule.scrbl +++ b/www/schedule.scrbl @@ -80,7 +80,7 @@ (list @wk{11/16} "" - @elem{Mountebank} @;{Match} + @elem{@secref["Mug"]} @elem{Neerdowell} @;{ ?? } ) diff --git a/www/utils.rkt b/www/utils.rkt index 8457760f..fb5bb611 100644 --- a/www/utils.rkt +++ b/www/utils.rkt @@ -1,6 +1,5 @@ - #lang racket -(provide exercise float-right panopto-vid shell) +(provide exercise float-right panopto-vid shell shell-result) (require scribble/base scribble/core scribble/html-properties redex/pict) @@ -47,9 +46,12 @@ (parameterize ([current-output-port o] [current-error-port e]) (set! r (proc))))))))) - (unless r (error (string-append os es))) + ; (unless r (error (string-append os es))) (string-append os es)) +(define (shell-result c) + (with-output-to-string/err (λ () (system #:set-pwd? #t c)))) + (define (shell . cs) (match cs ['() ""]