From bef31812bfc7c8324fae779be750c52c682fb56b Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 12:58:17 -0500 Subject: [PATCH 01/21] Symbols. --- langs/mountebank/Makefile | 1 + langs/mountebank/ast.rkt | 2 + langs/mountebank/compile-datum.rkt | 58 ++++++++-- langs/mountebank/compile-ops.rkt | 33 +++++- langs/mountebank/compile.rkt | 30 +++++- langs/mountebank/intern.rkt | 147 +++++++++++++++++++++++++ langs/mountebank/parse.rkt | 6 +- langs/mountebank/print.c | 13 +++ langs/mountebank/symbol.c | 149 ++++++++++++++++++++++++++ langs/mountebank/test/test-runner.rkt | 19 +++- langs/mountebank/types.h | 1 + langs/mountebank/types.rkt | 1 + langs/mountebank/values.c | 11 ++ langs/mountebank/values.h | 8 ++ 14 files changed, 461 insertions(+), 18 deletions(-) create mode 100644 langs/mountebank/intern.rkt create mode 100644 langs/mountebank/symbol.c diff --git a/langs/mountebank/Makefile b/langs/mountebank/Makefile index 9b74bfc0..76f979af 100644 --- a/langs/mountebank/Makefile +++ b/langs/mountebank/Makefile @@ -11,6 +11,7 @@ objs = \ main.o \ values.o \ print.o \ + symbol.o \ io.o default: runtime.o diff --git a/langs/mountebank/ast.rkt b/langs/mountebank/ast.rkt index b4be14cd..6f7a03c1 100644 --- a/langs/mountebank/ast.rkt +++ b/langs/mountebank/ast.rkt @@ -23,6 +23,7 @@ ;; | Char ;; | Boolean ;; | String +;; | Symbol ;; | (Boxof Datum) ;; | (Listof Datum) ;; | (Vectorof Datum) @@ -35,6 +36,7 @@ ;; | 'empty? | 'cons? | 'box? ;; | 'vector? | vector-length ;; | 'string? | string-length +;; | 'symbol? | string->symbol | symbol->string ;; type Op2 = '+ | '- | '< | '= ;; | 'cons | 'eq? ;; | 'make-vector | 'vector-ref diff --git a/langs/mountebank/compile-datum.rkt b/langs/mountebank/compile-datum.rkt index 9e00b110..7f2e96f6 100644 --- a/langs/mountebank/compile-datum.rkt +++ b/langs/mountebank/compile-datum.rkt @@ -1,22 +1,45 @@ #lang racket -(provide compile-datum) -(require "types.rkt" +(provide compile-datum compile-literals) +(require "types.rkt" + "intern.rkt" a86/ast) ;; Registers used (define rax 'rax) ; return +;; QEnv -> Asm +(define (compile-literals q) + (match q + ['() (seq)] + [(cons (cons s (Ref l _)) q) + (seq (compile-literal (to-string s) l) + (compile-literals q))])) + +;; String Label -> Asm +(define (compile-literal s l) + (seq (Label l) + (Dq (string-length s)) + (compile-string-chars (string->list s)) + (if (odd? (string-length s)) + (seq (Dd 0)) + (seq)))) + (define (compound? x) - (or (string? x) + (or ;(string? x) + ;(symbol? x) (cons? x) (vector? x) (box? x))) ;; Datum -> Asm (define (compile-datum d) - (if (compound? d) - (compile-compound-datum d) - (seq (Mov rax (imm->bits d))))) + (cond + [(compound? d) + (compile-compound-datum d)] + [(Ref? d) + (seq (Lea rax (Plus (Ref-label d) (Ref-type-tag d))))] + [else + (seq (Mov rax (imm->bits d)))])) (define (compile-compound-datum d) (match (compile-quoted d) @@ -29,13 +52,16 @@ ;; Datum -> (cons AsmExpr Asm) (define (compile-quoted c) (cond - [(string? c) (compile-datum-string c)] + ;[(string? c) (compile-datum-string c)] + ;[(symbol? c) (compile-datum-symbol (symbol->string c))] [(vector? c) (compile-datum-vector (vector->list c))] [(box? c) (compile-datum-box (unbox c))] [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(Ref? c) (cons (Plus (Ref-label c) (Ref-type-tag c)) '())] [else (cons (imm->bits c) '())])) ;; String -> (cons AsmExpr Asm) +#; (define (compile-datum-string c) (let ((l (gensym 'string))) (cons (Plus l type-str) @@ -46,6 +72,18 @@ (seq (Dd 0)) (seq)))))) +;; String -> (cons AsmExpr Asm) +#; +(define (compile-datum-symbol c) + (let ((l (gensym 'symbol))) + (cons (Plus l type-symb) + (seq (Label l) + (Dq (string-length c)) + (compile-string-chars (string->list c)) + (if (odd? (string-length c)) + (seq (Dd 0)) + (seq)))))) + ;; [Listof Datum] -> (cons AsmExpr Asm) (define (compile-datum-vector ds) (match ds @@ -90,3 +128,9 @@ [(cons c cs) (seq (Dd (char->integer c)) (compile-string-chars cs))])) + +;; (U String Symbol) -> String +(define (to-string s) + (if (symbol? s) + (symbol->string s) + s)) diff --git a/langs/mountebank/compile-ops.rkt b/langs/mountebank/compile-ops.rkt index 1616d9b8..0ef65bc2 100644 --- a/langs/mountebank/compile-ops.rkt +++ b/langs/mountebank/compile-ops.rkt @@ -21,7 +21,11 @@ unpad-stack)] ['peek-byte (seq pad-stack (Call 'peek_byte) - unpad-stack)])) + unpad-stack)] + ['gensym (seq pad-stack + (Call 'gensym) + unpad-stack + (Or rax type-symb))])) ;; Op1 -> Asm (define (compile-op1 p) @@ -80,6 +84,8 @@ (type-pred ptr-mask type-vect)] ['string? (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] ['vector-length (let ((zero (gensym)) (done (gensym))) @@ -105,7 +111,24 @@ (Jmp done) (Label zero) (Mov rax 0) - (Label done)))])) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + (Mov rdi rax) + pad-stack + ;; FIXME: this allocates off-heap + (Call 'str_dup) + unpad-stack + (Or rax type-str))])) ;; Op2 -> Asm (define (compile-op2 p) @@ -140,7 +163,7 @@ (let ((true (gensym))) (seq (Je true) (Mov rax val-false) - (Label true))))] + (Label true))))] ['cons (seq (Mov (Offset rbx 0) rax) (Pop rax) @@ -150,7 +173,7 @@ (Add rbx 16))] ['eq? (seq (Pop r8) - (eq r8 rax))] + (eq r8 rax))] ['make-vector (let ((loop (gensym)) (done (gensym)) @@ -304,6 +327,8 @@ (assert-type ptr-mask type-vect)) (define assert-string (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) (define assert-proc (assert-type ptr-mask type-proc)) diff --git a/langs/mountebank/compile.rkt b/langs/mountebank/compile.rkt index 2e31ef39..6d13813c 100644 --- a/langs/mountebank/compile.rkt +++ b/langs/mountebank/compile.rkt @@ -4,6 +4,7 @@ "types.rkt" "lambdas.rkt" "fv.rkt" + "intern.rkt" "compile-ops.rkt" "compile-datum.rkt" a86/ast) @@ -18,12 +19,13 @@ ;; Prog -> Asm (define (compile p) - (match p - [(Prog ds e) + (match (intern p) + [(cons (Prog ds e) q) (prog (externs) (Global 'entry) (Label 'entry) (Mov rbx rdi) ; recv heap pointer + (init-symbol-table q) (compile-defines-values ds) (compile-e e (reverse (define-ids ds)) #t) (Add rsp (* 8 (length ds))) ;; pop function definitions @@ -32,13 +34,18 @@ (compile-lambda-defines (lambdas p)) (Label 'raise_error_align) pad-stack - (Call 'raise_error))])) + (Call 'raise_error) + (Data) + (compile-literals q))])) (define (externs) (seq (Extern 'peek_byte) (Extern 'read_byte) (Extern 'write_byte) - (Extern 'raise_error))) + (Extern 'raise_error) + (Extern 'gensym) + (Extern 'intern_symbol) + (Extern 'str_dup))) ;; [Listof Defn] -> [Listof Id] (define (define-ids ds) @@ -289,6 +296,21 @@ [#t 0] [#f (+ 8 (lookup x rest))])])) +;; QEnv -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table q) + (seq pad-stack + (append-map init-symbol q) + unpad-stack)) + +;; (cons (U String Symbol) Ref) -> Asm +(define (init-symbol qb) + (match qb + [(cons (? symbol? s) (Ref l _)) + (seq (Lea rdi l) + (Call 'intern_symbol))] + [_ (seq)])) + ;; Symbol -> Label ;; Produce a symbol that is a valid Nasm label (define (symbol->label s) diff --git a/langs/mountebank/intern.rkt b/langs/mountebank/intern.rkt new file mode 100644 index 00000000..2ecef152 --- /dev/null +++ b/langs/mountebank/intern.rkt @@ -0,0 +1,147 @@ +#lang racket +(require "ast.rkt" "types.rkt") +(provide intern + (struct-out Ref)) + +;; type QEnv = [Listof (cons (U String Symbol) Ref)] + +(struct Ref (label type-tag) #:prefab) + +;; Datum QEnv -> (cons Datum QEnv) +;; Intern all literal strings and symbols. +;; Replaces occurrences of string and symbol literals with +;; a reference bound in qenv. +(define (intern-datum d q) + (cond + [(string? d) (intern! d q type-str)] + [(symbol? d) (intern! d q type-symb)] + [(box? d) + (match (intern-datum (unbox d) q) + [(cons d q) + (cons (box d) q)])] + [(cons? d) + (match (intern-datums (list (car d) (cdr d)) q) + [(cons (list d1 d2) q) + (cons (cons d1 d2) q)])] + [(vector? d) + (match (intern-datums (vector->list d) q) + [(cons ds q) + (cons (apply vector ds) q)])] + [else + (cons d q)])) + +;; (U String Symbol) QEnv -> [Maybe Ref] +(define (lookup d q) + (match q + ['() #f] + [(cons (cons d0 r) q) + (if (equal? d d0) + r + (lookup d q))])) + +;; (U Symbol String) QEnv Tag -> QEnv +;; Either lookup prior reference or create a new one +(define (intern! s q type) + (match (lookup s q) + [#f + (let ((l (gensym 'lit))) + (let ((r (Ref l type))) + (cons r + (cons (cons s r) q))))] + [r (cons r q)])) + +;; Prog -> (cons Prog QEnv) +(define (intern p) + (intern-prog p '())) + +;; [Listof Datum] QEnv -> (cons [Listof Datum] QEnv) +(define (intern-datums ds q) + (match ds + ['() (cons ds q)] + [(cons d ds) + (match (intern-datum d q) + [(cons d q) + (match (intern-datums ds q) + [(cons ds q) + (cons (cons d ds) q)])])])) + +;; Prog QEnv -> (cons Prog QEnv) +(define (intern-prog p q) + (match p + [(Prog ds e) + (match (intern-ds ds q) + [(cons ds q) + (match (intern-e e q) + [(cons e q) + (cons (Prog ds e) q)])])])) + +;; Defns QEnv -> (cons Defns QEnv) +(define (intern-ds ds q) + (match ds + ['() (cons '() q)] + [(cons d ds) + (match (intern-d d q) + [(cons d q) + (match (intern-ds ds q) + [(cons ds q) + (cons (cons d ds) q)])])])) + +;; Defn QEnv -> (cons Defn QEnv) +(define (intern-d d q) + (match d + [(Defn f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Defn f xs e) q)])])) + +;; Expr QEnv -> (cons Expr QEnv) +(define (intern-e e q) + (match e + [(Quote d) + (match (intern-datum d q) + [(cons d q) + (cons (Quote d) q)])] + [(Prim1 p e) + (match (intern-e e q) + [(cons e q) + (cons (Prim1 p e) q)])] + [(Prim2 p e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Prim2 p e1 e2) q)])] + [(Prim3 p e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (Prim3 p e1 e2 e3) q)])] + [(If e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (If e1 e2 e3) q)])] + [(Begin e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Begin e1 e2) q)])] + [(Let x e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Let x e1 e2) q)])] + [(App e1 es) + (match (intern-es (cons e1 es) q) + [(cons (cons e1 es) q) + (cons (App e1 es) q)])] + [(Lam f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Lam f xs e) q)])] + [_ (cons e q)])) + +;; [Listof Expr] QEnv -> (cons [Listof Expr] QEnv) +(define (intern-es es q) + (match es + ['() (cons '() q)] + [(cons e es) + (match (intern-e e q) + [(cons e q) + (match (intern-es es q) + [(cons es q) + (cons (cons e es) q)])])])) diff --git a/langs/mountebank/parse.rkt b/langs/mountebank/parse.rkt index 708b0505..70fe6c2e 100644 --- a/langs/mountebank/parse.rkt +++ b/langs/mountebank/parse.rkt @@ -56,6 +56,7 @@ [(cons d1 d2) (cons (parse-datum d1) (parse-datum d2))] ['() '()] + [(? symbol? s) s] [(? integer? i) i] [(? boolean? b) b] [(? string? s) s] @@ -73,13 +74,14 @@ (vector? x))) (define op0 - '(read-byte peek-byte void)) + '(read-byte peek-byte void gensym)) (define op1 '(add1 sub1 zero? char? write-byte eof-object? integer->char char->integer box unbox empty? cons? box? car cdr - vector? vector-length string? string-length)) + vector? vector-length string? string-length + symbol->string string->symbol symbol?)) (define op2 '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) (define op3 diff --git a/langs/mountebank/print.c b/langs/mountebank/print.c index 6cb5b1b0..2bcb21dc 100644 --- a/langs/mountebank/print.c +++ b/langs/mountebank/print.c @@ -7,6 +7,7 @@ void print_codepoint(val_char_t); void print_cons(val_cons_t *); void print_vect(val_vect_t*); void print_str(val_str_t*); +void print_symb(val_symb_t*); void print_str_char(val_char_t); void print_result_interior(val_t); int utf8_encode_char(val_char_t, char *); @@ -40,6 +41,10 @@ void print_result(val_t x) print_str(val_unwrap_str(x)); putchar('"'); break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; case T_PROC: printf("#"); break; @@ -48,6 +53,11 @@ void print_result(val_t x) } } +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + void print_result_interior(val_t x) { switch (val_typeof(x)) { @@ -63,6 +73,9 @@ void print_result_interior(val_t x) print_cons(val_unwrap_cons(x)); printf(")"); break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; case T_VECT: print_vect(val_unwrap_vect(x)); break; diff --git a/langs/mountebank/symbol.c b/langs/mountebank/symbol.c new file mode 100644 index 00000000..08a9e5dd --- /dev/null +++ b/langs/mountebank/symbol.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include "values.h" + +static uint64_t gensym_ctr = 0; + +val_str_t *str_from_cstr(const char *); +int str_cmp(const val_str_t *, const val_str_t *); +val_str_t *str_dup(const val_str_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = str_cmp((val_str_t*)symb, (val_str_t*)t->elem); + if (r == 0) { + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it + + *curr = calloc(1, sizeof(struct Node)); + + struct Node* t = *curr; + t->elem = symb; // str_dup(str); + + return t->elem; +} + +val_symb_t *str_to_symbol(const val_str_t *str) +{ + // str_dup needed if string is mutable + return intern_symbol((val_symb_t*)str_dup(str)); +} + +val_symb_t *gensym(void) +{ + char s[100]; // uint64_t has maximum 20 digits + sprintf(s, "g%" PRIu64, gensym_ctr++); + return (val_symb_t*)str_from_cstr(s); // uninterned symbol +} + +val_str_t *str_from_cstr(const char *s) +{ + int64_t len = strlen(s); + val_str_t *str = + malloc(sizeof(int64_t) + len * sizeof(val_char_t)); + + if (!str) + return NULL; + + str->len = len; + int i; + for (i = 0; i < len; i++) { + str->codepoints[i] = (val_char_t)s[i]; + } + return str; +} + +int str_cmp(const val_str_t *s1, const val_str_t *s2) +{ + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i = i+1) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} + +val_str_t *str_dup(const val_str_t *s) +{ + int64_t n = s->len; + val_str_t *d; + + d = calloc(2+n, sizeof(val_char_t)); + if (!d) + return NULL; + + return memcpy(d, s, (2+n) * sizeof(val_char_t)); +} + + +#ifdef CHECK +// $ gcc -DCHECK symbol.c +#include +int main(void) +{ + val_str_t *foo = str_from_cstr("foo"); + val_str_t *foo_ = str_from_cstr("foo"); + val_str_t *bar = str_from_cstr("bar"); + val_str_t *foo1 = str_from_cstr("foo1"); + val_str_t *foo1_ = str_from_cstr("foo1"); + val_str_t *fo = str_from_cstr("fo"); + val_str_t *fo_ = str_from_cstr("fo"); + + assert(str_cmp(foo,foo_) == 0); + assert(str_cmp(foo1,foo1_) == 0); + assert(str_cmp(fo,fo_) == 0); + + assert(str_cmp(foo,foo1) < 0); + assert(str_cmp(foo,foo1) == str_cmp(foo,foo1_)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1_)); + + assert(str_cmp(foo,fo) > 0); + assert(str_cmp(foo,fo) == str_cmp(foo,fo_)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo_)); + + assert(str_cmp(foo,bar) > 0); + assert(str_cmp(bar,foo) < 0); + + assert(str_cmp(fo,bar) > 0); + assert(str_cmp(bar,fo) < 0); + + val_symb_t *foo_symb = (val_symb_t*)foo; + val_symb_t *foo_symb_ = (val_symb_t*)foo_; + assert(foo_symb != foo_symb_); + val_symb_t *foo_symb_i = intern_symbol(foo_symb); + val_symb_t *foo_symb_i_ = intern_symbol(foo_symb_); + assert(foo_symb_i == foo_symb_i_); + + return 0; +} +#endif diff --git a/langs/mountebank/test/test-runner.rkt b/langs/mountebank/test/test-runner.rkt index cc234fad..d5b395a7 100644 --- a/langs/mountebank/test/test-runner.rkt +++ b/langs/mountebank/test/test-runner.rkt @@ -254,7 +254,24 @@ #f) (check-equal? (run '(define (f) '(1 . 2)) '(eq? (f) (f))) - #t)) + #t) + + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(let ((x '(foo . foo))) + (eq? (car x) (cdr x)))) + #t) + (check-equal? (run '(eq? 'g0 (gensym))) #f) + (check-equal? (run '(eq? (gensym) (gensym))) #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f)) (define (test-runner-io run) ;; Evildoer examples diff --git a/langs/mountebank/types.h b/langs/mountebank/types.h index 90668957..4093c4f7 100644 --- a/langs/mountebank/types.h +++ b/langs/mountebank/types.h @@ -24,6 +24,7 @@ #define vect_type_tag 3 #define str_type_tag 4 #define proc_type_tag 5 +#define symb_type_tag 6 #define int_shift (1 + imm_shift) #define int_type_mask ((1 << int_shift) - 1) #define int_type_tag (0 << (int_shift - 1)) diff --git a/langs/mountebank/types.rkt b/langs/mountebank/types.rkt index b025251b..c02d2d1e 100644 --- a/langs/mountebank/types.rkt +++ b/langs/mountebank/types.rkt @@ -9,6 +9,7 @@ (define type-vect #b011) (define type-str #b100) (define type-proc #b101) +(define type-symb #b110) (define int-shift (+ 1 imm-shift)) (define char-shift (+ 2 imm-shift)) (define type-int #b0000) diff --git a/langs/mountebank/values.c b/langs/mountebank/values.c index c6b27244..32e922bd 100644 --- a/langs/mountebank/values.c +++ b/langs/mountebank/values.c @@ -12,6 +12,8 @@ type_t val_typeof(val_t x) return T_VECT; case str_type_tag: return T_STR; + case symb_type_tag: + return T_SYMB; case proc_type_tag: return T_PROC; } @@ -108,3 +110,12 @@ val_t val_wrap_str(val_str_t *v) { return ((val_t)v) | str_type_tag; } + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} diff --git a/langs/mountebank/values.h b/langs/mountebank/values.h index 54fb2039..c1de09d6 100644 --- a/langs/mountebank/values.h +++ b/langs/mountebank/values.h @@ -20,6 +20,7 @@ typedef enum type_t { T_CONS, T_VECT, T_STR, + T_SYMB, T_PROC, } type_t; @@ -39,6 +40,10 @@ typedef struct val_str_t { uint64_t len; val_char_t codepoints[]; } val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; /* return the type of x */ type_t val_typeof(val_t x); @@ -73,4 +78,7 @@ val_t val_wrap_vect(val_vect_t* c); val_str_t* val_unwrap_str(val_t x); val_t val_wrap_str(val_str_t* c); +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + #endif From 9738ed007edf947917f0fd3b242bf5f9c21b2116 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 13:06:39 -0500 Subject: [PATCH 02/21] Stale mug removed. --- langs/mug/interp-env.rkt | 190 ------------------------------------- langs/mug/interp.rkt | 75 --------------- langs/mug/pat.rkt | 200 --------------------------------------- langs/mug/syntax.rkt | 194 ------------------------------------- 4 files changed, 659 deletions(-) delete mode 100644 langs/mug/interp-env.rkt delete mode 100644 langs/mug/interp.rkt delete mode 100644 langs/mug/pat.rkt delete mode 100644 langs/mug/syntax.rkt diff --git a/langs/mug/interp-env.rkt b/langs/mug/interp-env.rkt deleted file mode 100644 index c63284ee..00000000 --- a/langs/mug/interp-env.rkt +++ /dev/null @@ -1,190 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require (only-in "syntax.rkt" prim?)) - -;; type Expr = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | Variable -;; | '() -;; | `(box ,Expr) -;; | `(if ,Expr ,Expr ,Expr) -;; | `(let ,(List Variable Expr) ... ,Expr) -;; | `(letrec ,(List Variable Lambda) ... ,Expr) -;; | `(λ ,Bindings ,Expr) -;; | `(apply ,Expr ,Expr) -;; | `(,Prim ,Expr ...) -;; | `(,Expr ,Expr ...) - -;; type Value = -;; | Integer -;; | Boolean -;; | Character -;; | String -;; | Symbol -;; | '() -;; | (Box Value) -;; | (Cons Value Value) -;; | Function - -;; type Function = -;; | (Values ... -> Answer) - -;; type Answer = Value | 'err - -;; type REnv = (Listof (List Variable Value)) - -;; Expr REnv -> Answer -(define (interp-env e r) - (match e - ;; produce fresh strings each time a literal is eval'd - [(? string? s) (string-copy s)] - [(? value? v) v] - [''() '()] - [`',(? symbol? s) s] - [`(if ,e0 ,e1 ,e2) - (match (interp-env e0 r) - ['err 'err] - [v - (if v - (interp-env e1 r) - (interp-env e2 r))])] - [(? symbol? x) - (lookup r x)] - [`(let (,`(,xs ,es) ...) ,e) - (match (interp-env* es r) - ['err 'err] - [vs - (interp-env e (append (zip xs vs) r))])] - [`(letrec (,`(,xs ,es) ...) ,e) - (letrec ((r* (λ () - (append - (zip xs - (map (λ (l) (λ vs (apply (interp-env l (r*)) vs))) - es)) - r)))) - (interp-env e (r*)))] - [`(λ (,xs ...) ,e) - (λ vs - (if (= (length vs) (length xs)) - (interp-env e (append (zip xs vs) r)) - 'err))] - [`(λ (,xs ... . ,x) ,e) - (λ vs - (if (>= (length vs) (length xs)) - (interp-env e (append (zip/remainder xs vs x) r)) - 'err))] - [`(apply ,e0 ,e1) - (let ((v0 (interp-env e0 r)) - (vs (interp-env e1 r))) - (if (list? vs) - (apply v0 vs) - 'err))] - [`(,(? prim? p) ,es ...) - (let ((as (interp-env* es r))) - (interp-prim p as))] - [`(,e ,es ...) - (match (interp-env* (cons e es) r) - [(list f vs ...) - (if (procedure? f) - (apply f vs) - 'err)] - ['err 'err])] - [_ 'err])) - -;; (Listof Expr) REnv (Listof Defn) -> (Listof Value) | 'err -(define (interp-env* es r) - (match es - ['() '()] - [(cons e es) - (match (interp-env e r) - ['err 'err] - [v (cons v (interp-env* es r))])])) - -;; Any -> Boolean -(define (value? x) - (or (integer? x) - (boolean? x) - (char? x) - (string? x))) - -;; Prim (Listof Answer) -> Answer -(define (interp-prim p as) - (match (cons p as) - [(list p (? value?) ... 'err _ ...) 'err] - [(list '- (? integer? i0)) (- i0)] - [(list '- (? integer? i0) (? integer? i1)) (- i0 i1)] - [(list 'abs (? integer? i0)) (abs i0)] - [(list 'add1 (? integer? i0)) (+ i0 1)] - [(list 'sub1 (? integer? i0)) (- i0 1)] - [(list 'zero? (? integer? i0)) (zero? i0)] - [(list 'char? v0) (char? v0)] - [(list 'integer? v0) (integer? v0)] - [(list 'boolean? v0) (boolean? v0)] - [(list 'integer->char (? codepoint? i0)) (integer->char i0)] - [(list 'char->integer (? char? c)) (char->integer c)] - [(list '+ (? integer? i0) (? integer? i1)) (+ i0 i1)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'car (? cons? v0)) (car v0)] - [(list 'cdr (? cons? v0)) (cdr v0)] - [(list 'string? v0) (string? v0)] - [(list 'box? v0) (box? v0)] - [(list 'empty? v0) (empty? v0)] - [(list 'cons? v0) (cons? v0)] - [(list 'cons v0 v1) (cons v0 v1)] - [(list 'box v0) (box v0)] - [(list 'unbox (? box? v0)) (unbox v0)] - [(list 'string-length (? string? v0)) (string-length v0)] - [(list 'make-string (? natural? v0) (? char? v1)) (make-string v0 v1)] - [(list 'string-ref (? string? v0) (? natural? v1)) - (if (< v1 (string-length v0)) - (string-ref v0 v1) - 'err)] - [(list '= (? integer? v0) (? integer? v1)) (= v0 v1)] - [(list '< (? integer? v0) (? integer? v1)) (< v0 v1)] - [(list '<= (? integer? v0) (? integer? v1)) (<= v0 v1)] - [(list 'char=? (? char? v0) (? char? v1)) (char=? v0 v1)] - [(list 'boolean=? (? boolean? v0) (? boolean? v1)) (boolean=? v0 v1)] - [(list 'eq? v0 v1) (eq? v0 v1)] - [(list 'gensym) (gensym)] - [(list 'symbol? v0) (symbol? v0)] - [(list 'procedure? v0) (procedure? v0)] - [_ 'err])) - -;; REnv Variable -> Answer -(define (lookup env x) - (match env - ['() 'err] - [(cons (list y v) env) - (match (symbol=? x y) - [#t v] - [#f (lookup env x)])])) - -;; REnv Variable Value -> Value -(define (ext r x v) - (cons (list x v) r)) - -;; Any -> Boolean -(define (codepoint? x) - (and (integer? x) - (<= 0 x #x10FFFF) - (not (<= #xD800 x #xDFFF)))) - -;; (Listof A) (Listof B) -> (Listof (List A B)) -(define (zip xs ys) - (match* (xs ys) - [('() '()) '()] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip xs ys))])) - -;; like zip but ys can be longer and remainder is associated with r -(define (zip/remainder xs ys r) - (match* (xs ys) - [('() ys) (list (list r ys))] - [((cons x xs) (cons y ys)) - (cons (list x y) (zip/remainder xs ys r))])) - - diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt deleted file mode 100644 index a1e7a2f1..00000000 --- a/langs/mug/interp.rkt +++ /dev/null @@ -1,75 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "syntax.rkt" - "interp-env.rkt") - - -(define (interp e) - (interp-env (desugar e) stdlib)) - -(define stdlib - `((append ,append) - (list? ,list?) - (first ,first) - (second ,second) - (rest ,rest) - (reverse ,reverse) - (not ,not) - (compose ,compose) - (symbol=? ,symbol=?) - (memq ,memq) - (length ,length) - (remq* ,remq*) - (remove-duplicates ,remove-duplicates) - (remove ,remove) - (member ,member) - (equal? ,equal?))) - - -;; Expr REnv Natural -> Answer -(define (interp-qq d r n) - ;(println `(interp-qq ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (interp-env (desugar e) r) ;! - (cons 'unquote (interp-qq-list e r (sub1 n))))] - [`(,'unquote-splicing ,e) 'err] - [`(,'quasiquote ,d) - (cons 'quasiquote (interp-qq-list d r (add1 n)))] - [`(,x . ,y) - (match (interp-qq-list x r n) - ['err 'err] - [xv (match (interp-qq y r n) - ['err 'err] - ['() xv] - [yv (if (list? xv) - (append xv yv) - 'err)])])] - [d d])) - -;; Expr REnv Natural -> Answer -(define (interp-qq-list d r n) - ;(println `(interp-qq-list ,d ,n)) - (match d - [`(,'unquote ,e) - (if (zero? n) - (match (interp-env (desugar e) r) ;! - ['err 'err] - [v (list v)]) - (list (cons 'unquote (interp-qq-list e r (sub1 n)))))] - [`(,'unquote-splicing ,e) - (if (zero? n) - (interp-env e r) - (list (cons 'unquote-splicing (interp-qq-list e r (sub1 n)))))] - [`(,'quasiquote ,d) - (list (cons 'quasiquote (interp-qq-list d r (add1 n))))] - [`(,x . ,y) - (match (interp-qq-list x r n) - ['err 'err] - [xv (match (interp-qq y r n) - ['err 'err] - [yv (list (append xv yv))])])] - [d (list d)])) - - diff --git a/langs/mug/pat.rkt b/langs/mug/pat.rkt deleted file mode 100644 index 2a5c11f6..00000000 --- a/langs/mug/pat.rkt +++ /dev/null @@ -1,200 +0,0 @@ -#lang racket -(provide (all-defined-out)) - -;; type Expr+ = -;; .... -;; | Match - -;; type Match = (match ,Expr+ ,(list Pat Expr+) ...) - -;; type Pat = -;; | #t -;; | #f -;; | Integer -;; | String -;; | Variable -;; | `_ -;; | `'() -;; | `(quote ,Symbol) -;; | `(cons ,Pat ,Pat) -;; | `(list ,Pat ...) -;; | `(? ,Expr ,Pat ...) - -;; Match -> Expr -;; Rewrite match expression into an equivalent cond expression -(define (match->cond m) - (match m - [(cons 'match (cons e mcs)) - (let ((x (gensym))) - `(let ((,x ,e)) - (cond ,@(map (λ (mc) - (match mc - [(list p e) - (list (pat-match p x) (pat-bind p x e))])) - mcs) - ;; fall through to error - [else (car '())])))])) - -;; Example -#; -(define (sum bt) - (match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))])) -#; -(define (sum^ bt) - (cond - [(eq? 'leaf bt) 0] - [(and (list? bt) - (= 4 (length bt)) - (eq? 'node (first bt))) - (let ((v (second bt)) - (l (third bt)) - (r (fourth bt))) - (+ v - (+ (sum l) - (sum r))))])) - -#; -`(define (sum bt) - ,(match->cond - '(match bt - ['leaf 0] - [(list 'node v l r) - (+ v - (+ (sum l) - (sum r)))]))) - -;; Two tasks: -;; 1. rewrite patterns into Boolean valued expressions that answer -;; whether the pattern matches the scrutiny -;; 2. rewrite pattern and RHS in to expressions in which the pattern variables -;; of pattern are bound to the appropriately deconstructed parts of the scrutiny - -;; Assume: the scrutiny is a variable. -;; (It's easy to establish this assumption in general.) - -;; Two functions: - -#; -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) ...) - -#; -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) ...) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern matching - -;; Pat Variable -> Expr -;; Produces an expression determining if p matches v -(define (pat-match p v) - (match p - [#t `(eq? #t ,v)] - [#f `(eq? #f ,v)] - [(? integer? i) `(eq? ,i ,v)] - [(? string? s) - `(and (string? ,v) - (string=? ,s ,v))] - [(list 'quote '()) `(eq? '() ,v)] - [(? symbol?) #t] - [(list 'quote (? symbol? s)) `(eq? ,v ',s)] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(and (cons? ,v) - (let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p1 v1) - ,(pat-match p2 v2)))))] - [(cons 'list ps) - `(and (list? ,v) - (= (length ,v) ,(length ps)) - ,(pat-match-list ps v))] - [(cons '? (cons e ps)) - `(and (,e ,v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if every ps matches x -(define (pats-match ps v) - (match ps - ['() #t] - [(cons p ps) - `(and ,(pat-match p v) - ,(pats-match ps v))])) - -;; (Listof Pat) Variable -> Expr -;; Produces an expression determining if each ps matches each element of list v -(define (pat-match-list ps v) - (match ps - ['() #t] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - (and ,(pat-match p v1) - ,(pat-match-list ps v2))))])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Pattern binding - -;; Pat Variable Expr -> Expr -;; Produce an expression that deconstructs v and binds pattern variables -;; of p in scope of e. -;; ASSUME: v matches p -(define (pat-bind p v e) - (match p - [#t e] - [#f e] - [(? integer?) e] - [(? string?) e] - [(list 'quote '()) e] - ['_ e] - [(? symbol? x) `(let ((,x ,v)) ,e)] - [(list 'quote (? symbol?)) e] - [(list 'cons p1 p2) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p1 v1 - (pat-bind p2 v2 e))))] - [(cons 'list ps) - (pat-bind-list ps v e)] - [(cons '? (cons _ ps)) - (pats-bind ps v e)])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that doconstructs v and binds pattern variables -;; of ps (each matched against v) in scope of e. -;; ASSUME: v matches every element of ps -(define (pats-bind ps v e) - (match ps - ['() e] - [(cons p ps) - (pat-bind p v (pats-bind ps v e))])) - -;; (Listof Pat) Variable Expr -> Expr -;; Produce an expression that deconstructs list v and binds pattern variables -;; of ps (matched element-wise against v) in scope of e. -;; ASSUME: elemens of v matches elements of ps -(define (pat-bind-list ps v e) - (match ps - ['() e] - [(cons p ps) - (let ((v1 (gensym)) - (v2 (gensym))) - `(let ((,v1 (car ,v)) - (,v2 (cdr ,v))) - ,(pat-bind p v1 (pat-bind-list ps v2 e))))])) diff --git a/langs/mug/syntax.rkt b/langs/mug/syntax.rkt deleted file mode 100644 index bad6439e..00000000 --- a/langs/mug/syntax.rkt +++ /dev/null @@ -1,194 +0,0 @@ -#lang racket -(provide (all-defined-out)) -(require "pat.rkt") - -;; type Expr+ = -;; .... exprs with match, cond, begin/define, quote etc. - -;; type S-Expr = -;; | Boolean -;; | Integer -;; | String -;; | '() -;; | (Cons S-Expr S-Expr) - -;; Expr+ -> Expr -(define (desugar e+) - (match e+ - [`(begin ,@(list `(define (,fs . ,xss) ,es) ...) ,e) - `(letrec ,(map (λ (f xs e) `(,f (λ ,xs ,(desugar e)))) fs xss es) - ,(desugar e))] - [(? symbol? x) x] - [(? imm? i) i] - [`',(? symbol? s) `',s] - [`',d (quote->expr d)] - [`(,(? prim? p) . ,es) `(,p ,@(map desugar es))] - [`(if ,e0 ,e1 ,e2) `(if ,(desugar e0) ,(desugar e1) ,(desugar e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(desugar e0))) ,(desugar e1))] - [`(letrec ,bs ,e0) - `(letrec ,(map (λ (b) (list (first b) (desugar (second b)))) bs) - ,(desugar e0))] - [`(λ ,xs ,e0) `(λ ,xs ,(desugar e0))] - [`(match . ,_) (desugar (match->cond e+))] - [`(cond . ,_) (desugar (cond->if e+))] - [`(and . ,_) (desugar (and->if e+))] - [`(or . ,_) (desugar (or->if e+))] - [`(,e . ,es) `(,(desugar e) ,@(map desugar es))])) - -;; S-Expr -> Expr -;; Produce an expression that evaluates to given s-expression, without -;; use of quote (except for symbols and empty list) -(define (quote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons x y) (list 'cons (quote->expr x) (quote->expr y))] - ['() ''()])) - -(define (quasiquote->expr d) - (match d - [(? boolean?) d] - [(? integer?) d] - [(? string?) d] - [(? char?) d] - [(? symbol?) (list 'quote d)] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) d] - [(cons 'unquote-splicing d) 'ERROR] - [(cons x y) - `(append ,(quasiquote->list-expr x) - ,(quasiquote->expr y))] - ['() ''()])) - -(define (quasiquote->list-expr d) - (match d - [(? symbol?) (list 'quote d)] - ['() ''()] - [(cons 'quasiquote d) - (quasiquote->expr (quasiquote->expr d))] - [(cons 'unquote d) `(list ,d)] - [(cons 'unquote-splicing d) d] - [(cons x y) - `(list (append ,(quasiquote->list-expr x) - ,(quasiquote->expr y)))] - [_ `'(,d)])) - -;; Expr -> Expr -(define (cond->if c) - (match c - [`(cond (else ,e)) e] - [`(cond (,c ,e) . ,r) - `(if ,c ,e (cond ,@r))])) - -;; Expr -> Expr -(define (and->if c) - (match c - [`(and) #t] - [`(and ,e) e] - [`(and ,e . ,r) - `(if ,e (and ,@r) #f)])) - -;; Expr -> Expr -(define (or->if c) - (match c - [`(or) #f] - [`(or ,e) e] - [`(or ,e . ,r) - (let ((x (gensym))) - `(let ((,x ,e)) - (if ,x ,x (or ,@r))))])) - - -(define (qq-expand x depth) - (match x - [(cons 'quasiquote r) - `(cons 'quasiquote ,(qq-expand r (add1 depth)))] - [(cons 'unquote r) - (cond [(> depth 0) - `(cons ','unquote ,(qq-expand r (sub1 depth)))] - [(and (not (empty? r)) - (empty? (cdr r))) - (car r)] - [else - (error "Illegal")])] - [(cons 'unqupte-splicing r) - (error "Illegal")] - [(cons a b) - `(append ,(qq-expand-list a depth) - ,(qq-expand b depth))] - [_ `',x])) - -(define (qq-expand-list x depth) - (match x - [(cons 'quasiquote r) - `(list (cons 'quasiquote ,(qq-expand r (add1 depth))))] - [(cons 'unquote r) - (cond [(> depth 0) `(list (cons ','unquote ,(qq-expand r (sub1 depth))))] - [else `(list . ,r)])] - [(cons 'unquote-splicing r) - (cond [(> depth 0) `(list (cons ','unquote-splicing ,(qq-expand r (sub1 depth))))] - [else `(append . ,r)])] - [_ - `'(,x)])) - - - -;; Any -> Boolean -(define (imm? x) - (or (integer? x) - (boolean? x) - (char? x) - (equal? ''() x))) - -;; Expr -> LExpr -(define (label-λ e) - (match e - [(? symbol? x) x] - [(? imm? i) i] - [`(,(? prim? p) . ,es) `(,p ,@(map label-λ es))] - [`(if ,e0 ,e1 ,e2) `(if ,(label-λ e0) ,(label-λ e1) ,(label-λ e2))] - [`(let ((,x ,e0)) ,e1) `(let ((,x ,(label-λ e0))) ,(label-λ e1))] - [`(letrec ,bs ,e0) `(letrec ,(map (λ (b) (list (first b) (label-λ (second b)))) bs) - ,(label-λ e0))] - [`(λ ,xs ,e0) `(λ ,xs ',(gensym) ,(label-λ e0))] - [`(,e . ,es) `(,(label-λ e) ,@(map label-λ es))])) - -;; LExpr -> (Listof LExpr) -;; Extract all the lambda expressions -(define (λs e) - (match e - [(? symbol? x) '()] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map λs es)] - [`(if ,e0 ,e1 ,e2) (append (λs e0) (λs e1) (λs e2))] - [`(let ((,x ,e0)) ,e1) (append (λs e0) (λs e1))] - [`(letrec ,bs ,e0) (append (apply append (map (compose λs second) bs)) (λs e0))] - [`(λ ,xs ,l ,e0) (cons e (λs e0))] - [`(,e . ,es) (append (λs e) (apply append (map λs es)))])) - -;; LExpr -> (Listof Variable) -(define (fvs e) - (define (fvs e) - (match e - [(? symbol? x) (list x)] - [(? imm? i) '()] - [`(,(? prim? p) . ,es) (append-map fvs es)] - [`(if ,e0 ,e1 ,e2) (append (fvs e0) (fvs e1) (fvs e2))] - [`(let ((,x ,e0)) ,e1) (append (fvs e0) (remq* (list x) (fvs e1)))] - [`(letrec ,bs ,e0) (remq* (map first bs) - (apply append (fvs e0) (map fvs (map second bs))))] - [`(λ ,xs ,l ,e0) (remq* xs (fvs e0))] - [`(,e . ,es) (append (fvs e) (apply append (map fvs es)))])) - (remove-duplicates (fvs e))) - -;; Any -> Boolean -(define (prim? x) - (and (symbol? x) - (memq x '(add1 sub1 zero? abs - char? boolean? integer? integer->char char->integer - string? box? empty? cons cons? box unbox car cdr string-length - make-string string-ref = < <= char=? boolean=? + eq? gensym symbol? - procedure?)))) From cadb2009e85c653d2ce6ee20c49fe390c5303797 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 13:08:28 -0500 Subject: [PATCH 03/21] Fix longstanding interp-file bug on void. --- langs/evildoer/interp-file.rkt | 4 +++- langs/fraud/interp-file.rkt | 4 +++- langs/hoax/interp-file.rkt | 4 +++- langs/hustle/interp-file.rkt | 4 +++- langs/iniquity/interp-file.rkt | 4 +++- langs/jig/interp-file.rkt | 4 +++- langs/knock/interp-file.rkt | 15 +++++++++++++++ langs/loot/interp-file.rkt | 4 +++- langs/mountebank/interp-file.rkt | 4 +++- 9 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 langs/knock/interp-file.rkt diff --git a/langs/evildoer/interp-file.rkt b/langs/evildoer/interp-file.rkt index e6c9b1d3..99789c43 100644 --- a/langs/evildoer/interp-file.rkt +++ b/langs/evildoer/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/fraud/interp-file.rkt b/langs/fraud/interp-file.rkt index e6c9b1d3..99789c43 100644 --- a/langs/fraud/interp-file.rkt +++ b/langs/fraud/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/hoax/interp-file.rkt b/langs/hoax/interp-file.rkt index e6c9b1d3..99789c43 100644 --- a/langs/hoax/interp-file.rkt +++ b/langs/hoax/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/hustle/interp-file.rkt b/langs/hustle/interp-file.rkt index e6c9b1d3..99789c43 100644 --- a/langs/hustle/interp-file.rkt +++ b/langs/hustle/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/iniquity/interp-file.rkt b/langs/iniquity/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/iniquity/interp-file.rkt +++ b/langs/iniquity/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/jig/interp-file.rkt b/langs/jig/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/jig/interp-file.rkt +++ b/langs/jig/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/knock/interp-file.rkt b/langs/knock/interp-file.rkt new file mode 100644 index 00000000..c2490030 --- /dev/null +++ b/langs/knock/interp-file.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; String -> Void +;; Parse and interpret contents of given filename, +;; print result on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) + (close-input-port p)))) diff --git a/langs/loot/interp-file.rkt b/langs/loot/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/loot/interp-file.rkt +++ b/langs/loot/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) diff --git a/langs/mountebank/interp-file.rkt b/langs/mountebank/interp-file.rkt index aabe615b..c2490030 100644 --- a/langs/mountebank/interp-file.rkt +++ b/langs/mountebank/interp-file.rkt @@ -9,5 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (println (interp (parse (read-all p)))) + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) (close-input-port p)))) From 434436b93df5e83dd6895a5edd973deee947bb1c Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 14:51:14 -0500 Subject: [PATCH 04/21] Missing 'eq? in hoax/ast.rkt. --- langs/hoax/ast.rkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langs/hoax/ast.rkt b/langs/hoax/ast.rkt index 50a257d3..4d9f978d 100644 --- a/langs/hoax/ast.rkt +++ b/langs/hoax/ast.rkt @@ -25,7 +25,7 @@ ;; | 'vector? | vector-length ;; | 'string? | string-length ;; type Op2 = '+ | '- | '< | '= -;; | 'cons +;; | 'cons | 'eq? ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! From aa42abd7cb7d76184e8c6208b8ce12605e8fbcb3 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 14:51:54 -0500 Subject: [PATCH 05/21] Avoid the string-copy in interpreting string literals. --- langs/hoax/interp.rkt | 2 +- langs/iniquity/interp.rkt | 2 +- langs/jig/interp.rkt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/langs/hoax/interp.rkt b/langs/hoax/interp.rkt index 89243152..cab103af 100644 --- a/langs/hoax/interp.rkt +++ b/langs/hoax/interp.rkt @@ -33,7 +33,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] diff --git a/langs/iniquity/interp.rkt b/langs/iniquity/interp.rkt index db40c7f5..e5cba4c9 100644 --- a/langs/iniquity/interp.rkt +++ b/langs/iniquity/interp.rkt @@ -36,7 +36,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] diff --git a/langs/jig/interp.rkt b/langs/jig/interp.rkt index db40c7f5..e5cba4c9 100644 --- a/langs/jig/interp.rkt +++ b/langs/jig/interp.rkt @@ -36,7 +36,7 @@ [(Eof) eof] [(Empty) '()] [(Var x) (lookup r x)] - [(Str s) (string-copy s)] + [(Str s) s] [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] From e7286b91ee04e168960204e74dbc235f2d909ad3 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 15:14:24 -0500 Subject: [PATCH 06/21] Mug: symbols and interned string literals (but no quote). --- langs/mug/Makefile | 38 ++ langs/mug/ast.rkt | 58 +++ langs/mug/char.c | 57 +++ langs/mug/compile-datum.rkt | 136 +++++ langs/mug/compile-file.rkt | 13 + langs/mug/compile-ops.rkt | 388 ++++++++++++++ langs/mug/compile.rkt | 338 ++++++++++++ langs/mug/env.rkt | 15 + langs/mug/fv.rkt | 21 + langs/mug/heap.h | 9 + langs/mug/intern.rkt | 113 ++++ langs/mug/interp-defun.rkt | 125 +++++ langs/mug/interp-file.rkt | 15 + langs/mug/interp-io.rkt | 12 + langs/mug/interp-prims.rkt | 72 +++ langs/mug/interp.rkt | 124 +++++ langs/mug/io.c | 25 + langs/mug/lambdas.rkt | 34 ++ langs/mug/main.c | 40 ++ langs/mug/parse-file.rkt | 13 + langs/mug/parse.rkt | 72 +++ langs/mug/print.c | 855 +++++++++++++++++++++++++++++++ langs/mug/print.h | 8 + langs/mug/read-all.rkt | 8 + langs/mug/runtime.h | 11 + langs/mug/symbol.c | 149 ++++++ langs/mug/test/build-runtime.rkt | 8 + langs/mug/test/compile.rkt | 18 + langs/mug/test/interp-defun.rkt | 8 + langs/mug/test/interp.rkt | 8 + langs/mug/test/test-runner.rkt | 311 +++++++++++ langs/mug/try.rkt | 3 + langs/mug/types.h | 42 ++ langs/mug/types.rkt | 71 +++ langs/mug/unload-bits-asm.rkt | 43 ++ langs/mug/values.c | 121 +++++ langs/mug/values.h | 84 +++ 37 files changed, 3466 insertions(+) create mode 100644 langs/mug/Makefile create mode 100644 langs/mug/ast.rkt create mode 100644 langs/mug/char.c create mode 100644 langs/mug/compile-datum.rkt create mode 100644 langs/mug/compile-file.rkt create mode 100644 langs/mug/compile-ops.rkt create mode 100644 langs/mug/compile.rkt create mode 100644 langs/mug/env.rkt create mode 100644 langs/mug/fv.rkt create mode 100644 langs/mug/heap.h create mode 100644 langs/mug/intern.rkt create mode 100644 langs/mug/interp-defun.rkt create mode 100644 langs/mug/interp-file.rkt create mode 100644 langs/mug/interp-io.rkt create mode 100644 langs/mug/interp-prims.rkt create mode 100644 langs/mug/interp.rkt create mode 100644 langs/mug/io.c create mode 100644 langs/mug/lambdas.rkt create mode 100644 langs/mug/main.c create mode 100644 langs/mug/parse-file.rkt create mode 100644 langs/mug/parse.rkt create mode 100644 langs/mug/print.c create mode 100644 langs/mug/print.h create mode 100644 langs/mug/read-all.rkt create mode 100644 langs/mug/runtime.h create mode 100644 langs/mug/symbol.c create mode 100644 langs/mug/test/build-runtime.rkt create mode 100644 langs/mug/test/compile.rkt create mode 100644 langs/mug/test/interp-defun.rkt create mode 100644 langs/mug/test/interp.rkt create mode 100644 langs/mug/test/test-runner.rkt create mode 100644 langs/mug/try.rkt create mode 100644 langs/mug/types.h create mode 100644 langs/mug/types.rkt create mode 100644 langs/mug/unload-bits-asm.rkt create mode 100644 langs/mug/values.c create mode 100644 langs/mug/values.h diff --git a/langs/mug/Makefile b/langs/mug/Makefile new file mode 100644 index 00000000..76f979af --- /dev/null +++ b/langs/mug/Makefile @@ -0,0 +1,38 @@ +UNAME := $(shell uname) +.PHONY: test + +ifeq ($(UNAME), Darwin) + format=macho64 +else + format=elf64 +endif + +objs = \ + main.o \ + values.o \ + print.o \ + symbol.o \ + io.o + +default: runtime.o + +runtime.o: $(objs) + ld -r $(objs) -o runtime.o + +%.run: %.o runtime.o + gcc runtime.o $< -o $@ + +.c.o: + gcc -fPIC -c -g -o $@ $< + +.s.o: + nasm -g -f $(format) -o $@ $< + +%.s: %.rkt + racket -t compile-file.rkt -m $< > $@ + +clean: + rm *.o *.s *.run + +test: example.run + @test "$(shell ./example.run)" = "$(shell racket example.rkt)" diff --git a/langs/mug/ast.rkt b/langs/mug/ast.rkt new file mode 100644 index 00000000..5b8885fe --- /dev/null +++ b/langs/mug/ast.rkt @@ -0,0 +1,58 @@ +#lang racket +(provide (all-defined-out)) + +;; type Prog = (Prog (Listof Defn) Expr) +(struct Prog (ds e) #:prefab) + +;; type Defn = (Defn Id (Listof Id) Expr) +(struct Defn (f xs e) #:prefab) + +;; type Expr = (Eof) +;; | (Empty) +;; | (Int Integer) +;; | (Bool Boolean) +;; | (Char Character) +;; | (Str String) +;; | (Symb Symbol) +;; | (Prim0 Op0) +;; | (Prim1 Op1 Expr) +;; | (Prim2 Op2 Expr Expr) +;; | (Prim3 Op3 Expr Expr Expr) +;; | (If Expr Expr Expr) +;; | (Begin Expr Expr) +;; | (Let Id Expr Expr) +;; | (Var Id) +;; | (App Expr (Listof Expr)) +;; | (Lam Id (Listof Id) Expr) +;; type Id = Symbol +;; type Op0 = 'read-byte | 'gensym +;; type Op1 = 'add1 | 'sub1 | 'zero? +;; | 'char? | 'integer->char | 'char->integer +;; | 'write-byte | 'eof-object? +;; | 'box | 'car | 'cdr | 'unbox +;; | 'empty? | 'cons? | 'box? +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length +;; | 'symbol? | 'symbol->string | 'string->symbol +;; type Op2 = '+ | '- | '< | '= +;; | 'cons +;; | 'make-vector | 'vector-ref +;; | 'make-string | 'string-ref +;; type Op3 = 'vector-set! +(struct Eof () #:prefab) +(struct Empty () #:prefab) +(struct Int (i) #:prefab) +(struct Bool (b) #:prefab) +(struct Char (c) #:prefab) +(struct Str (s) #:prefab) +(struct Symb (s) #:prefab) +(struct Prim0 (p) #:prefab) +(struct Prim1 (p e) #:prefab) +(struct Prim2 (p e1 e2) #:prefab) +(struct Prim3 (p e1 e2 e3) #:prefab) +(struct If (e1 e2 e3) #:prefab) +(struct Begin (e1 e2) #:prefab) +(struct Let (x e1 e2) #:prefab) +(struct Var (x) #:prefab) +(struct App (e es) #:prefab) +(struct Lam (f xs e) #:prefab) diff --git a/langs/mug/char.c b/langs/mug/char.c new file mode 100644 index 00000000..d11f16e0 --- /dev/null +++ b/langs/mug/char.c @@ -0,0 +1,57 @@ +#include +#include +#include "types.h" + +void print_codepoint(int64_t); + +void print_char (int64_t v) { + int64_t codepoint = v >> char_shift; + printf("#\\"); + switch (codepoint) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(v); + } +} + +void print_codepoint(int64_t v) { + int64_t codepoint = v >> char_shift; + // Print using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (codepoint < 128) { + printf("%c", (char) codepoint); + } else if (codepoint < 2048) { + printf("%c%c", + (char)(codepoint >> 6) | 192, + ((char)codepoint & 63) | 128); + } else if (codepoint < 65536) { + printf("%c%c%c", + (char)(codepoint >> 12) | 224, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } else { + printf("%c%c%c%c", + (char)(codepoint >> 18) | 240, + ((char)(codepoint >> 12) & 63) | 128, + ((char)(codepoint >> 6) & 63) | 128, + ((char)codepoint & 63) | 128); + } +} + diff --git a/langs/mug/compile-datum.rkt b/langs/mug/compile-datum.rkt new file mode 100644 index 00000000..7f2e96f6 --- /dev/null +++ b/langs/mug/compile-datum.rkt @@ -0,0 +1,136 @@ +#lang racket +(provide compile-datum compile-literals) +(require "types.rkt" + "intern.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return + +;; QEnv -> Asm +(define (compile-literals q) + (match q + ['() (seq)] + [(cons (cons s (Ref l _)) q) + (seq (compile-literal (to-string s) l) + (compile-literals q))])) + +;; String Label -> Asm +(define (compile-literal s l) + (seq (Label l) + (Dq (string-length s)) + (compile-string-chars (string->list s)) + (if (odd? (string-length s)) + (seq (Dd 0)) + (seq)))) + +(define (compound? x) + (or ;(string? x) + ;(symbol? x) + (cons? x) + (vector? x) + (box? x))) + +;; Datum -> Asm +(define (compile-datum d) + (cond + [(compound? d) + (compile-compound-datum d)] + [(Ref? d) + (seq (Lea rax (Plus (Ref-label d) (Ref-type-tag d))))] + [else + (seq (Mov rax (imm->bits d)))])) + +(define (compile-compound-datum d) + (match (compile-quoted d) + [(cons l is) + (seq (Data) + is + (Text) + (Lea rax l))])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-quoted c) + (cond + ;[(string? c) (compile-datum-string c)] + ;[(symbol? c) (compile-datum-symbol (symbol->string c))] + [(vector? c) (compile-datum-vector (vector->list c))] + [(box? c) (compile-datum-box (unbox c))] + [(cons? c) (compile-datum-cons (car c) (cdr c))] + [(Ref? c) (cons (Plus (Ref-label c) (Ref-type-tag c)) '())] + [else (cons (imm->bits c) '())])) + +;; String -> (cons AsmExpr Asm) +#; +(define (compile-datum-string c) + (let ((l (gensym 'string))) + (cons (Plus l type-str) + (seq (Label l) + (Dq (string-length c)) + (compile-string-chars (string->list c)) + (if (odd? (string-length c)) + (seq (Dd 0)) + (seq)))))) + +;; String -> (cons AsmExpr Asm) +#; +(define (compile-datum-symbol c) + (let ((l (gensym 'symbol))) + (cons (Plus l type-symb) + (seq (Label l) + (Dq (string-length c)) + (compile-string-chars (string->list c)) + (if (odd? (string-length c)) + (seq (Dd 0)) + (seq)))))) + +;; [Listof Datum] -> (cons AsmExpr Asm) +(define (compile-datum-vector ds) + (match ds + ['() (cons type-vect '())] + [_ + (let ((l (gensym 'vector)) + (cds (map compile-quoted ds))) + (cons (Plus l type-vect) + (seq (Label l) + (Dq (length ds)) + (map (λ (cd) (Dq (car cd))) cds) + (append-map cdr cds))))])) + +;; Datum -> (cons AsmExpr Asm) +(define (compile-datum-box c) + (match (compile-quoted c) + [(cons l1 is1) + (let ((l (gensym 'box))) + (cons (Plus l type-box) + (seq (Label l) + (Dq l1) + is1)))])) + +;; Datum Datum -> (cons AsmExpr Asm) +(define (compile-datum-cons c1 c2) + (match (compile-quoted c1) + [(cons l1 is1) + (match (compile-quoted c2) + [(cons l2 is2) + (let ((l (gensym 'cons))) + (cons (Plus l type-cons) + (seq (Label l) + (Dq l2) + (Dq l1) + is1 + is2)))])])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) + +;; (U String Symbol) -> String +(define (to-string s) + (if (symbol? s) + (symbol->string s) + s)) diff --git a/langs/mug/compile-file.rkt b/langs/mug/compile-file.rkt new file mode 100644 index 00000000..3593dc7d --- /dev/null +++ b/langs/mug/compile-file.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; String -> Void +;; Compile contents of given file name, +;; emit asm code on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (displayln (asm-string (compile (parse (read-all p))))) + (close-input-port p)))) diff --git a/langs/mug/compile-ops.rkt b/langs/mug/compile-ops.rkt new file mode 100644 index 00000000..0ef65bc2 --- /dev/null +++ b/langs/mug/compile-ops.rkt @@ -0,0 +1,388 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" "types.rkt" a86/ast) + +(define rax 'rax) ; return +(define eax 'eax) ; 32-bit load/store +(define rbx 'rbx) ; heap +(define rdi 'rdi) ; arg +(define r8 'r8) ; scratch +(define r9 'r9) ; scratch +(define r10 'r10) ; scratch +(define r15 'r15) ; stack pad (non-volatile) +(define rsp 'rsp) ; stack + +;; Op0 -> Asm +(define (compile-op0 p) + (match p + ['void (seq (Mov rax val-void))] + ['read-byte (seq pad-stack + (Call 'read_byte) + unpad-stack)] + ['peek-byte (seq pad-stack + (Call 'peek_byte) + unpad-stack)] + ['gensym (seq pad-stack + (Call 'gensym) + unpad-stack + (Or rax type-symb))])) + +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ['add1 + (seq (assert-integer rax) + (Add rax (imm->bits 1)))] + ['sub1 + (seq (assert-integer rax) + (Sub rax (imm->bits 1)))] + ['zero? + (seq (assert-integer rax) + (eq-imm 0))] + ['char? + (type-pred mask-char type-char)] + ['char->integer + (seq (assert-char rax) + (Sar rax char-shift) + (Sal rax int-shift))] + ['integer->char + (seq (assert-codepoint rax) + (Sar rax int-shift) + (Sal rax char-shift) + (Xor rax type-char))] + ['eof-object? (eq-imm eof)] + ['write-byte + (seq (assert-byte rax) + pad-stack + (Mov rdi rax) + (Call 'write_byte) + unpad-stack + (Mov rax val-void))] + ['box + (seq (Mov (Offset rbx 0) rax) + (Mov rax rbx) + (Or rax type-box) + (Add rbx 8))] + ['unbox + (seq (assert-box rax) + (Xor rax type-box) + (Mov rax (Offset rax 0)))] + ['car + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 8)))] + ['cdr + (seq (assert-cons rax) + (Xor rax type-cons) + (Mov rax (Offset rax 0)))] + ['empty? (eq-imm '())] + ['box? + (type-pred ptr-mask type-box)] + ['cons? + (type-pred ptr-mask type-cons)] + ['vector? + (type-pred ptr-mask type-vect)] + ['string? + (type-pred ptr-mask type-str)] + ['symbol? + (type-pred ptr-mask type-symb)] + ['vector-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-vector rax) + (Xor rax type-vect) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string-length + (let ((zero (gensym)) + (done (gensym))) + (seq (assert-string rax) + (Xor rax type-str) + (Cmp rax 0) + (Je zero) + (Mov rax (Offset rax 0)) + (Sal rax int-shift) + (Jmp done) + (Label zero) + (Mov rax 0) + (Label done)))] + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))] + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + (Mov rdi rax) + pad-stack + ;; FIXME: this allocates off-heap + (Call 'str_dup) + unpad-stack + (Or rax type-str))])) + +;; Op2 -> Asm +(define (compile-op2 p) + (match p + ['+ + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Add rax r8))] + ['- + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Sub r8 rax) + (Mov rax r8))] + ['< + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Jl true) + (Mov rax val-false) + (Label true))))] + ['= + (seq (Pop r8) + (assert-integer r8) + (assert-integer rax) + (Cmp r8 rax) + (Mov rax val-true) + (let ((true (gensym))) + (seq (Je true) + (Mov rax val-false) + (Label true))))] + ['cons + (seq (Mov (Offset rbx 0) rax) + (Pop rax) + (Mov (Offset rbx 8) rax) + (Mov rax rbx) + (Or rax type-cons) + (Add rbx 16))] + ['eq? + (seq (Pop r8) + (eq r8 rax))] + ['make-vector + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (Cmp r8 0) ; special case empty vector + (Je empty) + + (Mov r9 rbx) + (Or r9 type-vect) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Label loop) + (Mov (Offset rbx 0) rax) + (Add rbx 8) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-vect) + (Label done)))] + + ['vector-ref + (seq (Pop r8) + (assert-vector r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 3) + (Add r8 rax) + (Mov rax (Offset r8 8)))] + + ['make-string + (let ((loop (gensym)) + (done (gensym)) + (empty (gensym))) + (seq (Pop r8) + (assert-natural r8) + (assert-char rax) + (Cmp r8 0) ; special case empty string + (Je empty) + + (Mov r9 rbx) + (Or r9 type-str) + + (Sar r8 int-shift) + (Mov (Offset rbx 0) r8) + (Add rbx 8) + + (Sar rax char-shift) + + (Add r9 1) ; adds 1 + (Sar r9 1) ; when + (Sal r9 1) ; len is odd + + (Label loop) + (Mov (Offset rbx 0) eax) + (Add rbx 4) + (Sub r8 1) + (Cmp r8 0) + (Jne loop) + + (Mov rax r9) + (Jmp done) + + (Label empty) + (Mov rax type-str) + (Label done)))] + + ['string-ref + (seq (Pop r8) + (assert-string r8) + (assert-integer rax) + (Cmp rax 0) + (Jl 'raise_error_align) + (Xor r8 type-str) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar rax int-shift) ; rax = index + (Sub r9 1) + (Cmp r9 rax) + (Jl 'raise_error_align) + (Sal rax 2) + (Add r8 rax) + (Mov 'eax (Offset r8 8)) + (Sal rax char-shift) + (Or rax type-char))])) + +;; Op3 -> Asm +(define (compile-op3 p) + (match p + ['vector-set! + (seq (Pop r10) + (Pop r8) + (assert-vector r8) + (assert-integer r10) + (Cmp r10 0) + (Jl 'raise_error_align) + (Xor r8 type-vect) ; r8 = ptr + (Mov r9 (Offset r8 0)) ; r9 = len + (Sar r10 int-shift) ; r10 = index + (Sub r9 1) + (Cmp r9 r10) + (Jl 'raise_error_align) + (Sal r10 3) + (Add r8 r10) + (Mov (Offset r8 8) rax) + (Mov rax val-void))])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (assert-type mask type) + (λ (arg) + (seq (Mov r9 arg) + (And r9 mask) + (Cmp r9 type) + (Jne 'raise_error_align)))) + +(define (type-pred mask type) + (let ((l (gensym))) + (seq (And rax mask) + (Cmp rax type) + (Mov rax (imm->bits #t)) + (Je l) + (Mov rax (imm->bits #f)) + (Label l)))) + +(define assert-integer + (assert-type mask-int type-int)) +(define assert-char + (assert-type mask-char type-char)) +(define assert-box + (assert-type ptr-mask type-box)) +(define assert-cons + (assert-type ptr-mask type-cons)) +(define assert-vector + (assert-type ptr-mask type-vect)) +(define assert-string + (assert-type ptr-mask type-str)) +(define assert-symbol + (assert-type ptr-mask type-symb)) +(define assert-proc + (assert-type ptr-mask type-proc)) + +(define (assert-codepoint r) + (let ((ok (gensym))) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 1114111)) + (Jg 'raise_error_align) + (Cmp r (imm->bits 55295)) + (Jl ok) + (Cmp r (imm->bits 57344)) + (Jg ok) + (Jmp 'raise_error_align) + (Label ok)))) + +(define (assert-byte r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align) + (Cmp r (imm->bits 255)) + (Jg 'raise_error_align))) + +(define (assert-natural r) + (seq (assert-integer r) + (Cmp r (imm->bits 0)) + (Jl 'raise_error_align))) + +;; Value -> Asm +(define (eq-imm imm) + (let ((l1 (gensym))) + (seq (Cmp rax (imm->bits imm)) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) + +(define (eq ir1 ir2) + (let ((l1 (gensym))) + (seq (Cmp ir1 ir2) + (Mov rax val-true) + (Je l1) + (Mov rax val-false) + (Label l1)))) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt new file mode 100644 index 00000000..457d68e5 --- /dev/null +++ b/langs/mug/compile.rkt @@ -0,0 +1,338 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "intern.rkt" + "compile-ops.rkt" + "compile-datum.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; type CEnv = [Listof Id] + +;; Prog -> Asm +(define (compile p) + (match (intern p) + [(cons (Prog ds e) q) + (prog (externs) + (Global 'entry) + (Label 'entry) + (Mov rbx rdi) ; recv heap pointer + (init-symbol-table q) + (compile-defines-values ds) + (compile-e e (reverse (define-ids ds)) #t) + (Add rsp (* 8 (length ds))) ;; pop function definitions + (Ret) + (compile-defines ds) + (compile-lambda-defines (lambdas p)) + (Label 'raise_error_align) + pad-stack + (Call 'raise_error) + (Data) + (compile-literals q))])) + +(define (externs) + (seq (Extern 'peek_byte) + (Extern 'read_byte) + (Extern 'write_byte) + (Extern 'raise_error) + (Extern 'gensym) + (Extern 'intern_symbol) + (Extern 'str_dup))) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Int i) (compile-value i)] + [(Bool b) (compile-value b)] + [(Char c) (compile-value c)] + [(Eof) (compile-value eof)] + [(Empty) (compile-value '())] + [(Var x) (compile-variable x c)] + [(Ref l t) (compile-ref l t)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)])) + +;; Value -> Asm +(define (compile-value v) + (seq (Mov rax (imm->bits v)))) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (let ((i (lookup x c))) + (seq (Mov rax (Offset rsp i))))) + +;; Label Tag -> Asm +(define (compile-ref l t) + (seq (Lea rax (Plus l t)))) + +;; Op0 CEnv -> Asm +(define (compile-prim0 p c) + (compile-op0 p)) + +;; Op1 Expr CEnv -> Asm +(define (compile-prim1 p e c) + (seq (compile-e e c #f) + (compile-op1 p))) + +;; Op2 Expr Expr CEnv -> Asm +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) + +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax val-false) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Id CEnv -> Integer +(define (lookup x cenv) + (match cenv + ['() (error "undefined variable:" x)] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (+ 8 (lookup x rest))])])) + +;; QEnv -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table q) + (seq pad-stack + (append-map init-symbol q) + unpad-stack)) + +;; (cons (U String Symbol) Ref) -> Asm +(define (init-symbol qb) + (match qb + [(cons (? symbol? s) (Ref l _)) + (seq (Lea rdi l) + (Call 'intern_symbol))] + [_ (seq)])) + +;; Symbol -> Label +;; Produce a symbol that is a valid Nasm label +(define (symbol->label s) + (string->symbol + (string-append + "label_" + (list->string + (map (λ (c) + (if (or (char<=? #\a c #\z) + (char<=? #\A c #\Z) + (char<=? #\0 c #\9) + (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) + c + #\_)) + (string->list (symbol->string s)))) + "_" + (number->string (eq-hash-code s) 16)))) diff --git a/langs/mug/env.rkt b/langs/mug/env.rkt new file mode 100644 index 00000000..c43be9c3 --- /dev/null +++ b/langs/mug/env.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide lookup ext) + +;; Env Variable -> Answer +(define (lookup env x) + (match env + ['() 'err] + [(cons (list y i) env) + (match (symbol=? x y) + [#t i] + [#f (lookup env x)])])) + +;; Env Variable Value -> Value +(define (ext r x i) + (cons (list x i) r)) \ No newline at end of file diff --git a/langs/mug/fv.rkt b/langs/mug/fv.rkt new file mode 100644 index 00000000..212efadd --- /dev/null +++ b/langs/mug/fv.rkt @@ -0,0 +1,21 @@ +#lang racket +(require "ast.rkt") +(provide fv) + +;; Expr -> [Listof Id] +;; List all of the free variables in e +(define (fv e) + (remove-duplicates (fv* e))) + +(define (fv* e) + (match e + [(Var x) (list x)] + [(Prim1 p e) (fv* e)] + [(Prim2 p e1 e2) (append (fv* e1) (fv* e2))] + [(Prim3 p e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(If e1 e2 e3) (append (fv* e1) (fv* e2) (fv* e3))] + [(Begin e1 e2) (append (fv* e1) (fv* e2))] + [(Let x e1 e2) (append (fv* e1) (remq* (list x) (fv* e2)))] + [(App e1 es) (append (fv* e1) (append-map fv* es))] + [(Lam f xs e) (remq* xs (fv* e))] + [_ '()])) diff --git a/langs/mug/heap.h b/langs/mug/heap.h new file mode 100644 index 00000000..8f2f5e23 --- /dev/null +++ b/langs/mug/heap.h @@ -0,0 +1,9 @@ +#include + +extern int64_t heap[]; +extern int from_side; + +extern char type[]; + +// in words +#define heap_size 1001 diff --git a/langs/mug/intern.rkt b/langs/mug/intern.rkt new file mode 100644 index 00000000..0f7cbe1f --- /dev/null +++ b/langs/mug/intern.rkt @@ -0,0 +1,113 @@ +#lang racket +(require "ast.rkt" "types.rkt") +(provide intern + (struct-out Ref)) + +;; type QEnv = [Listof (cons (U String Symbol) Ref)] + +(struct Ref (label type-tag) #:prefab) + +;; (U String Symbol) QEnv -> [Maybe Ref] +(define (lookup d q) + (match q + ['() #f] + [(cons (cons d0 r) q) + (if (equal? d d0) + r + (lookup d q))])) + +;; (U Symbol String) QEnv Tag -> QEnv +;; Either lookup prior reference or create a new one +(define (intern! s q type) + (match (lookup s q) + [#f + (let ((l (gensym 'lit))) + (let ((r (Ref l type))) + (cons r + (cons (cons s r) q))))] + [r (cons r q)])) + +;; Prog -> (cons Prog QEnv) +(define (intern p) + (intern-prog p '())) + +;; Prog QEnv -> (cons Prog QEnv) +(define (intern-prog p q) + (match p + [(Prog ds e) + (match (intern-ds ds q) + [(cons ds q) + (match (intern-e e q) + [(cons e q) + (cons (Prog ds e) q)])])])) + +;; Defns QEnv -> (cons Defns QEnv) +(define (intern-ds ds q) + (match ds + ['() (cons '() q)] + [(cons d ds) + (match (intern-d d q) + [(cons d q) + (match (intern-ds ds q) + [(cons ds q) + (cons (cons d ds) q)])])])) + +;; Defn QEnv -> (cons Defn QEnv) +(define (intern-d d q) + (match d + [(Defn f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Defn f xs e) q)])])) + +;; Expr QEnv -> (cons Expr QEnv) +(define (intern-e e q) + (match e + [(Str s) + (intern! s q type-str)] + [(Symb s) + (intern! s q type-symb)] + [(Prim1 p e) + (match (intern-e e q) + [(cons e q) + (cons (Prim1 p e) q)])] + [(Prim2 p e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Prim2 p e1 e2) q)])] + [(Prim3 p e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (Prim3 p e1 e2 e3) q)])] + [(If e1 e2 e3) + (match (intern-es (list e1 e2 e3) q) + [(cons (list e1 e2 e3) q) + (cons (If e1 e2 e3) q)])] + [(Begin e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Begin e1 e2) q)])] + [(Let x e1 e2) + (match (intern-es (list e1 e2) q) + [(cons (list e1 e2) q) + (cons (Let x e1 e2) q)])] + [(App e1 es) + (match (intern-es (cons e1 es) q) + [(cons (cons e1 es) q) + (cons (App e1 es) q)])] + [(Lam f xs e) + (match (intern-e e q) + [(cons e q) + (cons (Lam f xs e) q)])] + [_ (cons e q)])) + +;; [Listof Expr] QEnv -> (cons [Listof Expr] QEnv) +(define (intern-es es q) + (match es + ['() (cons '() q)] + [(cons e es) + (match (intern-e e q) + [(cons e q) + (match (intern-es es q) + [(cons es q) + (cons (cons e es) q)])])])) diff --git a/langs/mug/interp-defun.rkt b/langs/mug/interp-defun.rkt new file mode 100644 index 00000000..4a727607 --- /dev/null +++ b/langs/mug/interp-defun.rkt @@ -0,0 +1,125 @@ +#lang racket +(provide interp interp-env (struct-out Closure) zip) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Symbol +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Closure [Listof Id] Expr Env) +(struct Closure (xs e r) #:prefab) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog Defns -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (Closure xs e r)] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (match f + [(Closure xs e r) + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err)] + [_ 'err])])])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (cons v (interp-env* es r ds))])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/interp-file.rkt b/langs/mug/interp-file.rkt new file mode 100644 index 00000000..c2490030 --- /dev/null +++ b/langs/mug/interp-file.rkt @@ -0,0 +1,15 @@ +#lang racket +(provide main) +(require "parse.rkt" "interp.rkt" "read-all.rkt") + +;; String -> Void +;; Parse and interpret contents of given filename, +;; print result on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (let ((r (interp (parse (read-all p))))) + (unless (void? r) + (println r))) + (close-input-port p)))) diff --git a/langs/mug/interp-io.rkt b/langs/mug/interp-io.rkt new file mode 100644 index 00000000..93f7d3c6 --- /dev/null +++ b/langs/mug/interp-io.rkt @@ -0,0 +1,12 @@ +#lang racket +(provide interp/io) +(require "interp.rkt") + +;; (Expr String -> String +;; Interpret e with given string as input, +;; collect output as string (including printed result) +(define (interp/io e in) + (parameterize ((current-output-port (open-output-string)) + (current-input-port (open-input-string in))) + (cons (interp e) + (get-output-string (current-output-port))))) diff --git a/langs/mug/interp-prims.rkt b/langs/mug/interp-prims.rkt new file mode 100644 index 00000000..79bcf361 --- /dev/null +++ b/langs/mug/interp-prims.rkt @@ -0,0 +1,72 @@ +#lang racket +(require "ast.rkt") +(provide interp-prim1 interp-prim2 interp-prim3) + +;; Op1 Value -> Answer +(define (interp-prim1 p1 v) + (match (list p1 v) + [(list 'add1 (? integer?)) (add1 v)] + [(list 'sub1 (? integer?)) (sub1 v)] + [(list 'zero? (? integer?)) (zero? v)] + [(list 'char? v) (char? v)] + [(list 'char->integer (? char?)) (char->integer v)] + [(list 'integer->char (? codepoint?)) (integer->char v)] + [(list 'eof-object? v) (eof-object? v)] + [(list 'write-byte (? byte?)) (write-byte v)] + [(list 'box v) (box v)] + [(list 'unbox (? box?)) (unbox v)] + [(list 'car (? pair?)) (car v)] + [(list 'cdr (? pair?)) (cdr v)] + [(list 'empty? v) (empty? v)] + [(list 'cons? v) (cons? v)] + [(list 'box? v) (box? v)] + [(list 'vector? v) (vector? v)] + [(list 'vector-length (? vector?)) (vector-length v)] + [(list 'string? v) (string? v)] + [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] + [_ 'err])) + +;; Op2 Value Value -> Answer +(define (interp-prim2 p v1 v2) + (match (list p v1 v2) + [(list '+ (? integer?) (? integer?)) (+ v1 v2)] + [(list '- (? integer?) (? integer?)) (- v1 v2)] + [(list '< (? integer?) (? integer?)) (< v1 v2)] + [(list '= (? integer?) (? integer?)) (= v1 v2)] + [(list 'cons v1 v2) (cons v1 v2)] + [(list 'eq? v1 v2) (eq? v1 v2)] + [(list 'make-vector (? integer?) _) + (if (<= 0 v1) + (make-vector v1 v2) + 'err)] + [(list 'vector-ref (? vector?) (? integer?)) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-ref v1 v2) + 'err)] + [(list 'make-string (? integer?) (? char?)) + (if (<= 0 v1) + (make-string v1 v2) + 'err)] + [(list 'string-ref (? string?) (? integer?)) + (if (<= 0 v2 (sub1 (string-length v1))) + (string-ref v1 v2) + 'err)] + [_ 'err])) + +;; Op3 Value Value Value -> Answer +(define (interp-prim3 p v1 v2 v3) + (match (list p v1 v2 v3) + [(list 'vector-set! (? vector?) (? integer?) _) + (if (<= 0 v2 (sub1 (vector-length v1))) + (vector-set! v1 v2 v3) + 'err)] + [_ 'err])) + +;; Any -> Boolean +(define (codepoint? v) + (and (integer? v) + (or (<= 0 v 55295) + (<= 57344 v 1114111)))) diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt new file mode 100644 index 00000000..19d8656d --- /dev/null +++ b/langs/mug/interp.rkt @@ -0,0 +1,124 @@ +#lang racket +(provide interp interp-env) +(require "ast.rkt" + "env.rkt" + "interp-prims.rkt") + +;; type Answer = Value | 'err + +;; type Value = +;; | Integer +;; | Boolean +;; | Character +;; | Symbol +;; | Eof +;; | Void +;; | '() +;; | (cons Value Value) +;; | (box Value) +;; | (vector Value ...) +;; | (string Char ...) +;; | (Value ... -> Answer) + +;; type REnv = (Listof (List Id Value)) +;; type Defns = (Listof Defn) + +;; Prog Defns -> Answer +(define (interp p) + (match p + [(Prog ds e) + (interp-env e '() ds)])) + +;; Expr Env Defns -> Answer +(define (interp-env e r ds) + (match e + [(Int i) i] + [(Bool b) b] + [(Char c) c] + [(Eof) eof] + [(Empty) '()] + [(Var x) (interp-var x r ds)] + [(Str s) s] + [(Symb s) s] + [(Prim0 'void) (void)] + [(Prim0 'read-byte) (read-byte)] + [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] + [(Prim1 p e) + (match (interp-env e r ds) + ['err 'err] + [v (interp-prim1 p v)])] + [(Prim2 p e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (interp-prim2 p v1 v2)])])] + [(Prim3 p e1 e2 e3) + (match (interp-env e1 r ds) + ['err 'err] + [v1 (match (interp-env e2 r ds) + ['err 'err] + [v2 (match (interp-env e3 r ds) + ['err 'err] + [v3 (interp-prim3 p v1 v2 v3)])])])] + [(If p e1 e2) + (match (interp-env p r ds) + ['err 'err] + [v + (if v + (interp-env e1 r ds) + (interp-env e2 r ds))])] + [(Begin e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [_ (interp-env e2 r ds)])] + [(Let x e1 e2) + (match (interp-env e1 r ds) + ['err 'err] + [v (interp-env e2 (ext r x v) ds)])] + [(Lam _ xs e) + (λ vs + ; check arity matches + (if (= (length xs) (length vs)) + (interp-env e (append (zip xs vs) r) ds) + 'err))] + [(App e es) + (match (interp-env e r ds) + ['err 'err] + [f + (match (interp-env* es r ds) + ['err 'err] + [vs + (if (procedure? f) + (apply f vs) + 'err)])])])) + +;; Id Env [Listof Defn] -> Answer +(define (interp-var x r ds) + (match (lookup r x) + ['err (match (defns-lookup ds x) + [(Defn f xs e) (interp-env (Lam f xs e) '() ds)] + [#f 'err])] + [v v])) + +;; (Listof Expr) REnv Defns -> (Listof Value) | 'err +(define (interp-env* es r ds) + (match es + ['() '()] + [(cons e es) + (match (interp-env e r ds) + ['err 'err] + [v (cons v (interp-env* es r ds))])])) + +;; Defns Symbol -> [Maybe Defn] +(define (defns-lookup ds f) + (findf (match-lambda [(Defn g _ _) (eq? f g)]) + ds)) + +(define (zip xs ys) + (match* (xs ys) + [('() '()) '()] + [((cons x xs) (cons y ys)) + (cons (list x y) + (zip xs ys))])) diff --git a/langs/mug/io.c b/langs/mug/io.c new file mode 100644 index 00000000..7ef82281 --- /dev/null +++ b/langs/mug/io.c @@ -0,0 +1,25 @@ +#include +#include +#include "types.h" +#include "values.h" +#include "runtime.h" + +val_t read_byte(void) +{ + char c = getc(in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); +} + +val_t peek_byte(void) +{ + char c = getc(in); + ungetc(c, in); + return (c == EOF) ? val_wrap_eof() : val_wrap_int(c); + +} + +val_t write_byte(val_t c) +{ + putc((char) val_unwrap_int(c), out); + return val_wrap_void(); +} diff --git a/langs/mug/lambdas.rkt b/langs/mug/lambdas.rkt new file mode 100644 index 00000000..8ef29b80 --- /dev/null +++ b/langs/mug/lambdas.rkt @@ -0,0 +1,34 @@ +#lang racket +(require "ast.rkt") +(provide lambdas) + + +;; Prog -> [Listof Lam] +;; List all of the lambda expressions in p +(define (lambdas p) + (match p + [(Prog ds e) + (append (lambdas-ds ds) (lambdas-e e))])) + +;; Defns -> [Listof Lam] +;; List all of the lambda expressions in ds +(define (lambdas-ds ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (append (lambdas-e e) + (lambdas-ds ds))])) + +;; Expr -> [Listof Lam] +;; List all of the lambda expressions in e +(define (lambdas-e e) + (match e + [(Prim1 p e) (lambdas-e e)] + [(Prim2 p e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Prim3 p e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(If e1 e2 e3) (append (lambdas-e e1) (lambdas-e e2) (lambdas-e e3))] + [(Begin e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(Let x e1 e2) (append (lambdas-e e1) (lambdas-e e2))] + [(App e1 es) (append (lambdas-e e1) (append-map lambdas-e es))] + [(Lam f xs e1) (cons e (lambdas-e e1))] + [_ '()])) diff --git a/langs/mug/main.c b/langs/mug/main.c new file mode 100644 index 00000000..1ca6115f --- /dev/null +++ b/langs/mug/main.c @@ -0,0 +1,40 @@ +#include +#include +#include "values.h" +#include "print.h" +#include "runtime.h" + +FILE* in; +FILE* out; +void (*error_handler)(); +val_t *heap; + +void error_exit() +{ + printf("err\n"); + exit(1); +} + +void raise_error() +{ + return error_handler(); +} + +int main(int argc, char** argv) +{ + in = stdin; + out = stdout; + error_handler = &error_exit; + heap = malloc(8 * heap_size); + + val_t result; + + result = entry(heap); + + print_result(result); + if (val_typeof(result) != T_VOID) + putchar('\n'); + + free(heap); + return 0; +} diff --git a/langs/mug/parse-file.rkt b/langs/mug/parse-file.rkt new file mode 100644 index 00000000..a5021320 --- /dev/null +++ b/langs/mug/parse-file.rkt @@ -0,0 +1,13 @@ +#lang racket +(provide main) +(require "parse.rkt" "compile.rkt" "read-all.rkt" a86/printer) + +;; String -> Void +;; Compile contents of given file name, +;; emit asm code on stdout +(define (main fn) + (let ((p (open-input-file fn))) + (begin + (read-line p) ; ignore #lang racket line + (displayln (parse (read-all p))) + (close-input-port p)))) diff --git a/langs/mug/parse.rkt b/langs/mug/parse.rkt new file mode 100644 index 00000000..cb473c74 --- /dev/null +++ b/langs/mug/parse.rkt @@ -0,0 +1,72 @@ +#lang racket +(provide parse parse-define parse-e) +(require "ast.rkt") + +;; [Listof S-Expr] -> Prog +(define (parse s) + (match s + [(cons (and (cons 'define _) d) s) + (match (parse s) + [(Prog ds e) + (Prog (cons (parse-define d) ds) e)])] + [(cons e '()) (Prog '() (parse-e e))] + [_ (error "program parse error")])) + +;; S-Expr -> Defn +(define (parse-define s) + (match s + [(list 'define (list-rest (? symbol? f) xs) e) + (if (andmap symbol? xs) + (Defn f xs (parse-e e)) + (error "parse definition error"))] + [_ (error "Parse defn error" s)])) + +;; S-Expr -> Expr +(define (parse-e s) + (match s + [(? integer?) (Int s)] + [(? boolean?) (Bool s)] + [(? char?) (Char s)] + [(? string?) (Str s)] + ['eof (Eof)] + [(? symbol?) (Var s)] + [(list 'quote (list)) (Empty)] + [(list 'quote (? symbol? s)) (Symb s)] + [(list (? (op? op0) p0)) (Prim0 p0)] + [(list (? (op? op1) p1) e) (Prim1 p1 (parse-e e))] + [(list (? (op? op2) p2) e1 e2) (Prim2 p2 (parse-e e1) (parse-e e2))] + [(list (? (op? op3) p3) e1 e2 e3) + (Prim3 p3 (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'begin e1 e2) + (Begin (parse-e e1) (parse-e e2))] + [(list 'if e1 e2 e3) + (If (parse-e e1) (parse-e e2) (parse-e e3))] + [(list 'let (list (list (? symbol? x) e1)) e2) + (Let x (parse-e e1) (parse-e e2))] + [(list (or 'lambda 'λ) xs e) + (if (and (list? xs) + (andmap symbol? xs)) + (Lam (gensym 'lambda) xs (parse-e e)) + (error "parse lambda error"))] + [(cons e es) + (App (parse-e e) (map parse-e es))] + [_ (error "Parse error" s)])) + +(define op0 + '(read-byte peek-byte void gensym)) + +(define op1 + '(add1 sub1 zero? char? write-byte eof-object? + integer->char char->integer + box unbox empty? cons? box? car cdr + vector? vector-length string? string-length + symbol? symbol->string string->symbol)) +(define op2 + '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) +(define op3 + '(vector-set!)) + +(define (op? ops) + (λ (x) + (and (symbol? x) + (memq x ops)))) diff --git a/langs/mug/print.c b/langs/mug/print.c new file mode 100644 index 00000000..2bcb21dc --- /dev/null +++ b/langs/mug/print.c @@ -0,0 +1,855 @@ +#include +#include +#include "values.h" + +void print_char(val_char_t); +void print_codepoint(val_char_t); +void print_cons(val_cons_t *); +void print_vect(val_vect_t*); +void print_str(val_str_t*); +void print_symb(val_symb_t*); +void print_str_char(val_char_t); +void print_result_interior(val_t); +int utf8_encode_char(val_char_t, char *); + +void print_result(val_t x) +{ + switch (val_typeof(x)) { + case T_INT: + printf("%" PRId64, val_unwrap_int(x)); + break; + case T_BOOL: + printf(val_unwrap_bool(x) ? "#t" : "#f"); + break; + case T_CHAR: + print_char(val_unwrap_char(x)); + break; + case T_EOF: + printf("#"); + break; + case T_VOID: + break; + case T_EMPTY: + case T_BOX: + case T_CONS: + case T_VECT: + printf("'"); + print_result_interior(x); + break; + case T_STR: + putchar('"'); + print_str(val_unwrap_str(x)); + putchar('"'); + break; + case T_SYMB: + printf("'"); + print_result_interior(x); + break; + case T_PROC: + printf("#"); + break; + case T_INVALID: + printf("internal error"); + } +} + +void print_symb(val_symb_t *s) +{ + print_str((val_str_t*) s); +} + +void print_result_interior(val_t x) +{ + switch (val_typeof(x)) { + case T_EMPTY: + printf("()"); + break; + case T_BOX: + printf("#&"); + print_result_interior(val_unwrap_box(x)->val); + break; + case T_CONS: + printf("("); + print_cons(val_unwrap_cons(x)); + printf(")"); + break; + case T_SYMB: + print_symb(val_unwrap_symb(x)); + break; + case T_VECT: + print_vect(val_unwrap_vect(x)); + break; + default: + print_result(x); + } +} + +void print_vect(val_vect_t *v) +{ + uint64_t i; + + if (!v) { printf("#()"); return; } + + printf("#("); + for (i = 0; i < v->len; ++i) { + print_result_interior(v->elems[i]); + + if (i < v->len - 1) + putchar(' '); + } + printf(")"); +} + +void print_cons(val_cons_t *cons) +{ + print_result_interior(cons->fst); + + switch (val_typeof(cons->snd)) { + case T_EMPTY: + // nothing + break; + case T_CONS: + printf(" "); + print_cons(val_unwrap_cons(cons->snd)); + break; + default: + printf(" . "); + print_result_interior(cons->snd); + break; + } +} + +void print_str(val_str_t* s) +{ + if (!s) return; + uint64_t i; + for (i = 0; i < s->len; ++i) + print_str_char(s->codepoints[i]); +} + +void print_str_char_u(val_char_t c) +{ + printf("\\u%04X", c); +} + +void print_str_char_U(val_char_t c) +{ + printf("\\U%08X", c); +} + +void print_str_char(val_char_t c) +{ + switch (c) { + case 0 ... 6: + print_str_char_u(c); + break; + case 7: + printf("\\a"); + break; + case 8: + printf("\\b"); + break; + case 9: + printf("\\t"); + break; + case 10: + printf("\\n"); + break; + case 11: + printf("\\v"); + break; + case 12: + printf("\\f"); + break; + case 13: + printf("\\r"); + break; + case 14 ... 26: + print_str_char_u(c); + break; + case 27: + printf("\\e"); + break; + case 28 ... 31: + print_str_char_u(c); + break; + case 34: + printf("\\\""); + break; + case 39: + printf("'"); + break; + case 92: + printf("\\\\"); + break; + case 127 ... 159: + case 173 ... 173: + case 888 ... 889: + case 896 ... 899: + case 907 ... 907: + case 909 ... 909: + case 930 ... 930: + case 1328 ... 1328: + case 1367 ... 1368: + case 1376 ... 1376: + case 1416 ... 1416: + case 1419 ... 1420: + case 1424 ... 1424: + case 1480 ... 1487: + case 1515 ... 1519: + case 1525 ... 1541: + case 1564 ... 1565: + case 1757 ... 1757: + case 1806 ... 1807: + case 1867 ... 1868: + case 1970 ... 1983: + case 2043 ... 2047: + case 2094 ... 2095: + case 2111 ... 2111: + case 2140 ... 2141: + case 2143 ... 2207: + case 2227 ... 2275: + case 2436 ... 2436: + case 2445 ... 2446: + case 2449 ... 2450: + case 2473 ... 2473: + case 2481 ... 2481: + case 2483 ... 2485: + case 2490 ... 2491: + case 2501 ... 2502: + case 2505 ... 2506: + case 2511 ... 2518: + case 2520 ... 2523: + case 2526 ... 2526: + case 2532 ... 2533: + case 2556 ... 2560: + case 2564 ... 2564: + case 2571 ... 2574: + case 2577 ... 2578: + case 2601 ... 2601: + case 2609 ... 2609: + case 2612 ... 2612: + case 2615 ... 2615: + case 2618 ... 2619: + case 2621 ... 2621: + case 2627 ... 2630: + case 2633 ... 2634: + case 2638 ... 2640: + case 2642 ... 2648: + case 2653 ... 2653: + case 2655 ... 2661: + case 2678 ... 2688: + case 2692 ... 2692: + case 2702 ... 2702: + case 2706 ... 2706: + case 2729 ... 2729: + case 2737 ... 2737: + case 2740 ... 2740: + case 2746 ... 2747: + case 2758 ... 2758: + case 2762 ... 2762: + case 2766 ... 2767: + case 2769 ... 2783: + case 2788 ... 2789: + case 2802 ... 2816: + case 2820 ... 2820: + case 2829 ... 2830: + case 2833 ... 2834: + case 2857 ... 2857: + case 2865 ... 2865: + case 2868 ... 2868: + case 2874 ... 2875: + case 2885 ... 2886: + case 2889 ... 2890: + case 2894 ... 2901: + case 2904 ... 2907: + case 2910 ... 2910: + case 2916 ... 2917: + case 2936 ... 2945: + case 2948 ... 2948: + case 2955 ... 2957: + case 2961 ... 2961: + case 2966 ... 2968: + case 2971 ... 2971: + case 2973 ... 2973: + case 2976 ... 2978: + case 2981 ... 2983: + case 2987 ... 2989: + case 3002 ... 3005: + case 3011 ... 3013: + case 3017 ... 3017: + case 3022 ... 3023: + case 3025 ... 3030: + case 3032 ... 3045: + case 3067 ... 3071: + case 3076 ... 3076: + case 3085 ... 3085: + case 3089 ... 3089: + case 3113 ... 3113: + case 3130 ... 3132: + case 3141 ... 3141: + case 3145 ... 3145: + case 3150 ... 3156: + case 3159 ... 3159: + case 3162 ... 3167: + case 3172 ... 3173: + case 3184 ... 3191: + case 3200 ... 3200: + case 3204 ... 3204: + case 3213 ... 3213: + case 3217 ... 3217: + case 3241 ... 3241: + case 3252 ... 3252: + case 3258 ... 3259: + case 3269 ... 3269: + case 3273 ... 3273: + case 3278 ... 3284: + case 3287 ... 3293: + case 3295 ... 3295: + case 3300 ... 3301: + case 3312 ... 3312: + case 3315 ... 3328: + case 3332 ... 3332: + case 3341 ... 3341: + case 3345 ... 3345: + case 3387 ... 3388: + case 3397 ... 3397: + case 3401 ... 3401: + case 3407 ... 3414: + case 3416 ... 3423: + case 3428 ... 3429: + case 3446 ... 3448: + case 3456 ... 3457: + case 3460 ... 3460: + case 3479 ... 3481: + case 3506 ... 3506: + case 3516 ... 3516: + case 3518 ... 3519: + case 3527 ... 3529: + case 3531 ... 3534: + case 3541 ... 3541: + case 3543 ... 3543: + case 3552 ... 3557: + case 3568 ... 3569: + case 3573 ... 3584: + case 3643 ... 3646: + case 3676 ... 3712: + case 3715 ... 3715: + case 3717 ... 3718: + case 3721 ... 3721: + case 3723 ... 3724: + case 3726 ... 3731: + case 3736 ... 3736: + case 3744 ... 3744: + case 3748 ... 3748: + case 3750 ... 3750: + case 3752 ... 3753: + case 3756 ... 3756: + case 3770 ... 3770: + case 3774 ... 3775: + case 3781 ... 3781: + case 3783 ... 3783: + case 3790 ... 3791: + case 3802 ... 3803: + case 3808 ... 3839: + case 3912 ... 3912: + case 3949 ... 3952: + case 3992 ... 3992: + case 4029 ... 4029: + case 4045 ... 4045: + case 4059 ... 4095: + case 4294 ... 4294: + case 4296 ... 4300: + case 4302 ... 4303: + case 4681 ... 4681: + case 4686 ... 4687: + case 4695 ... 4695: + case 4697 ... 4697: + case 4702 ... 4703: + case 4745 ... 4745: + case 4750 ... 4751: + case 4785 ... 4785: + case 4790 ... 4791: + case 4799 ... 4799: + case 4801 ... 4801: + case 4806 ... 4807: + case 4823 ... 4823: + case 4881 ... 4881: + case 4886 ... 4887: + case 4955 ... 4956: + case 4989 ... 4991: + case 5018 ... 5023: + case 5109 ... 5119: + case 5789 ... 5791: + case 5881 ... 5887: + case 5901 ... 5901: + case 5909 ... 5919: + case 5943 ... 5951: + case 5972 ... 5983: + case 5997 ... 5997: + case 6001 ... 6001: + case 6004 ... 6015: + case 6110 ... 6111: + case 6122 ... 6127: + case 6138 ... 6143: + case 6158 ... 6159: + case 6170 ... 6175: + case 6264 ... 6271: + case 6315 ... 6319: + case 6390 ... 6399: + case 6431 ... 6431: + case 6444 ... 6447: + case 6460 ... 6463: + case 6465 ... 6467: + case 6510 ... 6511: + case 6517 ... 6527: + case 6572 ... 6575: + case 6602 ... 6607: + case 6619 ... 6621: + case 6684 ... 6685: + case 6751 ... 6751: + case 6781 ... 6782: + case 6794 ... 6799: + case 6810 ... 6815: + case 6830 ... 6831: + case 6847 ... 6911: + case 6988 ... 6991: + case 7037 ... 7039: + case 7156 ... 7163: + case 7224 ... 7226: + case 7242 ... 7244: + case 7296 ... 7359: + case 7368 ... 7375: + case 7415 ... 7415: + case 7418 ... 7423: + case 7670 ... 7675: + case 7958 ... 7959: + case 7966 ... 7967: + case 8006 ... 8007: + case 8014 ... 8015: + case 8024 ... 8024: + case 8026 ... 8026: + case 8028 ... 8028: + case 8030 ... 8030: + case 8062 ... 8063: + case 8117 ... 8117: + case 8133 ... 8133: + case 8148 ... 8149: + case 8156 ... 8156: + case 8176 ... 8177: + case 8181 ... 8181: + case 8191 ... 8191: + case 8203 ... 8207: + case 8232 ... 8238: + case 8288 ... 8303: + case 8306 ... 8307: + case 8335 ... 8335: + case 8349 ... 8351: + case 8382 ... 8399: + case 8433 ... 8447: + case 8586 ... 8591: + case 9211 ... 9215: + case 9255 ... 9279: + case 9291 ... 9311: + case 11124 ... 11125: + case 11158 ... 11159: + case 11194 ... 11196: + case 11209 ... 11209: + case 11218 ... 11263: + case 11311 ... 11311: + case 11359 ... 11359: + case 11508 ... 11512: + case 11558 ... 11558: + case 11560 ... 11564: + case 11566 ... 11567: + case 11624 ... 11630: + case 11633 ... 11646: + case 11671 ... 11679: + case 11687 ... 11687: + case 11695 ... 11695: + case 11703 ... 11703: + case 11711 ... 11711: + case 11719 ... 11719: + case 11727 ... 11727: + case 11735 ... 11735: + case 11743 ... 11743: + case 11843 ... 11903: + case 11930 ... 11930: + case 12020 ... 12031: + case 12246 ... 12271: + case 12284 ... 12287: + case 12352 ... 12352: + case 12439 ... 12440: + case 12544 ... 12548: + case 12590 ... 12592: + case 12687 ... 12687: + case 12731 ... 12735: + case 12772 ... 12783: + case 12831 ... 12831: + case 13055 ... 13055: + case 19894 ... 19903: + case 40909 ... 40959: + case 42125 ... 42127: + case 42183 ... 42191: + case 42540 ... 42559: + case 42654 ... 42654: + case 42744 ... 42751: + case 42895 ... 42895: + case 42926 ... 42927: + case 42930 ... 42998: + case 43052 ... 43055: + case 43066 ... 43071: + case 43128 ... 43135: + case 43205 ... 43213: + case 43226 ... 43231: + case 43260 ... 43263: + case 43348 ... 43358: + case 43389 ... 43391: + case 43470 ... 43470: + case 43482 ... 43485: + case 43519 ... 43519: + case 43575 ... 43583: + case 43598 ... 43599: + case 43610 ... 43611: + case 43715 ... 43738: + case 43767 ... 43776: + case 43783 ... 43784: + case 43791 ... 43792: + case 43799 ... 43807: + case 43815 ... 43815: + case 43823 ... 43823: + case 43872 ... 43875: + case 43878 ... 43967: + case 44014 ... 44015: + case 44026 ... 44031: + case 55204 ... 55215: + case 55239 ... 55242: + case 55292 ... 55295: + case 57344 ... 63743: + case 64110 ... 64111: + case 64218 ... 64255: + case 64263 ... 64274: + case 64280 ... 64284: + case 64311 ... 64311: + case 64317 ... 64317: + case 64319 ... 64319: + case 64322 ... 64322: + case 64325 ... 64325: + case 64450 ... 64466: + case 64832 ... 64847: + case 64912 ... 64913: + case 64968 ... 65007: + case 65022 ... 65023: + case 65050 ... 65055: + case 65070 ... 65071: + case 65107 ... 65107: + case 65127 ... 65127: + case 65132 ... 65135: + case 65141 ... 65141: + case 65277 ... 65280: + case 65471 ... 65473: + case 65480 ... 65481: + case 65488 ... 65489: + case 65496 ... 65497: + case 65501 ... 65503: + case 65511 ... 65511: + case 65519 ... 65531: + case 65534 ... 65535: + print_str_char_u(c); + break; + case 65548 ... 65548: + case 65575 ... 65575: + case 65595 ... 65595: + case 65598 ... 65598: + case 65614 ... 65615: + case 65630 ... 65663: + case 65787 ... 65791: + case 65795 ... 65798: + case 65844 ... 65846: + case 65933 ... 65935: + case 65948 ... 65951: + case 65953 ... 65999: + case 66046 ... 66175: + case 66205 ... 66207: + case 66257 ... 66271: + case 66300 ... 66303: + case 66340 ... 66351: + case 66379 ... 66383: + case 66427 ... 66431: + case 66462 ... 66462: + case 66500 ... 66503: + case 66518 ... 66559: + case 66718 ... 66719: + case 66730 ... 66815: + case 66856 ... 66863: + case 66916 ... 66926: + case 66928 ... 67071: + case 67383 ... 67391: + case 67414 ... 67423: + case 67432 ... 67583: + case 67590 ... 67591: + case 67593 ... 67593: + case 67638 ... 67638: + case 67641 ... 67643: + case 67645 ... 67646: + case 67670 ... 67670: + case 67743 ... 67750: + case 67760 ... 67839: + case 67868 ... 67870: + case 67898 ... 67902: + case 67904 ... 67967: + case 68024 ... 68029: + case 68032 ... 68095: + case 68100 ... 68100: + case 68103 ... 68107: + case 68116 ... 68116: + case 68120 ... 68120: + case 68148 ... 68151: + case 68155 ... 68158: + case 68168 ... 68175: + case 68185 ... 68191: + case 68256 ... 68287: + case 68327 ... 68330: + case 68343 ... 68351: + case 68406 ... 68408: + case 68438 ... 68439: + case 68467 ... 68471: + case 68498 ... 68504: + case 68509 ... 68520: + case 68528 ... 68607: + case 68681 ... 69215: + case 69247 ... 69631: + case 69710 ... 69713: + case 69744 ... 69758: + case 69821 ... 69821: + case 69826 ... 69839: + case 69865 ... 69871: + case 69882 ... 69887: + case 69941 ... 69941: + case 69956 ... 69967: + case 70007 ... 70015: + case 70089 ... 70092: + case 70094 ... 70095: + case 70107 ... 70112: + case 70133 ... 70143: + case 70162 ... 70162: + case 70206 ... 70319: + case 70379 ... 70383: + case 70394 ... 70400: + case 70404 ... 70404: + case 70413 ... 70414: + case 70417 ... 70418: + case 70441 ... 70441: + case 70449 ... 70449: + case 70452 ... 70452: + case 70458 ... 70459: + case 70469 ... 70470: + case 70473 ... 70474: + case 70478 ... 70486: + case 70488 ... 70492: + case 70500 ... 70501: + case 70509 ... 70511: + case 70517 ... 70783: + case 70856 ... 70863: + case 70874 ... 71039: + case 71094 ... 71095: + case 71114 ... 71167: + case 71237 ... 71247: + case 71258 ... 71295: + case 71352 ... 71359: + case 71370 ... 71839: + case 71923 ... 71934: + case 71936 ... 72383: + case 72441 ... 73727: + case 74649 ... 74751: + case 74863 ... 74863: + case 74869 ... 77823: + case 78895 ... 92159: + case 92729 ... 92735: + case 92767 ... 92767: + case 92778 ... 92781: + case 92784 ... 92879: + case 92910 ... 92911: + case 92918 ... 92927: + case 92998 ... 93007: + case 93018 ... 93018: + case 93026 ... 93026: + case 93048 ... 93052: + case 93072 ... 93951: + case 94021 ... 94031: + case 94079 ... 94094: + case 94112 ... 110591: + case 110594 ... 113663: + case 113771 ... 113775: + case 113789 ... 113791: + case 113801 ... 113807: + case 113818 ... 113819: + case 113824 ... 118783: + case 119030 ... 119039: + case 119079 ... 119080: + case 119155 ... 119162: + case 119262 ... 119295: + case 119366 ... 119551: + case 119639 ... 119647: + case 119666 ... 119807: + case 119893 ... 119893: + case 119965 ... 119965: + case 119968 ... 119969: + case 119971 ... 119972: + case 119975 ... 119976: + case 119981 ... 119981: + case 119994 ... 119994: + case 119996 ... 119996: + case 120004 ... 120004: + case 120070 ... 120070: + case 120075 ... 120076: + case 120085 ... 120085: + case 120093 ... 120093: + case 120122 ... 120122: + case 120127 ... 120127: + case 120133 ... 120133: + case 120135 ... 120137: + case 120145 ... 120145: + case 120486 ... 120487: + case 120780 ... 120781: + case 120832 ... 124927: + case 125125 ... 125126: + case 125143 ... 126463: + case 126468 ... 126468: + case 126496 ... 126496: + case 126499 ... 126499: + case 126501 ... 126502: + case 126504 ... 126504: + case 126515 ... 126515: + case 126520 ... 126520: + case 126522 ... 126522: + case 126524 ... 126529: + case 126531 ... 126534: + case 126536 ... 126536: + case 126538 ... 126538: + case 126540 ... 126540: + case 126544 ... 126544: + case 126547 ... 126547: + case 126549 ... 126550: + case 126552 ... 126552: + case 126554 ... 126554: + case 126556 ... 126556: + case 126558 ... 126558: + case 126560 ... 126560: + case 126563 ... 126563: + case 126565 ... 126566: + case 126571 ... 126571: + case 126579 ... 126579: + case 126584 ... 126584: + case 126589 ... 126589: + case 126591 ... 126591: + case 126602 ... 126602: + case 126620 ... 126624: + case 126628 ... 126628: + case 126634 ... 126634: + case 126652 ... 126703: + case 126706 ... 126975: + case 127020 ... 127023: + case 127124 ... 127135: + case 127151 ... 127152: + case 127168 ... 127168: + case 127184 ... 127184: + case 127222 ... 127231: + case 127245 ... 127247: + case 127279 ... 127279: + case 127340 ... 127343: + case 127387 ... 127461: + case 127491 ... 127503: + case 127547 ... 127551: + case 127561 ... 127567: + case 127570 ... 127743: + case 127789 ... 127791: + case 127870 ... 127871: + case 127951 ... 127955: + case 127992 ... 127999: + case 128255 ... 128255: + case 128331 ... 128335: + case 128378 ... 128378: + case 128420 ... 128420: + case 128579 ... 128580: + case 128720 ... 128735: + case 128749 ... 128751: + case 128756 ... 128767: + case 128884 ... 128895: + case 128981 ... 129023: + case 129036 ... 129039: + case 129096 ... 129103: + case 129114 ... 129119: + case 129160 ... 129167: + case 129198 ... 131071: + case 173783 ... 173823: + case 177973 ... 177983: + case 178206 ... 194559: + case 195102 ... 917759: + case 918000 ... 1114110: + print_str_char_U(c); + break; + default: + print_codepoint(c); + break; + } +} + +void print_char(val_char_t c) +{ + printf("#\\"); + switch (c) { + case 0: + printf("nul"); break; + case 8: + printf("backspace"); break; + case 9: + printf("tab"); break; + case 10: + printf("newline"); break; + case 11: + printf("vtab"); break; + case 12: + printf("page"); break; + case 13: + printf("return"); break; + case 32: + printf("space"); break; + case 127: + printf("rubout"); break; + default: + print_codepoint(c); + } +} + +void print_codepoint(val_char_t c) +{ + char buffer[5] = {0}; + utf8_encode_char(c, buffer); + printf("%s", buffer); +} + +int utf8_encode_char(val_char_t c, char *buffer) +{ + // Output to buffer using UTF-8 encoding of codepoint + // https://en.wikipedia.org/wiki/UTF-8 + if (c < 128) { + buffer[0] = (char) c; + return 1; + } else if (c < 2048) { + buffer[0] = (char)(c >> 6) | 192; + buffer[1] = ((char) c & 63) | 128; + return 2; + } else if (c < 65536) { + buffer[0] = (char)(c >> 12) | 224; + buffer[1] = ((char)(c >> 6) & 63) | 128; + buffer[2] = ((char) c & 63) | 128; + return 3; + } else { + buffer[0] = (char)(c >> 18) | 240; + buffer[1] = ((char)(c >> 12) & 63) | 128; + buffer[2] = ((char)(c >> 6) & 63) | 128; + buffer[3] = ((char) c & 63) | 128; + return 4; + } +} diff --git a/langs/mug/print.h b/langs/mug/print.h new file mode 100644 index 00000000..c22081a2 --- /dev/null +++ b/langs/mug/print.h @@ -0,0 +1,8 @@ +#ifndef PRINT_H +#define PRINT_H + +#include "values.h" + +void print_result(val_t); + +#endif diff --git a/langs/mug/read-all.rkt b/langs/mug/read-all.rkt new file mode 100644 index 00000000..fd03042b --- /dev/null +++ b/langs/mug/read-all.rkt @@ -0,0 +1,8 @@ +#lang racket +(provide read-all) +;; read all s-expression until eof +(define (read-all p) + (let ((r (read p))) + (if (eof-object? r) + '() + (cons r (read-all p))))) diff --git a/langs/mug/runtime.h b/langs/mug/runtime.h new file mode 100644 index 00000000..f594f0f6 --- /dev/null +++ b/langs/mug/runtime.h @@ -0,0 +1,11 @@ +#ifndef RUNTIME_H +#define RUNTIME_H +int64_t entry(); +extern FILE* in; +extern FILE* out; +extern void (*error_handler)(); + +// in words +#define heap_size 10000 +extern int64_t *heap; +#endif /* RUNTIME_H */ diff --git a/langs/mug/symbol.c b/langs/mug/symbol.c new file mode 100644 index 00000000..08a9e5dd --- /dev/null +++ b/langs/mug/symbol.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include "values.h" + +static uint64_t gensym_ctr = 0; + +val_str_t *str_from_cstr(const char *); +int str_cmp(const val_str_t *, const val_str_t *); +val_str_t *str_dup(const val_str_t *); + +// binary tree node +struct Node { + val_symb_t* elem; + struct Node* left; + struct Node* right; +}; + +static struct Node *symbol_tbl = NULL; + +val_symb_t *intern_symbol(val_symb_t* symb) +{ + struct Node **curr = &symbol_tbl; + + while (*curr) { + struct Node *t = *curr; + int r = str_cmp((val_str_t*)symb, (val_str_t*)t->elem); + if (r == 0) { + return t->elem; + } else if (r < 0) { + curr = &t->left; + } else { + curr = &t->right; + } + } + + // wasn't found, so insert it + + *curr = calloc(1, sizeof(struct Node)); + + struct Node* t = *curr; + t->elem = symb; // str_dup(str); + + return t->elem; +} + +val_symb_t *str_to_symbol(const val_str_t *str) +{ + // str_dup needed if string is mutable + return intern_symbol((val_symb_t*)str_dup(str)); +} + +val_symb_t *gensym(void) +{ + char s[100]; // uint64_t has maximum 20 digits + sprintf(s, "g%" PRIu64, gensym_ctr++); + return (val_symb_t*)str_from_cstr(s); // uninterned symbol +} + +val_str_t *str_from_cstr(const char *s) +{ + int64_t len = strlen(s); + val_str_t *str = + malloc(sizeof(int64_t) + len * sizeof(val_char_t)); + + if (!str) + return NULL; + + str->len = len; + int i; + for (i = 0; i < len; i++) { + str->codepoints[i] = (val_char_t)s[i]; + } + return str; +} + +int str_cmp(const val_str_t *s1, const val_str_t *s2) +{ + int64_t len1 = s1->len; + int64_t len2 = s2->len; + + int64_t len = len1 < len2 ? len1 : len2; + int i; + + for (i = 0; i < len; i = i+1) { + if (s1->codepoints[i] != s2->codepoints[i]) + return s1->codepoints[i] - s2->codepoints[i]; + } + + return len1 - len2; +} + +val_str_t *str_dup(const val_str_t *s) +{ + int64_t n = s->len; + val_str_t *d; + + d = calloc(2+n, sizeof(val_char_t)); + if (!d) + return NULL; + + return memcpy(d, s, (2+n) * sizeof(val_char_t)); +} + + +#ifdef CHECK +// $ gcc -DCHECK symbol.c +#include +int main(void) +{ + val_str_t *foo = str_from_cstr("foo"); + val_str_t *foo_ = str_from_cstr("foo"); + val_str_t *bar = str_from_cstr("bar"); + val_str_t *foo1 = str_from_cstr("foo1"); + val_str_t *foo1_ = str_from_cstr("foo1"); + val_str_t *fo = str_from_cstr("fo"); + val_str_t *fo_ = str_from_cstr("fo"); + + assert(str_cmp(foo,foo_) == 0); + assert(str_cmp(foo1,foo1_) == 0); + assert(str_cmp(fo,fo_) == 0); + + assert(str_cmp(foo,foo1) < 0); + assert(str_cmp(foo,foo1) == str_cmp(foo,foo1_)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1)); + assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1_)); + + assert(str_cmp(foo,fo) > 0); + assert(str_cmp(foo,fo) == str_cmp(foo,fo_)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo)); + assert(str_cmp(foo,fo) == str_cmp(foo_,fo_)); + + assert(str_cmp(foo,bar) > 0); + assert(str_cmp(bar,foo) < 0); + + assert(str_cmp(fo,bar) > 0); + assert(str_cmp(bar,fo) < 0); + + val_symb_t *foo_symb = (val_symb_t*)foo; + val_symb_t *foo_symb_ = (val_symb_t*)foo_; + assert(foo_symb != foo_symb_); + val_symb_t *foo_symb_i = intern_symbol(foo_symb); + val_symb_t *foo_symb_i_ = intern_symbol(foo_symb_); + assert(foo_symb_i == foo_symb_i_); + + return 0; +} +#endif diff --git a/langs/mug/test/build-runtime.rkt b/langs/mug/test/build-runtime.rkt new file mode 100644 index 00000000..7023ee0b --- /dev/null +++ b/langs/mug/test/build-runtime.rkt @@ -0,0 +1,8 @@ +#lang racket +(require a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) diff --git a/langs/mug/test/compile.rkt b/langs/mug/test/compile.rkt new file mode 100644 index 00000000..81defae6 --- /dev/null +++ b/langs/mug/test/compile.rkt @@ -0,0 +1,18 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../compile.rkt" + "../unload-bits-asm.rkt" + a86/interp) + +;; link with runtime for IO operations +(unless (file-exists? "../runtime.o") + (system "make -C .. runtime.o")) +(current-objs + (list (path->string (normalize-path "../runtime.o")))) + +(test-runner (λ p (unload/free (asm-interp (compile (parse p)))))) +(test-runner-io (λ (s . p) + (match (asm-interp/io (compile (parse p)) s) + ['err 'err] + [(cons r o) (cons (unload/free r) o)]))) diff --git a/langs/mug/test/interp-defun.rkt b/langs/mug/test/interp-defun.rkt new file mode 100644 index 00000000..bd8cc7e8 --- /dev/null +++ b/langs/mug/test/interp-defun.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp-defun.rkt" + "../interp-io.rkt") + +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mug/test/interp.rkt b/langs/mug/test/interp.rkt new file mode 100644 index 00000000..cd7b654e --- /dev/null +++ b/langs/mug/test/interp.rkt @@ -0,0 +1,8 @@ +#lang racket +(require "test-runner.rkt" + "../parse.rkt" + "../interp.rkt" + "../interp-io.rkt") + +(test-runner (λ p (interp (parse p)))) +(test-runner-io (λ (s . p) (interp/io (parse p) s))) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt new file mode 100644 index 00000000..c8ca1b39 --- /dev/null +++ b/langs/mug/test/test-runner.rkt @@ -0,0 +1,311 @@ +#lang racket +(provide test-runner test-runner-io) +(require rackunit) + +(define (test-runner run) + ;; Abscond examples + (check-equal? (run 7) 7) + (check-equal? (run -8) -8) + + ;; Blackmail examples + (check-equal? (run '(add1 (add1 7))) 9) + (check-equal? (run '(add1 (sub1 7))) 7) + + ;; Con examples + (check-equal? (run '(if (zero? 0) 1 2)) 1) + (check-equal? (run '(if (zero? 1) 1 2)) 2) + (check-equal? (run '(if (zero? -7) 1 2)) 2) + (check-equal? (run '(if (zero? 0) + (if (zero? 1) 1 2) + 7)) + 2) + (check-equal? (run '(if (zero? (if (zero? 0) 1 0)) + (if (zero? 1) 1 2) + 7)) + 7) + + ;; Dupe examples + (check-equal? (run #t) #t) + (check-equal? (run #f) #f) + (check-equal? (run '(if #t 1 2)) 1) + (check-equal? (run '(if #f 1 2)) 2) + (check-equal? (run '(if 0 1 2)) 1) + (check-equal? (run '(if #t 3 4)) 3) + (check-equal? (run '(if #f 3 4)) 4) + (check-equal? (run '(if 0 3 4)) 3) + (check-equal? (run '(zero? 4)) #f) + (check-equal? (run '(zero? 0)) #t) + + ;; Dodger examples + (check-equal? (run #\a) #\a) + (check-equal? (run #\b) #\b) + (check-equal? (run '(char? #\a)) #t) + (check-equal? (run '(char? #t)) #f) + (check-equal? (run '(char? 8)) #f) + (check-equal? (run '(char->integer #\a)) (char->integer #\a)) + (check-equal? (run '(integer->char 955)) #\λ) + + ;; Extort examples + (check-equal? (run '(add1 #f)) 'err) + (check-equal? (run '(sub1 #f)) 'err) + (check-equal? (run '(zero? #f)) 'err) + (check-equal? (run '(char->integer #f)) 'err) + (check-equal? (run '(integer->char #f)) 'err) + (check-equal? (run '(integer->char -1)) 'err) + (check-equal? (run '(write-byte #f)) 'err) + (check-equal? (run '(write-byte -1)) 'err) + (check-equal? (run '(write-byte 256)) 'err) + + ;; Fraud examples + (check-equal? (run '(let ((x 7)) x)) 7) + (check-equal? (run '(let ((x 7)) 2)) 2) + (check-equal? (run '(let ((x 7)) (add1 x))) 8) + (check-equal? (run '(let ((x (add1 7))) x)) 8) + (check-equal? (run '(let ((x 7)) (let ((y 2)) x))) 7) + (check-equal? (run '(let ((x 7)) (let ((x 2)) x))) 2) + (check-equal? (run '(let ((x 7)) (let ((x (add1 x))) x))) 8) + + (check-equal? (run '(let ((x 0)) + (if (zero? x) 7 8))) + 7) + (check-equal? (run '(let ((x 1)) + (add1 (if (zero? x) 7 8)))) + 9) + (check-equal? (run '(+ 3 4)) 7) + (check-equal? (run '(- 3 4)) -1) + (check-equal? (run '(+ (+ 2 1) 4)) 7) + (check-equal? (run '(+ (+ 2 1) (+ 2 2))) 7) + (check-equal? (run '(let ((x (+ 1 2))) + (let ((z (- 4 x))) + (+ (+ x x) z)))) + 7) + (check-equal? (run '(= 5 5)) #t) + (check-equal? (run '(= 4 5)) #f) + (check-equal? (run '(= (add1 4) 5)) #t) + (check-equal? (run '(< 5 5)) #f) + (check-equal? (run '(< 4 5)) #t) + (check-equal? (run '(< (add1 4) 5)) #f) + + ;; Hustle examples + (check-equal? (run ''()) '()) + (check-equal? (run '(box 1)) (box 1)) + (check-equal? (run '(cons 1 2)) (cons 1 2)) + (check-equal? (run '(unbox (box 1))) 1) + (check-equal? (run '(car (cons 1 2))) 1) + (check-equal? (run '(cdr (cons 1 2))) 2) + (check-equal? (run '(cons 1 '())) (list 1)) + (check-equal? (run '(let ((x (cons 1 2))) + (begin (cdr x) + (car x)))) + 1) + (check-equal? (run '(let ((x (cons 1 2))) + (let ((y (box 3))) + (unbox y)))) + 3) + (check-equal? (run '(eq? 1 1)) #t) + (check-equal? (run '(eq? 1 2)) #f) + (check-equal? (run '(eq? (cons 1 2) (cons 1 2))) #f) + (check-equal? (run '(let ((x (cons 1 2))) (eq? x x))) #t) + + ;; Hoax examples + (check-equal? (run '(make-vector 0 0)) #()) + (check-equal? (run '(make-vector 1 0)) #(0)) + (check-equal? (run '(make-vector 3 0)) #(0 0 0)) + (check-equal? (run '(make-vector 3 5)) #(5 5 5)) + (check-equal? (run '(vector? (make-vector 0 0))) #t) + (check-equal? (run '(vector? (cons 0 0))) #f) + (check-equal? (run '(vector-ref (make-vector 3 5) -1)) 'err) + (check-equal? (run '(vector-ref (make-vector 3 5) 0)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 1)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 2)) 5) + (check-equal? (run '(vector-ref (make-vector 3 5) 3)) 'err) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 0 4) + x))) + #(4 5 5)) + (check-equal? (run '(let ((x (make-vector 3 5))) + (begin (vector-set! x 1 4) + x))) + #(5 4 5)) + (check-equal? (run '(vector-length (make-vector 3 #f))) 3) + (check-equal? (run '(vector-length (make-vector 0 #f))) 0) + (check-equal? (run '"") "") + (check-equal? (run '"fred") "fred") + (check-equal? (run '"wilma") "wilma") + (check-equal? (run '(make-string 0 #\f)) "") + (check-equal? (run '(make-string 3 #\f)) "fff") + (check-equal? (run '(make-string 3 #\g)) "ggg") + (check-equal? (run '(string-length "")) 0) + (check-equal? (run '(string-length "fred")) 4) + (check-equal? (run '(string-ref "fred" 0)) #\f) + (check-equal? (run '(string-ref "fred" 1)) #\r) + (check-equal? (run '(string-ref "fred" 2)) #\e) + (check-equal? (run '(string-ref "fred" 4)) 'err) + (check-equal? (run '(string? "fred")) #t) + (check-equal? (run '(string? (cons 1 2))) #f) + + ;; Iniquity tests + (check-equal? (run + '(define (f x) x) + '(f 5)) + 5) + + (check-equal? (run + '(define (tri x) + (if (zero? x) + 0 + (+ x (tri (sub1 x))))) + '(tri 9)) + 45) + + (check-equal? (run + '(define (f x) x) + '(define (g x) (f x)) + '(g 5)) + 5) + (check-equal? (run + '(define (even? x) + (if (zero? x) + #t + (odd? (sub1 x)))) + '(define (odd? x) + (if (zero? x) + #f + (even? (sub1 x)))) + '(even? 101)) + #f) + (check-equal? (run + '(define (map-add1 xs) + (if (empty? xs) + '() + (cons (add1 (car xs)) + (map-add1 (cdr xs))))) + '(map-add1 (cons 1 (cons 2 (cons 3 '()))))) + '(2 3 4)) + (check-equal? (run + '(define (f x) + 10) + '(f 1)) + 10) + (check-equal? (run + '(define (f x) + 10) + '(let ((x 2)) (f 1))) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(f 1 2)) + 10) + (check-equal? (run + '(define (f x y) + 10) + '(let ((z 2)) (f 1 2))) + 10) + + + ;; Loot examples + (check-equal? (run '((λ (x) x) 5)) + 5) + + (check-equal? (run '(let ((f (λ (x) x))) (f 5))) + 5) + (check-equal? (run '(let ((f (λ (x y) x))) (f 5 7))) + 5) + (check-equal? (run '(let ((f (λ (x y) y))) (f 5 7))) + 7) + (check-equal? (run '((let ((x 1)) + (let ((y 2)) + (lambda (z) (cons x (cons y (cons z '())))))) + 3)) + '(1 2 3)) + (check-equal? (run '(define (adder n) + (λ (x) (+ x n))) + '((adder 5) 10)) + 15) + (check-equal? (run '(((λ (t) + ((λ (f) (t (λ (z) ((f f) z)))) + (λ (f) (t (λ (z) ((f f) z)))))) + (λ (tri) + (λ (n) + (if (zero? n) + 0 + (+ n (tri (sub1 n))))))) + 36)) + 666) + + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(eq? 'g0 (gensym))) #f) + (check-equal? (run '(eq? (gensym) (gensym))) #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f)) + +(define (test-runner-io run) + ;; Evildoer examples + (check-equal? (run "" 7) (cons 7 "")) + (check-equal? (run "" '(write-byte 97)) (cons (void) "a")) + (check-equal? (run "a" '(read-byte)) (cons 97 "")) + (check-equal? (run "b" '(begin (write-byte 97) (read-byte))) + (cons 98 "a")) + (check-equal? (run "" '(read-byte)) (cons eof "")) + (check-equal? (run "" '(eof-object? (read-byte))) (cons #t "")) + (check-equal? (run "a" '(eof-object? (read-byte))) (cons #f "")) + (check-equal? (run "" '(begin (write-byte 97) (write-byte 98))) + (cons (void) "ab")) + + (check-equal? (run "ab" '(peek-byte)) (cons 97 "")) + (check-equal? (run "ab" '(begin (peek-byte) (read-byte))) (cons 97 "")) + ;; Extort examples + (check-equal? (run "" '(write-byte #t)) (cons 'err "")) + + ;; Fraud examples + (check-equal? (run "" '(let ((x 97)) (write-byte x))) (cons (void) "a")) + (check-equal? (run "" + '(let ((x 97)) + (begin (write-byte x) + x))) + (cons 97 "a")) + (check-equal? (run "b" '(let ((x 97)) (begin (read-byte) x))) + (cons 97 "")) + (check-equal? (run "b" '(let ((x 97)) (begin (peek-byte) x))) + (cons 97 "")) + + ;; Hustle examples + (check-equal? (run "" + '(let ((x 1)) + (begin (write-byte 97) + 1))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x 1)) + (let ((y 2)) + (begin (write-byte 97) + 1)))) + (cons 1 "a")) + + (check-equal? (run "" + '(let ((x (cons 1 2))) + (begin (write-byte 97) + (car x)))) + (cons 1 "a")) + ;; Iniquity examples + #| + (check-equal? (run "" + '(define (print-alphabet i) + (if (zero? i) + (void) + (begin (write-byte (- 123 i)) + (print-alphabet (sub1 i))))) + '(print-alphabet 26)) + (cons (void) "abcdefghijklmnopqrstuvwxyz")) +|#) diff --git a/langs/mug/try.rkt b/langs/mug/try.rkt new file mode 100644 index 00000000..b52ae93d --- /dev/null +++ b/langs/mug/try.rkt @@ -0,0 +1,3 @@ +#lang racket +(void) + diff --git a/langs/mug/types.h b/langs/mug/types.h new file mode 100644 index 00000000..4093c4f7 --- /dev/null +++ b/langs/mug/types.h @@ -0,0 +1,42 @@ +#ifndef TYPES_H +#define TYPES_H + +/* + Bit layout of values + + Values are either: + - Immediates: end in #b000 + - Pointers + + Immediates are either + - Integers: end in #b0 000 + - Characters: end in #b01 000 + - True: #b11 000 + - False: #b1 11 000 + - Eof: #b10 11 000 + - Void: #b11 11 000 + - Empty: #b100 11 000 +*/ +#define imm_shift 3 +#define ptr_type_mask ((1 << imm_shift) - 1) +#define box_type_tag 1 +#define cons_type_tag 2 +#define vect_type_tag 3 +#define str_type_tag 4 +#define proc_type_tag 5 +#define symb_type_tag 6 +#define int_shift (1 + imm_shift) +#define int_type_mask ((1 << int_shift) - 1) +#define int_type_tag (0 << (int_shift - 1)) +#define nonint_type_tag (1 << (int_shift - 1)) +#define char_shift (int_shift + 1) +#define char_type_mask ((1 << char_shift) - 1) +#define char_type_tag ((0 << (char_shift - 1)) | nonint_type_tag) +#define nonchar_type_tag ((1 << (char_shift - 1)) | nonint_type_tag) +#define val_true ((0 << char_shift) | nonchar_type_tag) +#define val_false ((1 << char_shift) | nonchar_type_tag) +#define val_eof ((2 << char_shift) | nonchar_type_tag) +#define val_void ((3 << char_shift) | nonchar_type_tag) +#define val_empty ((4 << char_shift) | nonchar_type_tag) + +#endif diff --git a/langs/mug/types.rkt b/langs/mug/types.rkt new file mode 100644 index 00000000..c02d2d1e --- /dev/null +++ b/langs/mug/types.rkt @@ -0,0 +1,71 @@ +#lang racket +(provide (all-defined-out)) + +(define imm-shift 3) +(define imm-mask #b111) +(define ptr-mask #b111) +(define type-box #b001) +(define type-cons #b010) +(define type-vect #b011) +(define type-str #b100) +(define type-proc #b101) +(define type-symb #b110) +(define int-shift (+ 1 imm-shift)) +(define char-shift (+ 2 imm-shift)) +(define type-int #b0000) +(define mask-int #b1111) +(define type-char #b01000) +(define mask-char #b11111) +(define val-true #b0011000) +(define val-false #b0111000) +(define val-eof #b1011000) +(define val-void #b1111000) +(define val-empty #b10011000) + +(define (bits->value b) + (cond [(= type-int (bitwise-and b mask-int)) + (arithmetic-shift b (- int-shift))] + [(= type-char (bitwise-and b mask-char)) + (integer->char (arithmetic-shift b (- char-shift)))] + [(= b val-true) #t] + [(= b val-false) #f] + [(= b val-eof) eof] + [(= b val-void) (void)] + [(= b val-empty) '()] + [else (error "invalid bits")])) + +(define (imm->bits v) + (cond [(eof-object? v) val-eof] + [(integer? v) (arithmetic-shift v int-shift)] + [(char? v) + (bitwise-ior type-char + (arithmetic-shift (char->integer v) char-shift))] + [(eq? v #t) val-true] + [(eq? v #f) val-false] + [(void? v) val-void] + [(empty? v) val-empty])) + + +(define (imm-bits? v) + (zero? (bitwise-and v imm-mask))) + +(define (int-bits? v) + (zero? (bitwise-and v mask-int))) + +(define (char-bits? v) + (= type-char (bitwise-and v mask-char))) + +(define (cons-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-cons))) + +(define (box-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-box))) + +(define (vect-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-vect))) + +(define (str-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-str))) + +(define (proc-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-proc))) diff --git a/langs/mug/unload-bits-asm.rkt b/langs/mug/unload-bits-asm.rkt new file mode 100644 index 00000000..be9b50c8 --- /dev/null +++ b/langs/mug/unload-bits-asm.rkt @@ -0,0 +1,43 @@ +#lang racket +(provide unload/free unload-value) +(require "types.rkt" + ffi/unsafe) + +;; Answer* -> Answer +(define (unload/free a) + (match a + ['err 'err] + [(cons h v) (begin0 (unload-value v) + (free h))])) + +;; Value* -> Value +(define (unload-value v) + (match v + [(? imm-bits?) (bits->value v)] + [(? box-bits? i) + (box (unload-value (heap-ref i)))] + [(? cons-bits? i) + (cons (unload-value (heap-ref (+ i 8))) + (unload-value (heap-ref i)))] + [(? vect-bits? i) + (if (zero? (untag i)) + (vector) + (build-vector (heap-ref i) + (lambda (j) + (unload-value (heap-ref (+ i (* 8 (add1 j))))))))] + [(? str-bits? i) + (if (zero? (untag i)) + (string) + (build-string (heap-ref i) + (lambda (j) + (char-ref (+ i 8) j))))])) + +(define (untag i) + (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) + (integer-length ptr-mask))) + +(define (heap-ref i) + (ptr-ref (cast (untag i) _int64 _pointer) _uint64)) + +(define (char-ref i j) + (integer->char (ptr-ref (cast (untag i) _int64 _pointer) _uint32 j))) diff --git a/langs/mug/values.c b/langs/mug/values.c new file mode 100644 index 00000000..32e922bd --- /dev/null +++ b/langs/mug/values.c @@ -0,0 +1,121 @@ +#include "types.h" +#include "values.h" + +type_t val_typeof(val_t x) +{ + switch (x & ptr_type_mask) { + case box_type_tag: + return T_BOX; + case cons_type_tag: + return T_CONS; + case vect_type_tag: + return T_VECT; + case str_type_tag: + return T_STR; + case symb_type_tag: + return T_SYMB; + case proc_type_tag: + return T_PROC; + } + + if ((int_type_mask & x) == int_type_tag) + return T_INT; + if ((char_type_mask & x) == char_type_tag) + return T_CHAR; + + switch (x) { + case val_true: + case val_false: + return T_BOOL; + case val_eof: + return T_EOF; + case val_void: + return T_VOID; + case val_empty: + return T_EMPTY; + } + + return T_INVALID; +} + +int64_t val_unwrap_int(val_t x) +{ + return x >> int_shift; +} +val_t val_wrap_int(int64_t i) +{ + return (i << int_shift) | int_type_tag; +} + +int val_unwrap_bool(val_t x) +{ + return x == val_true; +} +val_t val_wrap_bool(int b) +{ + return b ? val_true : val_false; +} + +val_char_t val_unwrap_char(val_t x) +{ + return (val_char_t)(x >> char_shift); +} +val_t val_wrap_char(val_char_t c) +{ + return (((val_t)c) << char_shift) | char_type_tag; +} + +val_t val_wrap_eof(void) +{ + return val_eof; +} + +val_t val_wrap_void(void) +{ + return val_void; +} + +val_box_t* val_unwrap_box(val_t x) +{ + return (val_box_t *)(x ^ box_type_tag); +} +val_t val_wrap_box(val_box_t* b) +{ + return ((val_t)b) | box_type_tag; +} + +val_cons_t* val_unwrap_cons(val_t x) +{ + return (val_cons_t *)(x ^ cons_type_tag); +} +val_t val_wrap_cons(val_cons_t *c) +{ + return ((val_t)c) | cons_type_tag; +} + +val_vect_t* val_unwrap_vect(val_t x) +{ + return (val_vect_t *)(x ^ vect_type_tag); +} +val_t val_wrap_vect(val_vect_t *v) +{ + return ((val_t)v) | vect_type_tag; +} + +val_str_t* val_unwrap_str(val_t x) +{ + return (val_str_t *)(x ^ str_type_tag); +} +val_t val_wrap_str(val_str_t *v) +{ + return ((val_t)v) | str_type_tag; +} + +val_symb_t* val_unwrap_symb(val_t x) +{ + return (val_symb_t *)(x ^ symb_type_tag); +} +val_t val_wrap_symb(val_symb_t *v) +{ + return ((val_t)v) | symb_type_tag; +} diff --git a/langs/mug/values.h b/langs/mug/values.h new file mode 100644 index 00000000..c1de09d6 --- /dev/null +++ b/langs/mug/values.h @@ -0,0 +1,84 @@ +#ifndef VALUES_H +#define VALUES_H + +#include + +/* any abstract value */ +typedef int64_t val_t; + +typedef enum type_t { + T_INVALID = -1, + /* immediates */ + T_INT, + T_BOOL, + T_CHAR, + T_EOF, + T_VOID, + T_EMPTY, + /* pointers */ + T_BOX, + T_CONS, + T_VECT, + T_STR, + T_SYMB, + T_PROC, +} type_t; + +typedef uint32_t val_char_t; +typedef struct val_box_t { + val_t val; +} val_box_t; +typedef struct val_cons_t { + val_t snd; + val_t fst; +} val_cons_t; +typedef struct val_vect_t { + uint64_t len; + val_t elems[]; +} val_vect_t; +typedef struct val_str_t { + uint64_t len; + val_char_t codepoints[]; +} val_str_t; +typedef struct val_symb_t { + uint64_t len; + val_char_t codepoints[]; +} val_symb_t; + +/* return the type of x */ +type_t val_typeof(val_t x); + +/** + * Wrap/unwrap values + * + * The behavior of unwrap functions are undefined on type mismatch. + */ +int64_t val_unwrap_int(val_t x); +val_t val_wrap_int(int64_t i); + +int val_unwrap_bool(val_t x); +val_t val_wrap_bool(int b); + +val_char_t val_unwrap_char(val_t x); +val_t val_wrap_char(val_char_t b); + +val_t val_wrap_eof(); + +val_t val_wrap_void(); + +val_box_t* val_unwrap_box(val_t x); +val_t val_wrap_box(val_box_t* b); + +val_cons_t* val_unwrap_cons(val_t x); +val_t val_wrap_cons(val_cons_t* c); + +val_vect_t* val_unwrap_vect(val_t x); +val_t val_wrap_vect(val_vect_t* c); + +val_str_t* val_unwrap_str(val_t x); +val_t val_wrap_str(val_str_t* c); + +val_symb_t* val_unwrap_symb(val_t x); +val_t val_wrap_symb(val_symb_t* c); + +#endif From 98784cb0d6c52908ed26400bde211cf92a3d5d70 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 15:45:08 -0500 Subject: [PATCH 07/21] Split Mug and Mountebank tests. --- langs/mountebank/test/test-runner.rkt | 31 ++++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/langs/mountebank/test/test-runner.rkt b/langs/mountebank/test/test-runner.rkt index d5b395a7..09ec4f78 100644 --- a/langs/mountebank/test/test-runner.rkt +++ b/langs/mountebank/test/test-runner.rkt @@ -234,6 +234,21 @@ 36)) 666) + ;; Mug examples + (check-equal? (run '(symbol? 'foo)) #t) + (check-equal? (run '(symbol? (string->symbol "foo"))) #t) + (check-equal? (run '(eq? 'foo 'foo)) #t) + (check-equal? (run '(eq? (string->symbol "foo") + (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'foo (string->symbol "foo"))) + #t) + (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) + #t) + (check-equal? (run '(eq? 'g0 (gensym))) #f) + (check-equal? (run '(eq? (gensym) (gensym))) #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + ;; Mountebank examples (check-equal? (run '#()) #()) @@ -255,23 +270,9 @@ (check-equal? (run '(define (f) '(1 . 2)) '(eq? (f) (f))) #t) - - (check-equal? (run '(symbol? 'foo)) #t) - (check-equal? (run '(symbol? (string->symbol "foo"))) #t) - (check-equal? (run '(eq? 'foo 'foo)) #t) - (check-equal? (run '(eq? (string->symbol "foo") - (string->symbol "foo"))) - #t) - (check-equal? (run '(eq? 'foo (string->symbol "foo"))) - #t) - (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) - #t) (check-equal? (run '(let ((x '(foo . foo))) (eq? (car x) (cdr x)))) - #t) - (check-equal? (run '(eq? 'g0 (gensym))) #f) - (check-equal? (run '(eq? (gensym) (gensym))) #f) - (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f)) + #t)) (define (test-runner-io run) ;; Evildoer examples From aba1776557928fae5f320f8395bb4313e29adbbd Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 15:45:58 -0500 Subject: [PATCH 08/21] Add symbol operations to Mountebank interpreter. --- langs/mountebank/interp-defun.rkt | 1 + langs/mountebank/interp-prims.rkt | 3 +++ langs/mountebank/interp.rkt | 1 + 3 files changed, 5 insertions(+) diff --git a/langs/mountebank/interp-defun.rkt b/langs/mountebank/interp-defun.rkt index 2d2a5010..a5bd127e 100644 --- a/langs/mountebank/interp-defun.rkt +++ b/langs/mountebank/interp-defun.rkt @@ -35,6 +35,7 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] diff --git a/langs/mountebank/interp-prims.rkt b/langs/mountebank/interp-prims.rkt index 15039f9f..79bcf361 100644 --- a/langs/mountebank/interp-prims.rkt +++ b/langs/mountebank/interp-prims.rkt @@ -24,6 +24,9 @@ [(list 'vector-length (? vector?)) (vector-length v)] [(list 'string? v) (string? v)] [(list 'string-length (? string?)) (string-length v)] + [(list 'symbol? v) (symbol? v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] [_ 'err])) ;; Op2 Value Value -> Answer diff --git a/langs/mountebank/interp.rkt b/langs/mountebank/interp.rkt index 8a448b02..b5f47538 100644 --- a/langs/mountebank/interp.rkt +++ b/langs/mountebank/interp.rkt @@ -34,6 +34,7 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] + [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] From 349008ad0472d1d9b213df8d01dd19cb4910a13d Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Wed, 10 Nov 2021 15:51:26 -0500 Subject: [PATCH 09/21] Fix read-all bug in interp-file. --- langs/evildoer/interp-file.rkt | 2 +- langs/fraud/interp-file.rkt | 2 +- langs/hoax/interp-file.rkt | 2 +- langs/hustle/interp-file.rkt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/langs/evildoer/interp-file.rkt b/langs/evildoer/interp-file.rkt index 99789c43..843f7bc8 100644 --- a/langs/evildoer/interp-file.rkt +++ b/langs/evildoer/interp-file.rkt @@ -9,7 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (let ((r (interp (parse (read-all p))))) + (let ((r (interp (parse (read p))))) (unless (void? r) (println r))) (close-input-port p)))) diff --git a/langs/fraud/interp-file.rkt b/langs/fraud/interp-file.rkt index 99789c43..843f7bc8 100644 --- a/langs/fraud/interp-file.rkt +++ b/langs/fraud/interp-file.rkt @@ -9,7 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (let ((r (interp (parse (read-all p))))) + (let ((r (interp (parse (read p))))) (unless (void? r) (println r))) (close-input-port p)))) diff --git a/langs/hoax/interp-file.rkt b/langs/hoax/interp-file.rkt index 99789c43..843f7bc8 100644 --- a/langs/hoax/interp-file.rkt +++ b/langs/hoax/interp-file.rkt @@ -9,7 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (let ((r (interp (parse (read-all p))))) + (let ((r (interp (parse (read p))))) (unless (void? r) (println r))) (close-input-port p)))) diff --git a/langs/hustle/interp-file.rkt b/langs/hustle/interp-file.rkt index 99789c43..843f7bc8 100644 --- a/langs/hustle/interp-file.rkt +++ b/langs/hustle/interp-file.rkt @@ -9,7 +9,7 @@ (let ((p (open-input-file fn))) (begin (read-line p) ; ignore #lang racket line - (let ((r (interp (parse (read-all p))))) + (let ((r (interp (parse (read p))))) (unless (void? r) (println r))) (close-input-port p)))) From 3a2d40c15be02f159c9e2a49a4280a939b44d841 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Fri, 12 Nov 2021 08:44:23 -0500 Subject: [PATCH 10/21] Add pattern matching to Loot. --- langs/loot/ast.rkt | 24 +++++- langs/loot/compile.rkt | 105 ++++++++++++++++++++++++++- langs/loot/interp-defun.rkt | 39 +++++++++- langs/loot/interp.rkt | 39 +++++++++- langs/loot/parse.rkt | 28 +++++++ langs/loot/test/test-runner.rkt | 34 +++++++++ www/notes/{mug.scrbl => sugar.scrbl} | 0 7 files changed, 264 insertions(+), 5 deletions(-) rename www/notes/{mug.scrbl => sugar.scrbl} (100%) diff --git a/langs/loot/ast.rkt b/langs/loot/ast.rkt index f6a9f159..b430b22d 100644 --- a/langs/loot/ast.rkt +++ b/langs/loot/ast.rkt @@ -21,6 +21,7 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol @@ -30,13 +31,24 @@ ;; | 'write-byte | 'eof-object? ;; | 'box | 'car | 'cdr | 'unbox ;; | 'empty? | 'cons? | 'box? -;; | 'vector? | vector-length -;; | 'string? | string-length +;; | 'vector? | 'vector-length +;; | 'string? | 'string-length ;; type Op2 = '+ | '- | '< | '= ;; | 'cons ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + (struct Eof () #:prefab) (struct Empty () #:prefab) (struct Int (i) #:prefab) @@ -53,3 +65,11 @@ (struct Var (x) #:prefab) (struct App (e es) #:prefab) (struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/loot/compile.rkt b/langs/loot/compile.rkt index 40b97486..7b9493f2 100644 --- a/langs/loot/compile.rkt +++ b/langs/loot/compile.rkt @@ -105,7 +105,8 @@ [(Begin e1 e2) (compile-begin e1 e2 c t?)] [(Let x e1 e2) (compile-let x e1 e2 c t?)] [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)])) + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) @@ -300,6 +301,108 @@ (Push rax) (compile-es es (cons #f c)))])) +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) + ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv diff --git a/langs/loot/interp-defun.rkt b/langs/loot/interp-defun.rkt index 36494ae3..3821e0c2 100644 --- a/langs/loot/interp-defun.rkt +++ b/langs/loot/interp-defun.rkt @@ -90,7 +90,44 @@ (if (= (length xs) (length vs)) (interp-env e (append (zip xs vs) r) ds) 'err)] - [_ 'err])])])])) + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/loot/interp.rkt b/langs/loot/interp.rkt index 41a9ca49..ca1ffa57 100644 --- a/langs/loot/interp.rkt +++ b/langs/loot/interp.rkt @@ -89,7 +89,44 @@ [vs (if (procedure? f) (apply f vs) - 'err)])])])) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/loot/parse.rkt b/langs/loot/parse.rkt index 9e52a49c..5cce0885 100644 --- a/langs/loot/parse.rkt +++ b/langs/loot/parse.rkt @@ -42,6 +42,8 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] [(list (or 'lambda 'λ) xs e) (if (and (list? xs) (andmap symbol? xs)) @@ -51,6 +53,32 @@ (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + (define op0 '(read-byte peek-byte void)) diff --git a/langs/loot/test/test-runner.rkt b/langs/loot/test/test-runner.rkt index cdeecb8a..eb0e7e14 100644 --- a/langs/loot/test/test-runner.rkt +++ b/langs/loot/test/test-runner.rkt @@ -203,6 +203,40 @@ '(let ((z 2)) (f 1 2))) 10) + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) ;; Loot examples (check-equal? (run '((λ (x) x) 5)) diff --git a/www/notes/mug.scrbl b/www/notes/sugar.scrbl similarity index 100% rename from www/notes/mug.scrbl rename to www/notes/sugar.scrbl From 3714673c2a6c90c102d8f30b82de4ed2f5ab7279 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Fri, 12 Nov 2021 08:48:44 -0500 Subject: [PATCH 11/21] Polishing Mug. --- langs/mug/compile-datum.rkt | 136 ------------------------------------ langs/mug/compile.rkt | 60 ++++++++++++---- langs/mug/intern.rkt | 7 +- langs/mug/symbol.c | 21 +++--- langs/mug/try.rkt | 3 - 5 files changed, 59 insertions(+), 168 deletions(-) delete mode 100644 langs/mug/compile-datum.rkt delete mode 100644 langs/mug/try.rkt diff --git a/langs/mug/compile-datum.rkt b/langs/mug/compile-datum.rkt deleted file mode 100644 index 7f2e96f6..00000000 --- a/langs/mug/compile-datum.rkt +++ /dev/null @@ -1,136 +0,0 @@ -#lang racket -(provide compile-datum compile-literals) -(require "types.rkt" - "intern.rkt" - a86/ast) - -;; Registers used -(define rax 'rax) ; return - -;; QEnv -> Asm -(define (compile-literals q) - (match q - ['() (seq)] - [(cons (cons s (Ref l _)) q) - (seq (compile-literal (to-string s) l) - (compile-literals q))])) - -;; String Label -> Asm -(define (compile-literal s l) - (seq (Label l) - (Dq (string-length s)) - (compile-string-chars (string->list s)) - (if (odd? (string-length s)) - (seq (Dd 0)) - (seq)))) - -(define (compound? x) - (or ;(string? x) - ;(symbol? x) - (cons? x) - (vector? x) - (box? x))) - -;; Datum -> Asm -(define (compile-datum d) - (cond - [(compound? d) - (compile-compound-datum d)] - [(Ref? d) - (seq (Lea rax (Plus (Ref-label d) (Ref-type-tag d))))] - [else - (seq (Mov rax (imm->bits d)))])) - -(define (compile-compound-datum d) - (match (compile-quoted d) - [(cons l is) - (seq (Data) - is - (Text) - (Lea rax l))])) - -;; Datum -> (cons AsmExpr Asm) -(define (compile-quoted c) - (cond - ;[(string? c) (compile-datum-string c)] - ;[(symbol? c) (compile-datum-symbol (symbol->string c))] - [(vector? c) (compile-datum-vector (vector->list c))] - [(box? c) (compile-datum-box (unbox c))] - [(cons? c) (compile-datum-cons (car c) (cdr c))] - [(Ref? c) (cons (Plus (Ref-label c) (Ref-type-tag c)) '())] - [else (cons (imm->bits c) '())])) - -;; String -> (cons AsmExpr Asm) -#; -(define (compile-datum-string c) - (let ((l (gensym 'string))) - (cons (Plus l type-str) - (seq (Label l) - (Dq (string-length c)) - (compile-string-chars (string->list c)) - (if (odd? (string-length c)) - (seq (Dd 0)) - (seq)))))) - -;; String -> (cons AsmExpr Asm) -#; -(define (compile-datum-symbol c) - (let ((l (gensym 'symbol))) - (cons (Plus l type-symb) - (seq (Label l) - (Dq (string-length c)) - (compile-string-chars (string->list c)) - (if (odd? (string-length c)) - (seq (Dd 0)) - (seq)))))) - -;; [Listof Datum] -> (cons AsmExpr Asm) -(define (compile-datum-vector ds) - (match ds - ['() (cons type-vect '())] - [_ - (let ((l (gensym 'vector)) - (cds (map compile-quoted ds))) - (cons (Plus l type-vect) - (seq (Label l) - (Dq (length ds)) - (map (λ (cd) (Dq (car cd))) cds) - (append-map cdr cds))))])) - -;; Datum -> (cons AsmExpr Asm) -(define (compile-datum-box c) - (match (compile-quoted c) - [(cons l1 is1) - (let ((l (gensym 'box))) - (cons (Plus l type-box) - (seq (Label l) - (Dq l1) - is1)))])) - -;; Datum Datum -> (cons AsmExpr Asm) -(define (compile-datum-cons c1 c2) - (match (compile-quoted c1) - [(cons l1 is1) - (match (compile-quoted c2) - [(cons l2 is2) - (let ((l (gensym 'cons))) - (cons (Plus l type-cons) - (seq (Label l) - (Dq l2) - (Dq l1) - is1 - is2)))])])) - -;; [Listof Char] -> Asm -(define (compile-string-chars cs) - (match cs - ['() (seq)] - [(cons c cs) - (seq (Dd (char->integer c)) - (compile-string-chars cs))])) - -;; (U String Symbol) -> String -(define (to-string s) - (if (symbol? s) - (symbol->string s) - s)) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt index 457d68e5..eaf04702 100644 --- a/langs/mug/compile.rkt +++ b/langs/mug/compile.rkt @@ -6,7 +6,6 @@ "fv.rkt" "intern.rkt" "compile-ops.rkt" - "compile-datum.rkt" a86/ast) ;; Registers used @@ -25,7 +24,9 @@ (Global 'entry) (Label 'entry) (Mov rbx rdi) ; recv heap pointer + (Sub 'rsp 8) (init-symbol-table q) + (Add 'rsp 8) (compile-defines-values ds) (compile-e e (reverse (define-ids ds)) #t) (Add rsp (* 8 (length ds))) ;; pop function definitions @@ -47,6 +48,44 @@ (Extern 'intern_symbol) (Extern 'str_dup))) +;; DEnv -> Asm +(define (compile-literals q) + (match q + ['() (seq)] + [(cons (cons s l) q) + (seq (compile-literal (to-string s) l) + (compile-literals q))])) + +;; String Label -> Asm +(define (compile-literal s l) + (seq (Label l) + (Dq (string-length s)) + (compile-string-chars (string->list s)) + (if (odd? (string-length s)) + (seq (Dd 0)) + (seq)))) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) + +;; DEnv -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table q) + (append-map init-symbol q)) + +;; (cons (U String Symbol) Symbol) -> Asm +(define (init-symbol qb) + (match qb + [(cons (? symbol? s) l) + (seq (Lea rdi l) + (Call 'intern_symbol))] + [_ (seq)])) + ;; [Listof Defn] -> [Listof Id] (define (define-ids ds) (match ds @@ -304,20 +343,11 @@ [#t 0] [#f (+ 8 (lookup x rest))])])) -;; QEnv -> Asm -;; Call intern_symbol on every symbol in the program -(define (init-symbol-table q) - (seq pad-stack - (append-map init-symbol q) - unpad-stack)) - -;; (cons (U String Symbol) Ref) -> Asm -(define (init-symbol qb) - (match qb - [(cons (? symbol? s) (Ref l _)) - (seq (Lea rdi l) - (Call 'intern_symbol))] - [_ (seq)])) +;; (U String Symbol) -> String +(define (to-string s) + (if (symbol? s) + (symbol->string s) + s)) ;; Symbol -> Label ;; Produce a symbol that is a valid Nasm label diff --git a/langs/mug/intern.rkt b/langs/mug/intern.rkt index 0f7cbe1f..11c8f8b7 100644 --- a/langs/mug/intern.rkt +++ b/langs/mug/intern.rkt @@ -22,10 +22,9 @@ (match (lookup s q) [#f (let ((l (gensym 'lit))) - (let ((r (Ref l type))) - (cons r - (cons (cons s r) q))))] - [r (cons r q)])) + (cons (Ref l type) + (cons (cons s l) q)))] + [l (cons (Ref l type) q)])) ;; Prog -> (cons Prog QEnv) (define (intern p) diff --git a/langs/mug/symbol.c b/langs/mug/symbol.c index 08a9e5dd..39bd088e 100644 --- a/langs/mug/symbol.c +++ b/langs/mug/symbol.c @@ -7,7 +7,7 @@ static uint64_t gensym_ctr = 0; val_str_t *str_from_cstr(const char *); -int str_cmp(const val_str_t *, const val_str_t *); +int symb_cmp(const val_symb_t *, const val_symb_t *); val_str_t *str_dup(const val_str_t *); // binary tree node @@ -25,8 +25,9 @@ val_symb_t *intern_symbol(val_symb_t* symb) while (*curr) { struct Node *t = *curr; - int r = str_cmp((val_str_t*)symb, (val_str_t*)t->elem); + int r = symb_cmp(symb, t->elem); if (r == 0) { + // found it, so return saved pointer return t->elem; } else if (r < 0) { curr = &t->left; @@ -35,14 +36,10 @@ val_symb_t *intern_symbol(val_symb_t* symb) } } - // wasn't found, so insert it - + // wasn't found, so insert it and return pointer *curr = calloc(1, sizeof(struct Node)); - - struct Node* t = *curr; - t->elem = symb; // str_dup(str); - - return t->elem; + (*curr)->elem = symb; + return (*curr)->elem; } val_symb_t *str_to_symbol(const val_str_t *str) @@ -61,6 +58,10 @@ val_symb_t *gensym(void) val_str_t *str_from_cstr(const char *s) { int64_t len = strlen(s); + + if (len == 0) + return NULL; + val_str_t *str = malloc(sizeof(int64_t) + len * sizeof(val_char_t)); @@ -75,7 +76,7 @@ val_str_t *str_from_cstr(const char *s) return str; } -int str_cmp(const val_str_t *s1, const val_str_t *s2) +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) { int64_t len1 = s1->len; int64_t len2 = s2->len; diff --git a/langs/mug/try.rkt b/langs/mug/try.rkt deleted file mode 100644 index b52ae93d..00000000 --- a/langs/mug/try.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang racket -(void) - From 61569a7d6c2ae966e506a22fa93a26eea812cb26 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Fri, 12 Nov 2021 09:17:48 -0500 Subject: [PATCH 12/21] Add pattern matching to Mug. --- langs/mug/ast.rkt | 20 +++++++ langs/mug/compile.rkt | 105 ++++++++++++++++++++++++++++++++- langs/mug/interp-defun.rkt | 40 ++++++++++++- langs/mug/interp.rkt | 40 ++++++++++++- langs/mug/parse.rkt | 28 +++++++++ langs/mug/test/test-runner.rkt | 34 +++++++++++ 6 files changed, 262 insertions(+), 5 deletions(-) diff --git a/langs/mug/ast.rkt b/langs/mug/ast.rkt index 5b8885fe..cd1317a1 100644 --- a/langs/mug/ast.rkt +++ b/langs/mug/ast.rkt @@ -22,6 +22,7 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol @@ -39,6 +40,17 @@ ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() + (struct Eof () #:prefab) (struct Empty () #:prefab) (struct Int (i) #:prefab) @@ -56,3 +68,11 @@ (struct Var (x) #:prefab) (struct App (e es) #:prefab) (struct Lam (f xs e) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt index eaf04702..b9d1b767 100644 --- a/langs/mug/compile.rkt +++ b/langs/mug/compile.rkt @@ -157,7 +157,8 @@ [(Begin e1 e2) (compile-begin e1 e2 c t?)] [(Let x e1 e2) (compile-let x e1 e2 c t?)] [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)])) + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) @@ -334,6 +335,108 @@ (Push rax) (compile-es es (cons #f c)))])) +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) + ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv diff --git a/langs/mug/interp-defun.rkt b/langs/mug/interp-defun.rkt index 4a727607..315729a4 100644 --- a/langs/mug/interp-defun.rkt +++ b/langs/mug/interp-defun.rkt @@ -10,7 +10,6 @@ ;; | Integer ;; | Boolean ;; | Character -;; | Symbol ;; | Eof ;; | Void ;; | '() @@ -93,7 +92,44 @@ (if (= (length xs) (length vs)) (interp-env e (append (zip xs vs) r) ds) 'err)] - [_ 'err])])])])) + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt index 19d8656d..876646d5 100644 --- a/langs/mug/interp.rkt +++ b/langs/mug/interp.rkt @@ -10,7 +10,6 @@ ;; | Integer ;; | Boolean ;; | Character -;; | Symbol ;; | Eof ;; | Void ;; | '() @@ -92,7 +91,44 @@ [vs (if (procedure? f) (apply f vs) - 'err)])])])) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mug/parse.rkt b/langs/mug/parse.rkt index cb473c74..125e3e84 100644 --- a/langs/mug/parse.rkt +++ b/langs/mug/parse.rkt @@ -43,6 +43,8 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] [(list (or 'lambda 'λ) xs e) (if (and (list? xs) (andmap symbol? xs)) @@ -52,6 +54,32 @@ (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + (define op0 '(read-byte peek-byte void gensym)) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt index c8ca1b39..7d6cc3e5 100644 --- a/langs/mug/test/test-runner.rkt +++ b/langs/mug/test/test-runner.rkt @@ -203,6 +203,40 @@ '(let ((z 2)) (f 1 2))) 10) + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) ;; Loot examples (check-equal? (run '((λ (x) x) 5)) From cef1ad79963b84affc9fa1a637b4cbef6a270928 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Fri, 12 Nov 2021 09:42:57 -0500 Subject: [PATCH 13/21] Add pattern matching to Mountebank. --- langs/mountebank/ast.rkt | 19 +++++ langs/mountebank/compile.rkt | 105 +++++++++++++++++++++++++- langs/mountebank/interp-defun.rkt | 39 +++++++++- langs/mountebank/interp.rkt | 39 +++++++++- langs/mountebank/parse.rkt | 28 +++++++ langs/mountebank/test/test-runner.rkt | 34 +++++++++ 6 files changed, 261 insertions(+), 3 deletions(-) diff --git a/langs/mountebank/ast.rkt b/langs/mountebank/ast.rkt index 6f7a03c1..22e60985 100644 --- a/langs/mountebank/ast.rkt +++ b/langs/mountebank/ast.rkt @@ -17,6 +17,7 @@ ;; | (Begin Expr Expr) ;; | (Let Id Expr Expr) ;; | (Var Id) +;; | (Match Expr (Listof Pat) (Listof Expr)) ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Datum = Integer @@ -42,6 +43,16 @@ ;; | 'make-vector | 'vector-ref ;; | 'make-string | 'string-ref ;; type Op3 = 'vector-set! +;; type Pat = (PVar Id) +;; | (PWild) +;; | (PLit Lit) +;; | (PBox Pat) +;; | (PCons Pat Pat) +;; | (PAnd Pat Pat) +;; type Lit = Boolean +;; | Character +;; | Integer +;; | '() (struct Eof () #:prefab) (struct Prim0 (p) #:prefab) @@ -55,3 +66,11 @@ (struct App (e es) #:prefab) (struct Lam (f xs e) #:prefab) (struct Quote (d) #:prefab) +(struct Match (e ps es) #:prefab) + +(struct PVar (x) #:prefab) +(struct PWild () #:prefab) +(struct PLit (x) #:prefab) +(struct PBox (p) #:prefab) +(struct PCons (p1 p2) #:prefab) +(struct PAnd (p1 p2) #:prefab) diff --git a/langs/mountebank/compile.rkt b/langs/mountebank/compile.rkt index 6d13813c..f730d443 100644 --- a/langs/mountebank/compile.rkt +++ b/langs/mountebank/compile.rkt @@ -114,7 +114,8 @@ [(Begin e1 e2) (compile-begin e1 e2 c t?)] [(Let x e1 e2) (compile-let x e1 e2 c t?)] [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)])) + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) ;; Value -> Asm (define (compile-value v) @@ -287,6 +288,108 @@ (Push rax) (compile-es es (cons #f c)))])) +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) + ;; Id CEnv -> Integer (define (lookup x cenv) (match cenv diff --git a/langs/mountebank/interp-defun.rkt b/langs/mountebank/interp-defun.rkt index a5bd127e..ad449140 100644 --- a/langs/mountebank/interp-defun.rkt +++ b/langs/mountebank/interp-defun.rkt @@ -84,7 +84,44 @@ (if (= (length xs) (length vs)) (interp-env e (append (zip xs vs) r) ds) 'err)] - [_ 'err])])])])) + [_ 'err])])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mountebank/interp.rkt b/langs/mountebank/interp.rkt index b5f47538..072e5f77 100644 --- a/langs/mountebank/interp.rkt +++ b/langs/mountebank/interp.rkt @@ -83,7 +83,44 @@ [vs (if (procedure? f) (apply f vs) - 'err)])])])) + 'err)])])] + [(Match e ps es) + (match (interp-env e r ds) + ['err 'err] + [v + (interp-match v ps es r ds)])])) + +;; Value [Listof Pat] [Listof Expr] Env Defns -> Answer +(define (interp-match v ps es r ds) + (match* (ps es) + [('() '()) 'err] + [((cons p ps) (cons e es)) + (match (interp-match-pat p v r) + [#f (interp-match v ps es r ds)] + [r (interp-env e r ds)])])) + +;; Pat Value Env -> [Maybe Env] +(define (interp-match-pat p v r) + (match p + [(PWild) r] + [(PVar x) (ext r x v)] + [(PLit l) (and (eqv? l v) r)] + [(PBox p) + (match v + [(box v) + (interp-match-pat p v r)] + [_ #f])] + [(PCons p1 p2) + (match v + [(cons v1 v2) + (match (interp-match-pat p1 v1 r) + [#f #f] + [r1 (interp-match-pat p2 v2 r1)])] + [_ #f])] + [(PAnd p1 p2) + (match (interp-match-pat p1 v r) + [#f #f] + [r1 (interp-match-pat p2 v r1)])])) ;; Id Env [Listof Defn] -> Answer (define (interp-var x r ds) diff --git a/langs/mountebank/parse.rkt b/langs/mountebank/parse.rkt index 70fe6c2e..3a535b6f 100644 --- a/langs/mountebank/parse.rkt +++ b/langs/mountebank/parse.rkt @@ -39,6 +39,8 @@ (If (parse-e e1) (parse-e e2) (parse-e e3))] [(list 'let (list (list (? symbol? x) e1)) e2) (Let x (parse-e e1) (parse-e e2))] + [(cons 'match (cons e ms)) + (parse-match (parse-e e) ms)] [(list (or 'lambda 'λ) xs e) (if (and (list? xs) (andmap symbol? xs)) @@ -48,6 +50,32 @@ (App (parse-e e) (map parse-e es))] [_ (error "Parse error" s)])) +(define (parse-match e ms) + (match ms + ['() (Match e '() '())] + [(cons (list p r) ms) + (match (parse-match e ms) + [(Match e ps es) + (Match e + (cons (parse-pat p) ps) + (cons (parse-e r) es))])])) + +(define (parse-pat p) + (match p + [(? boolean?) (PLit p)] + [(? integer?) (PLit p)] + [(? char?) (PLit p)] + ['_ (PWild)] + [(? symbol?) (PVar p)] + [(list 'quote (list)) + (PLit '())] + [(list 'box p) + (PBox (parse-pat p))] + [(list 'cons p1 p2) + (PCons (parse-pat p1) (parse-pat p2))] + [(list 'and p1 p2) + (PAnd (parse-pat p1) (parse-pat p2))])) + ;; Datum -> Datum (define (parse-datum d) (match d diff --git a/langs/mountebank/test/test-runner.rkt b/langs/mountebank/test/test-runner.rkt index 09ec4f78..2b04d07b 100644 --- a/langs/mountebank/test/test-runner.rkt +++ b/langs/mountebank/test/test-runner.rkt @@ -203,6 +203,40 @@ '(let ((z 2)) (f 1 2))) 10) + ;; Knock examples + (check-equal? (run '(match 1)) 'err) + (check-equal? (run '(match 1 [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2])) + 2) + (check-equal? (run '(match 1 [2 1] [1 2] [0 3])) + 2) + (check-equal? (run '(match 1 [2 1] [0 3])) + 'err) + (check-equal? (run '(match 1 [_ 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x 2] [_ 3])) + 2) + (check-equal? (run '(match 1 [x x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [x x] [_ 3])) + (cons 1 2)) + (check-equal? (run '(match (cons 1 2) [(cons x y) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons x 2) x] [_ 3])) + 1) + (check-equal? (run '(match (cons 1 2) [(cons 3 2) 0] [_ 3])) + 3) + (check-equal? (run '(match 1 [(cons x y) x] [_ 3])) + 3) + (check-equal? (run '(match (cons 1 2) [(cons 1 3) 0] [(cons 1 y) y] [_ 3])) + 2) + (check-equal? (run '(match (box 1) [(box 1) 0] [_ 1])) + 0) + (check-equal? (run '(match (box 1) [(box 2) 0] [_ 1])) + 1) + (check-equal? (run '(match (box 1) [(box x) x] [_ 2])) + 1) ;; Loot examples (check-equal? (run '((λ (x) x) 5)) From b80f71e37346b1a93d5090a6eaedf2caf6822728 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Fri, 12 Nov 2021 09:43:16 -0500 Subject: [PATCH 14/21] Copy over symbol.c changes to Mountebank. --- langs/mountebank/symbol.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/langs/mountebank/symbol.c b/langs/mountebank/symbol.c index 08a9e5dd..d8ad04ad 100644 --- a/langs/mountebank/symbol.c +++ b/langs/mountebank/symbol.c @@ -7,7 +7,7 @@ static uint64_t gensym_ctr = 0; val_str_t *str_from_cstr(const char *); -int str_cmp(const val_str_t *, const val_str_t *); +int symb_cmp(const val_symb_t *, const val_symb_t *); val_str_t *str_dup(const val_str_t *); // binary tree node @@ -25,8 +25,9 @@ val_symb_t *intern_symbol(val_symb_t* symb) while (*curr) { struct Node *t = *curr; - int r = str_cmp((val_str_t*)symb, (val_str_t*)t->elem); + int r = symb_cmp(symb, t->elem); if (r == 0) { + // found it, so return saved pointer return t->elem; } else if (r < 0) { curr = &t->left; @@ -35,14 +36,10 @@ val_symb_t *intern_symbol(val_symb_t* symb) } } - // wasn't found, so insert it - + // wasn't found, so insert it and return pointer *curr = calloc(1, sizeof(struct Node)); - - struct Node* t = *curr; - t->elem = symb; // str_dup(str); - - return t->elem; + (*curr)->elem = symb; + return (*curr)->elem; } val_symb_t *str_to_symbol(const val_str_t *str) @@ -61,6 +58,10 @@ val_symb_t *gensym(void) val_str_t *str_from_cstr(const char *s) { int64_t len = strlen(s); + + if (len == 0) + return NULL; + val_str_t *str = malloc(sizeof(int64_t) + len * sizeof(val_char_t)); @@ -75,7 +76,7 @@ val_str_t *str_from_cstr(const char *s) return str; } -int str_cmp(const val_str_t *s1, const val_str_t *s2) +int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) { int64_t len1 = s1->len; int64_t len2 = s2->len; From 11b4db3769c4731578ee8bff291e4ce2608d4501 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 16:18:31 -0500 Subject: [PATCH 15/21] Fix bug in make-string. --- langs/hoax/compile-ops.rkt | 6 +++--- langs/hoax/test/test-runner.rkt | 5 ++++- langs/iniquity/compile-ops.rkt | 6 +++--- langs/iniquity/test/test-runner.rkt | 3 +++ langs/jig/compile-ops.rkt | 6 +++--- langs/jig/test/test-runner.rkt | 3 +++ langs/knock/compile-ops.rkt | 6 +++--- langs/knock/test/test-runner.rkt | 3 +++ langs/loot/compile-ops.rkt | 6 +++--- langs/loot/test/test-runner.rkt | 3 +++ langs/mug/compile-ops.rkt | 6 +++--- langs/mug/test/test-runner.rkt | 3 +++ 12 files changed, 37 insertions(+), 19 deletions(-) diff --git a/langs/hoax/compile-ops.rkt b/langs/hoax/compile-ops.rkt index 5461146b..a7ee5c5d 100644 --- a/langs/hoax/compile-ops.rkt +++ b/langs/hoax/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/hoax/test/test-runner.rkt b/langs/hoax/test/test-runner.rkt index 22735575..403a02ef 100644 --- a/langs/hoax/test/test-runner.rkt +++ b/langs/hoax/test/test-runner.rkt @@ -149,7 +149,10 @@ (check-equal? (run '(string-ref "fred" 2)) #\e) (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) - (check-equal? (run '(string? (cons 1 2))) #f)) + (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff")) (define (test-runner-io run) ;; Evildoer examples diff --git a/langs/iniquity/compile-ops.rkt b/langs/iniquity/compile-ops.rkt index dbfacad2..44f13545 100644 --- a/langs/iniquity/compile-ops.rkt +++ b/langs/iniquity/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/iniquity/test/test-runner.rkt b/langs/iniquity/test/test-runner.rkt index 7a4381f6..0282f301 100644 --- a/langs/iniquity/test/test-runner.rkt +++ b/langs/iniquity/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/jig/compile-ops.rkt b/langs/jig/compile-ops.rkt index eda019ce..3aee63bd 100644 --- a/langs/jig/compile-ops.rkt +++ b/langs/jig/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/jig/test/test-runner.rkt b/langs/jig/test/test-runner.rkt index 8e79d65e..a04433e7 100644 --- a/langs/jig/test/test-runner.rkt +++ b/langs/jig/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/knock/compile-ops.rkt b/langs/knock/compile-ops.rkt index eda019ce..3aee63bd 100644 --- a/langs/knock/compile-ops.rkt +++ b/langs/knock/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/knock/test/test-runner.rkt b/langs/knock/test/test-runner.rkt index 26e1b4fc..2bfa21fd 100644 --- a/langs/knock/test/test-runner.rkt +++ b/langs/knock/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/loot/compile-ops.rkt b/langs/loot/compile-ops.rkt index 1616d9b8..2923cc2b 100644 --- a/langs/loot/compile-ops.rkt +++ b/langs/loot/compile-ops.rkt @@ -216,9 +216,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/loot/test/test-runner.rkt b/langs/loot/test/test-runner.rkt index eb0e7e14..2932fb91 100644 --- a/langs/loot/test/test-runner.rkt +++ b/langs/loot/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run diff --git a/langs/mug/compile-ops.rkt b/langs/mug/compile-ops.rkt index 0ef65bc2..c66bade2 100644 --- a/langs/mug/compile-ops.rkt +++ b/langs/mug/compile-ops.rkt @@ -239,9 +239,9 @@ (Sar rax char-shift) - (Add r9 1) ; adds 1 - (Sar r9 1) ; when - (Sal r9 1) ; len is odd + (Add r8 1) ; adds 1 + (Sar r8 1) ; when + (Sal r8 1) ; len is odd (Label loop) (Mov (Offset rbx 0) eax) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt index 7d6cc3e5..e0b687ae 100644 --- a/langs/mug/test/test-runner.rkt +++ b/langs/mug/test/test-runner.rkt @@ -143,6 +143,9 @@ (check-equal? (run '(string-ref "fred" 4)) 'err) (check-equal? (run '(string? "fred")) #t) (check-equal? (run '(string? (cons 1 2))) #f) + (check-equal? (run '(begin (make-string 3 #\f) + (make-string 3 #\f))) + "fff") ;; Iniquity tests (check-equal? (run From 83ee7c2f67b10eed0e36ddf95adabee8d839f3ad Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 22:18:12 -0500 Subject: [PATCH 16/21] Add missing use of label-symbol->string in a86 printer. --- langs/a86/printer.rkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langs/a86/printer.rkt b/langs/a86/printer.rkt index b54e6ff6..7602946a 100644 --- a/langs/a86/printer.rkt +++ b/langs/a86/printer.rkt @@ -54,7 +54,7 @@ [(Offset (? reg? r) i) (string-append "[" (reg->string r) " + " (number->string i) "]")] [(Offset (? label? l) i) - (string-append "[" (symbol->string l) " + " (number->string i) "]")] + (string-append "[" (label-symbol->string l) " + " (number->string i) "]")] [(Const l) (symbol->string l)] [(? exp?) (exp->string a)])) From 975680a6e39554008ed841115ae4bb5962ae8ba9 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 22:20:39 -0500 Subject: [PATCH 17/21] Add Jle and Jge instructions to a86. --- langs/a86/ast.rkt | 8 ++++++++ langs/a86/printer.rkt | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/langs/a86/ast.rkt b/langs/a86/ast.rkt index 0f2d073a..847eef81 100644 --- a/langs/a86/ast.rkt +++ b/langs/a86/ast.rkt @@ -131,7 +131,9 @@ (instruct Je (x) check:target) (instruct Jne (x) check:target) (instruct Jl (x) check:target) +(instruct Jle (x) check:target) (instruct Jg (x) check:target) +(instruct Jge (x) check:target) (instruct And (dst src) check:src-dest) (instruct Or (dst src) check:src-dest) (instruct Xor (dst src) check:src-dest) @@ -191,7 +193,9 @@ (Je? x) (Jne? x) (Jl? x) + (Jle? x) (Jg? x) + (Jge? x) (And? x) (Or? x) (Xor? x) @@ -271,8 +275,12 @@ (cons s (label-uses asm))] [(cons (Jg (? label-symbol? s)) asm) (cons s (label-uses asm))] + [(cons (Jge (? label-symbol? s)) asm) + (cons s (label-uses asm))] [(cons (Jl (? label-symbol? s)) asm) (cons s (label-uses asm))] + [(cons (Jle (? label-symbol? s)) asm) + (cons s (label-uses asm))] [(cons (Call (? label-symbol? s)) asm) (cons s (label-uses asm))] [(cons (Lea _ (? label-symbol? s)) asm) diff --git a/langs/a86/printer.rkt b/langs/a86/printer.rkt index 7602946a..cfca1460 100644 --- a/langs/a86/printer.rkt +++ b/langs/a86/printer.rkt @@ -127,9 +127,15 @@ [(Jl l) (string-append tab "jl " (jump-target->string l))] + [(Jle l) + (string-append tab "jle " + (jump-target->string l))] [(Jg l) (string-append tab "jg " (jump-target->string l))] + [(Jge l) + (string-append tab "jge " + (jump-target->string l))] [(Call l) (string-append tab "call " (jump-target->string l))] From 4d842a80bf90bfacbb3c769c9b308ef8f4478ae5 Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 23:00:58 -0500 Subject: [PATCH 18/21] Mug overhaul. --- langs/mug/ast.rkt | 5 +- langs/mug/compile-define.rkt | 69 +++++ langs/mug/compile-expr.rkt | 311 +++++++++++++++++++++++ langs/mug/compile-literals.rkt | 91 +++++++ langs/mug/compile-ops.rkt | 47 ++-- langs/mug/compile.rkt | 442 +-------------------------------- langs/mug/intern.rkt | 112 --------- langs/mug/interp-defun.rkt | 1 - langs/mug/interp-prims.rkt | 6 +- langs/mug/interp.rkt | 1 - langs/mug/parse.rkt | 5 +- langs/mug/symbol.c | 100 +------- langs/mug/test/test-runner.rkt | 14 +- langs/mug/utils.rkt | 53 ++++ 14 files changed, 581 insertions(+), 676 deletions(-) create mode 100644 langs/mug/compile-define.rkt create mode 100644 langs/mug/compile-expr.rkt create mode 100644 langs/mug/compile-literals.rkt delete mode 100644 langs/mug/intern.rkt create mode 100644 langs/mug/utils.rkt diff --git a/langs/mug/ast.rkt b/langs/mug/ast.rkt index cd1317a1..d83ef3fa 100644 --- a/langs/mug/ast.rkt +++ b/langs/mug/ast.rkt @@ -26,7 +26,7 @@ ;; | (App Expr (Listof Expr)) ;; | (Lam Id (Listof Id) Expr) ;; type Id = Symbol -;; type Op0 = 'read-byte | 'gensym +;; type Op0 = 'read-byte ;; type Op1 = 'add1 | 'sub1 | 'zero? ;; | 'char? | 'integer->char | 'char->integer ;; | 'write-byte | 'eof-object? @@ -34,7 +34,8 @@ ;; | 'empty? | 'cons? | 'box? ;; | 'vector? | 'vector-length ;; | 'string? | 'string-length -;; | 'symbol? | 'symbol->string | 'string->symbol +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol ;; type Op2 = '+ | '- | '< | '= ;; | 'cons ;; | 'make-vector | 'vector-ref diff --git a/langs/mug/compile-define.rkt b/langs/mug/compile-define.rkt new file mode 100644 index 00000000..354e6f26 --- /dev/null +++ b/langs/mug/compile-define.rkt @@ -0,0 +1,69 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "fv.rkt" + "utils.rkt" + "compile-expr.rkt" + a86/ast) + +;; [Listof Defn] -> [Listof Id] +(define (define-ids ds) + (match ds + ['() '()] + [(cons (Defn f xs e) ds) + (cons f (define-ids ds))])) + +;; [Listof Defn] -> Asm +(define (compile-defines ds) + (match ds + ['() (seq)] + [(cons d ds) + (seq (compile-define d) + (compile-defines ds))])) + +;; Defn -> Asm +(define (compile-define d) + (match d + [(Defn f xs e) + (compile-lambda-define (Lam f xs e))])) + +;; Defns -> Asm +;; Compile the closures for ds and push them on the stack +(define (compile-defines-values ds) + (seq (alloc-defines ds 0) + (init-defines ds (reverse (define-ids ds)) 8) + (add-rbx-defines ds 0))) + +;; Defns Int -> Asm +;; Allocate closures for ds at given offset, but don't write environment yet +(define (alloc-defines ds off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx off) rax) + (Mov rax rbx) + (Add rax off) + (Or rax type-proc) + (Push rax) + (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns CEnv Int -> Asm +;; Initialize the environment for each closure for ds at given offset +(define (init-defines ds c off) + (match ds + ['() (seq)] + [(cons (Defn f xs e) ds) + (let ((fvs (fv (Lam f xs e)))) + (seq (free-vars-to-heap fvs c off) + (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) + +;; Defns Int -> Asm +;; Compute adjustment to rbx for allocation of all ds +(define (add-rbx-defines ds n) + (match ds + ['() (seq (Add rbx (* n 8)))] + [(cons (Defn f xs e) ds) + (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) diff --git a/langs/mug/compile-expr.rkt b/langs/mug/compile-expr.rkt new file mode 100644 index 00000000..97e0319c --- /dev/null +++ b/langs/mug/compile-expr.rkt @@ -0,0 +1,311 @@ +#lang racket +(provide (all-defined-out)) +(require "ast.rkt" + "types.rkt" + "lambdas.rkt" + "fv.rkt" + "utils.rkt" + "compile-ops.rkt" + a86/ast) + +;; Registers used +(define rax 'rax) ; return +(define rbx 'rbx) ; heap +(define rsp 'rsp) ; stack +(define rdi 'rdi) ; arg + +;; Expr CEnv Bool -> Asm +(define (compile-e e c t?) + (match e + [(Int i) (compile-value i)] + [(Bool b) (compile-value b)] + [(Char c) (compile-value c)] + [(Eof) (compile-value eof)] + [(Empty) (compile-value '())] + [(Str s) (compile-string s)] + [(Symb s) (compile-symbol s)] + [(Var x) (compile-variable x c)] + [(Prim0 p) (compile-prim0 p c)] + [(Prim1 p e) (compile-prim1 p e c)] + [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] + [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] + [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] + [(Begin e1 e2) (compile-begin e1 e2 c t?)] + [(Let x e1 e2) (compile-let x e1 e2 c t?)] + [(App e es) (compile-app e es c t?)] + [(Lam f xs e) (compile-lam f xs e c)] + [(Match e ps es) (compile-match e ps es c t?)])) + +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea rax (Plus (symbol->data-label s) type-symb)))) + +;; String -> Asm +(define (compile-string s) + (seq (Lea rax (Plus (symbol->data-label (string->symbol s)) type-str)))) + +;; Value -> Asm +(define (compile-value v) + (seq (Mov rax (imm->bits v)))) + +;; Id CEnv -> Asm +(define (compile-variable x c) + (match (lookup x c) + [#f (error "unbound variable")] ;(seq (Lea rax (symbol->label x)))] + [i (seq (Mov rax (Offset rsp i)))])) + +;; Op0 CEnv -> Asm +(define (compile-prim0 p c) + (compile-op0 p)) + +;; Op1 Expr CEnv -> Asm +(define (compile-prim1 p e c) + (seq (compile-e e c #f) + (compile-op1 p))) + +;; Op2 Expr Expr CEnv -> Asm +(define (compile-prim2 p e1 e2 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (compile-op2 p))) + +;; Op3 Expr Expr Expr CEnv -> Asm +(define (compile-prim3 p e1 e2 e3 c) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons #f c) #f) + (Push rax) + (compile-e e3 (cons #f (cons #f c)) #f) + (compile-op3 p))) + +;; Expr Expr Expr CEnv Bool -> Asm +(define (compile-if e1 e2 e3 c t?) + (let ((l1 (gensym 'if)) + (l2 (gensym 'if))) + (seq (compile-e e1 c #f) + (Cmp rax val-false) + (Je l1) + (compile-e e2 c t?) + (Jmp l2) + (Label l1) + (compile-e e3 c t?) + (Label l2)))) + +;; Expr Expr CEnv Bool -> Asm +(define (compile-begin e1 e2 c t?) + (seq (compile-e e1 c #f) + (compile-e e2 c t?))) + +;; Id Expr Expr CEnv Bool -> Asm +(define (compile-let x e1 e2 c t?) + (seq (compile-e e1 c #f) + (Push rax) + (compile-e e2 (cons x c) t?) + (Add rsp 8))) + +;; Id [Listof Expr] CEnv Bool -> Asm +(define (compile-app f es c t?) + ;(compile-app-nontail f es c) + (if t? + (compile-app-tail f es c) + (compile-app-nontail f es c))) + +;; Expr [Listof Expr] CEnv -> Asm +(define (compile-app-tail e es c) + (seq (compile-es (cons e es) c) + (move-args (add1 (length es)) (length c)) + (Add rsp (* 8 (length c))) + (Mov rax (Offset rsp (* 8 (length es)))) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) + (Jmp rax))) + +;; Integer Integer -> Asm +(define (move-args i off) + (cond [(zero? off) (seq)] + [(zero? i) (seq)] + [else + (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) + (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) + (move-args (sub1 i) off))])) + +;; Expr [Listof Expr] CEnv -> Asm +;; The return address is placed above the arguments, so callee pops +;; arguments and return address is next frame +(define (compile-app-nontail e es c) + (let ((r (gensym 'ret)) + (i (* 8 (length es)))) + (seq (Lea rax r) + (Push rax) + (compile-es (cons e es) (cons #f c)) + (Mov rax (Offset rsp i)) + (assert-proc rax) + (Xor rax type-proc) + (Mov rax (Offset rax 0)) ; fetch the code label + (Jmp rax) + (Label r)))) + +;; Id [Listof Id] Expr CEnv -> Asm +(define (compile-lam f xs e c) + (let ((fvs (fv (Lam f xs e)))) + (seq (Lea rax (symbol->label f)) + (Mov (Offset rbx 0) rax) + (free-vars-to-heap fvs c 8) + (Mov rax rbx) ; return value + (Or rax type-proc) + (Add rbx (* 8 (add1 (length fvs))))))) + +;; [Listof Id] CEnv Int -> Asm +;; Copy the values of given free variables into the heap at given offset +(define (free-vars-to-heap fvs c off) + (match fvs + ['() (seq)] + [(cons x fvs) + (seq (Mov r8 (Offset rsp (lookup x c))) + (Mov (Offset rbx off) r8) + (free-vars-to-heap fvs c (+ off 8)))])) + +;; [Listof Lam] -> Asm +(define (compile-lambda-defines ls) + (match ls + ['() (seq)] + [(cons l ls) + (seq (compile-lambda-define l) + (compile-lambda-defines ls))])) + +;; Lam -> Asm +(define (compile-lambda-define l) + (let ((fvs (fv l))) + (match l + [(Lam f xs e) + (let ((env (append (reverse fvs) (reverse xs) (list #f)))) + (seq (Label (symbol->label f)) + (Mov rax (Offset rsp (* 8 (length xs)))) + (Xor rax type-proc) + (copy-env-to-stack fvs 8) + (compile-e e env #t) + (Add rsp (* 8 (length env))) ; pop env + (Ret)))]))) + +;; [Listof Id] Int -> Asm +;; Copy the closure environment at given offset to stack +(define (copy-env-to-stack fvs off) + (match fvs + ['() (seq)] + [(cons _ fvs) + (seq (Mov r9 (Offset rax off)) + (Push r9) + (copy-env-to-stack fvs (+ 8 off)))])) + +;; [Listof Expr] CEnv -> Asm +(define (compile-es es c) + (match es + ['() '()] + [(cons e es) + (seq (compile-e e c #f) + (Push rax) + (compile-es es (cons #f c)))])) + +;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm +(define (compile-match e ps es c t?) + (let ((done (gensym))) + (seq (compile-e e c t?) + (Push rax) ; save away to be restored by each clause + (compile-match-clauses ps es (cons #f c) done t?) + (Jmp 'raise_error_align) + (Label done) + (Add rsp 8)))) ; pop the saved value being matched + +;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm +(define (compile-match-clauses ps es c done t?) + (match* (ps es) + [('() '()) (seq)] + [((cons p ps) (cons e es)) + (seq (compile-match-clause p e c done t?) + (compile-match-clauses ps es c done t?))])) + +;; Pat Expr CEnv Symbol Bool -> Asm +(define (compile-match-clause p e c done t?) + (let ((next (gensym))) + (match (compile-pattern p '() next) + [(list i f cm) + (seq (Mov rax (Offset rsp 0)) ; restore value being matched + i + (compile-e e (append cm c) t?) + (Add rsp (* 8 (length cm))) + (Jmp done) + f + (Label next))]))) + +;; Pat CEnv Symbol -> (list Asm Asm CEnv) +(define (compile-pattern p cm next) + (match p + [(PWild) + (list (seq) (seq) cm)] + [(PVar x) + (list (seq (Push rax)) + (seq) + (cons x cm))] + [(PLit l) + (let ((fail (gensym))) + (list (seq (Cmp rax (imm->bits l)) + (Jne fail)) + (seq (Label fail) + (Add rsp (* 8 (length cm))) + (Jmp next)) + cm))] + [(PAnd p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (list + (seq (Push rax) + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 f2) + cm2)])])] + [(PBox p) + (match (compile-pattern p cm next) + [(list i1 f1 cm1) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-box) + (Jne fail) + (Xor rax type-box) + (Mov rax (Offset rax 0)) + i1) + (seq f1 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm1))])] + [(PCons p1 p2) + (match (compile-pattern p1 (cons #f cm) next) + [(list i1 f1 cm1) + (match (compile-pattern p2 cm1 next) + [(list i2 f2 cm2) + (let ((fail (gensym))) + (list + (seq (Mov r8 rax) + (And r8 ptr-mask) + (Cmp r8 type-cons) + (Jne fail) + (Xor rax type-cons) + (Mov r8 (Offset rax 0)) + (Push r8) ; push cdr + (Mov rax (Offset rax 8)) ; mov rax car + i1 + (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) + i2) + (seq f1 + f2 + (Label fail) + (Add rsp (* 8 (length cm))) ; haven't pushed anything yet + (Jmp next)) + cm2))])])])) diff --git a/langs/mug/compile-literals.rkt b/langs/mug/compile-literals.rkt new file mode 100644 index 00000000..95a1b34d --- /dev/null +++ b/langs/mug/compile-literals.rkt @@ -0,0 +1,91 @@ +#lang racket +(provide compile-literals init-symbol-table literals) +(require "ast.rkt" + "utils.rkt" + a86/ast) + +(define rdi 'rdi) + +;; Prog -> Asm +(define (compile-literals p) + (append-map compile-literal (literals p))) + +;; Symbol -> Asm +(define (compile-literal s) + (let ((str (symbol->string s))) + (seq (Label (symbol->data-label s)) + (Dq (string-length str)) + (compile-string-chars (string->list str)) + (if (odd? (string-length str)) + (seq (Dd 0)) + (seq))))) + +;; Prog -> Asm +;; Call intern_symbol on every symbol in the program +(define (init-symbol-table p) + (match (symbols p) + ['() (seq)] + [ss (seq (Sub 'rsp 8) + (append-map init-symbol ss) + (Add 'rsp 8))])) + +;; Symbol -> Asm +(define (init-symbol s) + (seq (Lea rdi (symbol->data-label s)) + (Call 'intern_symbol))) + +;; Prog -> [Listof Symbol] +(define (literals p) + (remove-duplicates + (map to-symbol (literals* p)))) + +;; Prog -> [Listof Symbol] +(define (symbols p) + (remove-duplicates (filter symbol? (literals* p)))) + +;; (U String Symbol) -> Symbol +(define (to-symbol s) + (if (string? s) + (string->symbol s) + s)) + +;; Prog -> [Listof (U Symbol String)] +(define (literals* p) + (match p + [(Prog ds e) + (append (append-map literals-d ds) (literals-e e))])) + +(define (literals-d d) + (match d + [(Defn f xs e) + (literals-e e)])) + +(define (literals-e e) + (match e + [(Str s) (list s)] + [(Symb s) (list s)] + [(Prim1 p e) + (literals-e e)] + [(Prim2 p e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Prim3 p e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(If e1 e2 e3) + (append (literals-e e1) (literals-e e2) (literals-e e3))] + [(Begin e1 e2) + (append (literals-e e1) (literals-e e2))] + [(Let x e1 e2) + (append (literals-e e1) (literals-e e2))] + [(App e1 es) + (append (literals-e e1) (append-map literals-e es))] + [(Lam f xs e) + (literals-e e)] + [_ '()])) + +;; [Listof Char] -> Asm +(define (compile-string-chars cs) + (match cs + ['() (seq)] + [(cons c cs) + (seq (Dd (char->integer c)) + (compile-string-chars cs))])) diff --git a/langs/mug/compile-ops.rkt b/langs/mug/compile-ops.rkt index c66bade2..44970b4c 100644 --- a/langs/mug/compile-ops.rkt +++ b/langs/mug/compile-ops.rkt @@ -1,11 +1,13 @@ #lang racket (provide (all-defined-out)) -(require "ast.rkt" "types.rkt" a86/ast) +(require "ast.rkt" "types.rkt" "utils.rkt" a86/ast) (define rax 'rax) ; return (define eax 'eax) ; 32-bit load/store (define rbx 'rbx) ; heap -(define rdi 'rdi) ; arg +(define rdi 'rdi) ; arg1 +(define rsi 'rsi) ; arg2 +(define rdx 'rdx) ; arg3 (define r8 'r8) ; scratch (define r9 'r9) ; scratch (define r10 'r10) ; scratch @@ -123,12 +125,29 @@ ['symbol->string (seq (assert-symbol rax) (Xor rax type-symb) - (Mov rdi rax) - pad-stack - ;; FIXME: this allocates off-heap - (Call 'str_dup) - unpad-stack - (Or rax type-str))])) + char-array-copy + (Or rax type-str))] + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + pad-stack + (Call 'memcpy) + unpad-stack + (Mov rax rbx) + (Add rbx rdx))) ;; Op2 -> Asm (define (compile-op2 p) @@ -374,15 +393,3 @@ (Je l1) (Mov rax val-false) (Label l1)))) - -;; Asm -;; Dynamically pad the stack to be aligned for a call -(define pad-stack - (seq (Mov r15 rsp) - (And r15 #b1000) - (Sub rsp r15))) - -;; Asm -;; Undo the stack alignment after a call -(define unpad-stack - (seq (Add rsp r15))) diff --git a/langs/mug/compile.rkt b/langs/mug/compile.rkt index b9d1b767..731b3315 100644 --- a/langs/mug/compile.rkt +++ b/langs/mug/compile.rkt @@ -4,12 +4,13 @@ "types.rkt" "lambdas.rkt" "fv.rkt" - "intern.rkt" - "compile-ops.rkt" + "utils.rkt" + "compile-define.rkt" + "compile-expr.rkt" + "compile-literals.rkt" a86/ast) ;; Registers used -(define rax 'rax) ; return (define rbx 'rbx) ; heap (define rsp 'rsp) ; stack (define rdi 'rdi) ; arg @@ -18,15 +19,13 @@ ;; Prog -> Asm (define (compile p) - (match (intern p) - [(cons (Prog ds e) q) + (match p + [(Prog ds e) (prog (externs) (Global 'entry) (Label 'entry) (Mov rbx rdi) ; recv heap pointer - (Sub 'rsp 8) - (init-symbol-table q) - (Add 'rsp 8) + (init-symbol-table p) (compile-defines-values ds) (compile-e e (reverse (define-ids ds)) #t) (Add rsp (* 8 (length ds))) ;; pop function definitions @@ -37,435 +36,12 @@ pad-stack (Call 'raise_error) (Data) - (compile-literals q))])) + (compile-literals p))])) (define (externs) (seq (Extern 'peek_byte) (Extern 'read_byte) (Extern 'write_byte) (Extern 'raise_error) - (Extern 'gensym) (Extern 'intern_symbol) - (Extern 'str_dup))) - -;; DEnv -> Asm -(define (compile-literals q) - (match q - ['() (seq)] - [(cons (cons s l) q) - (seq (compile-literal (to-string s) l) - (compile-literals q))])) - -;; String Label -> Asm -(define (compile-literal s l) - (seq (Label l) - (Dq (string-length s)) - (compile-string-chars (string->list s)) - (if (odd? (string-length s)) - (seq (Dd 0)) - (seq)))) - -;; [Listof Char] -> Asm -(define (compile-string-chars cs) - (match cs - ['() (seq)] - [(cons c cs) - (seq (Dd (char->integer c)) - (compile-string-chars cs))])) - -;; DEnv -> Asm -;; Call intern_symbol on every symbol in the program -(define (init-symbol-table q) - (append-map init-symbol q)) - -;; (cons (U String Symbol) Symbol) -> Asm -(define (init-symbol qb) - (match qb - [(cons (? symbol? s) l) - (seq (Lea rdi l) - (Call 'intern_symbol))] - [_ (seq)])) - -;; [Listof Defn] -> [Listof Id] -(define (define-ids ds) - (match ds - ['() '()] - [(cons (Defn f xs e) ds) - (cons f (define-ids ds))])) - -;; [Listof Defn] -> Asm -(define (compile-defines ds) - (match ds - ['() (seq)] - [(cons d ds) - (seq (compile-define d) - (compile-defines ds))])) - -;; Defn -> Asm -(define (compile-define d) - (match d - [(Defn f xs e) - (compile-lambda-define (Lam f xs e))])) - -;; [Listof Lam] -> Asm -(define (compile-lambda-defines ls) - (match ls - ['() (seq)] - [(cons l ls) - (seq (compile-lambda-define l) - (compile-lambda-defines ls))])) - -;; Lam -> Asm -(define (compile-lambda-define l) - (let ((fvs (fv l))) - (match l - [(Lam f xs e) - (let ((env (append (reverse fvs) (reverse xs) (list #f)))) - (seq (Label (symbol->label f)) - (Mov rax (Offset rsp (* 8 (length xs)))) - (Xor rax type-proc) - (copy-env-to-stack fvs 8) - (compile-e e env #t) - (Add rsp (* 8 (length env))) ; pop env - (Ret)))]))) - -;; [Listof Id] Int -> Asm -;; Copy the closure environment at given offset to stack -(define (copy-env-to-stack fvs off) - (match fvs - ['() (seq)] - [(cons _ fvs) - (seq (Mov r9 (Offset rax off)) - (Push r9) - (copy-env-to-stack fvs (+ 8 off)))])) - -;; Expr CEnv Bool -> Asm -(define (compile-e e c t?) - (match e - [(Int i) (compile-value i)] - [(Bool b) (compile-value b)] - [(Char c) (compile-value c)] - [(Eof) (compile-value eof)] - [(Empty) (compile-value '())] - [(Var x) (compile-variable x c)] - [(Ref l t) (compile-ref l t)] - [(Prim0 p) (compile-prim0 p c)] - [(Prim1 p e) (compile-prim1 p e c)] - [(Prim2 p e1 e2) (compile-prim2 p e1 e2 c)] - [(Prim3 p e1 e2 e3) (compile-prim3 p e1 e2 e3 c)] - [(If e1 e2 e3) (compile-if e1 e2 e3 c t?)] - [(Begin e1 e2) (compile-begin e1 e2 c t?)] - [(Let x e1 e2) (compile-let x e1 e2 c t?)] - [(App e es) (compile-app e es c t?)] - [(Lam f xs e) (compile-lam f xs e c)] - [(Match e ps es) (compile-match e ps es c t?)])) - -;; Value -> Asm -(define (compile-value v) - (seq (Mov rax (imm->bits v)))) - -;; Id CEnv -> Asm -(define (compile-variable x c) - (let ((i (lookup x c))) - (seq (Mov rax (Offset rsp i))))) - -;; Label Tag -> Asm -(define (compile-ref l t) - (seq (Lea rax (Plus l t)))) - -;; Op0 CEnv -> Asm -(define (compile-prim0 p c) - (compile-op0 p)) - -;; Op1 Expr CEnv -> Asm -(define (compile-prim1 p e c) - (seq (compile-e e c #f) - (compile-op1 p))) - -;; Op2 Expr Expr CEnv -> Asm -(define (compile-prim2 p e1 e2 c) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons #f c) #f) - (compile-op2 p))) - -;; Op3 Expr Expr Expr CEnv -> Asm -(define (compile-prim3 p e1 e2 e3 c) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons #f c) #f) - (Push rax) - (compile-e e3 (cons #f (cons #f c)) #f) - (compile-op3 p))) - -;; Expr Expr Expr CEnv Bool -> Asm -(define (compile-if e1 e2 e3 c t?) - (let ((l1 (gensym 'if)) - (l2 (gensym 'if))) - (seq (compile-e e1 c #f) - (Cmp rax val-false) - (Je l1) - (compile-e e2 c t?) - (Jmp l2) - (Label l1) - (compile-e e3 c t?) - (Label l2)))) - -;; Expr Expr CEnv Bool -> Asm -(define (compile-begin e1 e2 c t?) - (seq (compile-e e1 c #f) - (compile-e e2 c t?))) - -;; Id Expr Expr CEnv Bool -> Asm -(define (compile-let x e1 e2 c t?) - (seq (compile-e e1 c #f) - (Push rax) - (compile-e e2 (cons x c) t?) - (Add rsp 8))) - -;; Id [Listof Expr] CEnv Bool -> Asm -(define (compile-app f es c t?) - ;(compile-app-nontail f es c) - (if t? - (compile-app-tail f es c) - (compile-app-nontail f es c))) - -;; Expr [Listof Expr] CEnv -> Asm -(define (compile-app-tail e es c) - (seq (compile-es (cons e es) c) - (move-args (add1 (length es)) (length c)) - (Add rsp (* 8 (length c))) - (Mov rax (Offset rsp (* 8 (length es)))) - (assert-proc rax) - (Xor rax type-proc) - (Mov rax (Offset rax 0)) - (Jmp rax))) - -;; Integer Integer -> Asm -(define (move-args i off) - (cond [(zero? off) (seq)] - [(zero? i) (seq)] - [else - (seq (Mov r8 (Offset rsp (* 8 (sub1 i)))) - (Mov (Offset rsp (* 8 (+ off (sub1 i)))) r8) - (move-args (sub1 i) off))])) - -;; Expr [Listof Expr] CEnv -> Asm -;; The return address is placed above the arguments, so callee pops -;; arguments and return address is next frame -(define (compile-app-nontail e es c) - (let ((r (gensym 'ret)) - (i (* 8 (length es)))) - (seq (Lea rax r) - (Push rax) - (compile-es (cons e es) (cons #f c)) - (Mov rax (Offset rsp i)) - (assert-proc rax) - (Xor rax type-proc) - (Mov rax (Offset rax 0)) ; fetch the code label - (Jmp rax) - (Label r)))) - -;; Defns -> Asm -;; Compile the closures for ds and push them on the stack -(define (compile-defines-values ds) - (seq (alloc-defines ds 0) - (init-defines ds (reverse (define-ids ds)) 8) - (add-rbx-defines ds 0))) - -;; Defns Int -> Asm -;; Allocate closures for ds at given offset, but don't write environment yet -(define (alloc-defines ds off) - (match ds - ['() (seq)] - [(cons (Defn f xs e) ds) - (let ((fvs (fv (Lam f xs e)))) - (seq (Lea rax (symbol->label f)) - (Mov (Offset rbx off) rax) - (Mov rax rbx) - (Add rax off) - (Or rax type-proc) - (Push rax) - (alloc-defines ds (+ off (* 8 (add1 (length fvs)))))))])) - -;; Defns CEnv Int -> Asm -;; Initialize the environment for each closure for ds at given offset -(define (init-defines ds c off) - (match ds - ['() (seq)] - [(cons (Defn f xs e) ds) - (let ((fvs (fv (Lam f xs e)))) - (seq (free-vars-to-heap fvs c off) - (init-defines ds c (+ off (* 8 (add1 (length fvs)))))))])) - -;; Defns Int -> Asm -;; Compute adjustment to rbx for allocation of all ds -(define (add-rbx-defines ds n) - (match ds - ['() (seq (Add rbx (* n 8)))] - [(cons (Defn f xs e) ds) - (add-rbx-defines ds (+ n (add1 (length (fv (Lam f xs e))))))])) - -;; Id [Listof Id] Expr CEnv -> Asm -(define (compile-lam f xs e c) - (let ((fvs (fv (Lam f xs e)))) - (seq (Lea rax (symbol->label f)) - (Mov (Offset rbx 0) rax) - (free-vars-to-heap fvs c 8) - (Mov rax rbx) ; return value - (Or rax type-proc) - (Add rbx (* 8 (add1 (length fvs))))))) - -;; [Listof Id] CEnv Int -> Asm -;; Copy the values of given free variables into the heap at given offset -(define (free-vars-to-heap fvs c off) - (match fvs - ['() (seq)] - [(cons x fvs) - (seq (Mov r8 (Offset rsp (lookup x c))) - (Mov (Offset rbx off) r8) - (free-vars-to-heap fvs c (+ off 8)))])) - -;; [Listof Expr] CEnv -> Asm -(define (compile-es es c) - (match es - ['() '()] - [(cons e es) - (seq (compile-e e c #f) - (Push rax) - (compile-es es (cons #f c)))])) - -;; Expr [Listof Pat] [Listof Expr] CEnv Bool -> Asm -(define (compile-match e ps es c t?) - (let ((done (gensym))) - (seq (compile-e e c t?) - (Push rax) ; save away to be restored by each clause - (compile-match-clauses ps es (cons #f c) done t?) - (Jmp 'raise_error_align) - (Label done) - (Add rsp 8)))) ; pop the saved value being matched - -;; [Listof Pat] [Listof Expr] CEnv Symbol Bool -> Asm -(define (compile-match-clauses ps es c done t?) - (match* (ps es) - [('() '()) (seq)] - [((cons p ps) (cons e es)) - (seq (compile-match-clause p e c done t?) - (compile-match-clauses ps es c done t?))])) - -;; Pat Expr CEnv Symbol Bool -> Asm -(define (compile-match-clause p e c done t?) - (let ((next (gensym))) - (match (compile-pattern p '() next) - [(list i f cm) - (seq (Mov rax (Offset rsp 0)) ; restore value being matched - i - (compile-e e (append cm c) t?) - (Add rsp (* 8 (length cm))) - (Jmp done) - f - (Label next))]))) - -;; Pat CEnv Symbol -> (list Asm Asm CEnv) -(define (compile-pattern p cm next) - (match p - [(PWild) - (list (seq) (seq) cm)] - [(PVar x) - (list (seq (Push rax)) - (seq) - (cons x cm))] - [(PLit l) - (let ((fail (gensym))) - (list (seq (Cmp rax (imm->bits l)) - (Jne fail)) - (seq (Label fail) - (Add rsp (* 8 (length cm))) - (Jmp next)) - cm))] - [(PAnd p1 p2) - (match (compile-pattern p1 (cons #f cm) next) - [(list i1 f1 cm1) - (match (compile-pattern p2 cm1 next) - [(list i2 f2 cm2) - (list - (seq (Push rax) - i1 - (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) - i2) - (seq f1 f2) - cm2)])])] - [(PBox p) - (match (compile-pattern p cm next) - [(list i1 f1 cm1) - (let ((fail (gensym))) - (list - (seq (Mov r8 rax) - (And r8 ptr-mask) - (Cmp r8 type-box) - (Jne fail) - (Xor rax type-box) - (Mov rax (Offset rax 0)) - i1) - (seq f1 - (Label fail) - (Add rsp (* 8 (length cm))) ; haven't pushed anything yet - (Jmp next)) - cm1))])] - [(PCons p1 p2) - (match (compile-pattern p1 (cons #f cm) next) - [(list i1 f1 cm1) - (match (compile-pattern p2 cm1 next) - [(list i2 f2 cm2) - (let ((fail (gensym))) - (list - (seq (Mov r8 rax) - (And r8 ptr-mask) - (Cmp r8 type-cons) - (Jne fail) - (Xor rax type-cons) - (Mov r8 (Offset rax 0)) - (Push r8) ; push cdr - (Mov rax (Offset rax 8)) ; mov rax car - i1 - (Mov rax (Offset rsp (* 8 (- (sub1 (length cm1)) (length cm))))) - i2) - (seq f1 - f2 - (Label fail) - (Add rsp (* 8 (length cm))) ; haven't pushed anything yet - (Jmp next)) - cm2))])])])) - -;; Id CEnv -> Integer -(define (lookup x cenv) - (match cenv - ['() (error "undefined variable:" x)] - [(cons y rest) - (match (eq? x y) - [#t 0] - [#f (+ 8 (lookup x rest))])])) - -;; (U String Symbol) -> String -(define (to-string s) - (if (symbol? s) - (symbol->string s) - s)) - -;; Symbol -> Label -;; Produce a symbol that is a valid Nasm label -(define (symbol->label s) - (string->symbol - (string-append - "label_" - (list->string - (map (λ (c) - (if (or (char<=? #\a c #\z) - (char<=? #\A c #\Z) - (char<=? #\0 c #\9) - (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) - c - #\_)) - (string->list (symbol->string s)))) - "_" - (number->string (eq-hash-code s) 16)))) + (Extern 'memcpy))) diff --git a/langs/mug/intern.rkt b/langs/mug/intern.rkt deleted file mode 100644 index 11c8f8b7..00000000 --- a/langs/mug/intern.rkt +++ /dev/null @@ -1,112 +0,0 @@ -#lang racket -(require "ast.rkt" "types.rkt") -(provide intern - (struct-out Ref)) - -;; type QEnv = [Listof (cons (U String Symbol) Ref)] - -(struct Ref (label type-tag) #:prefab) - -;; (U String Symbol) QEnv -> [Maybe Ref] -(define (lookup d q) - (match q - ['() #f] - [(cons (cons d0 r) q) - (if (equal? d d0) - r - (lookup d q))])) - -;; (U Symbol String) QEnv Tag -> QEnv -;; Either lookup prior reference or create a new one -(define (intern! s q type) - (match (lookup s q) - [#f - (let ((l (gensym 'lit))) - (cons (Ref l type) - (cons (cons s l) q)))] - [l (cons (Ref l type) q)])) - -;; Prog -> (cons Prog QEnv) -(define (intern p) - (intern-prog p '())) - -;; Prog QEnv -> (cons Prog QEnv) -(define (intern-prog p q) - (match p - [(Prog ds e) - (match (intern-ds ds q) - [(cons ds q) - (match (intern-e e q) - [(cons e q) - (cons (Prog ds e) q)])])])) - -;; Defns QEnv -> (cons Defns QEnv) -(define (intern-ds ds q) - (match ds - ['() (cons '() q)] - [(cons d ds) - (match (intern-d d q) - [(cons d q) - (match (intern-ds ds q) - [(cons ds q) - (cons (cons d ds) q)])])])) - -;; Defn QEnv -> (cons Defn QEnv) -(define (intern-d d q) - (match d - [(Defn f xs e) - (match (intern-e e q) - [(cons e q) - (cons (Defn f xs e) q)])])) - -;; Expr QEnv -> (cons Expr QEnv) -(define (intern-e e q) - (match e - [(Str s) - (intern! s q type-str)] - [(Symb s) - (intern! s q type-symb)] - [(Prim1 p e) - (match (intern-e e q) - [(cons e q) - (cons (Prim1 p e) q)])] - [(Prim2 p e1 e2) - (match (intern-es (list e1 e2) q) - [(cons (list e1 e2) q) - (cons (Prim2 p e1 e2) q)])] - [(Prim3 p e1 e2 e3) - (match (intern-es (list e1 e2 e3) q) - [(cons (list e1 e2 e3) q) - (cons (Prim3 p e1 e2 e3) q)])] - [(If e1 e2 e3) - (match (intern-es (list e1 e2 e3) q) - [(cons (list e1 e2 e3) q) - (cons (If e1 e2 e3) q)])] - [(Begin e1 e2) - (match (intern-es (list e1 e2) q) - [(cons (list e1 e2) q) - (cons (Begin e1 e2) q)])] - [(Let x e1 e2) - (match (intern-es (list e1 e2) q) - [(cons (list e1 e2) q) - (cons (Let x e1 e2) q)])] - [(App e1 es) - (match (intern-es (cons e1 es) q) - [(cons (cons e1 es) q) - (cons (App e1 es) q)])] - [(Lam f xs e) - (match (intern-e e q) - [(cons e q) - (cons (Lam f xs e) q)])] - [_ (cons e q)])) - -;; [Listof Expr] QEnv -> (cons [Listof Expr] QEnv) -(define (intern-es es q) - (match es - ['() (cons '() q)] - [(cons e es) - (match (intern-e e q) - [(cons e q) - (match (intern-es es q) - [(cons es q) - (cons (cons e es) q)])])])) diff --git a/langs/mug/interp-defun.rkt b/langs/mug/interp-defun.rkt index 315729a4..b4d57880 100644 --- a/langs/mug/interp-defun.rkt +++ b/langs/mug/interp-defun.rkt @@ -43,7 +43,6 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] - [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] diff --git a/langs/mug/interp-prims.rkt b/langs/mug/interp-prims.rkt index 79bcf361..7797de69 100644 --- a/langs/mug/interp-prims.rkt +++ b/langs/mug/interp-prims.rkt @@ -25,8 +25,10 @@ [(list 'string? v) (string? v)] [(list 'string-length (? string?)) (string-length v)] [(list 'symbol? v) (symbol? v)] - [(list 'string->symbol (? string?)) (string->symbol v)] - [(list 'symbol->string (? symbol?)) (symbol->string v)] + [(list 'symbol->string (? symbol?)) (symbol->string v)] + [(list 'string->symbol (? string?)) (string->symbol v)] + [(list 'string->uninterned-symbol (? string?)) + (string->uninterned-symbol v)] [_ 'err])) ;; Op2 Value Value -> Answer diff --git a/langs/mug/interp.rkt b/langs/mug/interp.rkt index 876646d5..3cc95f9f 100644 --- a/langs/mug/interp.rkt +++ b/langs/mug/interp.rkt @@ -42,7 +42,6 @@ [(Prim0 'void) (void)] [(Prim0 'read-byte) (read-byte)] [(Prim0 'peek-byte) (peek-byte)] - [(Prim0 'gensym) (gensym)] [(Prim1 p e) (match (interp-env e r ds) ['err 'err] diff --git a/langs/mug/parse.rkt b/langs/mug/parse.rkt index 125e3e84..82a02de0 100644 --- a/langs/mug/parse.rkt +++ b/langs/mug/parse.rkt @@ -81,14 +81,13 @@ (PAnd (parse-pat p1) (parse-pat p2))])) (define op0 - '(read-byte peek-byte void gensym)) - + '(read-byte peek-byte void)) (define op1 '(add1 sub1 zero? char? write-byte eof-object? integer->char char->integer box unbox empty? cons? box? car cdr vector? vector-length string? string-length - symbol? symbol->string string->symbol)) + symbol? symbol->string string->symbol string->uninterned-symbol)) (define op2 '(+ - < = cons eq? make-vector vector-ref make-string string-ref)) (define op3 diff --git a/langs/mug/symbol.c b/langs/mug/symbol.c index 39bd088e..5ad9f270 100644 --- a/langs/mug/symbol.c +++ b/langs/mug/symbol.c @@ -1,14 +1,7 @@ #include -#include -#include -#include #include "values.h" -static uint64_t gensym_ctr = 0; - -val_str_t *str_from_cstr(const char *); int symb_cmp(const val_symb_t *, const val_symb_t *); -val_str_t *str_dup(const val_str_t *); // binary tree node struct Node { @@ -42,40 +35,6 @@ val_symb_t *intern_symbol(val_symb_t* symb) return (*curr)->elem; } -val_symb_t *str_to_symbol(const val_str_t *str) -{ - // str_dup needed if string is mutable - return intern_symbol((val_symb_t*)str_dup(str)); -} - -val_symb_t *gensym(void) -{ - char s[100]; // uint64_t has maximum 20 digits - sprintf(s, "g%" PRIu64, gensym_ctr++); - return (val_symb_t*)str_from_cstr(s); // uninterned symbol -} - -val_str_t *str_from_cstr(const char *s) -{ - int64_t len = strlen(s); - - if (len == 0) - return NULL; - - val_str_t *str = - malloc(sizeof(int64_t) + len * sizeof(val_char_t)); - - if (!str) - return NULL; - - str->len = len; - int i; - for (i = 0; i < len; i++) { - str->codepoints[i] = (val_char_t)s[i]; - } - return str; -} - int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) { int64_t len1 = s1->len; @@ -84,67 +43,10 @@ int symb_cmp(const val_symb_t *s1, const val_symb_t *s2) int64_t len = len1 < len2 ? len1 : len2; int i; - for (i = 0; i < len; i = i+1) { + for (i = 0; i < len; i++) { if (s1->codepoints[i] != s2->codepoints[i]) return s1->codepoints[i] - s2->codepoints[i]; } return len1 - len2; } - -val_str_t *str_dup(const val_str_t *s) -{ - int64_t n = s->len; - val_str_t *d; - - d = calloc(2+n, sizeof(val_char_t)); - if (!d) - return NULL; - - return memcpy(d, s, (2+n) * sizeof(val_char_t)); -} - - -#ifdef CHECK -// $ gcc -DCHECK symbol.c -#include -int main(void) -{ - val_str_t *foo = str_from_cstr("foo"); - val_str_t *foo_ = str_from_cstr("foo"); - val_str_t *bar = str_from_cstr("bar"); - val_str_t *foo1 = str_from_cstr("foo1"); - val_str_t *foo1_ = str_from_cstr("foo1"); - val_str_t *fo = str_from_cstr("fo"); - val_str_t *fo_ = str_from_cstr("fo"); - - assert(str_cmp(foo,foo_) == 0); - assert(str_cmp(foo1,foo1_) == 0); - assert(str_cmp(fo,fo_) == 0); - - assert(str_cmp(foo,foo1) < 0); - assert(str_cmp(foo,foo1) == str_cmp(foo,foo1_)); - assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1)); - assert(str_cmp(foo,foo1) == str_cmp(foo_,foo1_)); - - assert(str_cmp(foo,fo) > 0); - assert(str_cmp(foo,fo) == str_cmp(foo,fo_)); - assert(str_cmp(foo,fo) == str_cmp(foo_,fo)); - assert(str_cmp(foo,fo) == str_cmp(foo_,fo_)); - - assert(str_cmp(foo,bar) > 0); - assert(str_cmp(bar,foo) < 0); - - assert(str_cmp(fo,bar) > 0); - assert(str_cmp(bar,fo) < 0); - - val_symb_t *foo_symb = (val_symb_t*)foo; - val_symb_t *foo_symb_ = (val_symb_t*)foo_; - assert(foo_symb != foo_symb_); - val_symb_t *foo_symb_i = intern_symbol(foo_symb); - val_symb_t *foo_symb_i_ = intern_symbol(foo_symb_); - assert(foo_symb_i == foo_symb_i_); - - return 0; -} -#endif diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt index e0b687ae..5a9db857 100644 --- a/langs/mug/test/test-runner.rkt +++ b/langs/mug/test/test-runner.rkt @@ -282,9 +282,17 @@ #t) (check-equal? (run '(eq? 'fff (string->symbol (make-string 3 #\f)))) #t) - (check-equal? (run '(eq? 'g0 (gensym))) #f) - (check-equal? (run '(eq? (gensym) (gensym))) #f) - (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f)) + (check-equal? (run '(symbol? 'g0)) #t) + (check-equal? (run '(symbol? "g0")) #f) + (check-equal? (run '(symbol? (string->symbol "g0"))) #t) + (check-equal? (run '(symbol? (string->uninterned-symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->symbol "g0"))) #t) + (check-equal? (run '(eq? 'g0 (string->uninterned-symbol "g0"))) #f) + (check-equal? (run '(eq? (string->uninterned-symbol "g0") (string->uninterned-symbol "g0"))) + #f) + (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) + (check-equal? (run '(string? (symbol->string 'foo))) #t) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f)) (define (test-runner-io run) ;; Evildoer examples diff --git a/langs/mug/utils.rkt b/langs/mug/utils.rkt new file mode 100644 index 00000000..f0b06af6 --- /dev/null +++ b/langs/mug/utils.rkt @@ -0,0 +1,53 @@ +#lang racket +(provide symbol->label symbol->data-label lookup pad-stack unpad-stack) +(require a86/ast) + +(define rsp 'rsp) +(define r15 'r15) + +;; Symbol -> Label +;; Produce a symbol that is a valid Nasm label +(define (symbol->label s) + (to-label "label_" s)) + +(define (symbol->data-label s) + (to-label "data_" s)) + +(define (to-label prefix s) + (string->symbol + (string-append + prefix + (list->string + (map (λ (c) + (if (or (char<=? #\a c #\z) + (char<=? #\A c #\Z) + (char<=? #\0 c #\9) + (memq c '(#\_ #\$ #\# #\@ #\~ #\. #\?))) + c + #\_)) + (string->list (symbol->string s)))) + "_" + (number->string (eq-hash-code s) 16)))) + +;; Id CEnv -> [Maybe Integer] +(define (lookup x cenv) + (match cenv + ['() #f] + [(cons y rest) + (match (eq? x y) + [#t 0] + [#f (match (lookup x rest) + [#f #f] + [i (+ 8 i)])])])) + +;; Asm +;; Dynamically pad the stack to be aligned for a call +(define pad-stack + (seq (Mov r15 rsp) + (And r15 #b1000) + (Sub rsp r15))) + +;; Asm +;; Undo the stack alignment after a call +(define unpad-stack + (seq (Add rsp r15))) From 5cea194d8f43cd518790d3bb8129c4629001454f Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 23:03:19 -0500 Subject: [PATCH 19/21] Mug notes. --- www/notes.scrbl | 1 + www/notes/mug.scrbl | 890 ++++++++++++++++++++++++++++++++++++++++++++ www/utils.rkt | 8 +- 3 files changed, 896 insertions(+), 3 deletions(-) create mode 100644 www/notes/mug.scrbl diff --git a/www/notes.scrbl b/www/notes.scrbl index 1ef6cdfd..eeee0751 100644 --- a/www/notes.scrbl +++ b/www/notes.scrbl @@ -30,4 +30,5 @@ suggestions for improving the material, @bold{please}, @include-section{notes/jig.scrbl} @include-section{notes/knock.scrbl} @include-section{notes/loot.scrbl} +@include-section{notes/mug.scrbl} @;include-section{notes/shakedown.scrbl} diff --git a/www/notes/mug.scrbl b/www/notes/mug.scrbl new file mode 100644 index 00000000..4ebd5260 --- /dev/null +++ b/www/notes/mug.scrbl @@ -0,0 +1,890 @@ +#lang scribble/manual + +@(require (for-label (except-in racket compile ...) a86)) +@(require redex/pict + racket/runtime-path + scribble/examples + "utils.rkt" + "ev.rkt" + "../fancyverb.rkt" + "../utils.rkt") + +@(define codeblock-include (make-codeblock-include #'h)) + +@(ev '(require rackunit a86)) +@(ev `(current-directory ,(path->string (build-path notes "mug")))) +@(void (ev '(with-output-to-string (thunk (system "make runtime.o"))))) +@(void (ev '(current-objs '("runtime.o")))) +@(for-each (λ (f) (ev `(require (file ,f)))) + '("interp.rkt" "compile.rkt" "compile-expr.rkt" "compile-literals.rkt" "utils.rkt" "ast.rkt" "parse.rkt" "types.rkt" "unload-bits-asm.rkt")) + +@title[#:tag "Mug"]{Mug: Symbols and Interned String Literals} + +@table-of-contents[] + +@section[#:tag-prefix "mug"]{String Literals} + +As it currently stands in our language, @bold{string literals} are +dynamically allocated when they are evaluated. + +This means, for example, that if we had a program like this: + +@#reader scribble/comment-reader +(racketblock +(define (f) "fred") +(cons (f) (cons (f) (cons (f) '()))) +) + +This will allocate three distinct copies of the string +@racket["fred"], one for each call to @racket[f]. This is unfortunate +since really just a single allocation of @racket["fred"] that is +referenced three times could've worked just as well and allocated less +memory. + +A common approach programming language implementations take is to take +every string literal that appears in a program and all allocate it +@bold{once} and replace occurrences of those literals with references +to memory allocated for it. + +This means, for example, that multiple occurrences of the same string +literal evaluate to the same pointer: + +@ex[ +(eq? "x" "x") +] + +Note that this doesn't mean that every string of the same characters +is represented by a unique pointer. We can dynamically construct +strings that will not be equal to a string literal of the same +characters: + +@ex[ +(eq? "x" (string #\x)) +] + +Let's consider how strings were previously compiled. Here's an assembly program +that returns @racket["Hello!"]: + +@ex[ +(require loot/compile) +(seq (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)) +] + +We can run it just to make sure: + +@ex[ +(unload/free + (asm-interp + (seq (Label 'entry) + (Mov 'rbx 'rdi) + (compile-string "Hello!") + (Ret)))) +] + +Notice that this program dynamically allocates the string by executing +instructions that write to memory pointed to by @racket['rbx] and +incrementing @racket['rbx]. + +But fundamentally, we shouldn't need to do anything dynamically if we +know statically that the string being return is @racket["Hello!"]. We +could @emph{statically} allocate the memory for the string at +compile-time and return a pointer to this data. + +@section[#:tag-prefix "mug"]{Static Memory} + +How can we statically allocate memory? The idea is to use memory in +the program itself to store the data needed to represent the string +literal. It turns out that in an a86 program you can have a section +for the program text and another section with binary data. To switch +between the program text and program data, we use the @racket[(Text)] +and @racket[(Data)] directive. Once in @racket[(Data)] mode we can +write down data that will be placed in the program. + +For example, here is a data section: + +@ex[ +(seq (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) + +] + +These psuedo-instructions will add to the data segment of our program +56-bytes of data. The first 8-bytes consist of the number 6. The +next 4-bytes consist of the number @racket[72], i.e. the codepoint for +@racket[#\H]. The next 4-bytes consist of the codepoint for +@racket[#\e] and so on. The names of these psuedo-instructions +designate how much memory is used: @racket[Dq] means 8-bytes +(64-bits), while @racket[Dd] means 4-bytes (32-bits). + +The label @racket['hi] is given to name this data's location. We've +previously seen how to load the address of a label using the +@racket[Lea] instruction in order to compute a place in the code to +jump to. Similarly, if we load the address of @racket['hi], we have a +pointer to the data at that location in the program. + +So to write a similar program that returns @racket["Hello!"] but +@emph{statically} allocates the memory for the string, we could do the +following: + +@ex[ +(unload/free + (asm-interp + (seq (Label 'entry) + (Lea 'rax 'hi) + (Or 'rax type-str) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) +] + +A couple things to note: +@itemlist[ + +@item{nothing is allocated in the heap memory set up by the run-time; +indeed this program doesn't use the @racket['rbx] register at all.} + +@item{Executing this program takes fewer steps than the previous +version; when the @racket['entry] label is called, it executes an +@racket[Lea] and @racket[Or] instruction and returns.} + +] + +This is pretty big improvement over the previous approach since the +number of instructions to execute were proportional to the size of the +string being compiled. Now we simply load the address of the static +data in a small, constant number of instructions. + +In fact, we can do one better. The @racket[Or] instruction is there +in order to tag the pointer to @racket['hi] as a string. There's +really no reason to do this at run-time; we should be able to add the +tag statically so that just a single load instruction suffices. The +goal is to add the tag to the address of @racket['hi] at compile time, +but the location of the label is actually not fully known until link +time. Our assembler has a way of resolving this by allowing us to +write @emph{expressions} involving labels and constants that will be +computed at link time. + +Here is a version of the same program that avoids the @racket[Or] +instruction, instead computing that type tagging at link time: + +@ex[ +(unload/free + (asm-interp + (seq (Label 'entry) + (Lea 'rax (Plus 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))))) +] + + +So one idea is to use static data to represent string literals. This +reduces the run-time memory that is allocated and makes is more +efficient to evaluate string literals. We could replace the old +@racket[compile-string] function with the following: + +@ex[ +(define (compile-string s) + (let ((l (gensym 'string))) + (seq (Data) + (Label l) + (Dq (string-length s)) + (map Dd (map char->integer (string->list s))) + (Text) + (Lea 'rax (Plus l type-str))))) + +(compile-string "Hello!") + +(unload/free + (asm-interp + (seq (Label 'entry) + (compile-string "Hello!") + (Ret)))) +] + +Now, while this does allocate string literals statically, using memory +within to the program to store the string, it doesn't alone solve the +problem with string literals being represented uniquely. + + +@section[#:tag-prefix "mug"]{Static Interning} + +We've seen static memory, but we still need to make sure every string +literal is allocated just once. + +Here is the basic idea: + +@itemlist[ + +@item{Collect all of the string literals in the program.} + +@item{For each distinct string literal, compile it to static data as +described above, labelling the data location.} + +@item{For each string literal expression, compile it to a reference to +the appropropiate label for that string.} + +] + +For example, let's say we want to compile this program: + +@#reader scribble/comment-reader +(racketblock +(begin "Hello!" + "Hello!") +) + +We'd like it to compile to something like this: + +@#reader scribble/comment-reader +(racketblock +(seq (Mov 'rax (Add 'hi type-str)) + (Mov 'rax (Add 'hi type-str)) + (Ret) + (Data) + (Label 'hi) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) +) + +Notice how the two occurrences of @racket["Hello!"] turn into the +instruction @racket[(Mov 'rax (Add 'hi type-str))]. The labelled +location @racket['hi] contains the data for the string and it is +statically allocated just once. + +In order to do this, we need to maintain an association between unique +string literals and the labels our compiler will choose to label their +static data. + +We @emph{could} do this by making a pass over the program to compute +this association. Initially it would be empty and every time a string +literal was encountered, we'd check to see if it's already in the +association. If it is, there's nothing to be done. If isn't, we'd +generate a new label and add it to the association. + +This association would have to be added as a parameter to each of our +@racket[compile-e] functions and string literals would consult the +association to emit the @racket[(Mov 'rax (Add _label type-str))] +instruction. + +We'd also take every label and string pair in the association and +compile the string data to static data labelled with the associated +label. + +However, here's a fun ``trick'' we can employ to avoid having to +explicitly represent this association between strings and their +labels. + +Strings can be converted to symbols, and symbols can be used as +labels. Symbols that consist of the same characters are guaranteed to +be pointer-equal to each other, so by converting a string to a symbol, +we can take advantage of our implementation language's (Racket's) +facility for interning to help us implement interning in our compiler. + +So here is our revised apporach will produce code like this for our +example program: + +@#reader scribble/comment-reader +(racketblock +(seq (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Mov 'rax (Add (symbol->label (string->symbol "Hello!")) type-str)) + (Ret) + (Data) + (Label (symbol->label (string->symbol "Hello!"))) + (Dq 6) + (Dd (char->integer #\H)) + (Dd (char->integer #\e)) + (Dd (char->integer #\l)) + (Dd (char->integer #\l)) + (Dd (char->integer #\o)) + (Dd (char->integer #\!))) +) + +So now an occurrence of a string literal @racket[_str] can be compiled +as @racket[(Mov 'rax (string->label (string->symbol _str)))]; no +association needs to be maintained explicity. + +@#reader scribble/comment-reader +(racketblock +;; String -> Asm +(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str)))) +) + +@(ev '(define (compile-string s) + (seq (Lea 'rax (Plus (symbol->label (string->symbol s)) type-str))))) + +So here's how an occurrence of @racket["Hello!"] is compiled: + +@ex[ +(compile-string "Hello!") +] + +We still need to compile the set of string literals that appear in the +program into statically allocated data, so for this we will write a +function: + +@#reader scribble/comment-reader +(racketblock +;; Prog -> [Listof Symbol] +(define (literals p) ...) +) + +This will produce the set of strings that appear literally in the +program text. Each string will be converted to its symbol +representation. The string representation is easy to recover by using +@racket[symbol->string]. + +This function is straightforwad, if a bit tedious, to write. It +traverses the AST. Recursive results are collected with +@racket[append]; when a string node @racket[(Str _s)] is encountered, +it produces @racket[(list (string->symbol _s))]. After all of the +strings have been collected, a final call to +@racket[remove-duplicates] ensures a list of unique symbols is +returned. + +@ex[ +(literals (parse '["Hello!"])) +(literals (parse '[(begin "Hello!" "Hello!")])) +(literals (parse '[(begin "Hello!" "Fren")])) +(literals (parse '[(define (f x) "Hello!") + (cons (f "Fren") (cons (f "Hello!") '()))])) +] + +Using @racket[literals], we can write a function that compiles all of +the string literals into static data as follows: + +@#reader scribble/comment-reader +(racketblock +;; Prog -> Asm +(define (compile-literals p) + (seq (Data) + (compile-literals-data (literals p)) + (Text))) + +;; [Listof Symbol] -> Asm +(define (compile-literals-data ss) + (append-map compile-literal-data ss)) + +;; Symbol -> Asm +(define (compile-literal-data s) + (let ((str (symbol->string s))) + (seq (Label (symbol->label s)) + (Dq (string-length str)) + (map Dq (map char->integer (string->list str)))))) +) + +@(ev +'(define (compile-literals p) + (seq (Data) + (compile-literals-data (strings p)) + (Text)))) + +@(ev +'(define (compile-literals-data ss) + (append-map compile-literal-data ss))) + +@(ev +'(define (compile-literal-data s) + (let ((str (symbol->string s))) + (seq (Label (symbol->label s)) + (Dq (string-length str)) + (map Dq (map char->integer (string->list str))))))) + +So now we can reconstruct our example with: + +@ex[ +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literals-data '(Hello!))) +] + +We've seemingly reached our goal. However, there is a fairly nasty +little bug with our approach. Can you spot it? + +Here's a hint: we are generating labels based on the content of string +literals. What else do we generate labels based off of and is it +possible to create a conflict? + +The answer is yes. Consider this program: + +@#reader scribble/comment-reader +(racketblock +(define (Hello! x) "Hello!") +42 +) + +It contains both a function called @racket[Hello!] and a string +literal @racket["Hello!"]. Unfortunately, the label used both for the +function and the string data will be @racket[(symbol->label 'Hello!)]. +If the compiler emits two definitions of this label, the assembler +will complain and fail to assemble the program. + +The solution is simple, when generating labels for data, we will use a +different symbol to label function, let's call it +@racket[symbol->data-label] that is guaranteed to produce disjoint +labels from @racket[symbol->label], which we will continue to use for +code labels. + +Using this function in all the places we used @racket[symbol->label] +will resolve the issue and our problematic program will now have two +different labels defined in it: + +@ex[ +(symbol->label 'Hello!) +(symbol->data-label 'Hello!) +] + + +So now we have accomplished our goal: string literals are statically +allocated and different occurrence of the same string literal are +considered @racket[eq?] to each other: + +@(ev '(require mug/compile)) + +@ex[ +(seq (compile-string "Hello!") + (compile-string "Hello!") + (compile-literals-data '(Hello!))) +] + +We can try it out to confirm some examples. + + +@ex[ +(define (run . p) + (unload/free (asm-interp (compile (parse p))))) + +(run "Hello!") + +(run '(begin "Hello!" "Hello!")) + +(run '(eq? "Hello!" "Hello!")) +(run '(eq? "Hello!" "Fren")) + +(run '(define (Hello! x) "Hello!") + '(eq? (Hello! 42) "Hello!")) +] + +It's still worth noting that only string literals are interned. +Dynamically created strings are not pointer-equal to structurally +equal string literals: + +@ex[ +(run '(eq? "fff" (make-string 3 #\f))) +] + +This is why we refer to this kind of interning as ``static'' interning. + +Let us now turn to a new, but familar, data type that supports a +stronger sense of interning: the symbol. + +@section[#:tag-prefix "mug"]{Symbols} + +One basic data type that we've used frequently in the writing of our +compiler, but which is not currently accounted for in our language is +that of @bold{symbols}. + +At first cut, a symbol is alot like a string: the name of a symbol +consists of some textual data. We can represent a symbol much like we +represent a string: using a tagged pointer to a sized array of +characters that comprise the name of the symbol. + +In fact, we made extensive use of this in our implementation of static +interning for string literals. This section will now uncover +@emph{how} symbols do their (dynamic) interning. + +From a syntax point of view, we add a new AST constructor for symbols +and names of the new operations: + +@filebox-include-fake[codeblock "mug/ast.rkt"]{ +;; type Expr = ... +;; | (Symb Symbol) +;; type Op1 = ... +;; | 'symbol? | 'symbol->string +;; | 'string->symbol | 'string->uninterned-symbol +(struct Symb (s) #:prefab) +} + +The parser is updated to construct such AST nodes when it encounters a +symbol: + +@ex[ +(parse-e ''foo) +] + +We can create a new pointer type tag: + +@filebox-include-fake[codeblock "mug/types.rkt"]{ +(define type-symb #b110) +} + +The run-time system has to be updated to handle symbol results and the +printer is updated to properly print symbols, but all of this follows +the blueprint of strings. It's simply a different tag and a slightly +different printer, which uses and initial @tt{'} delimiter instead of +an initial @tt{"} and subsequent @tt{"} delimiter. + +But one of the key differences between strings and symbols is that +symbols that have the same name are considered the same, i.e. they +should be represented by the same @emph{pointer}. + +This means that two symbols of the same name should be @racket[eq?] to +each other: + +@ex[ +(eq? 'x 'x) +] + +Having seen how string literals are handled, you can see that symbol +literals are like string literals and we can take a similar approach +to transform a program into one that statically allocates all of the +symbols that appear in the program and replace their occurrences with +references. + +Again, we just follow the blueprint of strings. + +The key additions are a function for compiling symbol occurrences: + +@#reader scribble/comment-reader +(racketblock +;; Symbol -> Asm +(define (compile-symbol s) + (seq (Lea 'rax (Plus (symbol->data-label s) type-symb)))) +) + +Which works as follows: + +@ex[ +(compile-symbol 'Hello!) +] + +And the @racket[literals] function should now include a case for +@racket[(Symb _sym)] to return @racket[(list _sym)]. + +@ex[ +(literals (parse '['Hello!])) +] + +You might worry that programs that have similar strings and symbols +may cause problem. Since @racket[literals] on the following program +only returns a single literal: + +@ex[ +(literals (parse '[(begin "Hello!" 'Hello!)])) +] + +But actually this is just fine. What happens is that only a signle +chunk of memory is allocated to hold the character data @tt{H}, +@tt{e}, @tt{l}, @tt{l}, @tt{o}, @tt{!}, but the @emph{symbol} +@racket['Hello] is represented as a pointer to this data tagged as a +symbol, while the string @racket["Hello"] is represent as the same +pointer, but tagged as a string. So this program compiles to: + +@ex[ +(seq (compile-string "Hello!") + (compile-symbol 'Hello!) + (compile-literals-data '(Hello!))) +] + +We have now added a symbol data type and have implement static +interning just as we did for strings. + +However this strategy alone won't fully solve the problem of symbol +identity because it is possible to dynamically create symbols and even +then it should be the case that symbols with the same name are ``the +same.'' This in contrast to how strings work: + +@ex[ +(eq? 'x (string->symbol (string #\x))) +] + +Here we are creating a symbol dynamically, using the string +@racket["x"] to specify the name of the symbol. Comparing it to a +@racket['x] that appears statically should still produce @racket[#t]. + +This was in fact a critical property we relied upon in implementing +static string interning. + +This latter example shows that we need to @emph{dynamically} ensure +symbols of the same name evaluate to unique pointers. + +@section[#:tag-prefix "mug"]{Dynamic Interning} + +Static interning requires identical static occurrences of data to have +a unique representation. Dynamic interning requires identical data, +regardless of when it's created, to have a unique representation. +Symbols are like strings that support dynamic interning. + +This is going to require more support from our run-time system. + +Essentially, the run-time systems needs to keep track of all of the +symbols that have appeared so far during the running of the program. +When a new symbol is dynamically created, e.g. through +@racket[string->symbol], the run-time will check whether this symbol +has been seen before (based on the characters of its name). If it has +been seen before, the run-time can give us the pointer for the +previous use of the symbol, thus preserving the pointer-equality +between this symbol and any other occurrences. + +On the other hand if the run-time has not see this symbol, it can +allocate memory for it, return the pointer, and remember in the future +that this symbol has been seen. + +To accomplish this, we will implement a @bold{symbol table}. It +associates symbol names, i.e. the characters of a symbol, with +pointers to symbols. When a program wishes to create a symbol, it +confers with the table to either fetch an existing pointer for the +symbol or create a new on, updating the table. + +To implement this table, we'll use a binary search tree of symbols, +represented in C as. We have a globally defined pointer +@tt{symbol_tbl} is which is initially empty (@tt{NULL}). The work of +dynamically interning a symbol will be done by the @tt{intern_symbol} +function. It searches the BST, using @tt{symb_cmp} to compare symbols +for alphabetic ordering. If an entry is found, it returns the +previously seen symbol, otherwise it adds the symbol to the table and +returns it. + +@filebox-include[fancy-c "mug/symbol.c"] + +The idea will be that every time a symbol is constructed, we call +@tt{intern_symbol} to intern it. + +So in addition to collecting all of the literals and compiling each to +static data, we will need to collect all of the symbols and emit a +call to @tt{intern_symbol} at the start of the program. + +To accomplish this, we'll design a function: + +@#reader scribble/comment-reader +(racketblock +;; Prog -> Asm +;; Initialize the symbol table with all the symbols that occur statically +(define (init-symbol-table p) ...) +) + +Here's what it will produce for some example programs: + +@ex[ +(init-symbol-table (parse '['Hello!])) +(init-symbol-table (parse '[(begin 'Hello! 'Hello!)])) +(init-symbol-table (parse '["Hello!"])) +(init-symbol-table (parse '[(define (Hello! x) 'Hello!) + (Hello! 'Fren)])) +] + +For each unique symbol in the program, it emits two instructions: + +@itemlist[ + +@item{move the address of the symbol's data into @racket['rdi], the +register used for the first argument in the System V ABI,} + +@item{call @tt{intern_symbol}.} +] + +We know that initially the table is empty, so each of these calls will +insert the given symbols into the table ensure that if any subsequent +symbol is interned that has the same character data, call +@tt{intern_symbol} will produce the original pointer to static data +for that symbol. + +Now we can implement the two operations @racket[string->symbol] and +@racket[symbol->string]. Here's what we do for +@racket[string->symbol]: + +@#reader scribble/comment-reader +(racketblock +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ; ... + ['string->symbol + (seq (assert-string rax) + (Xor rax type-str) + (Mov rdi rax) + pad-stack + (Call 'intern_symbol) + unpad-stack + (Or rax type-symb))])) +) + +This first does some type-tag checking to make sure the argument is a +string, then it untags the pointer and moves it to the @racket['rdi] +register in order to call @racket[intern_symbol]. The address of the +interned symbol is returned in @racket['rax], which is then tagged as +being a symbol. + +We can now confirm that dynamically created symbols are still +pointer-equal to symbols that statically appear in the program: + +@ex[ +(run '(eq? 'fff (string->symbol (make-string 3 #\f)))) +] + +Even creating two symbols dynamically will result in the same pointer +so long as they are spelled the same: + +@ex[ +(run '(eq? (string->symbol (make-string 3 #\a)) + (string->symbol (make-string 3 #\a)))) +] + +Going the other direction from symbols to strings is easy: we copy the +string data and tag the pointer as a string. Note that we could get +away will simply retagging the pointer and not actually copying the +string, but we make a copy to mimic Racket's behavior and to be safe +should we add string mutation operations. + +@#reader scribble/comment-reader +(racketblock +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ; ... + ['symbol->string + (seq (assert-symbol rax) + (Xor rax type-symb) + char-array-copy + (Or rax type-str))])) + +;; Asm +;; Copy sized array of characters pointed to by rax +(define char-array-copy + (seq (Mov rdi rbx) ; dst + (Mov rsi rax) ; src + (Mov rdx (Offset rax 0)) ; len + (Add rdx 1) ; #words = 1 + (len+1)/2 + (Sar rdx 1) + (Add rdx 1) + (Sal rdx 3) ; #bytes = 8*#words + pad-stack + (Call 'memcpy) + unpad-stack + (Mov rax rbx) + (Add rbx rdx))) +) + +The @racket[char-array-copy] sequence of instructions sets up a call +to C's @tt{memcpy} function giving the address of the string data as +the source, the current heap pointer as the destination, and the +number of bytes which will be copied. After the call returns, the +heap pointer is incremented by that number of copied bytes. + +We can see that this works: + +@ex[ +(run '(symbol->string 'foo)) +] + +To observe the copying behavior, notice: + +@ex[ +(run '(eq? (symbol->string 'foo) (symbol->string 'foo))) +] + + +@section[#:tag-prefix "mug"]{Uninterned Symbols} + +Sometimes it is useful to create a symbol that is distinct from all +other symbols. We've relied on the ability to create a symbol with +this property whenever we used the @racket[gensym] operation. What +@racket[gensym] produces is an @bold{uninterned} symbol. Even if you +constructed a symbol with the same letters, it would be a different +pointer from the symbol created by a call to @racket[gensym]. + +To add this ability, we will add an precursor to @racket[gensym] +called @racket[string->uninterned-symbol]. It consumes a string and +produces a symbol with the same letters, but distinct from all other +symbols, even those that are spelled the same. + +@ex[ +(eq? 'Hello! (string->uninterned-symbol "Hello!")) +] + +Calling @racket[string->uninterned-symbol] twice with the same string +will produce two different symbols: + +@ex[ +(eq? (string->uninterned-symbol "Hello!") + (string->uninterned-symbol "Hello!")) +] + +Implementing @racket[string->uninterned-symbol] is fairly simple: we +allocate a new symbol, thereby ensuring it is unique and then simple +avoid calling @tt{intern_symbol}: + +@#reader scribble/comment-reader +(racketblock +;; Op1 -> Asm +(define (compile-op1 p) + (match p + ; ... + ['string->uninterned-symbol + (seq (assert-string rax) + (Xor rax type-str) + char-array-copy + (Or rax type-symb))])) +) + +We can confirm this works as expected: + +@ex[ +(run '(string->uninterned-symbol "foo")) +(run '(eq? 'foo (string->uninterned-symbol "foo"))) +(run '(eq? (string->uninterned-symbol "foo") + (string->uninterned-symbol "foo"))) +] + +With that, we have completed the implementation of symbols and strings +with the proper interning behavior. + +@section[#:tag-prefix "mug"]{Compiling Symbols and Strings} + +We can now put the pieces together for the complete compiler. + +@(define (code-link fn) + (link (string-append "code/" fn) (tt fn))) + +We do a bit of housekeeping and move the code for compiling +expressions to its own module: @code-link{mug/compile-expr.rkt}. + + +The top-level compiler is now: + +@filebox-include[codeblock "mug/compile.rkt"] + +The work of compiling literals and emitting calls to initialize the +symbol table is contained in its own module: + +@filebox-include[codeblock "mug/compile-literals.rkt"] diff --git a/www/utils.rkt b/www/utils.rkt index 8457760f..fb5bb611 100644 --- a/www/utils.rkt +++ b/www/utils.rkt @@ -1,6 +1,5 @@ - #lang racket -(provide exercise float-right panopto-vid shell) +(provide exercise float-right panopto-vid shell shell-result) (require scribble/base scribble/core scribble/html-properties redex/pict) @@ -47,9 +46,12 @@ (parameterize ([current-output-port o] [current-error-port e]) (set! r (proc))))))))) - (unless r (error (string-append os es))) + ; (unless r (error (string-append os es))) (string-append os es)) +(define (shell-result c) + (with-output-to-string/err (λ () (system #:set-pwd? #t c)))) + (define (shell . cs) (match cs ['() ""] From 678358f9f4ca2f2e38cf81e0888cbb038ce9d3ad Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 23:30:54 -0500 Subject: [PATCH 20/21] Add symbol type to unload-bits-asm. --- langs/mug/test/test-runner.rkt | 3 ++- langs/mug/types.rkt | 3 +++ langs/mug/unload-bits-asm.rkt | 9 ++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/langs/mug/test/test-runner.rkt b/langs/mug/test/test-runner.rkt index 5a9db857..8b58351b 100644 --- a/langs/mug/test/test-runner.rkt +++ b/langs/mug/test/test-runner.rkt @@ -292,7 +292,8 @@ #f) (check-equal? (run '(eq? (symbol->string 'foo) (symbol->string 'foo))) #f) (check-equal? (run '(string? (symbol->string 'foo))) #t) - (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f)) + (check-equal? (run '(eq? (symbol->string 'foo) "foo")) #f) + (check-equal? (run ''foo) 'foo)) (define (test-runner-io run) ;; Evildoer examples diff --git a/langs/mug/types.rkt b/langs/mug/types.rkt index c02d2d1e..966c22ba 100644 --- a/langs/mug/types.rkt +++ b/langs/mug/types.rkt @@ -69,3 +69,6 @@ (define (proc-bits? v) (zero? (bitwise-xor (bitwise-and v imm-mask) type-proc))) + +(define (symb-bits? v) + (zero? (bitwise-xor (bitwise-and v imm-mask) type-symb))) diff --git a/langs/mug/unload-bits-asm.rkt b/langs/mug/unload-bits-asm.rkt index be9b50c8..121bbe0d 100644 --- a/langs/mug/unload-bits-asm.rkt +++ b/langs/mug/unload-bits-asm.rkt @@ -30,7 +30,14 @@ (string) (build-string (heap-ref i) (lambda (j) - (char-ref (+ i 8) j))))])) + (char-ref (+ i 8) j))))] + [(? symb-bits? i) + (string->symbol + (if (zero? (untag i)) + (string) + (build-string (heap-ref i) + (lambda (j) + (char-ref (+ i 8) j)))))])) (define (untag i) (arithmetic-shift (arithmetic-shift i (- (integer-length ptr-mask))) From caa46b9f5e5b60e483e4a4f4f21eae3d292bbf9c Mon Sep 17 00:00:00 2001 From: David Van Horn Date: Sun, 14 Nov 2021 23:43:35 -0500 Subject: [PATCH 21/21] Add Mug to schedule. --- www/schedule.scrbl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/www/schedule.scrbl b/www/schedule.scrbl index 7005674e..9f9fd333 100644 --- a/www/schedule.scrbl +++ b/www/schedule.scrbl @@ -80,7 +80,7 @@ (list @wk{11/16} "" - @elem{Mountebank} @;{Match} + @elem{@secref["Mug"]} @elem{Neerdowell} @;{ ?? } )