diff --git a/.gitignore b/.gitignore index f17e3731..178d5af4 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,10 @@ cmake-build-release buildnojit nojit nojita -*.so \ No newline at end of file +*.so +*.dll +*.exp +*.lib +*.obj +*.s +time-debug.txt \ No newline at end of file diff --git a/LICENSE b/LICENSE index 947312b0..a3c8b6fb 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ /****************************************************************************** * Copyright (C) 1994-2019 Lua.org, PUC-Rio. -* Portions Copyright (C) 2015-2020 Dibyendu Majumdar +* Portions Copyright (C) 2015-2021 Dibyendu Majumdar +* Portions Copyright (c) 2019 Rui Ueyama * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the diff --git a/ravicomp/CMakeLists.txt b/ravicomp/CMakeLists.txt index 1db1eace..e2c9cc05 100644 --- a/ravicomp/CMakeLists.txt +++ b/ravicomp/CMakeLists.txt @@ -21,7 +21,8 @@ set(HEADERS src/common.h src/optimizer.h src/parser.h - src/codegen.h) + src/codegen.h + src/chibicc/chibicc.h) set(SRCS src/allocate.c @@ -43,6 +44,13 @@ set(SRCS src/membuf.c src/codegen.c src/ravi_binding.c + src/chibicc/chibicc_tokenize.c + src/chibicc/chibicc_parse.c + src/chibicc/chibicc_type.c + src/chibicc/chibicc_strings.c + src/chibicc/chibicc_unicode.c + src/chibicc/chibicc_hashmap.c + ../src/ravi_alloc.c ) message("SOURCE dir is ${RaviCompiler_SOURCE_DIR}") @@ -88,7 +96,9 @@ add_library(ravicomp ${LIBTYPE} target_include_directories(ravicomp PUBLIC "${CMAKE_CURRENT_BINARY_DIR}" PUBLIC "${RaviCompiler_SOURCE_DIR}/include" - PRIVATE "${RaviCompiler_SOURCE_DIR}/src") + PRIVATE "${RaviCompiler_SOURCE_DIR}/src" + PRIVATE "${RaviCompiler_SOURCE_DIR}/../src" + ) target_link_libraries(ravicomp ${EXTRA_LIBRARIES}) set_property(TARGET ravicomp PROPERTY C_STANDARD 99) include(GenerateExportHeader) diff --git a/ravicomp/LICENSE b/ravicomp/LICENSE index 5c19209a..a920cb0e 100644 --- a/ravicomp/LICENSE +++ b/ravicomp/LICENSE @@ -2,6 +2,7 @@ MIT License Copyright (c) 2019-2021 Dibyendu Majumdar Portions Copyright (c) 1994–2019 Lua.org, PUC-Rio. +Portions Copyright (c) 2019 Rui Ueyama Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/ravicomp/include/ravi_compiler.h b/ravicomp/include/ravi_compiler.h index f57e6a33..c6687666 100644 --- a/ravicomp/include/ravi_compiler.h +++ b/ravicomp/include/ravi_compiler.h @@ -69,6 +69,9 @@ enum TokenType { TOK_in, TOK_local, TOK_defer, + TOK_C__decl, + TOK_C__unsafe, + TOK_C__new, TOK_nil, TOK_not, TOK_or, @@ -299,6 +302,7 @@ enum AstNodeType { STMT_FOR_NUM, STMT_REPEAT, STMT_EXPR, /* Also used for assignment statements */ + STMT_EMBEDDED_C, EXPR_LITERAL, EXPR_SYMBOL, EXPR_Y_INDEX, /* [] operator */ @@ -310,7 +314,8 @@ enum AstNodeType { EXPR_FUNCTION, /* function literal */ EXPR_TABLE_LITERAL, /* table constructor */ EXPR_FUNCTION_CALL, - EXPR_CONCAT + EXPR_CONCAT, + EXPR_BUILTIN }; typedef struct Statement Statement; @@ -325,6 +330,7 @@ typedef struct TestThenStatement TestThenStatement; typedef struct IfStatement IfStatement; typedef struct WhileOrRepeatStatement WhileOrRepeatStatement; typedef struct ForStatement ForStatement; +typedef struct EmbeddedCStatement EmbeddedCStatement; typedef struct Expression Expression; typedef struct LiteralExpression LiteralExpression; @@ -338,6 +344,7 @@ typedef struct TableLiteralExpression TableLiteralExpression; typedef struct SuffixedExpression SuffixedExpression; typedef struct FunctionCallExpression FunctionCallExpression; typedef struct StringConcatenationExpression StringConcatenationExpression; +typedef struct BuiltinExpression BuiltinExpression; typedef struct Scope Scope; @@ -446,6 +453,11 @@ RAVICOMP_EXPORT void raviX_for_statement_body_foreach_statement(const ForStateme void (*callback)(void *userdata, const Statement *statement)); +RAVICOMP_EXPORT void raviX_embedded_C_statement_foreach_symbol(const EmbeddedCStatement *statement, void *userdata, + void (*callback)(void *, + const LuaVariableSymbol *expr)); +RAVICOMP_EXPORT const StringObject *raviX_embedded_C_statement_C_source(const EmbeddedCStatement *statement); + /* literal expression */ /* Note: '...' value has type RAVI_TVARARGS and no associated SemInfo. */ RAVICOMP_EXPORT const VariableType *raviX_literal_expression_type(const LiteralExpression *expression); @@ -541,6 +553,7 @@ RAVICOMP_EXPORT const TestThenStatement *raviX_test_then_statement(const Stateme RAVICOMP_EXPORT const IfStatement *raviX_if_statement(const Statement *stmt); RAVICOMP_EXPORT const WhileOrRepeatStatement *raviX_while_or_repeat_statement(const Statement *stmt); RAVICOMP_EXPORT const ForStatement *raviX_for_statement(const Statement *stmt); +RAVICOMP_EXPORT const EmbeddedCStatement *raviX_embedded_C_statment(const Statement *stmt); /* Convert an expression to the correct type */ RAVICOMP_EXPORT enum AstNodeType raviX_expression_type(const Expression *expression); diff --git a/ravicomp/src/allocate.h b/ravicomp/src/allocate.h index bbbc79c7..cb42bbde 100644 --- a/ravicomp/src/allocate.h +++ b/ravicomp/src/allocate.h @@ -140,4 +140,4 @@ extern size_t raviX_del_array_element(void *p, size_t element_size, size_t array #endif -#endif \ No newline at end of file +#endif diff --git a/ravicomp/src/ast_lower.c b/ravicomp/src/ast_lower.c index 19568546..782d35a0 100644 --- a/ravicomp/src/ast_lower.c +++ b/ravicomp/src/ast_lower.c @@ -80,6 +80,8 @@ static void process_expression(CompilerState *container, AstNode *node) case EXPR_TABLE_LITERAL: process_expression_list(container, node->table_expr.expr_list); break; + case EXPR_BUILTIN: + break; default: assert(0); break; @@ -369,6 +371,8 @@ static void process_statement(CompilerState *container, AstNode *node) case STMT_FOR_IN: lower_for_in_statement(container, node); break; + case STMT_EMBEDDED_C: + break; default: fprintf(stderr, "AST = %d\n", node->type); assert(0); diff --git a/ravicomp/src/ast_printer.c b/ravicomp/src/ast_printer.c index 22dffd1b..0f0be50c 100644 --- a/ravicomp/src/ast_printer.c +++ b/ravicomp/src/ast_printer.c @@ -74,7 +74,10 @@ static void printf_buf(TextBuffer *buf, const char *format, ...) type = va_arg(ap, const VariableType *); if (type->type_code == RAVI_TUSERDATA) { const StringObject *s = type->type_name; - raviX_buffer_add_string(buf, s->str); + if (s != NULL) + raviX_buffer_add_string(buf, s->str); + else + raviX_buffer_add_string(buf, "userdata"); } else { raviX_buffer_add_string(buf, raviX_get_type_name(type->type_code)); } @@ -463,6 +466,12 @@ void raviX_print_ast_node(TextBuffer *buf, AstNode *node, int level) printf_buf(buf, "%pend\n", level); break; } + case STMT_EMBEDDED_C: { + printf_buf(buf, "%pC (\n", level); + print_symbol_list(buf, node->embedded_C_stmt.symbols, level + 1, ","); + printf_buf(buf, "%p ) '%t'\n", level, node->embedded_C_stmt.C_src_snippet); + break; + } case EXPR_SUFFIXED: { printf_buf(buf, "%p%c %T\n", level, "[suffixed expr start]", &node->suffixed_expr.type); printf_buf(buf, "%p%c %T\n", level + 1, "[primary start]", @@ -575,6 +584,12 @@ void raviX_print_ast_node(TextBuffer *buf, AstNode *node, int level) printf_buf(buf, "%p%c\n", level, "[concat end]"); break; } + case EXPR_BUILTIN: { + printf_buf(buf, "%p%s %c%T\n", level, "C__new(", "", &node->builtin_expr.type); + // TODO print contents + printf_buf(buf, "%p)\n", level); + break; + } default: printf_buf(buf, "%pUnsupported node type %d\n", level, node->type); assert(0); diff --git a/ravicomp/src/ast_simplify.c b/ravicomp/src/ast_simplify.c index d1c693b5..9993ad81 100644 --- a/ravicomp/src/ast_simplify.c +++ b/ravicomp/src/ast_simplify.c @@ -494,6 +494,8 @@ static void process_expression(CompilerState *container, AstNode *node) case EXPR_TABLE_LITERAL: process_expression_list(container, node->table_expr.expr_list); break; + case EXPR_BUILTIN: + break; default: assert(0); break; @@ -566,6 +568,8 @@ static void process_statement(CompilerState *container, AstNode *node) process_expression_list(container, node->for_stmt.expr_list); process_statement_list(container, node->for_stmt.for_statement_list); break; + case STMT_EMBEDDED_C: + break; default: fprintf(stderr, "AST = %d\n", node->type); assert(0); diff --git a/ravicomp/src/ast_walker.c b/ravicomp/src/ast_walker.c index 089a94fb..aed9e2ad 100644 --- a/ravicomp/src/ast_walker.c +++ b/ravicomp/src/ast_walker.c @@ -164,6 +164,27 @@ const ForStatement *raviX_for_statement(const Statement *stmt) assert(stmt->type == STMT_FOR_IN || stmt->type == STMT_FOR_NUM); return &n(stmt)->for_stmt; } +const EmbeddedCStatement *raviX_embedded_C_statment(const Statement *stmt) +{ + assert(stmt->type == STMT_EMBEDDED_C); + return &n(stmt)->embedded_C_stmt; +} +void raviX_embedded_C_statement_foreach_symbol(const EmbeddedCStatement *statement, void *userdata, + void (*callback)(void *, const LuaVariableSymbol *expr)) +{ + LuaSymbol *symbol; + FOR_EACH_PTR(statement->symbols, LuaSymbol, symbol) + { + assert(symbol->symbol_type == SYM_LOCAL); + callback(userdata, &symbol->variable); + } + END_FOR_EACH_PTR(symbol) +} +const StringObject *raviX_embedded_C_statement_C_source(const EmbeddedCStatement *statement) +{ + return statement->C_src_snippet; +} + enum AstNodeType raviX_expression_type(const Expression *expression) { return expression->type; } const LiteralExpression *raviX_literal_expression(const Expression *expr) { diff --git a/ravicomp/src/chibicc/.clang-format b/ravicomp/src/chibicc/.clang-format new file mode 100644 index 00000000..fb17e815 --- /dev/null +++ b/ravicomp/src/chibicc/.clang-format @@ -0,0 +1,7 @@ +BasedOnStyle: LLVM +IndentWidth: 2 +UseTab: Never +BreakBeforeBraces: Attach +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +ColumnLimit: 120 \ No newline at end of file diff --git a/ravicomp/src/chibicc/README.md b/ravicomp/src/chibicc/README.md new file mode 100644 index 00000000..612752af --- /dev/null +++ b/ravicomp/src/chibicc/README.md @@ -0,0 +1,7 @@ +This is a modified version of https://github.com/rui314/chibicc. + +``` +MIT License + +Copyright (c) 2019 Rui Ueyama +``` diff --git a/ravicomp/src/chibicc/chibicc.h b/ravicomp/src/chibicc/chibicc.h new file mode 100644 index 00000000..ef674c93 --- /dev/null +++ b/ravicomp/src/chibicc/chibicc.h @@ -0,0 +1,557 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ +#ifndef CHIBICC_H +#define CHIBICC_H + +#include "ravi_alloc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RAVI_EXTENSIONS + +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#ifndef __GNUC__ +# define __attribute__(x) +#endif +#ifdef _WIN32 +# define noreturn +# define strncasecmp strnicmp +# define strndup _strndup +#else +#include +#endif + +typedef struct C_Type C_Type; +typedef struct C_Node C_Node; +typedef struct C_Member C_Member; +typedef struct C_Relocation C_Relocation; +typedef struct C_Hideset C_Hideset; +typedef struct C_Parser C_Parser; + +// +// strings.c +// + +typedef struct { + char **data; + int capacity; + int len; +} StringArray; + +void strarray_push(mspace arena, StringArray *arr, char *s); +char *format(char *fmt, ...) __attribute__((format(printf, 1, 2))); + +// +// C_tokenize.c +// + +// C_Token +typedef enum { + TK_IDENT, // Identifiers + TK_PUNCT, // Punctuators + TK_KEYWORD, // Keywords + TK_STR, // String literals + TK_NUM, // Numeric literals + TK_PP_NUM, // Preprocessing numbers + TK_EOF, // End-of-file markers +} C_TokenKind; + +typedef struct { + char *name; + int file_no; + char *contents; + + // For #line directive + char *display_name; + int line_delta; +} C_File; + +// C_Token type +typedef struct C_Token C_Token; +struct C_Token { + C_TokenKind kind; // C_Token kind + C_Token *next; // Next token + int64_t val; // If kind is TK_NUM, its value + long double fval; // If kind is TK_NUM, its value + char *loc; // C_Token location + int len; // C_Token length + C_Type *ty; // Used if TK_NUM or TK_STR + char *str; // String literal contents including terminating '\0' + + C_File *file; // Source location + char *filename; // Filename + int line_no; // Line number + int line_delta; // Line number + bool at_bol; // True if this token is at beginning of line + bool has_space; // True if this token follows a space character + C_Hideset *hideset; // For macro expansion + C_Token *origin; // If this is expanded from a macro, the original token +}; + +noreturn void C_error(C_Parser *tokenizer, char *fmt, ...) __attribute__((format(printf, 2, 3))); +noreturn void C_error_at(C_Parser *tokenizer, char *loc, char *fmt, ...) __attribute__((format(printf, 3, 4))); +noreturn void C_error_tok(C_Parser *tokenizer, C_Token *tok, char *fmt, ...) __attribute__((format(printf, 3, 4))); +void C_warn_tok(C_Parser *tokenizer, C_Token *tok, char *fmt, ...) __attribute__((format(printf, 3, 4))); +bool C_equal(C_Token *tok, char *op); +C_Token *C_skip(C_Parser *parser, C_Token *tok, char *op); +bool C_consume(C_Token **rest, C_Token *tok, char *str); +void C_convert_pp_tokens(C_Parser *tokenizer, C_Token *tok); +C_File *C_new_file(C_Parser *tokenizer, char *name, int file_no, char *contents); +C_Token *C_tokenize(C_Parser *tokenizer, C_File *file); +C_Token *C_tokenize_buffer(C_Parser *tokenizer, char *p); + +#define unreachable(parser) \ + C_error(parser, "internal error at %s:%d", __FILE__, __LINE__) + +// +// C_parse.c +// + +// Variable or function +typedef struct C_Obj C_Obj; +struct C_Obj { + C_Obj *next; + char *name; // Variable name + C_Type *ty; // Type + C_Token *tok; // representative token + bool is_local; // local or global/function + int align; // alignment + + // Local variable + int offset; + + // Global variable or function + bool is_function; + bool is_definition; + bool is_static; + + // Global variable + bool is_tentative; + bool is_tls; + char *init_data; + C_Relocation *rel; + + // Function + bool is_inline; + C_Obj *params; + C_Node *body; + C_Obj *locals; + C_Obj *va_area; + C_Obj *alloca_bottom; + int stack_size; + + // Static inline function + bool is_live; + bool is_root; + StringArray refs; +}; + +// Global variable can be initialized either by a constant expression +// or a pointer to another global variable. This struct represents the +// latter. +typedef struct C_Relocation C_Relocation; +struct C_Relocation { + C_Relocation *next; + int offset; + char **label; + long addend; +}; + +// AST node +typedef enum { + ND_NULL_EXPR, // Do nothing + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_MOD, // % + ND_BITAND, // & + ND_BITOR, // | + ND_BITXOR, // ^ + ND_SHL, // << + ND_SHR, // >> + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_ASSIGN, // = + ND_COND, // ?: + ND_COMMA, // , + ND_MEMBER, // . (struct member access) + ND_ADDR, // unary & + ND_DEREF, // unary * + ND_NOT, // ! + ND_BITNOT, // ~ + ND_LOGAND, // && + ND_LOGOR, // || + ND_RETURN, // "return" + ND_IF, // "if" + ND_FOR, // "for" or "while" + ND_DO, // "do" + ND_SWITCH, // "switch" + ND_CASE, // "case" + ND_BLOCK, // { ... } + ND_GOTO, // "goto" + ND_GOTO_EXPR, // "goto" labels-as-values + ND_LABEL, // Labeled statement + ND_LABEL_VAL, // [GNU] Labels-as-values + ND_FUNCALL, // Function call + ND_EXPR_STMT, // Expression statement + ND_STMT_EXPR, // Statement expression + ND_VAR, // Variable + ND_VLA_PTR, // VLA designator + ND_NUM, // Integer + ND_CAST, // Type cast + ND_MEMZERO, // Zero-clear a stack variable + ND_ASM, // "asm" + ND_CAS, // Atomic compare-and-swap + ND_EXCH, // Atomic exchange +} C_NodeKind; + +// AST node type +struct C_Node { + C_NodeKind kind; // C_Node kind + C_Node *next; // Next node + C_Type *ty; // Type, e.g. int or pointer to int + C_Token *tok; // Representative token + + C_Node *lhs; // Left-hand side + C_Node *rhs; // Right-hand side + + // "if" or "for" statement + C_Node *cond; + C_Node *then; + C_Node *els; + C_Node *init; + C_Node *inc; + + // "break" and "continue" labels + char *brk_label; + char *cont_label; + + // Block or statement expression + C_Node *body; + + // Struct member access + C_Member *member; + + // Function call + C_Type *func_ty; + C_Node *args; + bool pass_by_stack; + C_Obj *ret_buffer; + + // Goto or labeled statement, or labels-as-values + char *label; + char *unique_label; + C_Node *goto_next; + + // Switch + C_Node *case_next; + C_Node *default_case; + + // Case + long begin; + long end; + + // "asm" string literal + char *asm_str; + + // Atomic compare-and-swap + C_Node *cas_addr; + C_Node *cas_old; + C_Node *cas_new; + + // Atomic op= operators + C_Obj *atomic_addr; + C_Node *atomic_expr; + + // Variable + C_Obj *var; + + // Numeric literal + int64_t val; + long double fval; +}; + +typedef struct { + char *key; + int keylen; + void *val; +} HashEntry; + +typedef struct { + HashEntry *buckets; + int capacity; + int used; + mspace arena; +} HashMap; + +// Represents a block scope. +typedef struct C_Scope C_Scope; +struct C_Scope { + C_Scope *next; + + // C has two block scopes; one is for variables/typedefs and + // the other is for struct/union/enum tags. + HashMap vars; // values are C_VarScope * + HashMap tags; // values are C_Type * +}; + +// C_Scope for local variables, global variables, typedefs +// or enum constants +typedef struct { + C_Obj *var; + C_Type *type_def; + C_Type *enum_ty; + int enum_val; +} C_VarScope; + +struct C_Parser { + int file_no; + // Input file + C_File *current_file; + + // A list of all input files. + C_File **input_files; + + // True if the current position is at the beginning of a line + bool at_bol; + + // True if the current position follows a space character + bool has_space; + + // All local variable instances created during parsing are + // accumulated to this list. + C_Obj *locals; + + // Likewise, global variables are accumulated to this list. + C_Obj *globals; + + C_Scope *scope; // = &(C_Scope){0}; + + // Points to the function object the parser is currently parsing. + C_Obj *current_fn; + + // Lists of all goto statements and labels in the curent function. + C_Node *gotos; + C_Node *labels; + + // Current "goto" and "continue" jump targets. + char *brk_label; + char *cont_label; + + // Points to a node representing a switch if we are parsing + // a switch statement. Otherwise, NULL. + C_Node *current_switch; + + C_Obj *builtin_alloca; + + HashMap keywords; // used by tokenizer + HashMap typewords; // used by parser + mspace arena; // pointer to memory arena handle + + jmp_buf env; /* For error handling */ + + char *error_message; // Error reporting arg + +#ifdef RAVI_EXTENSIONS + bool embedded_mode; +#endif + +}; + +void C_parser_init(C_Parser *parser); +C_Scope *C_global_scope(C_Parser *parser); +C_Node *C_new_cast(C_Parser *parser, C_Node *expr, C_Type *ty); +int64_t C_const_expr(C_Parser *parser, C_Token **rest, C_Token *tok); +C_Obj *C_parse(C_Scope * globalScope, C_Parser *parser, C_Token *tok); +void C_parser_destroy(C_Parser *parser); + +#ifdef RAVI_EXTENSIONS +C_Node *C_parse_compound_statement(C_Scope *globalScope, C_Parser *parser, C_Token *tok); +C_Obj *C_create_function(C_Scope *globalScope, C_Parser *parser, char *name_str); +#endif + +void C_destroy_parser(C_Parser *parser); + +// +// type.c +// + +typedef enum { + TY_VOID, + TY_BOOL, + TY_CHAR, + TY_SHORT, + TY_INT, + TY_LONG, + TY_FLOAT, + TY_DOUBLE, + TY_LDOUBLE, + TY_ENUM, + TY_PTR, + TY_FUNC, + TY_ARRAY, + TY_VLA, // variable-length array + TY_STRUCT, + TY_UNION, +} TypeKind; + +struct C_Type { + TypeKind kind; + int size; // sizeof() value + int align; // alignment + bool is_unsigned; // unsigned or signed + bool is_atomic; // true if _Atomic + C_Type *origin; // for type compatibility check + + // Pointer-to or array-of type. We intentionally use the same member + // to represent pointer/array duality in C. + // + // In many contexts in which a pointer is expected, we examine this + // member instead of "kind" member to determine whether a type is a + // pointer or not. That means in many contexts "array of T" is + // naturally handled as if it were "pointer to T", as required by + // the C spec. + C_Type *base; + + // Declaration + C_Token *name; + C_Token *name_pos; + + // Array + int array_len; + + // Variable-length array + C_Node *vla_len; // # of elements + C_Obj *vla_size; // sizeof() value + + // Struct + C_Member *members; + bool is_flexible; + bool is_packed; + + // Function type + C_Type *return_ty; + C_Type *params; + bool is_variadic; + C_Type *next; +}; + +// Struct member +struct C_Member { + C_Member *next; + C_Type *ty; + C_Token *tok; // for error message + C_Token *name; + int idx; + int align; + int offset; + + // Bitfield + bool is_bitfield; + int bit_offset; + int bit_width; +}; + +extern C_Type *C_ty_void; +extern C_Type *C_ty_bool; + +extern C_Type *C_ty_char; +extern C_Type *C_ty_short; +extern C_Type *C_ty_int; +extern C_Type *C_ty_long; + +extern C_Type *C_ty_uchar; +extern C_Type *C_ty_ushort; +extern C_Type *C_ty_uint; +extern C_Type *C_ty_ulong; + +extern C_Type *C_ty_float; +extern C_Type *C_ty_double; +extern C_Type *C_ty_ldouble; + +bool C_is_integer(C_Type *ty); +bool C_is_flonum(C_Type *ty); +bool C_is_numeric(C_Type *ty); +bool C_is_compatible(C_Type *t1, C_Type *t2); +C_Type *C_copy_type(C_Parser *parser, C_Type *ty); +C_Type *C_pointer_to(C_Parser *parser, C_Type *base); +C_Type *C_func_type(C_Parser *parser, C_Type *return_ty); +C_Type *C_array_of(C_Parser *parser, C_Type *base, int size); +C_Type *C_vla_of(C_Parser *parser, C_Type *base, C_Node *expr); +C_Type *C_enum_type(C_Parser *parser); +C_Type *C_struct_type(C_Parser *parser); +void C_add_type(C_Parser *parser, C_Node *node); + +// Round up `n` to the nearest multiple of `align`. For instance, +// align_to(5, 8) returns 8 and align_to(11, 8) returns 16. +static inline int C_align_to(int n, int align) { + return (n + align - 1) / align * align; +} + + +// +// unicode.c +// + +int C_encode_utf8(char *buf, uint32_t c); +uint32_t C_decode_utf8(C_Parser *tokenizer, char **new_pos, char *p); +bool C_is_ident1(uint32_t c); +bool C_is_ident2(uint32_t c); +int C_display_width(C_Parser *tokenizer, char *p, int len); + +// +// hashmap.c +// + +void *hashmap_get(HashMap *map, char *key); +void *hashmap_get2(HashMap *map, char *key, int keylen); +void hashmap_put(HashMap *map, char *key, void *val); +void hashmap_put2(HashMap *map, char *key, int keylen, void *val); +void hashmap_delete(HashMap *map, char *key); +void hashmap_delete2(HashMap *map, char *key, int keylen); +void hashmap_test(void); +void hashmap_foreach(HashMap *map, void (*f)(void *userdata, char *key, int keylen, void *val), void *userdata); +void hashmap_destroy(HashMap *map); + +#endif \ No newline at end of file diff --git a/ravicomp/src/chibicc/chibicc_hashmap.c b/ravicomp/src/chibicc/chibicc_hashmap.c new file mode 100644 index 00000000..84f7914c --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_hashmap.c @@ -0,0 +1,208 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +// This is an implementation of the open-addressing hash table. + +#include "chibicc.h" + +// Initial hash bucket size +#define INIT_SIZE 16 + +// Rehash if the usage exceeds 70%. +#define HIGH_WATERMARK 70 + +// We'll keep the usage below 50% after rehashing. +#define LOW_WATERMARK 50 + +// Represents a deleted hash entry +#define TOMBSTONE ((void *)-1) + +static uint64_t fnv_hash(char *s, int len) { + uint64_t hash = 0xcbf29ce484222325; + for (int i = 0; i < len; i++) { + hash *= 0x100000001b3; + hash ^= (unsigned char)s[i]; + } + return hash; +} + +// Make room for new entires in a given hashmap by removing +// tombstones and possibly extending the bucket size. +static void rehash(HashMap *map) { + // Compute the size of the new hashmap. + int nkeys = 0; + for (int i = 0; i < map->capacity; i++) + if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE) + nkeys++; + + int cap = map->capacity; + while ((nkeys * 100) / cap >= LOW_WATERMARK) + cap = cap * 2; + assert(cap > 0); + + // Create a new hashmap and copy all key-values. + HashMap map2 = {0}; + map2.arena = map->arena; + map2.buckets = mspace_calloc(map->arena, cap, sizeof(HashEntry)); + map2.capacity = cap; + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[i]; + if (ent->key && ent->key != TOMBSTONE) + hashmap_put2(&map2, ent->key, ent->keylen, ent->val); + } + + assert(map2.used == nkeys); + mspace_free(map->arena, map->buckets); + *map = map2; +} + +static bool match(HashEntry *ent, char *key, int keylen) { + return ent->key && ent->key != TOMBSTONE && + ent->keylen == keylen && memcmp(ent->key, key, keylen) == 0; +} + +static HashEntry *get_entry(HashMap *map, char *key, int keylen) { + if (!map->buckets) + return NULL; + + uint64_t hash = fnv_hash(key, keylen); + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; + if (match(ent, key, keylen)) + return ent; + if (ent->key == NULL) + return NULL; + } + //unreachable(); + assert(false); + return NULL; +} + +static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) { + if (!map->buckets) { + assert(map->arena != NULL); + map->buckets = mspace_calloc(map->arena, INIT_SIZE, sizeof(HashEntry)); + map->capacity = INIT_SIZE; + } else if ((map->used * 100) / map->capacity >= HIGH_WATERMARK) { + rehash(map); + } + + uint64_t hash = fnv_hash(key, keylen); + + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[(hash + i) % map->capacity]; + + if (match(ent, key, keylen)) + return ent; + + if (ent->key == TOMBSTONE) { + ent->key = key; + ent->keylen = keylen; + return ent; + } + + if (ent->key == NULL) { + ent->key = key; + ent->keylen = keylen; + map->used++; + return ent; + } + } + //unreachable(); + assert(false); + return NULL; +} + +void *hashmap_get(HashMap *map, char *key) { + return hashmap_get2(map, key, strlen(key)); +} + +void *hashmap_get2(HashMap *map, char *key, int keylen) { + HashEntry *ent = get_entry(map, key, keylen); + return ent ? ent->val : NULL; +} + +void hashmap_put(HashMap *map, char *key, void *val) { + hashmap_put2(map, key, strlen(key), val); +} + +void hashmap_put2(HashMap *map, char *key, int keylen, void *val) { + HashEntry *ent = get_or_insert_entry(map, key, keylen); + ent->val = val; +} + +void hashmap_delete(HashMap *map, char *key) { + hashmap_delete2(map, key, strlen(key)); +} + +void hashmap_delete2(HashMap *map, char *key, int keylen) { + HashEntry *ent = get_entry(map, key, keylen); + if (ent) + ent->key = TOMBSTONE; +} + +void hashmap_foreach(HashMap *map, void (*f)(void *userdata, char *key, int keylen, void *val), void *userdata) { + for (int i = 0; i < map->capacity; i++) { + HashEntry *ent = &map->buckets[i]; + if (ent->key && ent->key != TOMBSTONE) + f(userdata, ent->key, ent->keylen, ent->val); + } +} + +#if 0 +void hashmap_test(void) { + HashMap *map = calloc(1, sizeof(HashMap)); + + for (int i = 0; i < 5000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + for (int i = 1000; i < 2000; i++) + hashmap_delete(map, format("key %d", i)); + for (int i = 1500; i < 1600; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + for (int i = 6000; i < 7000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + + for (int i = 0; i < 1000; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 1000; i < 1500; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 1500; i < 1600; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 1600; i < 2000; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 2000; i < 5000; i++) + assert((size_t)hashmap_get(map, format("key %d", i)) == i); + for (int i = 5000; i < 6000; i++) + assert(hashmap_get(map, "no such key") == NULL); + for (int i = 6000; i < 7000; i++) + hashmap_put(map, format("key %d", i), (void *)(size_t)i); + + assert(hashmap_get(map, "no such key") == NULL); + printf("OK\n"); +} +#endif \ No newline at end of file diff --git a/ravicomp/src/chibicc/chibicc_parse.c b/ravicomp/src/chibicc/chibicc_parse.c new file mode 100644 index 00000000..530b05ef --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_parse.c @@ -0,0 +1,3413 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +// This file contains a recursive descent parser for C. +// +// Most functions in this file are named after the symbols they are +// supposed to read from an input token list. For example, stmt() is +// responsible for reading a statement from a token list. The function +// then construct an AST node representing a statement. +// +// Each function conceptually returns two values, an AST node and +// remaining part of the input tokens. Since C doesn't support +// multiple return values, the remaining tokens are returned to the +// caller via a pointer argument. +// +// Input tokens are represented by a linked list. Unlike many recursive +// descent parsers, we don't have the notion of the "input token stream". +// Most parsing functions don't change the global state of the parser. +// So it is very easy to lookahead arbitrary number of tokens in this +// parser. + +#include "chibicc.h" + +// Variable attributes such as typedef or extern. +typedef struct { + bool is_typedef; + bool is_static; + bool is_extern; + bool is_inline; + bool is_tls; + int align; +} VarAttr; + +// This struct represents a variable initializer. Since initializers +// can be nested (e.g. `int x[2][2] = {{1, 2}, {3, 4}}`), this struct +// is a tree data structure. +typedef struct Initializer Initializer; +struct Initializer { + Initializer *next; + C_Type *ty; + C_Token *tok; + bool is_flexible; + + // If it's not an aggregate type and has an initializer, + // `expr` has an initialization expression. + C_Node *expr; + + // If it's an initializer for an aggregate type (e.g. array or struct), + // `children` has initializers for its children. + Initializer **children; + + // Only one member can be initialized for a union. + // `mem` is used to clarify which member is initialized. + C_Member *mem; +}; + +// For local variable initializer. +typedef struct InitDesg InitDesg; +struct InitDesg { + InitDesg *next; + int idx; + C_Member *member; + C_Obj *var; +}; + + +static bool is_typename(C_Parser *parser, C_Token *tok); +static C_Type *declspec(C_Parser *parser, C_Token **rest, C_Token *tok, VarAttr *attr); +static C_Type *typename(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Type *enum_specifier(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Type *typeof_specifier(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Type *type_suffix(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty); +static C_Type *declarator(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty); +static C_Node *declaration(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *basety, VarAttr *attr); +static void array_initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init, int i); +static void struct_initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init, C_Member *mem); +static void initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init); +static Initializer *initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty, C_Type **new_ty); +static C_Node *lvar_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Obj *var); +static void gvar_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Obj *var); +static C_Node *compound_stmt(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *stmt(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *expr_stmt(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *expr(C_Parser *parser, C_Token **rest, C_Token *tok); +static int64_t eval(C_Parser *parser, C_Node *node); +static int64_t eval2(C_Parser *parser, C_Node *node, char ***label); +static int64_t eval_rval(C_Parser *parser, C_Node *node, char ***label); +static bool is_const_expr(C_Parser *parser, C_Node *node); +static C_Node *assign(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *logor(C_Parser *parser, C_Token **rest, C_Token *tok); +static double eval_double(C_Parser *parser, C_Node *node); +static C_Node *conditional(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *logand(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *bitor(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *bitxor(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *bitand(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *equality(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *relational(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *shift(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *add(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *new_add(C_Parser *parser, C_Node *lhs, C_Node *rhs, C_Token *tok); +static C_Node *new_sub(C_Parser *parser, C_Node *lhs, C_Node *rhs, C_Token *tok); +static C_Node *mul(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *cast(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Member *get_struct_member(C_Parser *parser, C_Type *ty, C_Token *tok); +static C_Type *struct_decl(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Type *union_decl(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *postfix(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *funcall(C_Parser *parser, C_Token **rest, C_Token *tok, C_Node *node); +static C_Node *unary(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Node *primary(C_Parser *parser, C_Token **rest, C_Token *tok); +static C_Token *parse_typedef(C_Parser *parser, C_Token *tok, C_Type *basety); +static bool is_function(C_Parser *parser, C_Token *tok); +static C_Token *function(C_Parser *parser, C_Token *tok, C_Type *basety, VarAttr *attr); +static C_Token *global_variable(C_Parser *parser, C_Token *tok, C_Type *basety, VarAttr *attr); + +static int align_down(int n, int align) { + return C_align_to(n - align + 1, align); +} + +static char *str_dup(mspace arena, const char *temp, size_t len) { + char *p = (char *) mspace_calloc(arena, 1, len+1); + memcpy(p, temp, len); + p[len] = 0; + return p; +} + +C_Scope *C_global_scope(C_Parser *parser) { + C_Scope *sc = mspace_calloc(parser->arena, 1, sizeof(C_Scope)); + sc->vars.arena = parser->arena; + sc->tags.arena = parser->arena; + return sc; +} + +static void enter_scope(C_Parser *parser) { + C_Scope *sc = mspace_calloc(parser->arena, 1, sizeof(C_Scope)); + sc->vars.arena = parser->arena; + sc->tags.arena = parser->arena; + sc->next = parser->scope; + parser->scope = sc; +} + +static void leave_scope(C_Parser *parser) { + parser->scope = parser->scope->next; +} + +// Find a variable by name. +static C_VarScope *find_var(C_Parser *parser, C_Token *tok) { + for (C_Scope *sc = parser->scope; sc; sc = sc->next) { + C_VarScope *sc2 = hashmap_get2(&sc->vars, tok->loc, tok->len); + if (sc2) + return sc2; + } + return NULL; +} + +static C_Type *find_tag(C_Parser *parser, C_Token *tok) { + for (C_Scope *sc = parser->scope; sc; sc = sc->next) { + C_Type *ty = hashmap_get2(&sc->tags, tok->loc, tok->len); + if (ty) + return ty; + } + return NULL; +} + +static C_Node *new_node(C_Parser *parser, C_NodeKind kind, C_Token *tok) { + C_Node *node = mspace_calloc(parser->arena,1, sizeof(C_Node)); + node->kind = kind; + node->tok = tok; + return node; +} + +static C_Node *new_binary(C_Parser *parser, C_NodeKind kind, C_Node *lhs, C_Node *rhs, C_Token *tok) { + C_Node *node = new_node(parser, kind, tok); + node->lhs = lhs; + node->rhs = rhs; + return node; +} + +static C_Node *new_unary(C_Parser *parser, C_NodeKind kind, C_Node *expr, C_Token *tok) { + C_Node *node = new_node(parser, kind, tok); + node->lhs = expr; + return node; +} + +static C_Node *new_num(C_Parser *parser, int64_t val, C_Token *tok) { + C_Node *node = new_node(parser, ND_NUM, tok); + node->val = val; + return node; +} + +static C_Node *new_long(C_Parser *parser, int64_t val, C_Token *tok) { + C_Node *node = new_node(parser, ND_NUM, tok); + node->val = val; + node->ty = C_ty_long; + return node; +} + +static C_Node *new_ulong(C_Parser *parser, long val, C_Token *tok) { + C_Node *node = new_node(parser, ND_NUM, tok); + node->val = val; + node->ty = C_ty_ulong; + return node; +} + +static C_Node *new_var_node(C_Parser *parser, C_Obj *var, C_Token *tok) { + C_Node *node = new_node(parser, ND_VAR, tok); + node->var = var; + return node; +} + +static C_Node *new_vla_ptr(C_Parser *parser, C_Obj *var, C_Token *tok) { + C_Node *node = new_node(parser, ND_VLA_PTR, tok); + node->var = var; + return node; +} + +C_Node *C_new_cast(C_Parser *parser, C_Node *expr, C_Type *ty) { + C_add_type(parser, expr); + + C_Node *node = mspace_calloc( parser->arena,1, sizeof(C_Node)); + node->kind = ND_CAST; + node->tok = expr->tok; + node->lhs = expr; + node->ty = C_copy_type(parser, ty); + return node; +} + +static C_VarScope *push_scope(C_Parser *parser, char *name) { + C_VarScope *sc = mspace_calloc(parser->arena,1, sizeof(C_VarScope)); + hashmap_put(&parser->scope->vars, name, sc); + return sc; +} + +static Initializer *new_initializer(C_Parser *parser, C_Type *ty, bool is_flexible) { + Initializer *init = mspace_calloc(parser->arena,1, sizeof(Initializer)); + init->ty = ty; + + if (ty->kind == TY_ARRAY) { + if (is_flexible && ty->size < 0) { + init->is_flexible = true; + return init; + } + + init->children = mspace_calloc(parser->arena, ty->array_len, sizeof(Initializer *)); + for (int i = 0; i < ty->array_len; i++) + init->children[i] = new_initializer(parser, ty->base, false); + return init; + } + + if (ty->kind == TY_STRUCT || ty->kind == TY_UNION) { + // Count the number of struct members. + int len = 0; + for (C_Member *mem = ty->members; mem; mem = mem->next) + len++; + + init->children = mspace_calloc(parser->arena, len, sizeof(Initializer *)); + + for (C_Member *mem = ty->members; mem; mem = mem->next) { + if (is_flexible && ty->is_flexible && !mem->next) { + Initializer *child = mspace_calloc(parser->arena, 1, sizeof(Initializer)); + child->ty = mem->ty; + child->is_flexible = true; + init->children[mem->idx] = child; + } else { + init->children[mem->idx] = new_initializer(parser, mem->ty, false); + } + } + return init; + } + + return init; +} + +static C_Obj *new_var(C_Parser *parser, char *name, C_Type *ty) { + C_Obj *var = mspace_calloc(parser->arena, 1, sizeof(C_Obj)); + var->name = name; + var->ty = ty; + var->align = ty->align; + push_scope(parser, name)->var = var; + return var; +} + +static C_Obj *new_lvar(C_Parser *parser, char *name, C_Type *ty) { + C_Obj *var = new_var(parser, name, ty); + var->is_local = true; + var->next = parser->locals; + parser->locals = var; + return var; +} + +static C_Obj *new_gvar(C_Parser *parser, char *name, C_Type *ty) { + C_Obj *var = new_var(parser, name, ty); + var->next = parser->globals; + var->is_static = true; + var->is_definition = true; + parser->globals = var; + return var; +} + +static char *new_unique_name(mspace arena) { + static int id = 0; + char temp[64]; + + snprintf(temp, sizeof temp, ".L..%d", id++); + return str_dup(arena, temp, strlen(temp)); +} + +static C_Obj *new_anon_gvar(C_Parser *parser, C_Type *ty) { + return new_gvar(parser, new_unique_name(parser->arena), ty); +} + +static C_Obj *new_string_literal(C_Parser *parser, char *p, C_Type *ty) { + C_Obj *var = new_anon_gvar(parser, ty); + var->init_data = p; + return var; +} + +static char *get_ident(C_Parser *parser, C_Token *tok) { + if (tok->kind != TK_IDENT) + C_error_tok(parser, tok, "expected an identifier"); + return str_dup(parser->arena, tok->loc, tok->len); +} + +static C_Type *find_typedef(C_Parser *parser, C_Token *tok) { + if (tok->kind == TK_IDENT) { + C_VarScope *sc = find_var(parser, tok); + if (sc) + return sc->type_def; + } + return NULL; +} + +static void push_tag_scope(C_Parser *parser, C_Token *tok, C_Type *ty) { + hashmap_put2(&parser->scope->tags, tok->loc, tok->len, ty); +} + +// declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long" +// | "typedef" | "static" | "extern" | "inline" +// | "_Thread_local" | "__thread" +// | "signed" | "unsigned" +// | struct-decl | union-decl | typedef-name +// | enum-specifier | typeof-specifier +// | "const" | "volatile" | "auto" | "register" | "restrict" +// | "__restrict" | "__restrict__" | "_Noreturn")+ +// +// The order of typenames in a type-specifier doesn't matter. For +// example, `int long static` means the same as `static long int`. +// That can also be written as `static long` because you can omit +// `int` if `long` or `short` are specified. However, something like +// `char int` is not a valid type specifier. We have to accept only a +// limited combinations of the typenames. +// +// In this function, we count the number of occurrences of each typename +// while keeping the "current" type object that the typenames up +// until that point represent. When we reach a non-typename token, +// we returns the current type object. +static C_Type *declspec(C_Parser *parser, C_Token **rest, C_Token *tok, VarAttr *attr) { + // We use a single integer as counters for all typenames. + // For example, bits 0 and 1 represents how many times we saw the + // keyword "void" so far. With this, we can use a switch statement + // as you can see below. + enum { + VOID = 1 << 0, + BOOL = 1 << 2, + CHAR = 1 << 4, + SHORT = 1 << 6, + INT = 1 << 8, + LONG = 1 << 10, + FLOAT = 1 << 12, + DOUBLE = 1 << 14, + OTHER = 1 << 16, + SIGNED = 1 << 17, + UNSIGNED = 1 << 18, + }; + + C_Type *ty = C_ty_int; + int counter = 0; + bool is_atomic = false; + + while (is_typename(parser, tok)) { + // Handle storage class specifiers. + if (C_equal(tok, "typedef") || C_equal(tok, "static") || C_equal(tok, "extern") || C_equal(tok, "inline") || + C_equal(tok, "_Thread_local") || C_equal(tok, "__thread")) { + if (!attr) + C_error_tok(parser, tok, "storage class specifier is not allowed in this context"); + + if (C_equal(tok, "typedef")) + attr->is_typedef = true; + else if (C_equal(tok, "static")) + attr->is_static = true; + else if (C_equal(tok, "extern")) + attr->is_extern = true; + else if (C_equal(tok, "inline")) + attr->is_inline = true; + else + attr->is_tls = true; + + if (attr->is_typedef && + attr->is_static + attr->is_extern + attr->is_inline + attr->is_tls > 1) + C_error_tok(parser, tok, + "typedef may not be used together with static," + " extern, inline, __thread or _Thread_local"); + tok = tok->next; + continue; + } + + // These keywords are recognized but ignored. + if (C_consume(&tok, tok, "const") || C_consume(&tok, tok, "volatile") || C_consume(&tok, tok, "auto") || + C_consume(&tok, tok, "register") || C_consume(&tok, tok, "restrict") || C_consume(&tok, tok, "__restrict") || + C_consume(&tok, tok, "__restrict__") || C_consume(&tok, tok, "_Noreturn")) + continue; + + if (C_equal(tok, "_Atomic")) { + tok = tok->next; + if (C_equal(tok, "(")) { + ty = typename(parser, &tok, tok->next); + tok = C_skip(parser, tok, ")"); + } + is_atomic = true; + continue; + } + + if (C_equal(tok, "_Alignas")) { + if (!attr) + C_error_tok(parser, tok, "_Alignas is not allowed in this context"); + tok = C_skip(parser, tok->next, "("); + + if (is_typename(parser, tok)) + attr->align = typename(parser, &tok, tok)->align; + else + attr->align = C_const_expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + continue; + } + + // Handle user-defined types. + C_Type *ty2 = find_typedef(parser, tok); + if (C_equal(tok, "struct") || C_equal(tok, "union") || C_equal(tok, "enum") || C_equal(tok, "typeof") || ty2) { + if (counter) + break; + + if (C_equal(tok, "struct")) { + ty = struct_decl(parser, &tok, tok->next); + } else if (C_equal(tok, "union")) { + ty = union_decl(parser, &tok, tok->next); + } else if (C_equal(tok, "enum")) { + ty = enum_specifier(parser, &tok, tok->next); + } else if (C_equal(tok, "typeof")) { + ty = typeof_specifier(parser, &tok, tok->next); + } else { + ty = ty2; + tok = tok->next; + } + + counter += OTHER; + continue; + } + + // Handle built-in types. + if (C_equal(tok, "void")) + counter += VOID; + else if (C_equal(tok, "_Bool")) + counter += BOOL; + else if (C_equal(tok, "char")) + counter += CHAR; + else if (C_equal(tok, "short")) + counter += SHORT; + else if (C_equal(tok, "int")) + counter += INT; + else if (C_equal(tok, "long")) + counter += LONG; + else if (C_equal(tok, "float")) + counter += FLOAT; + else if (C_equal(tok, "double")) + counter += DOUBLE; + else if (C_equal(tok, "signed")) + counter |= SIGNED; + else if (C_equal(tok, "unsigned")) + counter |= UNSIGNED; + else + unreachable(parser); + + switch (counter) { + case VOID: + ty = C_ty_void; + break; + case BOOL: + ty = C_ty_bool; + break; + case CHAR: + case SIGNED + CHAR: + ty = C_ty_char; + break; + case UNSIGNED + CHAR: + ty = C_ty_uchar; + break; + case SHORT: + case SHORT + INT: + case SIGNED + SHORT: + case SIGNED + SHORT + INT: + ty = C_ty_short; + break; + case UNSIGNED + SHORT: + case UNSIGNED + SHORT + INT: + ty = C_ty_ushort; + break; + case INT: + case SIGNED: + case SIGNED + INT: + ty = C_ty_int; + break; + case UNSIGNED: + case UNSIGNED + INT: + ty = C_ty_uint; + break; + case LONG: + case LONG + INT: + case LONG + LONG: + case LONG + LONG + INT: + case SIGNED + LONG: + case SIGNED + LONG + INT: + case SIGNED + LONG + LONG: + case SIGNED + LONG + LONG + INT: + ty = C_ty_long; + break; + case UNSIGNED + LONG: + case UNSIGNED + LONG + INT: + case UNSIGNED + LONG + LONG: + case UNSIGNED + LONG + LONG + INT: + ty = C_ty_ulong; + break; + case FLOAT: + ty = C_ty_float; + break; + case DOUBLE: + ty = C_ty_double; + break; + case LONG + DOUBLE: + ty = C_ty_ldouble; + break; + default: + C_error_tok(parser, tok, "invalid type"); + } + + tok = tok->next; + } + + if (is_atomic) { + ty = C_copy_type(parser, ty); + ty->is_atomic = true; + } + + *rest = tok; + return ty; +} + +// func-params = ("void" | param ("," param)* ("," "...")?)? ")" +// param = declspec declarator +static C_Type *func_params(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + if (C_equal(tok, "void") && C_equal(tok->next, ")")) { + *rest = tok->next->next; + return C_func_type(parser, ty); + } + + C_Type head = {0}; + C_Type *cur = &head; + bool is_variadic = false; + + while (!C_equal(tok, ")")) { + if (cur != &head) + tok = C_skip(parser, tok, ","); + + if (C_equal(tok, "...")) { + is_variadic = true; + tok = tok->next; + C_skip(parser, tok, ")"); + break; + } + + C_Type *ty2 = declspec(parser, &tok, tok, NULL); + ty2 = declarator(parser, &tok, tok, ty2); + + C_Token *name = ty2->name; + + if (ty2->kind == TY_ARRAY) { + // "array of T" is converted to "pointer to T" only in the parameter + // context. For example, *argv[] is converted to **argv by this. + ty2 = C_pointer_to(parser, ty2->base); + ty2->name = name; + } else if (ty2->kind == TY_FUNC) { + // Likewise, a function is converted to a pointer to a function + // only in the parameter context. + ty2 = C_pointer_to(parser, ty2); + ty2->name = name; + } + + cur = cur->next = C_copy_type(parser, ty2); + } + + if (cur == &head) + is_variadic = true; + + ty = C_func_type(parser, ty); + ty->params = head.next; + ty->is_variadic = is_variadic; + *rest = tok->next; + return ty; +} + +// array-dimensions = ("static" | "restrict")* const-expr? "]" type-suffix +static C_Type *array_dimensions(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + while (C_equal(tok, "static") || C_equal(tok, "restrict")) + tok = tok->next; + + if (C_equal(tok, "]")) { + ty = type_suffix(parser, rest, tok->next, ty); + return C_array_of(parser, ty, -1); + } + + C_Node *expr = conditional(parser, &tok, tok); + tok = C_skip(parser, tok, "]"); + ty = type_suffix(parser, rest, tok, ty); + + if (ty->kind == TY_VLA || !is_const_expr(parser, expr)) + return C_vla_of(parser, ty, expr); + return C_array_of(parser, ty, eval(parser, expr)); +} + +// type-suffix = "(" func-params +// | "[" array-dimensions +// | ε +static C_Type *type_suffix(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + if (C_equal(tok, "(")) + return func_params(parser, rest, tok->next, ty); + + if (C_equal(tok, "[")) + return array_dimensions(parser, rest, tok->next, ty); + + *rest = tok; + return ty; +} + +// pointers = ("*" ("const" | "volatile" | "restrict")*)* +static C_Type *pointers(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + while (C_consume(&tok, tok, "*")) { + ty = C_pointer_to(parser, ty); + while (C_equal(tok, "const") || C_equal(tok, "volatile") || C_equal(tok, "restrict") || C_equal(tok, "__restrict") || + C_equal(tok, "__restrict__")) + tok = tok->next; + } + *rest = tok; + return ty; +} + +// declarator = pointers ("(" ident ")" | "(" declarator ")" | ident) type-suffix +static C_Type *declarator(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + ty = pointers(parser, &tok, tok, ty); + + if (C_equal(tok, "(")) { + C_Token *start = tok; + C_Type dummy = {0}; + declarator(parser, &tok, start->next, &dummy); + tok = C_skip(parser, tok, ")"); + ty = type_suffix(parser, rest, tok, ty); + return declarator(parser, &tok, start->next, ty); + } + + C_Token *name = NULL; + C_Token *name_pos = tok; + + if (tok->kind == TK_IDENT) { + name = tok; + tok = tok->next; + } + + ty = type_suffix(parser, rest, tok, ty); + ty->name = name; + ty->name_pos = name_pos; + return ty; +} + +// abstract-declarator = pointers ("(" abstract-declarator ")")? type-suffix +static C_Type *abstract_declarator(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + ty = pointers(parser, &tok, tok, ty); + + if (C_equal(tok, "(")) { + C_Token *start = tok; + C_Type dummy = {0}; + abstract_declarator(parser, &tok, start->next, &dummy); + tok = C_skip(parser, tok, ")"); + ty = type_suffix(parser, rest, tok, ty); + return abstract_declarator(parser, &tok, start->next, ty); + } + + return type_suffix(parser, rest, tok, ty); +} + +// type-name = declspec abstract-declarator +static C_Type *typename(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Type *ty = declspec(parser, &tok, tok, NULL); + return abstract_declarator(parser, rest, tok, ty); +} + +static bool is_end(C_Token *tok) { + return C_equal(tok, "}") || (C_equal(tok, ",") && C_equal(tok->next, "}")); +} + +static bool consume_end(C_Token **rest, C_Token *tok) { + if (C_equal(tok, "}")) { + *rest = tok->next; + return true; + } + + if (C_equal(tok, ",") && C_equal(tok->next, "}")) { + *rest = tok->next->next; + return true; + } + + return false; +} + +// enum-specifier = ident? "{" enum-list? "}" +// | ident ("{" enum-list? "}")? +// +// enum-list = ident ("=" num)? ("," ident ("=" num)?)* ","? +static C_Type *enum_specifier(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Type *ty = C_enum_type(parser); + + // Read a struct tag. + C_Token *tag = NULL; + if (tok->kind == TK_IDENT) { + tag = tok; + tok = tok->next; + } + + if (tag && !C_equal(tok, "{")) { + C_Type *ty = find_tag(parser, tag); + if (!ty) + C_error_tok(parser, tag, "unknown enum type"); + if (ty->kind != TY_ENUM) + C_error_tok(parser, tag, "not an enum tag"); + *rest = tok; + return ty; + } + + tok = C_skip(parser, tok, "{"); + + // Read an enum-list. + int i = 0; + int val = 0; + while (!consume_end(rest, tok)) { + if (i++ > 0) + tok = C_skip(parser, tok, ","); + + char *name = get_ident(parser, tok); + tok = tok->next; + + if (C_equal(tok, "=")) + val = C_const_expr(parser, &tok, tok->next); + + C_VarScope *sc = push_scope(parser, name); + sc->enum_ty = ty; + sc->enum_val = val++; + } + + if (tag) + push_tag_scope(parser, tag, ty); + return ty; +} + +// typeof-specifier = "(" (expr | typename) ")" +static C_Type *typeof_specifier(C_Parser *parser, C_Token **rest, C_Token *tok) { + tok = C_skip(parser, tok, "("); + + C_Type *ty; + if (is_typename(parser, tok)) { + ty = typename(parser, &tok, tok); + } else { + C_Node *node = expr(parser, &tok, tok); + C_add_type(parser, node); + ty = node->ty; + } + *rest = C_skip(parser, tok, ")"); + return ty; +} + +// Generate code for computing a VLA size. +static C_Node *compute_vla_size(C_Parser *parser, C_Type *ty, C_Token *tok) { + C_Node *node = new_node(parser, ND_NULL_EXPR, tok); + if (ty->base) + node = new_binary(parser, ND_COMMA, node, compute_vla_size(parser, ty->base, tok), tok); + + if (ty->kind != TY_VLA) + return node; + + C_Node *base_sz; + if (ty->base->kind == TY_VLA) + base_sz = new_var_node(parser, ty->base->vla_size, tok); + else + base_sz = new_num(parser, ty->base->size, tok); + + ty->vla_size = new_lvar(parser, "", C_ty_ulong); + C_Node *expr = new_binary(parser, ND_ASSIGN, new_var_node(parser, ty->vla_size, tok), + new_binary(parser, ND_MUL, ty->vla_len, base_sz, tok), + tok); + return new_binary(parser, ND_COMMA, node, expr, tok); +} + +static C_Node *new_alloca(C_Parser *parser, C_Node *sz) { + C_Node *node = new_unary(parser, ND_FUNCALL, new_var_node(parser, parser->builtin_alloca, sz->tok), sz->tok); + node->func_ty = parser->builtin_alloca->ty; + node->ty = parser->builtin_alloca->ty->return_ty; + node->args = sz; + C_add_type(parser, sz); + return node; +} + +// declaration = declspec (declarator ("=" expr)? ("," declarator ("=" expr)?)*)? ";" +static C_Node *declaration(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *basety, VarAttr *attr) { + C_Node head = {0}; + C_Node *cur = &head; + int i = 0; + + while (!C_equal(tok, ";")) { + if (i++ > 0) + tok = C_skip(parser, tok, ","); + + C_Type *ty = declarator(parser, &tok, tok, basety); + if (ty->kind == TY_VOID) + C_error_tok(parser, tok, "variable declared void"); + if (!ty->name) + C_error_tok(parser, ty->name_pos, "variable name omitted"); + + if (attr && attr->is_static) { + // static local variable + C_Obj *var = new_anon_gvar(parser, ty); + push_scope(parser, get_ident(parser, ty->name))->var = var; + if (C_equal(tok, "=")) + gvar_initializer(parser, &tok, tok->next, var); + continue; + } + + // Generate code for computing a VLA size. We need to do this + // even if ty is not VLA because ty may be a pointer to VLA + // (e.g. int (*foo)[n][m] where n and m are variables.) + cur = cur->next = new_unary(parser, ND_EXPR_STMT, compute_vla_size(parser, ty, tok), tok); + + if (ty->kind == TY_VLA) { + if (C_equal(tok, "=")) + C_error_tok(parser, tok, "variable-sized object may not be initialized"); + + // Variable length arrays (VLAs) are translated to alloca() calls. + // For example, `int x[n+2]` is translated to `tmp = n + 2, + // x = alloca(tmp)`. + C_Obj *var = new_lvar(parser, get_ident(parser, ty->name), ty); + C_Token *tok = ty->name; + C_Node *expr = new_binary(parser, ND_ASSIGN, new_vla_ptr(parser, var, tok), + new_alloca(parser, new_var_node(parser, ty->vla_size, tok)), + tok); + + cur = cur->next = new_unary(parser, ND_EXPR_STMT, expr, tok); + continue; + } + + C_Obj *var = new_lvar(parser, get_ident(parser, ty->name), ty); + if (attr && attr->align) + var->align = attr->align; + + if (C_equal(tok, "=")) { + C_Node *expr = lvar_initializer(parser, &tok, tok->next, var); + cur = cur->next = new_unary(parser, ND_EXPR_STMT, expr, tok); + } + + if (var->ty->size < 0) + C_error_tok(parser, ty->name, "variable has incomplete type"); + if (var->ty->kind == TY_VOID) + C_error_tok(parser, ty->name, "variable declared void"); + } + + C_Node *node = new_node(parser, ND_BLOCK, tok); + node->body = head.next; + *rest = tok->next; + return node; +} + +static C_Token *skip_excess_element(C_Parser *parser, C_Token *tok) { + if (C_equal(tok, "{")) { + tok = skip_excess_element(parser, tok->next); + return C_skip(parser, tok, "}"); + } + + assign(parser, &tok, tok); + return tok; +} + +// string-initializer = string-literal +static void string_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + if (init->is_flexible) + *init = *new_initializer(parser, C_array_of(parser, init->ty->base, tok->ty->array_len), false); + + int len = MIN(init->ty->array_len, tok->ty->array_len); + + switch (init->ty->base->size) { + case 1: { + char *str = tok->str; + for (int i = 0; i < len; i++) + init->children[i]->expr = new_num(parser, str[i], tok); + break; + } + case 2: { + uint16_t *str = (uint16_t *)tok->str; + for (int i = 0; i < len; i++) + init->children[i]->expr = new_num(parser, str[i], tok); + break; + } + case 4: { + uint32_t *str = (uint32_t *)tok->str; + for (int i = 0; i < len; i++) + init->children[i]->expr = new_num(parser, str[i], tok); + break; + } + default: + unreachable(parser); + } + + *rest = tok->next; +} + +// array-designator = "[" const-expr "]" +// +// C99 added the designated initializer to the language, which allows +// programmers to move the "cursor" of an initializer to any element. +// The syntax looks like this: +// +// int x[10] = { 1, 2, [5]=3, 4, 5, 6, 7 }; +// +// `[5]` moves the cursor to the 5th element, so the 5th element of x +// is set to 3. Initialization then continues forward in order, so +// 6th, 7th, 8th and 9th elements are initialized with 4, 5, 6 and 7, +// respectively. Unspecified elements (in this case, 3rd and 4th +// elements) are initialized with zero. +// +// Nesting is allowed, so the following initializer is valid: +// +// int x[5][10] = { [5][8]=1, 2, 3 }; +// +// It sets x[5][8], x[5][9] and x[6][0] to 1, 2 and 3, respectively. +// +// Use `.fieldname` to move the cursor for a struct initializer. E.g. +// +// struct { int a, b, c; } x = { .c=5 }; +// +// The above initializer sets x.c to 5. +static void array_designator(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty, int *begin, int *end) { + *begin = C_const_expr(parser, &tok, tok->next); + if (*begin >= ty->array_len) + C_error_tok(parser, tok, "array designator index exceeds array bounds"); + + if (C_equal(tok, "...")) { + *end = C_const_expr(parser, &tok, tok->next); + if (*end >= ty->array_len) + C_error_tok(parser, tok, "array designator index exceeds array bounds"); + if (*end < *begin) + C_error_tok(parser, tok, "array designator range [%d, %d] is empty", *begin, *end); + } else { + *end = *begin; + } + + *rest = C_skip(parser, tok, "]"); +} + +// struct-designator = "." ident +static C_Member *struct_designator(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + C_Token *start = tok; + tok = C_skip(parser, tok, "."); + if (tok->kind != TK_IDENT) + C_error_tok(parser, tok, "expected a field designator"); + + for (C_Member *mem = ty->members; mem; mem = mem->next) { + // Anonymous struct member + if (mem->ty->kind == TY_STRUCT && !mem->name) { + if (get_struct_member(parser, mem->ty, tok)) { + *rest = start; + return mem; + } + continue; + } + + // Regular struct member + if (mem->name->len == tok->len && !strncmp(mem->name->loc, tok->loc, tok->len)) { + *rest = tok->next; + return mem; + } + } + + C_error_tok(parser, tok, "struct has no such member"); +} + +// designation = ("[" const-expr "]" | "." ident)* "="? initializer +static void designation(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + if (C_equal(tok, "[")) { + if (init->ty->kind != TY_ARRAY) + C_error_tok(parser, tok, "array index in non-array initializer"); + + int begin, end; + array_designator(parser, &tok, tok, init->ty, &begin, &end); + + C_Token *tok2; + for (int i = begin; i <= end; i++) + designation(parser, &tok2, tok, init->children[i]); + array_initializer2(parser, rest, tok2, init, begin + 1); + return; + } + + if (C_equal(tok, ".") && init->ty->kind == TY_STRUCT) { + C_Member *mem = struct_designator(parser, &tok, tok, init->ty); + designation(parser, &tok, tok, init->children[mem->idx]); + init->expr = NULL; + struct_initializer2(parser, rest, tok, init, mem->next); + return; + } + + if (C_equal(tok, ".") && init->ty->kind == TY_UNION) { + C_Member *mem = struct_designator(parser, &tok, tok, init->ty); + init->mem = mem; + designation(parser, rest, tok, init->children[mem->idx]); + return; + } + + if (C_equal(tok, ".")) + C_error_tok(parser, tok, "field name not in struct or union initializer"); + + if (C_equal(tok, "=")) + tok = tok->next; + initializer2(parser, rest, tok, init); +} + +// An array length can be omitted if an array has an initializer +// (e.g. `int x[] = {1,2,3}`). If it's omitted, count the number +// of initializer elements. +static int count_array_init_elements(C_Parser *parser, C_Token *tok, C_Type *ty) { + bool first = true; + Initializer *dummy = new_initializer(parser, ty->base, true); + + int i = 0, max = 0; + + while (!consume_end(&tok, tok)) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + if (C_equal(tok, "[")) { + i = C_const_expr(parser, &tok, tok->next); + if (C_equal(tok, "...")) + i = C_const_expr(parser, &tok, tok->next); + tok = C_skip(parser, tok, "]"); + designation(parser, &tok, tok, dummy); + } else { + initializer2(parser, &tok, tok, dummy); + } + + i++; + max = MAX(max, i); + } + return max; +} + +// array-initializer1 = "{" initializer ("," initializer)* ","? "}" +static void array_initializer1(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + tok = C_skip(parser, tok, "{"); + + if (init->is_flexible) { + int len = count_array_init_elements(parser, tok, init->ty); + *init = *new_initializer(parser, C_array_of(parser, init->ty->base, len), false); + } + + bool first = true; + + if (init->is_flexible) { + int len = count_array_init_elements(parser, tok, init->ty); + *init = *new_initializer(parser, C_array_of(parser, init->ty->base, len), false); + } + + for (int i = 0; !consume_end(rest, tok); i++) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + if (C_equal(tok, "[")) { + int begin, end; + array_designator(parser, &tok, tok, init->ty, &begin, &end); + + C_Token *tok2; + for (int j = begin; j <= end; j++) + designation(parser, &tok2, tok, init->children[j]); + tok = tok2; + i = end; + continue; + } + + if (i < init->ty->array_len) + initializer2(parser, &tok, tok, init->children[i]); + else + tok = skip_excess_element(parser, tok); + } +} + +// array-initializer2 = initializer ("," initializer)* +static void array_initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init, int i) { + if (init->is_flexible) { + int len = count_array_init_elements(parser, tok, init->ty); + *init = *new_initializer(parser, C_array_of(parser, init->ty->base, len), false); + } + + for (; i < init->ty->array_len && !is_end(tok); i++) { + C_Token *start = tok; + if (i > 0) + tok = C_skip(parser, tok, ","); + + if (C_equal(tok, "[") || C_equal(tok, ".")) { + *rest = start; + return; + } + + initializer2(parser, &tok, tok, init->children[i]); + } + *rest = tok; +} + +// struct-initializer1 = "{" initializer ("," initializer)* ","? "}" +static void struct_initializer1(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + tok = C_skip(parser, tok, "{"); + + C_Member *mem = init->ty->members; + bool first = true; + + while (!consume_end(rest, tok)) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + if (C_equal(tok, ".")) { + mem = struct_designator(parser, &tok, tok, init->ty); + designation(parser, &tok, tok, init->children[mem->idx]); + mem = mem->next; + continue; + } + + if (mem) { + initializer2(parser, &tok, tok, init->children[mem->idx]); + mem = mem->next; + } else { + tok = skip_excess_element(parser, tok); + } + } +} + +// struct-initializer2 = initializer ("," initializer)* +static void struct_initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init, C_Member *mem) { + bool first = true; + + for (; mem && !is_end(tok); mem = mem->next) { + C_Token *start = tok; + + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + if (C_equal(tok, "[") || C_equal(tok, ".")) { + *rest = start; + return; + } + + initializer2(parser, &tok, tok, init->children[mem->idx]); + } + *rest = tok; +} + +static void union_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + // Unlike structs, union initializers take only one initializer, + // and that initializes the first union member by default. + // You can initialize other member using a designated initializer. + if (C_equal(tok, "{") && C_equal(tok->next, ".")) { + C_Member *mem = struct_designator(parser, &tok, tok->next, init->ty); + init->mem = mem; + designation(parser, &tok, tok, init->children[mem->idx]); + *rest = C_skip(parser, tok, "}"); + return; + } + + init->mem = init->ty->members; + + if (C_equal(tok, "{")) { + initializer2(parser, &tok, tok->next, init->children[0]); + C_consume(&tok, tok, ","); + *rest = C_skip(parser, tok, "}"); + } else { + initializer2(parser, rest, tok, init->children[0]); + } +} + +// initializer = string-initializer | array-initializer +// | struct-initializer | union-initializer +// | assign +static void initializer2(C_Parser *parser, C_Token **rest, C_Token *tok, Initializer *init) { + if (init->ty->kind == TY_ARRAY && tok->kind == TK_STR) { + string_initializer(parser, rest, tok, init); + return; + } + + if (init->ty->kind == TY_ARRAY) { + if (C_equal(tok, "{")) + array_initializer1(parser, rest, tok, init); + else + array_initializer2(parser, rest, tok, init, 0); + return; + } + + if (init->ty->kind == TY_STRUCT) { + if (C_equal(tok, "{")) { + struct_initializer1(parser, rest, tok, init); + return; + } + + // A struct can be initialized with another struct. E.g. + // `struct T x = y;` where y is a variable of type `struct T`. + // Handle that case first. + C_Node *expr = assign(parser, rest, tok); + C_add_type(parser, expr); + if (expr->ty->kind == TY_STRUCT) { + init->expr = expr; + return; + } + + struct_initializer2(parser, rest, tok, init, init->ty->members); + return; + } + + if (init->ty->kind == TY_UNION) { + union_initializer(parser, rest, tok, init); + return; + } + + if (C_equal(tok, "{")) { + // An initializer for a scalar variable can be surrounded by + // braces. E.g. `int x = {3};`. Handle that case. + initializer2(parser, &tok, tok->next, init); + *rest = C_skip(parser, tok, "}"); + return; + } + + init->expr = assign(parser, rest, tok); +} + +static C_Type *copy_struct_type(C_Parser *parser, C_Type *ty) { + ty = C_copy_type(parser, ty); + + C_Member head = {0}; + C_Member *cur = &head; + for (C_Member *mem = ty->members; mem; mem = mem->next) { + C_Member *m = mspace_calloc(parser->arena, 1, sizeof(C_Member)); + *m = *mem; + cur = cur->next = m; + } + + ty->members = head.next; + return ty; +} + +static Initializer *initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty, C_Type **new_ty) { + Initializer *init = new_initializer(parser, ty, true); + initializer2(parser, rest, tok, init); + + if ((ty->kind == TY_STRUCT || ty->kind == TY_UNION) && ty->is_flexible) { + ty = copy_struct_type(parser, ty); + + C_Member *mem = ty->members; + while (mem->next) + mem = mem->next; + mem->ty = init->children[mem->idx]->ty; + ty->size += mem->ty->size; + + *new_ty = ty; + return init; + } + + *new_ty = init->ty; + return init; +} + +static C_Node *init_desg_expr(C_Parser *parser, InitDesg *desg, C_Token *tok) { + if (desg->var) + return new_var_node(parser, desg->var, tok); + + if (desg->member) { + C_Node *node = new_unary(parser, ND_MEMBER, init_desg_expr(parser, desg->next, tok), tok); + node->member = desg->member; + return node; + } + + C_Node *lhs = init_desg_expr(parser, desg->next, tok); + C_Node *rhs = new_num(parser, desg->idx, tok); + return new_unary(parser, ND_DEREF, new_add(parser, lhs, rhs, tok), tok); +} + +static C_Node *create_lvar_init(C_Parser *parser, Initializer *init, C_Type *ty, InitDesg *desg, C_Token *tok) { + if (ty->kind == TY_ARRAY) { + C_Node *node = new_node(parser, ND_NULL_EXPR, tok); + for (int i = 0; i < ty->array_len; i++) { + InitDesg desg2 = {desg, i}; + C_Node *rhs = create_lvar_init(parser, init->children[i], ty->base, &desg2, tok); + node = new_binary(parser, ND_COMMA, node, rhs, tok); + } + return node; + } + + if (ty->kind == TY_STRUCT && !init->expr) { + C_Node *node = new_node(parser, ND_NULL_EXPR, tok); + + for (C_Member *mem = ty->members; mem; mem = mem->next) { + InitDesg desg2 = {desg, 0, mem}; + C_Node *rhs = create_lvar_init(parser, init->children[mem->idx], mem->ty, &desg2, tok); + node = new_binary(parser, ND_COMMA, node, rhs, tok); + } + return node; + } + + if (ty->kind == TY_UNION) { + C_Member *mem = init->mem ? init->mem : ty->members; + InitDesg desg2 = {desg, 0, mem}; + return create_lvar_init(parser, init->children[mem->idx], mem->ty, &desg2, tok); + } + + if (!init->expr) + return new_node(parser, ND_NULL_EXPR, tok); + + C_Node *lhs = init_desg_expr(parser, desg, tok); + return new_binary(parser, ND_ASSIGN, lhs, init->expr, tok); +} + +// A variable definition with an initializer is a shorthand notation +// for a variable definition followed by assignments. This function +// generates assignment expressions for an initializer. For example, +// `int x[2][2] = {{6, 7}, {8, 9}}` is converted to the following +// expressions: +// +// x[0][0] = 6; +// x[0][1] = 7; +// x[1][0] = 8; +// x[1][1] = 9; +static C_Node *lvar_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Obj *var) { + Initializer *init = initializer(parser, rest, tok, var->ty, &var->ty); + InitDesg desg = {NULL, 0, NULL, var}; + + // If a partial initializer list is given, the standard requires + // that unspecified elements are set to 0. Here, we simply + // zero-initialize the entire memory region of a variable before + // initializing it with user-supplied values. + C_Node *lhs = new_node(parser, ND_MEMZERO, tok); + lhs->var = var; + + C_Node *rhs = create_lvar_init(parser, init, var->ty, &desg, tok); + return new_binary(parser, ND_COMMA, lhs, rhs, tok); +} + +static uint64_t read_buf(C_Parser *parser, char *buf, int sz) { + if (sz == 1) + return *buf; + if (sz == 2) + return *(uint16_t *)buf; + if (sz == 4) + return *(uint32_t *)buf; + if (sz == 8) + return *(uint64_t *)buf; + unreachable(parser); +} + +static void write_buf(C_Parser *parser, char *buf, uint64_t val, int sz) { + if (sz == 1) + *buf = val; + else if (sz == 2) + *(uint16_t *)buf = val; + else if (sz == 4) + *(uint32_t *)buf = val; + else if (sz == 8) + *(uint64_t *)buf = val; + else + unreachable(parser); +} + +static C_Relocation * +write_gvar_data(C_Parser *parser, C_Relocation *cur, Initializer *init, C_Type *ty, char *buf, int offset) { + if (ty->kind == TY_ARRAY) { + int sz = ty->base->size; + for (int i = 0; i < ty->array_len; i++) + cur = write_gvar_data(parser, cur, init->children[i], ty->base, buf, offset + sz * i); + return cur; + } + + if (ty->kind == TY_STRUCT) { + for (C_Member *mem = ty->members; mem; mem = mem->next) { + if (mem->is_bitfield) { + C_Node *expr = init->children[mem->idx]->expr; + if (!expr) + break; + + char *loc = buf + offset + mem->offset; + uint64_t oldval = read_buf(parser, loc, mem->ty->size); + uint64_t newval = eval(parser, expr); + uint64_t mask = (1L << mem->bit_width) - 1; + uint64_t combined = oldval | ((newval & mask) << mem->bit_offset); + write_buf(parser, loc, combined, mem->ty->size); + } else { + cur = write_gvar_data(parser, cur, init->children[mem->idx], mem->ty, buf, + offset + mem->offset); + } + } + return cur; + } + + if (ty->kind == TY_UNION) { + if (!init->mem) + return cur; + return write_gvar_data(parser, cur, init->children[init->mem->idx], + init->mem->ty, buf, offset); + } + + if (!init->expr) + return cur; + + if (ty->kind == TY_FLOAT) { + *(float *)(buf + offset) = eval_double(parser, init->expr); + return cur; + } + + if (ty->kind == TY_DOUBLE) { + *(double *)(buf + offset) = eval_double(parser, init->expr); + return cur; + } + + char **label = NULL; + uint64_t val = eval2(parser, init->expr, &label); + + if (!label) { + write_buf(parser, buf + offset, val, ty->size); + return cur; + } + + C_Relocation *rel = mspace_calloc(parser->arena, 1, sizeof(C_Relocation)); + rel->offset = offset; + rel->label = label; + rel->addend = val; + cur->next = rel; + return cur->next; +} + +// Initializers for global variables are evaluated at compile-time and +// embedded to .data section. This function serializes Initializer +// objects to a flat byte array. It is a compile error if an +// initializer list contains a non-constant expression. +static void gvar_initializer(C_Parser *parser, C_Token **rest, C_Token *tok, C_Obj *var) { + Initializer *init = initializer(parser, rest, tok, var->ty, &var->ty); + + C_Relocation head = {0}; + char *buf = mspace_calloc(parser->arena, 1, var->ty->size); + write_gvar_data(parser, &head, init, var->ty, buf, 0); + var->init_data = buf; + var->rel = head.next; +} + +// Returns true if a given token represents a type. +static bool is_typename(C_Parser *parser, C_Token *tok) { + if (parser->typewords.capacity == 0) { + static char *kw[] = { + "void", "_Bool", "char", "short", "int", "long", "struct", "union", + "typedef", "enum", "static", "extern", "_Alignas", "signed", "unsigned", + "const", "volatile", "auto", "register", "restrict", "__restrict", + "__restrict__", "_Noreturn", "float", "double", "typeof", "inline", + "_Thread_local", "__thread", "_Atomic", + }; + + for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) + hashmap_put(&parser->typewords, kw[i], (void *)1); + } + return hashmap_get2(&parser->typewords, tok->loc, tok->len) || find_typedef(parser, tok); +} + +// asm-stmt = "asm" ("volatile" | "inline")* "(" string-literal ")" +static C_Node *asm_stmt(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = new_node(parser, ND_ASM, tok); + tok = tok->next; + + while (C_equal(tok, "volatile") || C_equal(tok, "inline")) + tok = tok->next; + + tok = C_skip(parser, tok, "("); + if (tok->kind != TK_STR || tok->ty->base->kind != TY_CHAR) + C_error_tok(parser, tok, "expected string literal"); + node->asm_str = tok->str; + *rest = C_skip(parser, tok->next, ")"); + return node; +} + +// stmt = "return" expr? ";" +// | "if" "(" expr ")" stmt ("else" stmt)? +// | "switch" "(" expr ")" stmt +// | "case" const-expr ("..." const-expr)? ":" stmt +// | "default" ":" stmt +// | "for" "(" expr-stmt expr? ";" expr? ")" stmt +// | "while" "(" expr ")" stmt +// | "do" stmt "while" "(" expr ")" ";" +// | "asm" asm-stmt +// | "goto" (ident | "*" expr) ";" +// | "break" ";" +// | "continue" ";" +// | ident ":" stmt +// | "{" compound-stmt +// | expr-stmt +static C_Node *stmt(C_Parser *parser, C_Token **rest, C_Token *tok) { + if (C_equal(tok, "return")) { + C_Node *node = new_node(parser, ND_RETURN, tok); + if (C_consume(rest, tok->next, ";")) + return node; + + C_Node *exp = expr(parser, &tok, tok->next); + *rest = C_skip(parser, tok, ";"); + + C_add_type(parser, exp); + C_Type *ty = parser->current_fn->ty->return_ty; + if (ty->kind != TY_STRUCT && ty->kind != TY_UNION) + exp = C_new_cast(parser, exp, parser->current_fn->ty->return_ty); + + node->lhs = exp; + return node; + } + + if (C_equal(tok, "if")) { + C_Node *node = new_node(parser, ND_IF, tok); + tok = C_skip(parser, tok->next, "("); + node->cond = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + node->then = stmt(parser, &tok, tok); + if (C_equal(tok, "else")) + node->els = stmt(parser, &tok, tok->next); + *rest = tok; + return node; + } + + if (C_equal(tok, "switch")) { + C_Node *node = new_node(parser, ND_SWITCH, tok); + tok = C_skip(parser, tok->next, "("); + node->cond = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + + C_Node *sw = parser->current_switch; + parser->current_switch = node; + + char *brk = parser->brk_label; + parser->brk_label = node->brk_label = new_unique_name(parser->arena); + + node->then = stmt(parser, rest, tok); + + parser->current_switch = sw; + parser->brk_label = brk; + return node; + } + + if (C_equal(tok, "case")) { + if (!parser->current_switch) + C_error_tok(parser, tok, "stray case"); + + C_Node *node = new_node(parser, ND_CASE, tok); + int begin = C_const_expr(parser, &tok, tok->next); + int end; + + if (C_equal(tok, "...")) { + // [GNU] Case ranges, e.g. "case 1 ... 5:" + end = C_const_expr(parser, &tok, tok->next); + if (end < begin) + C_error_tok(parser, tok, "empty case range specified"); + } else { + end = begin; + } + + tok = C_skip(parser, tok, ":"); + node->label = new_unique_name(parser->arena); + node->lhs = stmt(parser, rest, tok); + node->begin = begin; + node->end = end; + node->case_next = parser->current_switch->case_next; + parser->current_switch->case_next = node; + return node; + } + + if (C_equal(tok, "default")) { + if (!parser->current_switch) + C_error_tok(parser, tok, "stray default"); + + C_Node *node = new_node(parser, ND_CASE, tok); + tok = C_skip(parser, tok->next, ":"); + node->label = new_unique_name(parser->arena); + node->lhs = stmt(parser, rest, tok); + parser->current_switch->default_case = node; + return node; + } + + if (C_equal(tok, "for")) { + C_Node *node = new_node(parser, ND_FOR, tok); + tok = C_skip(parser, tok->next, "("); + + enter_scope(parser); + + char *brk = parser->brk_label; + char *cont = parser->cont_label; + parser->brk_label = node->brk_label = new_unique_name(parser->arena); + parser->cont_label = node->cont_label = new_unique_name(parser->arena); + + if (is_typename(parser, tok)) { + C_Type *basety = declspec(parser, &tok, tok, NULL); + node->init = declaration(parser, &tok, tok, basety, NULL); + } else { + node->init = expr_stmt(parser, &tok, tok); + } + + if (!C_equal(tok, ";")) + node->cond = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ";"); + + if (!C_equal(tok, ")")) + node->inc = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + + node->then = stmt(parser, rest, tok); + + leave_scope(parser); + parser->brk_label = brk; + parser->cont_label = cont; + return node; + } + + if (C_equal(tok, "while")) { + C_Node *node = new_node(parser, ND_FOR, tok); + tok = C_skip(parser, tok->next, "("); + node->cond = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + + char *brk = parser->brk_label; + char *cont = parser->cont_label; + parser->brk_label = node->brk_label = new_unique_name(parser->arena); + parser->cont_label = node->cont_label = new_unique_name(parser->arena); + + node->then = stmt(parser, rest, tok); + + parser->brk_label = brk; + parser->cont_label = cont; + return node; + } + + if (C_equal(tok, "do")) { + C_Node *node = new_node(parser, ND_DO, tok); + + char *brk = parser->brk_label; + char *cont = parser->cont_label; + parser->brk_label = node->brk_label = new_unique_name(parser->arena); + parser->cont_label = node->cont_label = new_unique_name(parser->arena); + + node->then = stmt(parser, &tok, tok->next); + + parser->brk_label = brk; + parser->cont_label = cont; + + tok = C_skip(parser, tok, "while"); + tok = C_skip(parser, tok, "("); + node->cond = expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + *rest = C_skip(parser, tok, ";"); + return node; + } + + if (C_equal(tok, "asm")) + return asm_stmt(parser, rest, tok); + + if (C_equal(tok, "goto")) { + if (C_equal(tok->next, "*")) { + // [GNU] `goto *ptr` jumps to the address specified by `ptr`. + C_Node *node = new_node(parser, ND_GOTO_EXPR, tok); + node->lhs = expr(parser, &tok, tok->next->next); + *rest = C_skip(parser, tok, ";"); + return node; + } + + C_Node *node = new_node(parser, ND_GOTO, tok); + node->label = get_ident(parser, tok->next); + node->goto_next = parser->gotos; + parser->gotos = node; + *rest = C_skip(parser, tok->next->next, ";"); + return node; + } + + if (C_equal(tok, "break")) { + if (!parser->brk_label) + C_error_tok(parser, tok, "stray break"); + C_Node *node = new_node(parser, ND_GOTO, tok); + node->unique_label = parser->brk_label; + *rest = C_skip(parser, tok->next, ";"); + return node; + } + + if (C_equal(tok, "continue")) { + if (!parser->cont_label) + C_error_tok(parser, tok, "stray continue"); + C_Node *node = new_node(parser, ND_GOTO, tok); + node->unique_label = parser->cont_label; + *rest = C_skip(parser, tok->next, ";"); + return node; + } + + if (tok->kind == TK_IDENT && C_equal(tok->next, ":")) { + C_Node *node = new_node(parser, ND_LABEL, tok); + node->label = str_dup(parser->arena, tok->loc, tok->len); + node->unique_label = new_unique_name(parser->arena); + node->lhs = stmt(parser, rest, tok->next->next); + node->goto_next = parser->labels; + parser->labels = node; + return node; + } + + if (C_equal(tok, "{")) + return compound_stmt(parser, rest, tok->next); + + return expr_stmt(parser, rest, tok); +} + +// compound-stmt = (typedef | declaration | stmt)* "}" +static C_Node *compound_stmt(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = new_node(parser, ND_BLOCK, tok); + C_Node head = {0}; + C_Node *cur = &head; + + enter_scope(parser); + + while ((!parser->embedded_mode || parser->embedded_mode && tok->kind != TK_EOF) && !C_equal(tok, "}")) { + if (is_typename(parser, tok) && !C_equal(tok->next, ":")) { + VarAttr attr = {0}; + C_Type *basety = declspec(parser, &tok, tok, &attr); + + if (attr.is_typedef) { + tok = parse_typedef(parser, tok, basety); + continue; + } + + if (is_function(parser, tok)) { + tok = function(parser, tok, basety, &attr); + continue; + } + + if (attr.is_extern) { + tok = global_variable(parser, tok, basety, &attr); + continue; + } + + cur = cur->next = declaration(parser, &tok, tok, basety, &attr); + } else { + cur = cur->next = stmt(parser, &tok, tok); + } + C_add_type(parser, cur); + } + + leave_scope(parser); + + node->body = head.next; + *rest = tok->next; + return node; +} + +// expr-stmt = expr? ";" +static C_Node *expr_stmt(C_Parser *parser, C_Token **rest, C_Token *tok) { + if (C_equal(tok, ";")) { + *rest = tok->next; + return new_node(parser, ND_BLOCK, tok); + } + + C_Node *node = new_node(parser, ND_EXPR_STMT, tok); + node->lhs = expr(parser, &tok, tok); + *rest = C_skip(parser, tok, ";"); + return node; +} + +// expr = assign ("," expr)? +static C_Node *expr(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = assign(parser, &tok, tok); + + if (C_equal(tok, ",")) + return new_binary(parser, ND_COMMA, node, expr(parser, rest, tok->next), tok); + + *rest = tok; + return node; +} + +static int64_t eval(C_Parser *parser, C_Node *node) { + return eval2(parser, node, NULL); +} + +// Evaluate a given node as a constant expression. +// +// A constant expression is either just a number or ptr+n where ptr +// is a pointer to a global variable and n is a postiive/negative +// number. The latter form is accepted only as an initialization +// expression for a global variable. +static int64_t eval2(C_Parser *parser, C_Node *node, char ***label) { + C_add_type(parser, node); + + if (C_is_flonum(node->ty)) + return eval_double(parser, node); + + switch (node->kind) { + case ND_ADD: + return eval2(parser, node->lhs, label) + eval(parser, node->rhs); + case ND_SUB: + return eval2(parser, node->lhs, label) - eval(parser, node->rhs); + case ND_MUL: + return eval(parser, node->lhs) * eval(parser, node->rhs); + case ND_DIV: + if (node->ty->is_unsigned) + return (uint64_t)eval(parser, node->lhs) / eval(parser, node->rhs); + return eval(parser, node->lhs) / eval(parser, node->rhs); + case ND_NEG: + return -eval(parser, node->lhs); + case ND_MOD: + if (node->ty->is_unsigned) + return (uint64_t)eval(parser, node->lhs) % eval(parser, node->rhs); + return eval(parser, node->lhs) % eval(parser, node->rhs); + case ND_BITAND: + return eval(parser, node->lhs) & eval(parser, node->rhs); + case ND_BITOR: + return eval(parser, node->lhs) | eval(parser, node->rhs); + case ND_BITXOR: + return eval(parser, node->lhs) ^ eval(parser, node->rhs); + case ND_SHL: + return eval(parser, node->lhs) << eval(parser, node->rhs); + case ND_SHR: + if (node->ty->is_unsigned && node->ty->size == 8) + return (uint64_t)eval(parser, node->lhs) >> eval(parser, node->rhs); + return eval(parser, node->lhs) >> eval(parser, node->rhs); + case ND_EQ: + return eval(parser, node->lhs) == eval(parser, node->rhs); + case ND_NE: + return eval(parser, node->lhs) != eval(parser, node->rhs); + case ND_LT: + if (node->lhs->ty->is_unsigned) + return (uint64_t)eval(parser, node->lhs) < eval(parser, node->rhs); + return eval(parser, node->lhs) < eval(parser, node->rhs); + case ND_LE: + if (node->lhs->ty->is_unsigned) + return (uint64_t)eval(parser, node->lhs) <= eval(parser, node->rhs); + return eval(parser, node->lhs) <= eval(parser, node->rhs); + case ND_COND: + return eval(parser, node->cond) ? eval2(parser, node->then, label) : eval2(parser, node->els, label); + case ND_COMMA: + return eval2(parser, node->rhs, label); + case ND_NOT: + return !eval(parser, node->lhs); + case ND_BITNOT: + return ~eval(parser, node->lhs); + case ND_LOGAND: + return eval(parser, node->lhs) && eval(parser, node->rhs); + case ND_LOGOR: + return eval(parser, node->lhs) || eval(parser, node->rhs); + case ND_CAST: { + int64_t val = eval2(parser, node->lhs, label); + if (C_is_integer(node->ty)) { + switch (node->ty->size) { + case 1: return node->ty->is_unsigned ? (uint8_t)val : (int8_t)val; + case 2: return node->ty->is_unsigned ? (uint16_t)val : (int16_t)val; + case 4: return node->ty->is_unsigned ? (uint32_t)val : (int32_t)val; + } + } + return val; + } + case ND_ADDR: + return eval_rval(parser, node->lhs, label); + case ND_LABEL_VAL: + *label = &node->unique_label; + return 0; + case ND_MEMBER: + if (!label) + C_error_tok(parser, node->tok, "not a compile-time constant"); + if (node->ty->kind != TY_ARRAY) + C_error_tok(parser, node->tok, "invalid initializer"); + return eval_rval(parser, node->lhs, label) + node->member->offset; + case ND_VAR: + if (!label) + C_error_tok(parser, node->tok, "not a compile-time constant"); + if (node->var->ty->kind != TY_ARRAY && node->var->ty->kind != TY_FUNC) + C_error_tok(parser, node->tok, "invalid initializer"); + *label = &node->var->name; + return 0; + case ND_NUM: + return node->val; + } + + C_error_tok(parser, node->tok, "not a compile-time constant"); +} + +static int64_t eval_rval(C_Parser *parser, C_Node *node, char ***label) { + switch (node->kind) { + case ND_VAR: + if (node->var->is_local) + C_error_tok(parser, node->tok, "not a compile-time constant"); + *label = &node->var->name; + return 0; + case ND_DEREF: + return eval2(parser, node->lhs, label); + case ND_MEMBER: + return eval_rval(parser, node->lhs, label) + node->member->offset; + } + + C_error_tok(parser, node->tok, "invalid initializer"); +} + +static bool is_const_expr(C_Parser *parser, C_Node *node) { + C_add_type(parser, node); + + switch (node->kind) { + case ND_ADD: + case ND_SUB: + case ND_MUL: + case ND_DIV: + case ND_BITAND: + case ND_BITOR: + case ND_BITXOR: + case ND_SHL: + case ND_SHR: + case ND_EQ: + case ND_NE: + case ND_LT: + case ND_LE: + case ND_LOGAND: + case ND_LOGOR: + return is_const_expr(parser, node->lhs) && is_const_expr(parser, node->rhs); + case ND_COND: + if (!is_const_expr(parser, node->cond)) + return false; + return is_const_expr(parser, eval(parser, node->cond) ? node->then : node->els); + case ND_COMMA: + return is_const_expr(parser, node->rhs); + case ND_NEG: + case ND_NOT: + case ND_BITNOT: + case ND_CAST: + return is_const_expr(parser, node->lhs); + case ND_NUM: + return true; + } + + return false; +} + +int64_t C_const_expr(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = conditional(parser, rest, tok); + return eval(parser, node); +} + +static double eval_double(C_Parser *parser, C_Node *node) { + C_add_type(parser, node); + + if (C_is_integer(node->ty)) { + if (node->ty->is_unsigned) + return (unsigned long)eval(parser, node); + return eval(parser, node); + } + + switch (node->kind) { + case ND_ADD: + return eval_double(parser, node->lhs) + eval_double(parser, node->rhs); + case ND_SUB: + return eval_double(parser, node->lhs) - eval_double(parser, node->rhs); + case ND_MUL: + return eval_double(parser, node->lhs) * eval_double(parser, node->rhs); + case ND_DIV: + return eval_double(parser, node->lhs) / eval_double(parser, node->rhs); + case ND_NEG: + return -eval_double(parser, node->lhs); + case ND_COND: + return eval_double(parser, node->cond) ? eval_double(parser, node->then) : eval_double(parser, node->els); + case ND_COMMA: + return eval_double(parser, node->rhs); + case ND_CAST: + if (C_is_flonum(node->lhs->ty)) + return eval_double(parser, node->lhs); + return eval(parser, node->lhs); + case ND_NUM: + return node->fval; + } + + C_error_tok(parser, node->tok, "not a compile-time constant"); +} + +// Convert op= operators to expressions containing an assignment. +// +// In general, `A op= C` is converted to ``tmp = &A, *tmp = *tmp op B`. +// However, if a given expression is of form `A.x op= C`, the input is +// converted to `tmp = &A, (*tmp).x = (*tmp).x op C` to handle assignments +// to bitfields. +static C_Node *to_assign(C_Parser *parser, C_Node *binary) { + C_add_type(parser, binary->lhs); + C_add_type(parser, binary->rhs); + C_Token *tok = binary->tok; + + // Convert `A.x op= C` to `tmp = &A, (*tmp).x = (*tmp).x op C`. + if (binary->lhs->kind == ND_MEMBER) { + C_Obj *var = new_lvar(parser, "", C_pointer_to(parser, binary->lhs->lhs->ty)); + + C_Node *expr1 = new_binary(parser, ND_ASSIGN, new_var_node(parser, var, tok), + new_unary(parser, ND_ADDR, binary->lhs->lhs, tok), tok); + + C_Node *expr2 = new_unary(parser, ND_MEMBER, + new_unary(parser, ND_DEREF, new_var_node(parser, var, tok), tok), + tok); + expr2->member = binary->lhs->member; + + C_Node *expr3 = new_unary(parser, ND_MEMBER, + new_unary(parser, ND_DEREF, new_var_node(parser, var, tok), tok), + tok); + expr3->member = binary->lhs->member; + + C_Node *expr4 = new_binary(parser, ND_ASSIGN, expr2, + new_binary(parser, binary->kind, expr3, binary->rhs, tok), + tok); + + return new_binary(parser, ND_COMMA, expr1, expr4, tok); + } + + // If A is an atomic type, Convert `A op= B` to + // + // ({ + // T1 *addr = &A; T2 val = (B); T1 old = *addr; T1 new; + // do { + // new = old op val; + // } while (!atomic_compare_exchange_strong(addr, &old, new)); + // new; + // }) + if (binary->lhs->ty->is_atomic) { + C_Node head = {0}; + C_Node *cur = &head; + + C_Obj *addr = new_lvar(parser, "", C_pointer_to(parser, binary->lhs->ty)); + C_Obj *val = new_lvar(parser, "", binary->rhs->ty); + C_Obj *old = new_lvar(parser, "", binary->lhs->ty); + C_Obj *new = new_lvar(parser, "", binary->lhs->ty); + + cur = cur->next = + new_unary(parser, ND_EXPR_STMT, + new_binary(parser, ND_ASSIGN, new_var_node(parser, addr, tok), + new_unary(parser, ND_ADDR, binary->lhs, tok), tok), + tok); + + cur = cur->next = + new_unary(parser, ND_EXPR_STMT, + new_binary(parser, ND_ASSIGN, new_var_node(parser, val, tok), binary->rhs, tok), + tok); + + cur = cur->next = + new_unary(parser, ND_EXPR_STMT, + new_binary(parser, ND_ASSIGN, new_var_node(parser, old, tok), + new_unary(parser, ND_DEREF, new_var_node(parser, addr, tok), tok), tok), + tok); + + C_Node *loop = new_node(parser, ND_DO, tok); + loop->brk_label = new_unique_name(parser->arena); + loop->cont_label = new_unique_name(parser->arena); + + C_Node *body = new_binary(parser, ND_ASSIGN, + new_var_node(parser, new, tok), + new_binary(parser, binary->kind, new_var_node(parser, old, tok), + new_var_node(parser, val, tok), tok), + tok); + + loop->then = new_node(parser, ND_BLOCK, tok); + loop->then->body = new_unary(parser, ND_EXPR_STMT, body, tok); + + C_Node *cas = new_node(parser, ND_CAS, tok); + cas->cas_addr = new_var_node(parser, addr, tok); + cas->cas_old = new_unary(parser, ND_ADDR, new_var_node(parser, old, tok), tok); + cas->cas_new = new_var_node(parser, new, tok); + loop->cond = new_unary(parser, ND_NOT, cas, tok); + + cur = cur->next = loop; + cur = cur->next = new_unary(parser, ND_EXPR_STMT, new_var_node(parser, new, tok), tok); + + C_Node *node = new_node(parser, ND_STMT_EXPR, tok); + node->body = head.next; + return node; + } + + // Convert `A op= B` to ``tmp = &A, *tmp = *tmp op B`. + C_Obj *var = new_lvar(parser, "", C_pointer_to(parser, binary->lhs->ty)); + + C_Node *expr1 = new_binary(parser, ND_ASSIGN, new_var_node(parser, var, tok), + new_unary(parser, ND_ADDR, binary->lhs, tok), tok); + + C_Node *expr2 = + new_binary(parser, ND_ASSIGN, + new_unary(parser, ND_DEREF, new_var_node(parser, var, tok), tok), + new_binary(parser, binary->kind, + new_unary(parser, ND_DEREF, new_var_node(parser, var, tok), tok), + binary->rhs, + tok), + tok); + + return new_binary(parser, ND_COMMA, expr1, expr2, tok); +} + +// assign = conditional (assign-op assign)? +// assign-op = "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "&=" | "|=" | "^=" +// | "<<=" | ">>=" +static C_Node *assign(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = conditional(parser, &tok, tok); + + if (C_equal(tok, "=")) + return new_binary(parser, ND_ASSIGN, node, assign(parser, rest, tok->next), tok); + + if (C_equal(tok, "+=")) + return to_assign(parser, new_add(parser, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "-=")) + return to_assign(parser, new_sub(parser, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "*=")) + return to_assign(parser, new_binary(parser, ND_MUL, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "/=")) + return to_assign(parser, new_binary(parser, ND_DIV, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "%=")) + return to_assign(parser, new_binary(parser, ND_MOD, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "&=")) + return to_assign(parser, new_binary(parser, ND_BITAND, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "|=")) + return to_assign(parser, new_binary(parser, ND_BITOR, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "^=")) + return to_assign(parser, new_binary(parser, ND_BITXOR, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, "<<=")) + return to_assign(parser, new_binary(parser, ND_SHL, node, assign(parser, rest, tok->next), tok)); + + if (C_equal(tok, ">>=")) + return to_assign(parser, new_binary(parser, ND_SHR, node, assign(parser, rest, tok->next), tok)); + + *rest = tok; + return node; +} + +// conditional = logor ("?" expr? ":" conditional)? +static C_Node *conditional(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *cond = logor(parser, &tok, tok); + + if (!C_equal(tok, "?")) { + *rest = tok; + return cond; + } + + if (C_equal(tok->next, ":")) { + // [GNU] Compile `a ?: b` as `tmp = a, tmp ? tmp : b`. + C_add_type(parser, cond); + C_Obj *var = new_lvar(parser, "", cond->ty); + C_Node *lhs = new_binary(parser, ND_ASSIGN, new_var_node(parser, var, tok), cond, tok); + C_Node *rhs = new_node(parser, ND_COND, tok); + rhs->cond = new_var_node(parser, var, tok); + rhs->then = new_var_node(parser, var, tok); + rhs->els = conditional(parser, rest, tok->next->next); + return new_binary(parser, ND_COMMA, lhs, rhs, tok); + } + + C_Node *node = new_node(parser, ND_COND, tok); + node->cond = cond; + node->then = expr(parser, &tok, tok->next); + tok = C_skip(parser, tok, ":"); + node->els = conditional(parser, rest, tok); + return node; +} + +// logor = logand ("||" logand)* +static C_Node *logor(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = logand(parser, &tok, tok); + while (C_equal(tok, "||")) { + C_Token *start = tok; + node = new_binary(parser, ND_LOGOR, node, logand(parser, &tok, tok->next), start); + } + *rest = tok; + return node; +} + +// logand = bitor ("&&" bitor)* +static C_Node *logand(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = bitor(parser, &tok, tok); + while (C_equal(tok, "&&")) { + C_Token *start = tok; + node = new_binary(parser, ND_LOGAND, node, bitor(parser, &tok, tok->next), start); + } + *rest = tok; + return node; +} + +// bitor = bitxor ("|" bitxor)* +static C_Node *bitor(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = bitxor(parser, &tok, tok); + while (C_equal(tok, "|")) { + C_Token *start = tok; + node = new_binary(parser, ND_BITOR, node, bitxor(parser, &tok, tok->next), start); + } + *rest = tok; + return node; +} + +// bitxor = bitand ("^" bitand)* +static C_Node *bitxor(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = bitand(parser, &tok, tok); + while (C_equal(tok, "^")) { + C_Token *start = tok; + node = new_binary(parser, ND_BITXOR, node, bitand(parser, &tok, tok->next), start); + } + *rest = tok; + return node; +} + +// bitand = equality ("&" equality)* +static C_Node *bitand(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = equality(parser, &tok, tok); + while (C_equal(tok, "&")) { + C_Token *start = tok; + node = new_binary(parser, ND_BITAND, node, equality(parser, &tok, tok->next), start); + } + *rest = tok; + return node; +} + +// equality = relational ("==" relational | "!=" relational)* +static C_Node *equality(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = relational(parser, &tok, tok); + + for (;;) { + C_Token *start = tok; + + if (C_equal(tok, "==")) { + node = new_binary(parser, ND_EQ, node, relational(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, "!=")) { + node = new_binary(parser, ND_NE, node, relational(parser, &tok, tok->next), start); + continue; + } + + *rest = tok; + return node; + } +} + +// relational = shift ("<" shift | "<=" shift | ">" shift | ">=" shift)* +static C_Node *relational(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = shift(parser, &tok, tok); + + for (;;) { + C_Token *start = tok; + + if (C_equal(tok, "<")) { + node = new_binary(parser, ND_LT, node, shift(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, "<=")) { + node = new_binary(parser, ND_LE, node, shift(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, ">")) { + node = new_binary(parser, ND_LT, shift(parser, &tok, tok->next), node, start); + continue; + } + + if (C_equal(tok, ">=")) { + node = new_binary(parser, ND_LE, shift(parser, &tok, tok->next), node, start); + continue; + } + + *rest = tok; + return node; + } +} + +// shift = add ("<<" add | ">>" add)* +static C_Node *shift(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = add(parser, &tok, tok); + + for (;;) { + C_Token *start = tok; + + if (C_equal(tok, "<<")) { + node = new_binary(parser, ND_SHL, node, add(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, ">>")) { + node = new_binary(parser, ND_SHR, node, add(parser, &tok, tok->next), start); + continue; + } + + *rest = tok; + return node; + } +} + +// In C, `+` operator is overloaded to perform the pointer arithmetic. +// If p is a pointer, p+n adds not n but sizeof(*p)*n to the value of p, +// so that p+n points to the location n elements (not bytes) ahead of p. +// In other words, we need to scale an integer value before adding to a +// pointer value. This function takes care of the scaling. +static C_Node *new_add(C_Parser *parser, C_Node *lhs, C_Node *rhs, C_Token *tok) { + C_add_type(parser, lhs); + C_add_type(parser, rhs); + + // num + num + if (C_is_numeric(lhs->ty) && C_is_numeric(rhs->ty)) + return new_binary(parser, ND_ADD, lhs, rhs, tok); + + if (lhs->ty->base && rhs->ty->base) + C_error_tok(parser, tok, "invalid operands"); + if (!lhs->ty->base && !rhs->ty->base) + C_error_tok(parser, tok, "invalid operands"); + + // Canonicalize `num + ptr` to `ptr + num`. + if (!lhs->ty->base && rhs->ty->base) { + C_Node *tmp = lhs; + lhs = rhs; + rhs = tmp; + } + + // VLA + num + if (lhs->ty->base->kind == TY_VLA) { + rhs = new_binary(parser, ND_MUL, rhs, new_var_node(parser, lhs->ty->base->vla_size, tok), tok); + return new_binary(parser, ND_ADD, lhs, rhs, tok); + } + + // ptr + num + rhs = new_binary(parser, ND_MUL, rhs, new_long(parser, lhs->ty->base->size, tok), tok); + return new_binary(parser, ND_ADD, lhs, rhs, tok); +} + +// Like `+`, `-` is overloaded for the pointer type. +static C_Node *new_sub(C_Parser *parser, C_Node *lhs, C_Node *rhs, C_Token *tok) { + C_add_type(parser, lhs); + C_add_type(parser, rhs); + + // num - num + if (C_is_numeric(lhs->ty) && C_is_numeric(rhs->ty)) + return new_binary(parser, ND_SUB, lhs, rhs, tok); + + // VLA + num + if (lhs->ty->base->kind == TY_VLA) { + rhs = new_binary(parser, ND_MUL, rhs, new_var_node(parser, lhs->ty->base->vla_size, tok), tok); + C_add_type(parser, rhs); + C_Node *node = new_binary(parser, ND_SUB, lhs, rhs, tok); + node->ty = lhs->ty; + return node; + } + + // ptr - num + if (lhs->ty->base && C_is_integer(rhs->ty)) { + rhs = new_binary(parser, ND_MUL, rhs, new_long(parser, lhs->ty->base->size, tok), tok); + C_add_type(parser, rhs); + C_Node *node = new_binary(parser, ND_SUB, lhs, rhs, tok); + node->ty = lhs->ty; + return node; + } + + // ptr - ptr, which returns how many elements are between the two. + if (lhs->ty->base && rhs->ty->base) { + C_Node *node = new_binary(parser, ND_SUB, lhs, rhs, tok); + node->ty = C_ty_long; + return new_binary(parser, ND_DIV, node, new_num(parser, lhs->ty->base->size, tok), tok); + } + + C_error_tok(parser, tok, "invalid operands"); +} + +// add = mul ("+" mul | "-" mul)* +static C_Node *add(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = mul(parser, &tok, tok); + + for (;;) { + C_Token *start = tok; + + if (C_equal(tok, "+")) { + node = new_add(parser, node, mul(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, "-")) { + node = new_sub(parser, node, mul(parser, &tok, tok->next), start); + continue; + } + + *rest = tok; + return node; + } +} + +// mul = cast ("*" cast | "/" cast | "%" cast)* +static C_Node *mul(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Node *node = cast(parser, &tok, tok); + + for (;;) { + C_Token *start = tok; + + if (C_equal(tok, "*")) { + node = new_binary(parser, ND_MUL, node, cast(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, "/")) { + node = new_binary(parser, ND_DIV, node, cast(parser, &tok, tok->next), start); + continue; + } + + if (C_equal(tok, "%")) { + node = new_binary(parser, ND_MOD, node, cast(parser, &tok, tok->next), start); + continue; + } + + *rest = tok; + return node; + } +} + +// cast = "(" type-name ")" cast | unary +static C_Node *cast(C_Parser *parser, C_Token **rest, C_Token *tok) { + if (C_equal(tok, "(") && is_typename(parser, tok->next)) { + C_Token *start = tok; + C_Type *ty = typename(parser, &tok, tok->next); + tok = C_skip(parser, tok, ")"); + + // compound literal + if (C_equal(tok, "{")) + return unary(parser, rest, start); + + // type cast + C_Node *node = C_new_cast(parser, cast(parser, rest, tok), ty); + node->tok = start; + return node; + } + + return unary(parser, rest, tok); +} + +// unary = ("+" | "-" | "*" | "&" | "!" | "~") cast +// | ("++" | "--") unary +// | "&&" ident +// | postfix +static C_Node *unary(C_Parser *parser, C_Token **rest, C_Token *tok) { + if (C_equal(tok, "+")) + return cast(parser, rest, tok->next); + + if (C_equal(tok, "-")) + return new_unary(parser, ND_NEG, cast(parser, rest, tok->next), tok); + + if (C_equal(tok, "&")) { + C_Node *lhs = cast(parser, rest, tok->next); + C_add_type(parser, lhs); + if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield) + C_error_tok(parser, tok, "cannot take address of bitfield"); + return new_unary(parser, ND_ADDR, lhs, tok); + } + + if (C_equal(tok, "*")) { + // [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity + // in the C spec, but dereferencing a function shouldn't do + // anything. If foo is a function, `*foo`, `**foo` or `*****foo` + // are all equivalent to just `foo`. + C_Node *node = cast(parser, rest, tok->next); + C_add_type(parser, node); + if (node->ty->kind == TY_FUNC) + return node; + return new_unary(parser, ND_DEREF, node, tok); + } + + if (C_equal(tok, "!")) + return new_unary(parser, ND_NOT, cast(parser, rest, tok->next), tok); + + if (C_equal(tok, "~")) + return new_unary(parser, ND_BITNOT, cast(parser, rest, tok->next), tok); + + // Read ++i as i+=1 + if (C_equal(tok, "++")) + return to_assign(parser, new_add(parser, unary(parser, rest, tok->next), new_num(parser, 1, tok), tok)); + + // Read --i as i-=1 + if (C_equal(tok, "--")) + return to_assign(parser, new_sub(parser, unary(parser, rest, tok->next), new_num(parser, 1, tok), tok)); + + // [GNU] labels-as-values + if (C_equal(tok, "&&")) { + C_Node *node = new_node(parser, ND_LABEL_VAL, tok); + node->label = get_ident(parser, tok->next); + node->goto_next = parser->gotos; + parser->gotos = node; + *rest = tok->next->next; + return node; + } + + return postfix(parser, rest, tok); +} + +// struct-members = (declspec declarator ("," declarator)* ";")* +static void struct_members(C_Parser *parser, C_Token **rest, C_Token *tok, C_Type *ty) { + C_Member head = {0}; + C_Member *cur = &head; + int idx = 0; + + while (!C_equal(tok, "}")) { + VarAttr attr = {0}; + C_Type *basety = declspec(parser, &tok, tok, &attr); + bool first = true; + + // Anonymous struct member + if ((basety->kind == TY_STRUCT || basety->kind == TY_UNION) && C_consume(&tok, tok, ";")) { + C_Member *mem = mspace_calloc(parser->arena, 1, sizeof(C_Member)); + mem->ty = basety; + mem->idx = idx++; + mem->align = attr.align ? attr.align : mem->ty->align; + cur = cur->next = mem; + continue; + } + + // Regular struct members + while (!C_consume(&tok, tok, ";")) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + C_Member *mem = mspace_calloc(parser->arena, 1, sizeof(C_Member)); + mem->ty = declarator(parser, &tok, tok, basety); + mem->name = mem->ty->name; + mem->idx = idx++; + mem->align = attr.align ? attr.align : mem->ty->align; + + if (C_consume(&tok, tok, ":")) { + mem->is_bitfield = true; + mem->bit_width = C_const_expr(parser, &tok, tok); + } + + cur = cur->next = mem; + } + } + + // If the last element is an array of incomplete type, it's + // called a "flexible array member". It should behave as if + // if were a zero-sized array. + if (cur != &head && cur->ty->kind == TY_ARRAY && cur->ty->array_len < 0) { + cur->ty = C_array_of(parser, cur->ty->base, 0); + ty->is_flexible = true; + } + + *rest = tok->next; + ty->members = head.next; +} + +// attribute = ("__attribute__" "(" "(" "packed" ")" ")")* +static C_Token *attribute_list(C_Parser *parser, C_Token *tok, C_Type *ty) { + while (C_consume(&tok, tok, "__attribute__")) { + tok = C_skip(parser, tok, "("); + tok = C_skip(parser, tok, "("); + + bool first = true; + + while (!C_consume(&tok, tok, ")")) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + if (C_consume(&tok, tok, "packed")) { + ty->is_packed = true; + continue; + } + + if (C_consume(&tok, tok, "aligned")) { + tok = C_skip(parser, tok, "("); + ty->align = C_const_expr(parser, &tok, tok); + tok = C_skip(parser, tok, ")"); + continue; + } + + C_error_tok(parser, tok, "unknown attribute"); + } + + tok = C_skip(parser, tok, ")"); + } + + return tok; +} + +// struct-union-decl = attribute? ident? ("{" struct-members)? +static C_Type *struct_union_decl(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Type *ty = C_struct_type(parser); + tok = attribute_list(parser, tok, ty); + + // Read a tag. + C_Token *tag = NULL; + if (tok->kind == TK_IDENT) { + tag = tok; + tok = tok->next; + } + + if (tag && !C_equal(tok, "{")) { + *rest = tok; + + C_Type *ty2 = find_tag(parser, tag); + if (ty2) + return ty2; + + ty->size = -1; + push_tag_scope(parser, tag, ty); + return ty; + } + + tok = C_skip(parser, tok, "{"); + + // Construct a struct object. + struct_members(parser, &tok, tok, ty); + *rest = attribute_list(parser, tok, ty); + + if (tag) { + // If this is a redefinition, overwrite a previous type. + // Otherwise, register the struct type. + C_Type *ty2 = hashmap_get2(&parser->scope->tags, tag->loc, tag->len); + if (ty2) { + *ty2 = *ty; + return ty2; + } + + push_tag_scope(parser, tag, ty); + } + + return ty; +} + +// struct-decl = struct-union-decl +static C_Type *struct_decl(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Type *ty = struct_union_decl(parser, rest, tok); + ty->kind = TY_STRUCT; + + if (ty->size < 0) + return ty; + + // Assign offsets within the struct to members. + int bits = 0; + + for (C_Member *mem = ty->members; mem; mem = mem->next) { + if (mem->is_bitfield && mem->bit_width == 0) { + // Zero-width anonymous bitfield has a special meaning. + // It affects only alignment. + bits = C_align_to(bits, mem->ty->size * 8); + } else if (mem->is_bitfield) { + int sz = mem->ty->size; + if (bits / (sz * 8) != (bits + mem->bit_width - 1) / (sz * 8)) + bits = C_align_to(bits, sz * 8); + + mem->offset = align_down(bits / 8, sz); + mem->bit_offset = bits % (sz * 8); + bits += mem->bit_width; + } else { + if (!ty->is_packed) + bits = C_align_to(bits, mem->align * 8); + mem->offset = bits / 8; + bits += mem->ty->size * 8; + } + + if (!ty->is_packed && ty->align < mem->align) + ty->align = mem->align; + } + + ty->size = C_align_to(bits, ty->align * 8) / 8; + return ty; +} + +// union-decl = struct-union-decl +static C_Type *union_decl(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Type *ty = struct_union_decl(parser, rest, tok); + ty->kind = TY_UNION; + + if (ty->size < 0) + return ty; + + // If union, we don't have to assign offsets because they + // are already initialized to zero. We need to compute the + // alignment and the size though. + for (C_Member *mem = ty->members; mem; mem = mem->next) { + if (ty->align < mem->align) + ty->align = mem->align; + if (ty->size < mem->ty->size) + ty->size = mem->ty->size; + } + ty->size = C_align_to(ty->size, ty->align); + return ty; +} + +// Find a struct member by name. +static C_Member *get_struct_member(C_Parser *parser, C_Type *ty, C_Token *tok) { + for (C_Member *mem = ty->members; mem; mem = mem->next) { + // Anonymous struct member + if ((mem->ty->kind == TY_STRUCT || mem->ty->kind == TY_UNION) && + !mem->name) { + if (get_struct_member(parser, mem->ty, tok)) + return mem; + continue; + } + + // Regular struct member + if (mem->name->len == tok->len && + !strncmp(mem->name->loc, tok->loc, tok->len)) + return mem; + } + return NULL; +} + +// Create a node representing a struct member access, such as foo.bar +// where foo is a struct and bar is a member name. +// +// C has a feature called "anonymous struct" which allows a struct to +// have another unnamed struct as a member like this: +// +// struct { struct { int a; }; int b; } x; +// +// The members of an anonymous struct belong to the outer struct's +// member namespace. Therefore, in the above example, you can access +// member "a" of the anonymous struct as "x.a". +// +// This function takes care of anonymous structs. +static C_Node *struct_ref(C_Parser *parser, C_Node *node, C_Token *tok) { + C_add_type(parser, node); + if (node->ty->kind != TY_STRUCT && node->ty->kind != TY_UNION) + C_error_tok(parser, node->tok, "not a struct nor a union"); + + C_Type *ty = node->ty; + + for (;;) { + C_Member *mem = get_struct_member(parser, ty, tok); + if (!mem) + C_error_tok(parser, tok, "no such member"); + node = new_unary(parser, ND_MEMBER, node, tok); + node->member = mem; + if (mem->name) + break; + ty = mem->ty; + } + return node; +} + +// Convert A++ to `(typeof A)((A += 1) - 1)` +static C_Node *new_inc_dec(C_Parser *parser, C_Node *node, C_Token *tok, int addend) { + C_add_type(parser, node); + return C_new_cast(parser, + new_add(parser, to_assign(parser, new_add(parser, node, new_num(parser, addend, tok), tok)), + new_num(parser, -addend, tok), tok), + node->ty); +} + +// postfix = "(" type-name ")" "{" initializer-list "}" +// = ident "(" func-args ")" postfix-tail* +// | primary postfix-tail* +// +// postfix-tail = "[" expr "]" +// | "(" func-args ")" +// | "." ident +// | "->" ident +// | "++" +// | "--" +static C_Node *postfix(C_Parser *parser, C_Token **rest, C_Token *tok) { + if (C_equal(tok, "(") && is_typename(parser, tok->next)) { + // Compound literal + C_Token *start = tok; + C_Type *ty = typename(parser, &tok, tok->next); + tok = C_skip(parser, tok, ")"); + + if (parser->scope->next == NULL) { + C_Obj *var = new_anon_gvar(parser, ty); + gvar_initializer(parser, rest, tok, var); + return new_var_node(parser,var, start); + } + + C_Obj *var = new_lvar(parser, "", ty); + C_Node *lhs = lvar_initializer(parser, rest, tok, var); + C_Node *rhs = new_var_node(parser, var, tok); + return new_binary(parser, ND_COMMA, lhs, rhs, start); + } + + C_Node *node = primary(parser, &tok, tok); + + for (;;) { + if (C_equal(tok, "(")) { + node = funcall(parser, &tok, tok->next, node); + continue; + } + + if (C_equal(tok, "[")) { + // x[y] is short for *(x+y) + C_Token *start = tok; + C_Node *idx = expr(parser, &tok, tok->next); + tok = C_skip(parser, tok, "]"); + node = new_unary(parser, ND_DEREF, new_add(parser, node, idx, start), start); + continue; + } + + if (C_equal(tok, ".")) { + node = struct_ref(parser, node, tok->next); + tok = tok->next->next; + continue; + } + + if (C_equal(tok, "->")) { + // x->y is short for (*x).y + node = new_unary(parser, ND_DEREF, node, tok); + node = struct_ref(parser, node, tok->next); + tok = tok->next->next; + continue; + } + + if (C_equal(tok, "++")) { + node = new_inc_dec(parser, node, tok, 1); + tok = tok->next; + continue; + } + + if (C_equal(tok, "--")) { + node = new_inc_dec(parser, node, tok, -1); + tok = tok->next; + continue; + } + + *rest = tok; + return node; + } +} + +// funcall = (assign ("," assign)*)? ")" +static C_Node *funcall(C_Parser *parser, C_Token **rest, C_Token *tok, C_Node *fn) { + C_add_type(parser, fn); + + if (fn->ty->kind != TY_FUNC && + (fn->ty->kind != TY_PTR || fn->ty->base->kind != TY_FUNC)) + C_error_tok(parser, fn->tok, "not a function"); + + C_Type *ty = (fn->ty->kind == TY_FUNC) ? fn->ty : fn->ty->base; + C_Type *param_ty = ty->params; + + C_Node head = {0}; + C_Node *cur = &head; + + while (!C_equal(tok, ")")) { + if (cur != &head) + tok = C_skip(parser, tok, ","); + + C_Node *arg = assign(parser, &tok, tok); + C_add_type(parser, arg); + + if (!param_ty && !ty->is_variadic) + C_error_tok(parser, tok, "too many arguments"); + + if (param_ty) { + if (param_ty->kind != TY_STRUCT && param_ty->kind != TY_UNION) + arg = C_new_cast(parser, arg, param_ty); + param_ty = param_ty->next; + } else if (arg->ty->kind == TY_FLOAT) { + // If parameter type is omitted (e.g. in "..."), float + // arguments are promoted to double. + arg = C_new_cast(parser, arg, C_ty_double); + } + + cur = cur->next = arg; + } + + if (param_ty) + C_error_tok(parser, tok, "too few arguments"); + + *rest = C_skip(parser, tok, ")"); + + C_Node *node = new_unary(parser, ND_FUNCALL, fn, tok); + node->func_ty = ty; + node->ty = ty->return_ty; + node->args = head.next; + + // If a function returns a struct, it is caller's responsibility + // to allocate a space for the return value. + if (node->ty->kind == TY_STRUCT || node->ty->kind == TY_UNION) + node->ret_buffer = new_lvar(parser, "", node->ty); + return node; +} + +// generic-selection = "(" assign "," generic-assoc ("," generic-assoc)* ")" +// +// generic-assoc = type-name ":" assign +// | "default" ":" assign +static C_Node *generic_selection(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Token *start = tok; + tok = C_skip(parser, tok, "("); + + C_Node *ctrl = assign(parser, &tok, tok); + C_add_type(parser, ctrl); + + C_Type *t1 = ctrl->ty; + if (t1->kind == TY_FUNC) + t1 = C_pointer_to(parser, t1); + else if (t1->kind == TY_ARRAY) + t1 = C_pointer_to(parser, t1->base); + + C_Node *ret = NULL; + + while (!C_consume(rest, tok, ")")) { + tok = C_skip(parser, tok, ","); + + if (C_equal(tok, "default")) { + tok = C_skip(parser, tok->next, ":"); + C_Node *node = assign(parser, &tok, tok); + if (!ret) + ret = node; + continue; + } + + C_Type *t2 = typename(parser, &tok, tok); + tok = C_skip(parser, tok, ":"); + C_Node *node = assign(parser, &tok, tok); + if (C_is_compatible(t1, t2)) + ret = node; + } + + if (!ret) + C_error_tok(parser, start, + "controlling expression type not compatible with" + " any generic association type"); + return ret; +} + +// primary = "(" "{" stmt+ "}" ")" +// | "(" expr ")" +// | "sizeof" "(" type-name ")" +// | "sizeof" unary +// | "_Alignof" "(" type-name ")" +// | "_Alignof" unary +// | "_Generic" generic-selection +// | "__builtin_types_compatible_p" "(" type-name, type-name, ")" +// | "__builtin_reg_class" "(" type-name ")" +// | ident +// | str +// | num +static C_Node *primary(C_Parser *parser, C_Token **rest, C_Token *tok) { + C_Token *start = tok; + + if (C_equal(tok, "(") && C_equal(tok->next, "{")) { + // This is a GNU statement expresssion. + C_Node *node = new_node(parser, ND_STMT_EXPR, tok); + node->body = compound_stmt(parser, &tok, tok->next->next)->body; + *rest = C_skip(parser, tok, ")"); + return node; + } + + if (C_equal(tok, "(")) { + C_Node *node = expr(parser, &tok, tok->next); + *rest = C_skip(parser, tok, ")"); + return node; + } + + if (C_equal(tok, "sizeof") && C_equal(tok->next, "(") && is_typename(parser, tok->next->next)) { + C_Type *ty = typename(parser, &tok, tok->next->next); + *rest = C_skip(parser, tok, ")"); + + if (ty->kind == TY_VLA) { + if (ty->vla_size) + return new_var_node(parser, ty->vla_size, tok); + + C_Node *lhs = compute_vla_size(parser, ty, tok); + C_Node *rhs = new_var_node(parser, ty->vla_size, tok); + return new_binary(parser, ND_COMMA, lhs, rhs, tok); + } + + return new_ulong(parser, ty->size, start); + } + + if (C_equal(tok, "sizeof")) { + C_Node *node = unary(parser, rest, tok->next); + C_add_type(parser, node); + if (node->ty->kind == TY_VLA) + return new_var_node(parser, node->ty->vla_size, tok); + return new_ulong(parser, node->ty->size, tok); + } + + if (C_equal(tok, "_Alignof") && C_equal(tok->next, "(") && is_typename(parser, tok->next->next)) { + C_Type *ty = typename(parser, &tok, tok->next->next); + *rest = C_skip(parser, tok, ")"); + return new_ulong(parser, ty->align, tok); + } + + if (C_equal(tok, "_Alignof")) { + C_Node *node = unary(parser, rest, tok->next); + C_add_type(parser, node); + return new_ulong(parser, node->ty->align, tok); + } + + if (C_equal(tok, "_Generic")) + return generic_selection(parser, rest, tok->next); + + if (C_equal(tok, "__builtin_types_compatible_p")) { + tok = C_skip(parser, tok->next, "("); + C_Type *t1 = typename(parser, &tok, tok); + tok = C_skip(parser, tok, ","); + C_Type *t2 = typename(parser, &tok, tok); + *rest = C_skip(parser, tok, ")"); + return new_num(parser, C_is_compatible(t1, t2), start); + } + + if (C_equal(tok, "__builtin_reg_class")) { + tok = C_skip(parser, tok->next, "("); + C_Type *ty = typename(parser, &tok, tok); + *rest = C_skip(parser, tok, ")"); + + if (C_is_integer(ty) || ty->kind == TY_PTR) + return new_num(parser, 0, start); + if (C_is_flonum(ty)) + return new_num(parser, 1, start); + return new_num(parser, 2, start); + } + + if (C_equal(tok, "__builtin_compare_and_swap")) { + C_Node *node = new_node(parser, ND_CAS, tok); + tok = C_skip(parser, tok->next, "("); + node->cas_addr = assign(parser, &tok, tok); + tok = C_skip(parser, tok, ","); + node->cas_old = assign(parser, &tok, tok); + tok = C_skip(parser, tok, ","); + node->cas_new = assign(parser, &tok, tok); + *rest = C_skip(parser, tok, ")"); + return node; + } + + if (C_equal(tok, "__builtin_atomic_exchange")) { + C_Node *node = new_node(parser, ND_EXCH, tok); + tok = C_skip(parser, tok->next, "("); + node->lhs = assign(parser, &tok, tok); + tok = C_skip(parser, tok, ","); + node->rhs = assign(parser, &tok, tok); + *rest = C_skip(parser, tok, ")"); + return node; + } + + if (tok->kind == TK_IDENT) { + // Variable or enum constant + C_VarScope *sc = find_var(parser, tok); + *rest = tok->next; + + // For "static inline" function + if (sc && sc->var && sc->var->is_function) { + if (parser->current_fn) + strarray_push(parser->arena, &parser->current_fn->refs, sc->var->name); + else + sc->var->is_root = true; + } + + if (sc) { + if (sc->var) + return new_var_node(parser, sc->var, tok); + if (sc->enum_ty) + return new_num(parser, sc->enum_val, tok); + } + + if (C_equal(tok->next, "(")) + C_error_tok(parser, tok, "implicit declaration of a function"); + C_error_tok(parser, tok, "undefined variable"); + } + + if (tok->kind == TK_STR) { + C_Obj *var = new_string_literal(parser, tok->str, tok->ty); + *rest = tok->next; + return new_var_node(parser, var, tok); + } + + if (tok->kind == TK_NUM) { + C_Node *node; + if (C_is_flonum(tok->ty)) { + node = new_node(parser, ND_NUM, tok); + node->fval = tok->fval; + } else { + node = new_num(parser, tok->val, tok); + } + + node->ty = tok->ty; + *rest = tok->next; + return node; + } + + C_error_tok(parser, tok, "expected an expression"); +} + +static C_Token *parse_typedef(C_Parser *parser, C_Token *tok, C_Type *basety) { + bool first = true; + + while (!C_consume(&tok, tok, ";")) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + C_Type *ty = declarator(parser, &tok, tok, basety); + if (!ty->name) + C_error_tok(parser, ty->name_pos, "typedef name omitted"); + push_scope(parser, get_ident(parser, ty->name))->type_def = ty; + } + return tok; +} + +static void create_param_lvars(C_Parser *parser, C_Type *param) { + if (param) { + create_param_lvars(parser, param->next); + if (!param->name) + C_error_tok(parser, param->name_pos, "parameter name omitted"); + new_lvar(parser, get_ident(parser, param->name), param); + } +} + +// This function matches gotos or labels-as-values with labels. +// +// We cannot resolve gotos as we C_parse a function because gotos +// can refer a label that appears later in the function. +// So, we need to do this after we C_parse the entire function. +static void resolve_goto_labels(C_Parser *parser) { + for (C_Node *x = parser->gotos; x; x = x->goto_next) { + for (C_Node *y = parser->labels; y; y = y->goto_next) { + if (!strcmp(x->label, y->label)) { + x->unique_label = y->unique_label; + break; + } + } + + if (x->unique_label == NULL) + C_error_tok(parser, x->tok->next, "use of undeclared label"); + } + + parser->gotos = parser->labels = NULL; +} + +static C_Obj *find_func(C_Parser *parser, char *name) { + C_Scope *sc = parser->scope; + while (sc->next) + sc = sc->next; + + C_VarScope *sc2 = hashmap_get(&sc->vars, name); + if (sc2 && sc2->var && sc2->var->is_function) + return sc2->var; + return NULL; +} + +static void mark_live(C_Parser *parser, C_Obj *var) { + if (!var->is_function || var->is_live) + return; + var->is_live = true; + + for (int i = 0; i < var->refs.len; i++) { + C_Obj *fn = find_func(parser, var->refs.data[i]); + if (fn) + mark_live(parser, fn); + } +} + +static C_Token *function(C_Parser *parser, C_Token *tok, C_Type *basety, VarAttr *attr) { + C_Type *ty = declarator(parser, &tok, tok, basety); + if (!ty->name) + C_error_tok(parser, ty->name_pos, "function name omitted"); + char *name_str = get_ident(parser, ty->name); + + C_Obj *fn = find_func(parser, name_str); + if (fn) { + // Redeclaration + if (!fn->is_function) + C_error_tok(parser, tok, "redeclared as a different kind of symbol"); + if (fn->is_definition && C_equal(tok, "{")) + C_error_tok(parser, tok, "redefinition of %s", name_str); + if (!fn->is_static && attr->is_static) + C_error_tok(parser, tok, "static declaration follows a non-static declaration"); + fn->is_definition = fn->is_definition || C_equal(tok, "{"); + } else { + fn = new_gvar(parser, name_str, ty); + fn->is_function = true; + fn->is_definition = C_equal(tok, "{"); + fn->is_static = attr->is_static || (attr->is_inline && !attr->is_extern); + fn->is_inline = attr->is_inline; + } + + fn->is_root = !(fn->is_static && fn->is_inline); + + if (C_consume(&tok, tok, ";")) + return tok; + + parser->current_fn = fn; + parser->locals = NULL; + enter_scope(parser); + create_param_lvars(parser, ty->params); + + // A buffer for a struct/union return value is passed + // as the hidden first parameter. + C_Type *rty = ty->return_ty; + if ((rty->kind == TY_STRUCT || rty->kind == TY_UNION) && rty->size > 16) + new_lvar(parser, "", C_pointer_to(parser, rty)); + + fn->params = parser->locals; + + if (ty->is_variadic) + fn->va_area = new_lvar(parser, "__va_area__", C_array_of(parser, C_ty_char, 136)); + fn->alloca_bottom = new_lvar(parser, "__alloca_size__", C_pointer_to(parser, C_ty_char)); + + tok = C_skip(parser, tok, "{"); + + // [https://www.sigbus.info/n1570#6.4.2.2p1] "__func__" is + // automatically defined as a local variable containing the + // current function name. + push_scope(parser, "__func__")->var = + new_string_literal(parser, fn->name, C_array_of(parser, C_ty_char, strlen(fn->name) + 1)); + + // [GNU] __FUNCTION__ is yet another name of __func__. + push_scope(parser, "__FUNCTION__")->var = + new_string_literal(parser, fn->name, C_array_of(parser, C_ty_char, strlen(fn->name) + 1)); + + fn->body = compound_stmt(parser, &tok, tok); + fn->locals = parser->locals; + leave_scope(parser); + resolve_goto_labels(parser); + return tok; +} + +static C_Token *global_variable(C_Parser *parser, C_Token *tok, C_Type *basety, VarAttr *attr) { + bool first = true; + + while (!C_consume(&tok, tok, ";")) { + if (!first) + tok = C_skip(parser, tok, ","); + first = false; + + C_Type *ty = declarator(parser, &tok, tok, basety); + if (!ty->name) + C_error_tok(parser, ty->name_pos, "variable name omitted"); + + C_Obj *var = new_gvar(parser, get_ident(parser, ty->name), ty); + var->is_definition = !attr->is_extern; + var->is_static = attr->is_static; + var->is_tls = attr->is_tls; + if (attr->align) + var->align = attr->align; + + if (C_equal(tok, "=")) + gvar_initializer(parser, &tok, tok->next, var); + else if (!attr->is_extern && !attr->is_tls) + var->is_tentative = true; + } + return tok; +} + +// Lookahead tokens and returns true if a given token is a start +// of a function definition or declaration. +static bool is_function(C_Parser *parser, C_Token *tok) { + if (C_equal(tok, ";")) + return false; + + C_Type dummy = {0}; + C_Type *ty = declarator(parser, &tok, tok, &dummy); + return ty->kind == TY_FUNC; +} + +// Remove redundant tentative definitions. +static void scan_globals(C_Parser *parser) { + C_Obj head; + C_Obj *cur = &head; + + for (C_Obj *var = parser->globals; var; var = var->next) { + if (!var->is_tentative) { + cur = cur->next = var; + continue; + } + + // Find another definition of the same identifier. + C_Obj *var2 = parser->globals; + for (; var2; var2 = var2->next) + if (var != var2 && var2->is_definition && !strcmp(var->name, var2->name)) + break; + + // If there's another definition, the tentative definition + // is redundant + if (!var2) + cur = cur->next = var; + } + + cur->next = NULL; + parser->globals = head.next; +} + +static void declare_builtin_functions(C_Parser *parser) { + C_Type *ty = C_func_type(parser, C_pointer_to(parser, C_ty_void)); + ty->params = C_copy_type(parser, C_ty_int); + parser->builtin_alloca = new_gvar(parser, "alloca", ty); + parser->builtin_alloca->is_definition = false; +} + +#ifdef RAVI_EXTENSIONS +C_Obj *C_create_function(C_Scope *globalScope, C_Parser *parser, char *name_str) { + C_Obj *fn = new_gvar(parser, name_str, C_func_type(parser, C_ty_void)); + fn->is_function = true; + fn->is_definition = true; + fn->is_static = true; + fn->is_inline = false; + parser->current_fn = fn; + parser->locals = NULL; + parser->gotos = NULL; + parser->labels = NULL; + parser->brk_label = NULL; + parser->cont_label = NULL; + parser->current_switch = NULL; + return fn; +} + +C_Node *C_parse_compound_statement(C_Scope *globalScope, C_Parser *parser, C_Token *tok) { + if (setjmp(parser->env) != 0) { + return NULL; + } + parser->scope = globalScope; + return compound_stmt(parser, &tok, tok); +} +#endif + +// program = (typedef | function-definition | global-variable)* +C_Obj *C_parse(C_Scope *globalScope, C_Parser *parser, C_Token *tok) { + if (setjmp(parser->env) != 0) { + return NULL; + } + + parser->scope = globalScope; + + declare_builtin_functions(parser); + parser->globals = NULL; + + while (tok->kind != TK_EOF) { + VarAttr attr = {0}; + C_Type *basety = declspec(parser, &tok, tok, &attr); + + // Typedef + if (attr.is_typedef) { + tok = parse_typedef(parser, tok, basety); + continue; + } + + // Function + if (is_function(parser, tok)) { + tok = function(parser, tok, basety, &attr); + continue; + } + + // Global variable + tok = global_variable(parser, tok, basety, &attr); + } + + for (C_Obj *var = parser->globals; var; var = var->next) + if (var->is_root) + mark_live(parser, var); + + // Remove redundant tentative definitions. + scan_globals(parser); + return parser->globals; +} + +void C_parser_init(C_Parser *parser) { + memset(parser, 0, sizeof *parser); + parser->arena = create_mspace(0, 0); + parser->keywords.arena = parser->arena; + parser->typewords.arena = parser->arena; +} + +void C_parser_destroy(C_Parser *parser) { + //mspace_malloc_stats(parser->arena); + destroy_mspace(parser->arena); + parser->arena = NULL; +} \ No newline at end of file diff --git a/ravicomp/src/chibicc/chibicc_strings.c b/ravicomp/src/chibicc/chibicc_strings.c new file mode 100644 index 00000000..a7805380 --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_strings.c @@ -0,0 +1,60 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +#include "chibicc.h" + +void strarray_push(mspace arena, StringArray *arr, char *s) { + if (!arr->data) { + arr->data = mspace_calloc(arena, 8, sizeof(char *)); + arr->capacity = 8; + } + + if (arr->capacity == arr->len) { + arr->data = mspace_realloc(arena, arr->data, sizeof(char *) * arr->capacity * 2); + arr->capacity *= 2; + for (int i = arr->len; i < arr->capacity; i++) + arr->data[i] = NULL; + } + + arr->data[arr->len++] = s; +} + +#if 0 +// Takes a printf-style format string and returns a formatted string. +char *format(char *fmt, ...) { + char *buf; + size_t buflen; + FILE *out = open_memstream(&buf, &buflen); + + va_list ap; + va_start(ap, fmt); + vfprintf(out, fmt, ap); + va_end(ap); + fclose(out); + return buf; +} +#endif \ No newline at end of file diff --git a/ravicomp/src/chibicc/chibicc_tokenize.c b/ravicomp/src/chibicc/chibicc_tokenize.c new file mode 100644 index 00000000..2248258f --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_tokenize.c @@ -0,0 +1,877 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +#include "chibicc.h" + +static int error_sprintf(C_Parser *parser, const char *fmt, ...) { + va_list args; + int pos = parser->error_message ? (int)strlen(parser->error_message) : 0; + int estimated_size = 128; + int n = 0; + for (int i = 0; i < 2; i++) { + parser->error_message = mspace_realloc(parser->arena, parser->error_message, pos + estimated_size); // ensure we have at least estimated_size free space + va_start(args, fmt); + n = vsnprintf(parser->error_message + pos, estimated_size, fmt, args); + va_end(args); + if (n >= estimated_size) { + estimated_size = n + 1; // allow for 0 byte + } else if (n < 0) { + fprintf(stderr, "Buffer conversion error\n"); + assert(false); + break; + } else { + break; + } + } + return n; +} + +static int error_vsprintf(C_Parser *parser, const char *fmt, va_list args) { + int estimated_size = 128; + int pos = parser->error_message ? (int)strlen(parser->error_message) : 0; + int n = 0; + for (int i = 0; i < 2; i++) { + parser->error_message = mspace_realloc(parser->arena, parser->error_message, pos + estimated_size); // ensure we have at least estimated_size free space + n = vsnprintf(parser->error_message + pos, estimated_size, fmt, args); + if (n >= estimated_size) { + estimated_size = n + 1; // allow for 0 byte + } else if (n < 0) { + fprintf(stderr, "Buffer conversion error\n"); + assert(false); + break; + } else { + break; + } + } + return n; +} + + +// Reports an error and exit. +void C_error(C_Parser *parser, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + error_vsprintf(parser, fmt, ap); + error_sprintf(parser, "\n"); + longjmp(parser->env, 1); +} + +// Reports an error message in the following format. +// +// foo.c:10: x = y + 1; +// ^ +static void verror_at(C_Parser *tokenizer, char *filename, char *input, int line_no, + char *loc, char *fmt, va_list ap) { + // Find a line containing `loc`. + char *line = loc; + while (input < line && line[-1] != '\n') + line--; + + char *end = loc; + while (*end && *end != '\n') + end++; + + // Print out the line. + int indent = error_sprintf(tokenizer, "%s:%d: ", filename, line_no); + error_sprintf(tokenizer, "%.*s\n", (int)(end - line), line); + + // Show the error message. + int pos = C_display_width(tokenizer, line, loc - line) + indent; + + error_sprintf(tokenizer, "%*s", pos, ""); // print pos spaces. + error_sprintf(tokenizer, "^ "); + error_vsprintf(tokenizer, fmt, ap); + error_sprintf(tokenizer, "\n"); +} + +void C_error_at(C_Parser *tokenizer, char *loc, char *fmt, ...) { + int line_no = 1; + for (char *p = tokenizer->current_file->contents; p < loc; p++) + if (*p == '\n') + line_no++; + + va_list ap; + va_start(ap, fmt); + verror_at(tokenizer, tokenizer->current_file->name, tokenizer->current_file->contents, line_no, loc, fmt, ap); + longjmp(tokenizer->env, 1); +} + +void C_error_tok(C_Parser *tokenizer, C_Token *tok, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + verror_at(tokenizer, tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt, ap); + longjmp(tokenizer->env, 1); +} + +void C_warn_tok(C_Parser *tokenizer, C_Token *tok, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + verror_at(tokenizer, tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt, ap); + va_end(ap); +} + +// Consumes the current token if it matches `op`. +bool C_equal(C_Token *tok, char *op) { + return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0'; +} + +// Ensure that the current token is `op`. +C_Token *C_skip(C_Parser *parser, C_Token *tok, char *op) { + if (!C_equal(tok, op)) + C_error_tok(parser, tok, "expected '%s'", op); + return tok->next; +} + +bool C_consume(C_Token **rest, C_Token *tok, char *str) { + if (C_equal(tok, str)) { + *rest = tok->next; + return true; + } + *rest = tok; + return false; +} + +// Create a new token. +static C_Token *new_token(C_Parser *tokenizer, C_TokenKind kind, char *start, char *end) { + C_Token *tok = mspace_calloc(tokenizer->arena, 1, sizeof(C_Token)); + tok->kind = kind; + tok->loc = start; + tok->len = end - start; + tok->file = tokenizer->current_file; + tok->filename = tokenizer->current_file->display_name; + tok->at_bol = tokenizer->at_bol; + tok->has_space = tokenizer->has_space; + + tokenizer->at_bol = tokenizer->has_space = false; + return tok; +} + +static bool startswith(char *p, char *q) { + return strncmp(p, q, strlen(q)) == 0; +} + +// Read an identifier and returns the length of it. +// If p does not point to a valid identifier, 0 is returned. +static int read_ident(C_Parser *tokenizer, char *start) { + char *p = start; + uint32_t c = C_decode_utf8(tokenizer, &p, p); + if (!C_is_ident1(c)) + return 0; + + for (;;) { + char *q; + c = C_decode_utf8(tokenizer, &q, p); + if (!C_is_ident2(c)) + return p - start; + p = q; + } +} + +static int from_hex(char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + return c - 'A' + 10; +} + +// Read a punctuator token from p and returns its length. +static int read_punct(char *p) { + static char *kw[] = { + "<<=", ">>=", "...", "==", "!=", "<=", ">=", "->", "+=", + "-=", "*=", "/=", "++", "--", "%=", "&=", "|=", "^=", "&&", + "||", "<<", ">>", "##", + }; + + for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) + if (startswith(p, kw[i])) + return strlen(kw[i]); + + return ispunct(*p) ? 1 : 0; +} + +static bool is_keyword(C_Parser *tokenizer, C_Token *tok) { + if (tokenizer->keywords.capacity == 0) { + static char *kw[] = { + "return", "if", "else", "for", "while", "int", "sizeof", "char", + "struct", "union", "short", "long", "void", "typedef", "_Bool", + "enum", "static", "goto", "break", "continue", "switch", "case", + "default", "extern", "_Alignof", "_Alignas", "do", "signed", + "unsigned", "const", "volatile", "auto", "register", "restrict", + "__restrict", "__restrict__", "_Noreturn", "float", "double", + "typeof", "asm", "_Thread_local", "__thread", "_Atomic", + "__attribute__", + }; + + for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) + hashmap_put(&tokenizer->keywords, kw[i], (void *)1); + } + + return hashmap_get2(&tokenizer->keywords, tok->loc, tok->len); +} + +static int read_escaped_char(C_Parser *tokenizer, char **new_pos, char *p) { + if ('0' <= *p && *p <= '7') { + // Read an octal number. + int c = *p++ - '0'; + if ('0' <= *p && *p <= '7') { + c = (c << 3) + (*p++ - '0'); + if ('0' <= *p && *p <= '7') + c = (c << 3) + (*p++ - '0'); + } + *new_pos = p; + return c; + } + + if (*p == 'x') { + // Read a hexadecimal number. + p++; + if (!isxdigit(*p)) + C_error_at(tokenizer, p, "invalid hex escape sequence"); + + int c = 0; + for (; isxdigit(*p); p++) + c = (c << 4) + from_hex(*p); + *new_pos = p; + return c; + } + + *new_pos = p + 1; + + // Escape sequences are defined using themselves here. E.g. + // '\n' is implemented using '\n'. This tautological definition + // works because the compiler that compiles our compiler knows + // what '\n' actually is. In other words, we "inherit" the ASCII + // code of '\n' from the compiler that compiles our compiler, + // so we don't have to teach the actual code here. + // + // This fact has huge implications not only for the correctness + // of the compiler but also for the security of the generated code. + // For more info, read "Reflections on Trusting Trust" by Ken Thompson. + // https://github.com/rui314/chibicc/wiki/thompson1984.pdf + switch (*p) { + case 'a': return '\a'; + case 'b': return '\b'; + case 't': return '\t'; + case 'n': return '\n'; + case 'v': return '\v'; + case 'f': return '\f'; + case 'r': return '\r'; + // [GNU] \e for the ASCII escape character is a GNU C extension. + case 'e': return 27; + default: return *p; + } +} + +// Find a closing double-quote. +static char *string_literal_end(C_Parser *tokenizer, char *p) { + char *start = p; + for (; *p != '"'; p++) { + if (*p == '\n' || *p == '\0') + C_error_at(tokenizer, start, "unclosed string literal"); + if (*p == '\\') + p++; + } + return p; +} + +static C_Token *read_string_literal(C_Parser *tokenizer, char *start, char *quote) { + char *end = string_literal_end(tokenizer,quote + 1); + char *buf = mspace_calloc(tokenizer->arena, 1, end - quote); + int len = 0; + + for (char *p = quote + 1; p < end;) { + if (*p == '\\') + buf[len++] = read_escaped_char(tokenizer, &p, p + 1); + else + buf[len++] = *p++; + } + + C_Token *tok = new_token(tokenizer, TK_STR, start, end + 1); + tok->ty = C_array_of(tokenizer, C_ty_char, len + 1); + tok->str = buf; + return tok; +} + +// Read a UTF-8-encoded string literal and transcode it in UTF-16. +// +// UTF-16 is yet another variable-width encoding for Unicode. Code +// points smaller than U+10000 are encoded in 2 bytes. Code points +// C_equal to or larger than that are encoded in 4 bytes. Each 2 bytes +// in the 4 byte sequence is called "surrogate", and a 4 byte sequence +// is called a "surrogate pair". +static C_Token *read_utf16_string_literal(C_Parser *tokenizer, char *start, char *quote) { + char *end = string_literal_end(tokenizer, quote + 1); + uint16_t *buf = mspace_calloc(tokenizer->arena, 2, end - start); + int len = 0; + + for (char *p = quote + 1; p < end;) { + if (*p == '\\') { + buf[len++] = read_escaped_char(tokenizer, &p, p + 1); + continue; + } + + uint32_t c = C_decode_utf8(tokenizer, &p, p); + if (c < 0x10000) { + // Encode a code point in 2 bytes. + buf[len++] = c; + } else { + // Encode a code point in 4 bytes. + c -= 0x10000; + buf[len++] = 0xd800 + ((c >> 10) & 0x3ff); + buf[len++] = 0xdc00 + (c & 0x3ff); + } + } + + C_Token *tok = new_token(tokenizer, TK_STR, start, end + 1); + tok->ty = C_array_of(tokenizer, C_ty_ushort, len + 1); + tok->str = (char *)buf; + return tok; +} + +// Read a UTF-8-encoded string literal and transcode it in UTF-32. +// +// UTF-32 is a fixed-width encoding for Unicode. Each code point is +// encoded in 4 bytes. +static C_Token *read_utf32_string_literal(C_Parser *tokenizer, char *start, char *quote, C_Type *ty) { + char *end = string_literal_end(tokenizer, quote + 1); + uint32_t *buf = mspace_calloc(tokenizer->arena, 4, end - quote); + int len = 0; + + for (char *p = quote + 1; p < end;) { + if (*p == '\\') + buf[len++] = read_escaped_char(tokenizer, &p, p + 1); + else + buf[len++] = C_decode_utf8(tokenizer, &p, p); + } + + C_Token *tok = new_token(tokenizer, TK_STR, start, end + 1); + tok->ty = C_array_of(tokenizer, ty, len + 1); + tok->str = (char *)buf; + return tok; +} + +static C_Token *read_char_literal(C_Parser *tokenizer, char *start, char *quote, C_Type *ty) { + char *p = quote + 1; + if (*p == '\0') + C_error_at(tokenizer, start, "unclosed char literal"); + + int c; + if (*p == '\\') + c = read_escaped_char(tokenizer, &p, p + 1); + else + c = C_decode_utf8(tokenizer, &p, p); + + char *end = strchr(p, '\''); + if (!end) + C_error_at(tokenizer, p, "unclosed char literal"); + + C_Token *tok = new_token(tokenizer, TK_NUM, start, end + 1); + tok->val = c; + tok->ty = ty; + return tok; +} + +static bool convert_pp_int(C_Token *tok) { + char *p = tok->loc; + + // Read a binary, octal, decimal or hexadecimal number. + int base = 10; + if (!strncasecmp(p, "0x", 2) && isxdigit(p[2])) { + p += 2; + base = 16; + } else if (!strncasecmp(p, "0b", 2) && (p[2] == '0' || p[2] == '1')) { + p += 2; + base = 2; + } else if (*p == '0') { + base = 8; + } + + int64_t val = strtoul(p, &p, base); + + // Read U, L or LL suffixes. + bool l = false; + bool u = false; + + if (startswith(p, "LLU") || startswith(p, "LLu") || + startswith(p, "llU") || startswith(p, "llu") || + startswith(p, "ULL") || startswith(p, "Ull") || + startswith(p, "uLL") || startswith(p, "ull")) { + p += 3; + l = u = true; + } else if (!strncasecmp(p, "lu", 2) || !strncasecmp(p, "ul", 2)) { + p += 2; + l = u = true; + } else if (startswith(p, "LL") || startswith(p, "ll")) { + p += 2; + l = true; + } else if (*p == 'L' || *p == 'l') { + p++; + l = true; + } else if (*p == 'U' || *p == 'u') { + p++; + u = true; + } + + if (p != tok->loc + tok->len) + return false; + + // Infer a type. + C_Type *ty; + if (base == 10) { + if (l && u) + ty = C_ty_ulong; + else if (l) + ty = C_ty_long; + else if (u) + ty = (val >> 32) ? C_ty_ulong : C_ty_uint; + else + ty = (val >> 31) ? C_ty_long : C_ty_int; + } else { + if (l && u) + ty = C_ty_ulong; + else if (l) + ty = (val >> 63) ? C_ty_ulong : C_ty_long; + else if (u) + ty = (val >> 32) ? C_ty_ulong : C_ty_uint; + else if (val >> 63) + ty = C_ty_ulong; + else if (val >> 32) + ty = C_ty_long; + else if (val >> 31) + ty = C_ty_uint; + else + ty = C_ty_int; + } + + tok->kind = TK_NUM; + tok->val = val; + tok->ty = ty; + return true; +} + +// The definition of the numeric literal at the preprocessing stage +// is more relaxed than the definition of that at the later stages. +// In order to handle that, a numeric literal is tokenized as a +// "pp-number" token first and then converted to a regular number +// token after preprocessing. +// +// This function converts a pp-number token to a regular number token. +static void convert_pp_number(C_Parser *tokenizer, C_Token *tok) { + // Try to C_parse as an integer constant. + if (convert_pp_int(tok)) + return; + + // If it's not an integer, it must be a floating point constant. + char *end; + long double val = strtold(tok->loc, &end); + + C_Type *ty; + if (*end == 'f' || *end == 'F') { + ty = C_ty_float; + end++; + } else if (*end == 'l' || *end == 'L') { + ty = C_ty_ldouble; + end++; + } else { + ty = C_ty_double; + } + + if (tok->loc + tok->len != end) + C_error_tok(tokenizer, tok, "invalid numeric constant"); + + tok->kind = TK_NUM; + tok->fval = val; + tok->ty = ty; +} + +void C_convert_pp_tokens(C_Parser *tokenizer, C_Token *tok) { + for (C_Token *t = tok; t->kind != TK_EOF; t = t->next) { + if (is_keyword(tokenizer, t)) + t->kind = TK_KEYWORD; + else if (t->kind == TK_PP_NUM) + convert_pp_number(tokenizer, t); + } +} + +// Initialize line info for all tokens. +static void add_line_numbers(C_Parser *tokenizer, C_Token *tok) { + char *p = tokenizer->current_file->contents; + int n = 1; + + do { + if (p == tok->loc) { + tok->line_no = n; + tok = tok->next; + } + if (*p == '\n') + n++; + } while (*p++); +} + +static C_Token *tokenize_string_literal(C_Parser *tokenizer, C_Token *tok, C_Type *basety) { + C_Token *t; + if (basety->size == 2) + t = read_utf16_string_literal(tokenizer, tok->loc, tok->loc); + else + t = read_utf32_string_literal(tokenizer, tok->loc, tok->loc, basety); + t->next = tok->next; + return t; +} + +// Tokenize a given string and returns new tokens. +C_Token *C_tokenize(C_Parser *tokenizer, C_File *file) { + tokenizer->current_file = file; + + char *p = file->contents; + C_Token head = {0}; + C_Token *cur = &head; + + tokenizer->at_bol = true; + tokenizer->has_space = false; + + while (*p) { + // Skip line comments. + if (startswith(p, "//")) { + p += 2; + while (*p != '\n') + p++; + tokenizer->has_space = true; + continue; + } + + // Skip block comments. + if (startswith(p, "/*")) { + char *q = strstr(p + 2, "*/"); + if (!q) + C_error_at(tokenizer, p, "unclosed block comment"); + p = q + 2; + tokenizer->has_space = true; + continue; + } + + // Skip newline. + if (*p == '\n') { + p++; + tokenizer->at_bol = true; + tokenizer->has_space = false; + continue; + } + + // Skip whitespace characters. + if (isspace(*p)) { + p++; + tokenizer->has_space = true; + continue; + } + + // Numeric literal + if (isdigit(*p) || (*p == '.' && isdigit(p[1]))) { + char *q = p++; + for (;;) { + if (p[0] && p[1] && strchr("eEpP", p[0]) && strchr("+-", p[1])) + p += 2; + else if (isalnum(*p) || *p == '.') + p++; + else + break; + } + cur = cur->next = new_token(tokenizer, TK_PP_NUM, q, p); + continue; + } + + // String literal + if (*p == '"') { + cur = cur->next = read_string_literal(tokenizer, p, p); + p += cur->len; + continue; + } + + // UTF-8 string literal + if (startswith(p, "u8\"")) { + cur = cur->next = read_string_literal(tokenizer, p, p + 2); + p += cur->len; + continue; + } + + // UTF-16 string literal + if (startswith(p, "u\"")) { + cur = cur->next = read_utf16_string_literal(tokenizer, p, p + 1); + p += cur->len; + continue; + } + + // Wide string literal + if (startswith(p, "L\"")) { + cur = cur->next = read_utf32_string_literal(tokenizer, p, p + 1, C_ty_int); + p += cur->len; + continue; + } + + // UTF-32 string literal + if (startswith(p, "U\"")) { + cur = cur->next = read_utf32_string_literal(tokenizer, p, p + 1, C_ty_uint); + p += cur->len; + continue; + } + + // Character literal + if (*p == '\'') { + cur = cur->next = read_char_literal(tokenizer, p, p, C_ty_int); + cur->val = (char)cur->val; + p += cur->len; + continue; + } + + // UTF-16 character literal + if (startswith(p, "u'")) { + cur = cur->next = read_char_literal(tokenizer, p, p + 1, C_ty_ushort); + cur->val &= 0xffff; + p += cur->len; + continue; + } + + // Wide character literal + if (startswith(p, "L'")) { + cur = cur->next = read_char_literal(tokenizer, p, p + 1, C_ty_int); + p += cur->len; + continue; + } + + // UTF-32 character literal + if (startswith(p, "U'")) { + cur = cur->next = read_char_literal(tokenizer, p, p + 1, C_ty_uint); + p += cur->len; + continue; + } + + // Identifier or keyword + int ident_len = read_ident(tokenizer, p); + if (ident_len) { + cur = cur->next = new_token(tokenizer, TK_IDENT, p, p + ident_len); + p += cur->len; + continue; + } + + // Punctuators + int punct_len = read_punct(p); + if (punct_len) { + cur = cur->next = new_token(tokenizer, TK_PUNCT, p, p + punct_len); + p += cur->len; + continue; + } + + C_error_at(tokenizer, p, "invalid token"); + } + + cur = cur->next = new_token(tokenizer, TK_EOF, p, p); + add_line_numbers(tokenizer, head.next); + return head.next; +} + +#if 0 +// Returns the contents of a given file. +static char *read_file(char *path) { + FILE *fp; + + if (strcmp(path, "-") == 0) { + // By convention, read from stdin if a given filename is "-". + fp = stdin; + } else { + fp = fopen(path, "r"); + if (!fp) + return NULL; + } + + char *buf; + size_t buflen; + FILE *out = open_memstream(&buf, &buflen); + + // Read the entire file. + for (;;) { + char buf2[4096]; + int n = fread(buf2, 1, sizeof(buf2), fp); + if (n == 0) + break; + fwrite(buf2, 1, n, out); + } + + if (fp != stdin) + fclose(fp); + + // Make sure that the last line is properly terminated with '\n'. + fflush(out); + if (buflen == 0 || buf[buflen - 1] != '\n') + fputc('\n', out); + fputc('\0', out); + fclose(out); + return buf; +} + +C_File **get_input_files(C_Parser *tokenizer) { + return tokenizer->input_files; +} +#endif + + +C_File *C_new_file(C_Parser *tokenizer, char *name, int file_no, char *contents) { + C_File *file = mspace_calloc(tokenizer->arena, 1, sizeof(C_File)); + file->name = name; + file->display_name = name; + file->file_no = file_no; + file->contents = contents; + return file; +} + + +// Replaces \r or \r\n with \n. +static void canonicalize_newline(char *p) { + int i = 0, j = 0; + + while (p[i]) { + if (p[i] == '\r' && p[i + 1] == '\n') { + i += 2; + p[j++] = '\n'; + } else if (p[i] == '\r') { + i++; + p[j++] = '\n'; + } else { + p[j++] = p[i++]; + } + } + + p[j] = '\0'; +} + +// Removes backslashes followed by a newline. +static void remove_backslash_newline(char *p) { + int i = 0, j = 0; + + // We want to keep the number of newline characters so that + // the logical line number matches the physical one. + // This counter maintain the number of newlines we have removed. + int n = 0; + + while (p[i]) { + if (p[i] == '\\' && p[i + 1] == '\n') { + i += 2; + n++; + } else if (p[i] == '\n') { + p[j++] = p[i++]; + for (; n > 0; n--) + p[j++] = '\n'; + } else { + p[j++] = p[i++]; + } + } + + for (; n > 0; n--) + p[j++] = '\n'; + p[j] = '\0'; +} + +static uint32_t read_universal_char(char *p, int len) { + uint32_t c = 0; + for (int i = 0; i < len; i++) { + if (!isxdigit(p[i])) + return 0; + c = (c << 4) | from_hex(p[i]); + } + return c; +} + +// Replace \u or \U escape sequences with corresponding UTF-8 bytes. +static void convert_universal_chars(char *p) { + char *q = p; + + while (*p) { + if (startswith(p, "\\u")) { + uint32_t c = read_universal_char(p + 2, 4); + if (c) { + p += 6; + q += C_encode_utf8(q, c); + } else { + *q++ = *p++; + } + } else if (startswith(p, "\\U")) { + uint32_t c = read_universal_char(p + 2, 8); + if (c) { + p += 10; + q += C_encode_utf8(q, c); + } else { + *q++ = *p++; + } + } else if (p[0] == '\\') { + *q++ = *p++; + *q++ = *p++; + } else { + *q++ = *p++; + } + } + + *q = '\0'; +} + +#if 0 +C_Token *tokenize_file(C_Parser *tokenizer, char *path) { + char *p = read_file(path); + if (!p) + return NULL; + return tokenize_buffer(p); +} +#endif + +C_Token *C_tokenize_buffer(C_Parser *tokenizer, char *p) { + if (!p) + return NULL; + + if (setjmp(tokenizer->env) != 0) + return NULL; + + // UTF-8 texts may start with a 3-byte "BOM" marker sequence. + // If exists, just C_skip them because they are useless bytes. + // (It is actually not recommended to add BOM markers to UTF-8 + // texts, but it's not uncommon particularly on Windows.) + if (!memcmp(p, "\xef\xbb\xbf", 3)) + p += 3; + + canonicalize_newline(p); + remove_backslash_newline(p); + convert_universal_chars(p); + + // Save the filename for assembler .file directive. + static int file_no; + C_File *file = C_new_file(tokenizer, "", file_no + 1, p); + + // Save the filename for assembler .file directive. + tokenizer->input_files = mspace_realloc(tokenizer->arena, tokenizer->input_files, sizeof(char *) * (file_no + 2)); + tokenizer->input_files[file_no] = file; + tokenizer->input_files[file_no + 1] = NULL; + file_no++; + + return C_tokenize(tokenizer, file); +} diff --git a/ravicomp/src/chibicc/chibicc_type.c b/ravicomp/src/chibicc/chibicc_type.c new file mode 100644 index 00000000..f79616ca --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_type.c @@ -0,0 +1,333 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "chibicc.h" + +C_Type *C_ty_void = &(C_Type){TY_VOID, 1, 1}; +C_Type *C_ty_bool = &(C_Type){TY_BOOL, 1, 1}; + +C_Type *C_ty_char = &(C_Type){TY_CHAR, 1, 1}; +C_Type *C_ty_short = &(C_Type){TY_SHORT, 2, 2}; +C_Type *C_ty_int = &(C_Type){TY_INT, 4, 4}; +C_Type *C_ty_long = &(C_Type){TY_LONG, 8, 8}; + +C_Type *C_ty_uchar = &(C_Type){TY_CHAR, 1, 1, true}; +C_Type *C_ty_ushort = &(C_Type){TY_SHORT, 2, 2, true}; +C_Type *C_ty_uint = &(C_Type){TY_INT, 4, 4, true}; +C_Type *C_ty_ulong = &(C_Type){TY_LONG, 8, 8, true}; + +C_Type *C_ty_float = &(C_Type){TY_FLOAT, 4, 4}; +C_Type *C_ty_double = &(C_Type){TY_DOUBLE, 8, 8}; +C_Type *C_ty_ldouble = &(C_Type){TY_LDOUBLE, 16, 16}; + +static C_Type *new_type(C_Parser *parser, TypeKind kind, int size, int align) { + C_Type *ty = mspace_calloc(parser->arena, 1, sizeof(C_Type)); + ty->kind = kind; + ty->size = size; + ty->align = align; + return ty; +} + +bool C_is_integer(C_Type *ty) { + TypeKind k = ty->kind; + return k == TY_BOOL || k == TY_CHAR || k == TY_SHORT || + k == TY_INT || k == TY_LONG || k == TY_ENUM; +} + +bool C_is_flonum(C_Type *ty) { + return ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE || + ty->kind == TY_LDOUBLE; +} + +bool C_is_numeric(C_Type *ty) { + return C_is_integer(ty) || C_is_flonum(ty); +} + +bool C_is_compatible(C_Type *t1, C_Type *t2) { + if (t1 == t2) + return true; + + if (t1->origin) + return C_is_compatible(t1->origin, t2); + + if (t2->origin) + return C_is_compatible(t1, t2->origin); + + if (t1->kind != t2->kind) + return false; + + switch (t1->kind) { + case TY_CHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + return t1->is_unsigned == t2->is_unsigned; + case TY_FLOAT: + case TY_DOUBLE: + case TY_LDOUBLE: + return true; + case TY_PTR: + return C_is_compatible(t1->base, t2->base); + case TY_FUNC: { + if (!C_is_compatible(t1->return_ty, t2->return_ty)) + return false; + if (t1->is_variadic != t2->is_variadic) + return false; + + C_Type *p1 = t1->params; + C_Type *p2 = t2->params; + for (; p1 && p2; p1 = p1->next, p2 = p2->next) + if (!C_is_compatible(p1, p2)) + return false; + return p1 == NULL && p2 == NULL; + } + case TY_ARRAY: + if (!C_is_compatible(t1->base, t2->base)) + return false; + return t1->array_len < 0 && t2->array_len < 0 && + t1->array_len == t2->array_len; + } + return false; +} + +C_Type *C_copy_type(C_Parser *parser, C_Type *ty) { + C_Type *ret = mspace_calloc(parser->arena, 1, sizeof(C_Type)); + *ret = *ty; + ret->origin = ty; + return ret; +} + +C_Type *C_pointer_to(C_Parser *parser, C_Type *base) { + C_Type *ty = new_type(parser, TY_PTR, 8, 8); + ty->base = base; + ty->is_unsigned = true; + return ty; +} + +C_Type *C_func_type(C_Parser *parser, C_Type *return_ty) { + // The C spec disallows sizeof(), but + // GCC allows that and the expression is evaluated to 1. + C_Type *ty = new_type(parser, TY_FUNC, 1, 1); + ty->return_ty = return_ty; + return ty; +} + +C_Type *C_array_of(C_Parser *parser, C_Type *base, int size) { + C_Type *ty = new_type(parser, TY_ARRAY, base->size * size, base->align); + ty->base = base; + ty->array_len = size; + return ty; +} + +C_Type *C_vla_of(C_Parser *parser, C_Type *base, C_Node *expr) { + C_Type *ty = new_type(parser, TY_VLA, 8, 8); + ty->base = base; + ty->vla_len = expr; + return ty; +} + +C_Type *C_enum_type(C_Parser *parser) { + return new_type(parser, TY_ENUM, 4, 4); +} + +C_Type *C_struct_type(C_Parser *parser) { + return new_type(parser, TY_STRUCT, 0, 1); +} + +static C_Type *get_common_type(C_Parser *parser, C_Type *ty1, C_Type *ty2) { + if (ty1->base) + return C_pointer_to(parser, ty1->base); + + if (ty1->kind == TY_FUNC) + return C_pointer_to(parser, ty1); + if (ty2->kind == TY_FUNC) + return C_pointer_to(parser, ty2); + + if (ty1->kind == TY_LDOUBLE || ty2->kind == TY_LDOUBLE) + return C_ty_ldouble; + if (ty1->kind == TY_DOUBLE || ty2->kind == TY_DOUBLE) + return C_ty_double; + if (ty1->kind == TY_FLOAT || ty2->kind == TY_FLOAT) + return C_ty_float; + + if (ty1->size < 4) + ty1 = C_ty_int; + if (ty2->size < 4) + ty2 = C_ty_int; + + if (ty1->size != ty2->size) + return (ty1->size < ty2->size) ? ty2 : ty1; + + if (ty2->is_unsigned) + return ty2; + return ty1; +} + +// For many binary operators, we implicitly promote operands so that +// both operands have the same type. Any integral type smaller than +// int is always promoted to int. If the type of one operand is larger +// than the other's (e.g. "long" vs. "int"), the smaller operand will +// be promoted to match with the other. +// +// This operation is called the "usual arithmetic conversion". +static void usual_arith_conv(C_Parser *parser, C_Node **lhs, C_Node **rhs) { + C_Type *ty = get_common_type(parser, (*lhs)->ty, (*rhs)->ty); + *lhs = C_new_cast(parser, *lhs, ty); + *rhs = C_new_cast(parser, *rhs, ty); +} + +void C_add_type(C_Parser *parser, C_Node *node) { + if (!node || node->ty) + return; + + C_add_type(parser, node->lhs); + C_add_type(parser, node->rhs); + C_add_type(parser, node->cond); + C_add_type(parser, node->then); + C_add_type(parser, node->els); + C_add_type(parser, node->init); + C_add_type(parser, node->inc); + + for (C_Node *n = node->body; n; n = n->next) + C_add_type(parser, n); + for (C_Node *n = node->args; n; n = n->next) + C_add_type(parser, n); + + switch (node->kind) { + case ND_NUM: + node->ty = C_ty_int; + return; + case ND_ADD: + case ND_SUB: + case ND_MUL: + case ND_DIV: + case ND_MOD: + case ND_BITAND: + case ND_BITOR: + case ND_BITXOR: + usual_arith_conv(parser, &node->lhs, &node->rhs); + node->ty = node->lhs->ty; + return; + case ND_NEG: { + C_Type *ty = get_common_type(parser, C_ty_int, node->lhs->ty); + node->lhs = C_new_cast(parser, node->lhs, ty); + node->ty = ty; + return; + } + case ND_ASSIGN: + if (node->lhs->ty->kind == TY_ARRAY) + C_error_tok(parser, node->lhs->tok, "not an lvalue"); + if (node->lhs->ty->kind != TY_STRUCT) + node->rhs = C_new_cast(parser, node->rhs, node->lhs->ty); + node->ty = node->lhs->ty; + return; + case ND_EQ: + case ND_NE: + case ND_LT: + case ND_LE: + usual_arith_conv(parser, &node->lhs, &node->rhs); + node->ty = C_ty_int; + return; + case ND_FUNCALL: + node->ty = node->func_ty->return_ty; + return; + case ND_NOT: + case ND_LOGOR: + case ND_LOGAND: + node->ty = C_ty_int; + return; + case ND_BITNOT: + case ND_SHL: + case ND_SHR: + node->ty = node->lhs->ty; + return; + case ND_VAR: + case ND_VLA_PTR: + node->ty = node->var->ty; + return; + case ND_COND: + if (node->then->ty->kind == TY_VOID || node->els->ty->kind == TY_VOID) { + node->ty = C_ty_void; + } else { + usual_arith_conv(parser, &node->then, &node->els); + node->ty = node->then->ty; + } + return; + case ND_COMMA: + node->ty = node->rhs->ty; + return; + case ND_MEMBER: + node->ty = node->member->ty; + return; + case ND_ADDR: { + C_Type *ty = node->lhs->ty; + if (ty->kind == TY_ARRAY) + node->ty = C_pointer_to(parser, ty->base); + else + node->ty = C_pointer_to(parser, ty); + return; + } + case ND_DEREF: + if (!node->lhs->ty->base) + C_error_tok(parser, node->tok, "invalid pointer dereference"); + if (node->lhs->ty->base->kind == TY_VOID) + C_error_tok(parser, node->tok, "dereferencing a void pointer"); + + node->ty = node->lhs->ty->base; + return; + case ND_STMT_EXPR: + if (node->body) { + C_Node *stmt = node->body; + while (stmt->next) + stmt = stmt->next; + if (stmt->kind == ND_EXPR_STMT) { + node->ty = stmt->lhs->ty; + return; + } + } + C_error_tok(parser, node->tok, "statement expression returning void is not supported"); + return; + case ND_LABEL_VAL: + node->ty = C_pointer_to(parser, C_ty_void); + return; + case ND_CAS: + C_add_type(parser, node->cas_addr); + C_add_type(parser, node->cas_old); + C_add_type(parser, node->cas_new); + node->ty = C_ty_bool; + + if (node->cas_addr->ty->kind != TY_PTR) + C_error_tok(parser, node->cas_addr->tok, "pointer expected"); + if (node->cas_old->ty->kind != TY_PTR) + C_error_tok(parser, node->cas_old->tok, "pointer expected"); + return; + case ND_EXCH: + if (node->lhs->ty->kind != TY_PTR) + C_error_tok(parser, node->cas_addr->tok, "pointer expected"); + node->ty = node->lhs->ty->base; + return; + } +} diff --git a/ravicomp/src/chibicc/chibicc_unicode.c b/ravicomp/src/chibicc/chibicc_unicode.c new file mode 100644 index 00000000..424549ea --- /dev/null +++ b/ravicomp/src/chibicc/chibicc_unicode.c @@ -0,0 +1,216 @@ +/* +Adapted from https://github.com/rui314/chibicc + +MIT License + +Copyright (c) 2019 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +#include "chibicc.h" + +// Encode a given character in UTF-8. +int C_encode_utf8(char *buf, uint32_t c) { + if (c <= 0x7F) { + buf[0] = c; + return 1; + } + + if (c <= 0x7FF) { + buf[0] = 0b11000000 | (c >> 6); + buf[1] = 0b10000000 | (c & 0b00111111); + return 2; + } + + if (c <= 0xFFFF) { + buf[0] = 0b11100000 | (c >> 12); + buf[1] = 0b10000000 | ((c >> 6) & 0b00111111); + buf[2] = 0b10000000 | (c & 0b00111111); + return 3; + } + + buf[0] = 0b11110000 | (c >> 18); + buf[1] = 0b10000000 | ((c >> 12) & 0b00111111); + buf[2] = 0b10000000 | ((c >> 6) & 0b00111111); + buf[3] = 0b10000000 | (c & 0b00111111); + return 4; +} + +// Read a UTF-8-encoded Unicode code point from a source file. +// We assume that source files are always in UTF-8. +// +// UTF-8 is a variable-width encoding in which one code point is +// encoded in one to four bytes. One byte UTF-8 code points are +// identical to ASCII. Non-ASCII characters are encoded using more +// than one byte. +uint32_t C_decode_utf8(C_Parser *tokenizer, char **new_pos, char *p) { + if ((unsigned char)*p < 128) { + *new_pos = p + 1; + return *p; + } + + char *start = p; + int len; + uint32_t c; + + if ((unsigned char)*p >= 0b11110000) { + len = 4; + c = *p & 0b111; + } else if ((unsigned char)*p >= 0b11100000) { + len = 3; + c = *p & 0b1111; + } else if ((unsigned char)*p >= 0b11000000) { + len = 2; + c = *p & 0b11111; + } else { + C_error_at(tokenizer, start, "invalid UTF-8 sequence"); + } + + for (int i = 1; i < len; i++) { + if ((unsigned char)p[i] >> 6 != 0b10) + C_error_at(tokenizer, start, "invalid UTF-8 sequence"); + c = (c << 6) | (p[i] & 0b111111); + } + + *new_pos = p + len; + return c; +} + +static bool in_range(uint32_t *range, uint32_t c) { + for (int i = 0; range[i] != -1; i += 2) + if (range[i] <= c && c <= range[i + 1]) + return true; + return false; +} + +// [https://www.sigbus.info/n1570#D] C11 allows not only ASCII but +// some multibyte characters in certan Unicode ranges to be used in an +// identifier. +// +// This function returns true if a given character is acceptable as +// the first character of an identifier. +// +// For example, ¾ (U+00BE) is a valid identifier because characters in +// 0x00BE-0x00C0 are allowed, while neither ⟘ (U+27D8) nor ' ' +// (U+3000, full-width space) are allowed because they are out of range. +bool C_is_ident1(uint32_t c) { + static uint32_t range[] = { + '_', '_', 'a', 'z', 'A', 'Z', '$', '$', + 0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF, + 0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6, + 0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F, + 0x1681, 0x180D, 0x180F, 0x1DBF, 0x1E00, 0x1FFF, 0x200B, 0x200D, + 0x202A, 0x202E, 0x203F, 0x2040, 0x2054, 0x2054, 0x2060, 0x206F, + 0x2070, 0x20CF, 0x2100, 0x218F, 0x2460, 0x24FF, 0x2776, 0x2793, + 0x2C00, 0x2DFF, 0x2E80, 0x2FFF, 0x3004, 0x3007, 0x3021, 0x302F, + 0x3031, 0x303F, 0x3040, 0xD7FF, 0xF900, 0xFD3D, 0xFD40, 0xFDCF, + 0xFDF0, 0xFE1F, 0xFE30, 0xFE44, 0xFE47, 0xFFFD, + 0x10000, 0x1FFFD, 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, 0x40000, 0x4FFFD, + 0x50000, 0x5FFFD, 0x60000, 0x6FFFD, 0x70000, 0x7FFFD, 0x80000, 0x8FFFD, + 0x90000, 0x9FFFD, 0xA0000, 0xAFFFD, 0xB0000, 0xBFFFD, 0xC0000, 0xCFFFD, + 0xD0000, 0xDFFFD, 0xE0000, 0xEFFFD, -1, + }; + + return in_range(range, c); +} + +// Returns true if a given character is acceptable as a non-first +// character of an identifier. +bool C_is_ident2(uint32_t c) { + static uint32_t range[] = { + '0', '9', '$', '$', 0x0300, 0x036F, 0x1DC0, 0x1DFF, 0x20D0, 0x20FF, + 0xFE20, 0xFE2F, -1, + }; + + return C_is_ident1(c) || in_range(range, c); +} + +// Returns the number of columns needed to display a given +// character in a fixed-width font. +// +// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c +static int char_width(uint32_t c) { + static uint32_t range1[] = { + 0x0000, 0x001F, 0x007f, 0x00a0, 0x0300, 0x036F, 0x0483, 0x0486, + 0x0488, 0x0489, 0x0591, 0x05BD, 0x05BF, 0x05BF, 0x05C1, 0x05C2, + 0x05C4, 0x05C5, 0x05C7, 0x05C7, 0x0600, 0x0603, 0x0610, 0x0615, + 0x064B, 0x065E, 0x0670, 0x0670, 0x06D6, 0x06E4, 0x06E7, 0x06E8, + 0x06EA, 0x06ED, 0x070F, 0x070F, 0x0711, 0x0711, 0x0730, 0x074A, + 0x07A6, 0x07B0, 0x07EB, 0x07F3, 0x0901, 0x0902, 0x093C, 0x093C, + 0x0941, 0x0948, 0x094D, 0x094D, 0x0951, 0x0954, 0x0962, 0x0963, + 0x0981, 0x0981, 0x09BC, 0x09BC, 0x09C1, 0x09C4, 0x09CD, 0x09CD, + 0x09E2, 0x09E3, 0x0A01, 0x0A02, 0x0A3C, 0x0A3C, 0x0A41, 0x0A42, + 0x0A47, 0x0A48, 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A82, + 0x0ABC, 0x0ABC, 0x0AC1, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0ACD, + 0x0AE2, 0x0AE3, 0x0B01, 0x0B01, 0x0B3C, 0x0B3C, 0x0B3F, 0x0B3F, + 0x0B41, 0x0B43, 0x0B4D, 0x0B4D, 0x0B56, 0x0B56, 0x0B82, 0x0B82, + 0x0BC0, 0x0BC0, 0x0BCD, 0x0BCD, 0x0C3E, 0x0C40, 0x0C46, 0x0C48, + 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 0x0CBC, 0x0CBC, 0x0CBF, 0x0CBF, + 0x0CC6, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D41, 0x0D43, + 0x0D4D, 0x0D4D, 0x0DCA, 0x0DCA, 0x0DD2, 0x0DD4, 0x0DD6, 0x0DD6, + 0x0E31, 0x0E31, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB1, 0x0EB1, + 0x0EB4, 0x0EB9, 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, + 0x0F35, 0x0F35, 0x0F37, 0x0F37, 0x0F39, 0x0F39, 0x0F71, 0x0F7E, + 0x0F80, 0x0F84, 0x0F86, 0x0F87, 0x0F90, 0x0F97, 0x0F99, 0x0FBC, + 0x0FC6, 0x0FC6, 0x102D, 0x1030, 0x1032, 0x1032, 0x1036, 0x1037, + 0x1039, 0x1039, 0x1058, 0x1059, 0x1160, 0x11FF, 0x135F, 0x135F, + 0x1712, 0x1714, 0x1732, 0x1734, 0x1752, 0x1753, 0x1772, 0x1773, + 0x17B4, 0x17B5, 0x17B7, 0x17BD, 0x17C6, 0x17C6, 0x17C9, 0x17D3, + 0x17DD, 0x17DD, 0x180B, 0x180D, 0x18A9, 0x18A9, 0x1920, 0x1922, + 0x1927, 0x1928, 0x1932, 0x1932, 0x1939, 0x193B, 0x1A17, 0x1A18, + 0x1B00, 0x1B03, 0x1B34, 0x1B34, 0x1B36, 0x1B3A, 0x1B3C, 0x1B3C, + 0x1B42, 0x1B42, 0x1B6B, 0x1B73, 0x1DC0, 0x1DCA, 0x1DFE, 0x1DFF, + 0x200B, 0x200F, 0x202A, 0x202E, 0x2060, 0x2063, 0x206A, 0x206F, + 0x20D0, 0x20EF, 0x302A, 0x302F, 0x3099, 0x309A, 0xA806, 0xA806, + 0xA80B, 0xA80B, 0xA825, 0xA826, 0xFB1E, 0xFB1E, 0xFE00, 0xFE0F, + 0xFE20, 0xFE23, 0xFEFF, 0xFEFF, 0xFFF9, 0xFFFB, 0x10A01, 0x10A03, + 0x10A05, 0x10A06, 0x10A0C, 0x10A0F, 0x10A38, 0x10A3A, 0x10A3F, 0x10A3F, + 0x1D167, 0x1D169, 0x1D173, 0x1D182, 0x1D185, 0x1D18B, 0x1D1AA, 0x1D1AD, + 0x1D242, 0x1D244, 0xE0001, 0xE0001, 0xE0020, 0xE007F, 0xE0100, 0xE01EF, + -1, + }; + + if (in_range(range1, c)) + return 0; + + static uint32_t range2[] = { + 0x1100, 0x115F, 0x2329, 0x2329, 0x232A, 0x232A, 0x2E80, 0x303E, + 0x3040, 0xA4CF, 0xAC00, 0xD7A3, 0xF900, 0xFAFF, 0xFE10, 0xFE19, + 0xFE30, 0xFE6F, 0xFF00, 0xFF60, 0xFFE0, 0xFFE6, 0x1F000, 0x1F644, + 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, -1, + }; + + if (in_range(range2, c)) + return 2; + return 1; +} + +// Returns the number of columns needed to display a given +// string in a fixed-width font. +int C_display_width(C_Parser *tokenizer, char *p, int len) { + char *start = p; + int w = 0; + while (p - start < len) { + uint32_t c = C_decode_utf8(tokenizer, &p, p); + w += char_width(c); + } + return w; +} diff --git a/ravicomp/src/codegen.c b/ravicomp/src/codegen.c index 29ce078f..437a84a1 100644 --- a/ravicomp/src/codegen.c +++ b/ravicomp/src/codegen.c @@ -30,6 +30,7 @@ #include "codegen.h" #include "ravi_api.h" +#include "chibicc/chibicc.h" #include #include @@ -37,6 +38,7 @@ /* * Only 64-bits supported right now * Following must be kept in sync with changes in the actual header files + * FIXME we need a way to customise this for 32-bit vs 64-bit */ static const char Lua_header[] = @@ -190,7 +192,6 @@ static const char Lua_header[] = "#define fcfvalue(o) check_exp(ttisfcf(o), val_(o).p)\n" "#define hvalue(o) check_exp(ttistable(o), gco2t(val_(o).gc))\n" "#define arrvalue(o) check_exp(ttisarray(o), gco2array(val_(o).gc))\n" - "#define arrvalue(o) check_exp(ttisarray(o), gco2array(val_(o).gc))\n" "#define bvalue(o) check_exp(ttisboolean(o), val_(o).b)\n" "#define thvalue(o) check_exp(ttisthread(o), gco2th(val_(o).gc))\n" "#define deadvalue(o) check_exp(ttisdeadkey(o), cast(void *, val_(o).gc))\n" @@ -314,6 +315,8 @@ static const char Lua_header[] = " { TValue *io=(o); const Udata *iu = (u); \\\n" " io->value_ = iu->user_; settt_(io, iu->ttuv_); \\\n" " checkliveness(L,io); }\n" + "#define sizeludata(l) (sizeof(union UUdata) + (l))\n" + "#define sizeudata(u) sizeludata((u)->len)\n" "typedef enum {\n" "RAVI_TI_NIL,\n" "RAVI_TI_FALSE,\n" @@ -670,6 +673,7 @@ static const char Lua_header[] = "extern LClosure *luaF_newLclosure (lua_State *L, int n);\n" "extern TString *luaS_newlstr (lua_State *L, const char *str, size_t l);\n" "extern Proto *luaF_newproto (lua_State *L);\n" + "extern Udata *luaS_newudata (lua_State *L, size_t s);\n" "extern void luaD_inctop (lua_State *L);\n" "#define luaM_reallocv(L,b,on,n,e) luaM_realloc_(L, (b), (on)*(e), (n)*(e))\n" "#define luaM_newvector(L,n,t) cast(t *, luaM_reallocv(L, NULL, 0, n, sizeof(t)))\n" @@ -685,13 +689,78 @@ static const char Lua_header[] = "#define intop(op,v1,v2) l_castU2S(l_castS2U(v1) op l_castS2U(v2))\n" "#define nan (0./0.)\n" "#define inf (1./0.)\n" - "#define luai_numunm(L,a) (-(a))\n"; + "#define luai_numunm(L,a) (-(a))\n" + "typedef struct {\n" + " char *ptr;\n" + " unsigned int len;\n" + "} Ravi_StringOrUserData;\n" + "typedef struct {\n" + " lua_Integer *ptr;\n" + " unsigned int len;\n" + "} Ravi_IntegerArray;\n" + "typedef struct {\n" + " lua_Number *ptr;\n" + " unsigned int len;\n" + "} Ravi_NumberArray;\n" + ; + +static const char Embedded_C_header[] = + + "typedef long long int64_t;\n" + "typedef double lua_Number;\n" + "typedef int64_t lua_Integer;\n" + "typedef struct {\n" + " union { lua_Integer i; lua_Number n; } value_;\n" + "} TValue;\n" + "static lua_Integer ivalue(const TValue *v) { return 0; }\n" + "static lua_Number fvalue(const TValue *v) { return 0.0; }\n" + "typedef struct {\n" + " char *data;\n" + " unsigned int len;\n" + "} Ravi_Arr;\n" + "static Ravi_Arr *arrvalue(const TValue *v) { return (Ravi_Arr *)v; }\n" + "static int ttisfulluserdata(const TValue *v) { return 0; }\n" + "static int ttislightuserdata(const TValue *v) { return 0; }\n" + "static int ttisstring(const TValue *v) { return 0; }\n" + "static void *uvalue(const TValue *v) { return (void*)0; }\n" + "static void *pvalue(const TValue *v) { return (void*)0; }\n" + "static void *svalue(const TValue *v) { return (void*)0; }\n" + "static void *getudatamem(const TValue *v) { return (void*)0; }\n" + "static void *gco2u(const TValue *v) { return (void*)0; }\n" + "static unsigned int sizeudata(const void *p) { return 0; }\n" + "static unsigned int vslen(const TValue *v) { return 0; }\n" + "static void settt_(TValue *v, int tt) {}\n" + "TValue *R(int reg) { return (void*)0; }\n" + "static const int LUA_TNIL = 0;\n" + "static const int LUA_TBOOLEAN = 1;\n" + "static const int LUA_TLIGHTUSERDATA = 2;\n" + "static const int LUA_TNUMBER = 3;\n" + "static const int LUA_TSTRING = 4;\n" + "static const int LUA_TTABLE = 5;\n" + "static const int LUA_TFUNCTION = 6;\n" + "static const int LUA_TUSERDATA = 7;\n" + "static const int LUA_TTHREAD = 8;\n" + "typedef struct {\n" + " char *ptr;\n" + " unsigned int len;\n" + "} Ravi_StringOrUserData;\n" + "typedef struct {\n" + " lua_Integer *ptr;\n" + " unsigned int len;\n" + "} Ravi_IntegerArray;\n" + "typedef struct {\n" + " lua_Number *ptr;\n" + " unsigned int len;\n" + "} Ravi_NumberArray;\n" + "int error_code;\n" + ; typedef struct { Proc *proc; TextBuffer prologue; TextBuffer body; TextBuffer tb; // Temp buf + TextBuffer C_local_declarations; // Declarations of temp int/float vars required when analysing embedded C code struct Ravi_CompilerInterface *api; jmp_buf env; } Function; @@ -739,6 +808,8 @@ enum { LUA_OPBNOT = 13 }; +static void output_string_literal(TextBuffer *mb, const char *s, unsigned int len); + static inline Pseudo *get_operand(Instruction *insn, unsigned idx) { return (Pseudo *)raviX_ptrlist_nth_entry((PtrList *)insn->operands, idx); @@ -820,6 +891,7 @@ static void initfn(Function *fn, Proc *proc, struct Ravi_CompilerInterface *api) raviX_buffer_init(&fn->prologue, 4096); raviX_buffer_init(&fn->body, 4096); raviX_buffer_init(&fn->tb, 256); + raviX_buffer_init(&fn->C_local_declarations, 256); raviX_buffer_add_fstring(&fn->prologue, "static int %s(lua_State *L) {\n", proc->funcname); raviX_buffer_add_string(&fn->prologue, "int error_code = 0;\n"); raviX_buffer_add_string(&fn->prologue, "int result = 0;\n"); @@ -829,6 +901,8 @@ static void initfn(Function *fn, Proc *proc, struct Ravi_CompilerInterface *api) raviX_buffer_add_string(&fn->prologue, "StkId base = ci->u.l.base;\n"); emit_vars("lua_Integer", int_var_prefix, &proc->temp_int_pseudos, &fn->prologue); emit_vars("lua_Number", flt_var_prefix, &proc->temp_flt_pseudos, &fn->prologue); + emit_vars("lua_Integer", int_var_prefix, &proc->temp_int_pseudos, &fn->C_local_declarations); + emit_vars("lua_Number", flt_var_prefix, &proc->temp_flt_pseudos, &fn->C_local_declarations); // Following are temp dummy regs // In ops like luaV_settable we may use up to two variables, hence we create // two of each @@ -849,6 +923,7 @@ static void cleanup(Function *fn) raviX_buffer_free(&fn->prologue); raviX_buffer_free(&fn->body); raviX_buffer_free(&fn->tb); + raviX_buffer_free(&fn->C_local_declarations); } /* Outputs an l-value/r-value variable name for a primitive C int / float type */ @@ -2406,6 +2481,521 @@ static int emit_op_init(Function *fn, Instruction *insn) return 0; } + +typedef struct C_Decl_Analysis { + C_Parser *parser; + C_Scope *global_scope; + int status; + int is_tags; +} C_Decl_Analysis; + +/* Checks type declarations do not contain pointers and unions */ +static void analyze_C_types(C_Decl_Analysis *analysis, C_Type *ty) +{ + if (ty->kind == TY_STRUCT) { + for (C_Member *mem = ty->members; mem; mem = mem->next) { + analyze_C_types(analysis, mem->ty); + } + } + else if (ty->kind == TY_PTR) { + fprintf(stderr, "Declaring pointer type is not allowed\n"); + analysis->status--; + } + else if (ty->kind == TY_UNION) { + fprintf(stderr, "Declaring union type is not allowed\n"); + analysis->status--; + } +} + +/* Checks that there are no entities being created in the global scope */ +static void analyze_C_vars(C_Decl_Analysis *analysis, C_VarScope *vc) +{ + if (vc->var) { + fprintf(stderr, "Declaring objects is not allowed: %s\n", vc->var->name); + analysis->status--; + } + else if (vc->type_def) { + analyze_C_types(analysis, vc->type_def); + } +} + +/* Built-ins are excluded from analysis */ +static int is_builtin(char *key, int keylen) +{ + static char* builtins[] = { + "alloca", + "int64_t", + "lua_Number", + "lua_Integer", + "TValue", + "ivalue", + "fvalue", + "Ravi_Arr", + "arrvalue", + "ttisfulluserdata", + "ttislightuserdata", + "ttisstring", + "uvalue", + "pvalue", + "svalue", + "getudatamem", + "gco2u", + "sizeudata", + "vslen", + "R", + "Ravi_StringOrUserData", + "Ravi_IntegerArray", + "Ravi_NumberArray", + "error_code", + "LUA_TBOOLEAN", + "LUA_TNIL", + "LUA_TNUMBER", + "LUA_TUSERDATA", + "LUA_TLIGHTUSERDATA", + "LUA_TTHREAD", + "LUA_TTABLE", + "LUA_TFUNCTION", + "LUA_TSTRING", + "LUA_TTABLE", + "settt_", + NULL + }; + for (int i = 0; builtins[i]; i++) { + if (strncmp(key, builtins[i], keylen) == 0) { + return 1; + } + } + return 0; +} + +/* Perform code analysis. status will be set < 0 if issues found */ +static void analyze_C_declarations(void *userdata, char *key, int keylen, void *val) +{ + C_Decl_Analysis *analysis = (C_Decl_Analysis *)userdata; + if (is_builtin(key, keylen)) + return; + if (analysis->is_tags) { + C_Type *ty = val; + analyze_C_types(analysis, ty); + } + else { + C_VarScope *vc = val; + analyze_C_vars(analysis, vc); + } +} + +typedef struct C_Code_Analysis { + int status; +} C_Code_Analysis; + +static void walk_node(C_Code_Analysis *analysis, C_Node *node) +{ + switch (node->kind) { + case ND_BLOCK: { + for (C_Node *n = node->body; n; n = n->next) + walk_node(analysis, n); + break; + } + case ND_IF: { + walk_node(analysis, node->cond); + walk_node(analysis, node->then); + if (node->els) + walk_node(analysis, node->els); + break; + } + case ND_FOR: { + if (node->init) + walk_node(analysis, node->init); + if (node->cond) + walk_node(analysis, node->cond); + if (node->inc) + walk_node(analysis, node->inc); + break; + } + case ND_DO: { + walk_node(analysis, node->then); + walk_node(analysis, node->cond); + break; + } + case ND_SWITCH: { + walk_node(analysis, node->cond); + for (C_Node *n = node->case_next; n; n = n->case_next) { + walk_node(analysis, n); + } + if (node->default_case) + walk_node(analysis, node->default_case); + break; + } + case ND_CASE: + walk_node(analysis, node->lhs); + break; + case ND_GOTO_EXPR: + walk_node(analysis, node->lhs); + break; + case ND_LABEL: + walk_node(analysis, node->lhs); + break; + case ND_RETURN: + fprintf(stderr, "Trying to return from embedded C code is not allowed\n"); + analysis->status--; + if (node->lhs) + walk_node(analysis, node->lhs); + break; + case ND_EXPR_STMT: + walk_node(analysis, node->lhs); + break; + case ND_ASM: + break; + case ND_NULL_EXPR: + break; + case ND_NUM: + break; + case ND_NEG: + walk_node(analysis, node->lhs); + break; + case ND_VAR: + break; + case ND_MEMBER: + break; + case ND_DEREF: + walk_node(analysis, node->lhs); + break; + case ND_ADDR: + walk_node(analysis, node->lhs); + break; + case ND_ASSIGN: + walk_node(analysis, node->lhs); + walk_node(analysis, node->rhs); + break; + case ND_STMT_EXPR: { + for (C_Node *n = node->body; n; n = n->next) + walk_node(analysis, n); + break; + } + case ND_COMMA: + walk_node(analysis, node->lhs); + walk_node(analysis, node->rhs); + break; + case ND_CAST: + walk_node(analysis, node->lhs); + break; + case ND_MEMZERO: + break; + case ND_COND: + walk_node(analysis, node->cond); + walk_node(analysis, node->then); + walk_node(analysis, node->els); + break; + case ND_NOT: + walk_node(analysis, node->lhs); + break; + case ND_BITNOT: + walk_node(analysis, node->lhs); + break; + case ND_LOGAND: + case ND_LOGOR: + walk_node(analysis, node->lhs); + walk_node(analysis, node->rhs); + break; + case ND_FUNCALL: + //fprintf(stderr, "Calling function %.*s\n", node->func_ty->name->len, node->func_ty->name->loc); + if (!is_builtin(node->func_ty->name->loc, node->func_ty->name->len)) { + fprintf(stderr, "Calling functions from embedded C code is not allowed\n"); + analysis->status--; + } + walk_node(analysis, node->lhs); + for (C_Node *arg = node->args; arg; arg = arg->next) { + walk_node(analysis, arg); + } + break; + case ND_LABEL_VAL: + break; + case ND_CAS: + break; + case ND_EXCH: + break; + case ND_ADD: + case ND_SUB: + case ND_MUL: + case ND_DIV: + case ND_EQ: + case ND_NE: + case ND_LT: + case ND_LE: + case ND_MOD: + case ND_BITAND: + case ND_BITOR: + case ND_BITXOR: + case ND_SHL: + case ND_SHR: + walk_node(analysis, node->lhs); + walk_node(analysis, node->rhs); + break; + } +} + +static int analyze_C_code(Function *fn, TextBuffer *C_code) +{ + static const char* addition_decls = "\n" + "TValue ival0;\n" + "TValue fval0;\n" + "TValue bval0;\n" + "TValue ival1;\n" + "TValue fval1;\n" + "TValue bval1;\n" + "TValue ival2;\n" + "TValue fval2;\n" + "TValue bval2;\n" + ; + TextBuffer code; + raviX_buffer_init(&code, 1024); + raviX_buffer_add_string(&code, Embedded_C_header); + raviX_buffer_add_string(&code, addition_decls); + if (fn->proc->linearizer->C_declarations.buf) + raviX_buffer_add_string(&code, fn->proc->linearizer->C_declarations.buf); + if (fn->C_local_declarations.buf) /* declarations of temp integer and float vars */ + raviX_buffer_add_string(&code, fn->C_local_declarations.buf); + + C_Code_Analysis analysis = {0}; + C_Parser parser; + C_parser_init(&parser); + C_Scope *global_scope = C_global_scope(&parser); + C_Token *tok = C_tokenize_buffer(&parser, code.buf); + if (tok == NULL) { + analysis.status = -1; + goto Lexit; + } + C_convert_pp_tokens(&parser, tok); + if (C_parse(global_scope, &parser, tok) == NULL){ + analysis.status = -1; + goto Lexit; + } + + tok = C_tokenize_buffer(&parser, C_code->buf); + if (tok == NULL){ + analysis.status = -1; + goto Lexit; + } + C_convert_pp_tokens(&parser, tok); + parser.embedded_mode = true; + C_Node *node = C_parse_compound_statement(global_scope, &parser, tok); + if (node == NULL){ + analysis.status = -1; + goto Lexit; + } + walk_node(&analysis, node); + +Lexit: + if (analysis.status < 0 && parser.error_message) { + fn->api->error_message(fn->api->context, parser.error_message); + } + C_parser_destroy(&parser); + raviX_buffer_free(&code); + + return analysis.status; +} + +/* Load variables from Ravi/Lua to C */ +static void emit_userdata_C_variable_load(Function *fn, Instruction *insn, Pseudo *pseudo) +{ + if (pseudo->type != PSEUDO_SYMBOL && + !((pseudo->type == PSEUDO_TEMP_FLT || pseudo->type == PSEUDO_TEMP_INT) && pseudo->temp_for_local != NULL)) { + handle_error_bad_pseudo(fn, pseudo, "Unsupported pseudo type in C bind variables"); + return; + } + LuaSymbol *symbol = pseudo->type == PSEUDO_SYMBOL ? pseudo->symbol : pseudo->temp_for_local; + ravitype_t type = symbol->variable.value_type.type_code; + if (type == RAVI_TNUMINT) { + raviX_buffer_add_fstring(&fn->body, " lua_Integer %s = ", symbol->variable.var_name->str); + emit_varname_or_constant(fn, pseudo); + raviX_buffer_add_string(&fn->body, ";\n"); + return; + } + else if (type == RAVI_TNUMFLT) { + raviX_buffer_add_fstring(&fn->body, " lua_Number %s = ", symbol->variable.var_name->str); + emit_varname_or_constant(fn, pseudo); + raviX_buffer_add_string(&fn->body, ";\n"); + return; + } + else if (type == RAVI_TARRAYINT) { + raviX_buffer_add_fstring(&fn->body, " Ravi_IntegerArray %s = {0};\n", symbol->variable.var_name->str); + } + else if (type == RAVI_TARRAYFLT) { + raviX_buffer_add_fstring(&fn->body, " Ravi_NumberArray %s = {0};\n", symbol->variable.var_name->str); + } + else if (type == RAVI_TSTRING || type == RAVI_TUSERDATA || type == RAVI_TANY) { + // We assume ANY is userdata - runtime check generated below. + raviX_buffer_add_fstring(&fn->body, " Ravi_StringOrUserData %s = {0};\n", symbol->variable.var_name->str); + } + else { + handle_error_bad_pseudo(fn, pseudo, "Unsupported symbol type in C bind variable"); + return; + } + raviX_buffer_add_string(&fn->body, " {\n"); + raviX_buffer_add_fstring(&fn->body, " TValue *raviX__%s = ", symbol->variable.var_name->str); + emit_reg_accessor(fn, symbol->variable.pseudo, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + if (type == RAVI_TARRAYINT) { + raviX_buffer_add_fstring(&fn->body, " %s.ptr = (lua_Integer*) arrvalue(raviX__%s)->data;\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.len = (unsigned int) arrvalue(raviX__%s)->len;\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + } + else if (type == RAVI_TARRAYFLT) { + raviX_buffer_add_fstring(&fn->body, " %s.ptr = (lua_Number *) arrvalue(raviX__%s)->data;\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.len = (unsigned int) arrvalue(raviX__%s)->len;\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + } + else { + raviX_buffer_add_fstring(&fn->body, " if (ttisfulluserdata(raviX__%s)) {\n", symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.ptr = getudatamem(uvalue(raviX__%s));\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.len = (unsigned int) sizeudata(gco2u(raviX__%s));\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_fstring(&fn->body, " else if (ttislightuserdata(raviX__%s)) {\n", symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.ptr = pvalue(raviX__%s);\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.len = 0;\n", symbol->variable.var_name->str); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_fstring(&fn->body, " else if (ttisstring(raviX__%s)) {\n", symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.ptr = svalue(raviX__%s);\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_fstring(&fn->body, " %s.len = vslen(raviX__%s);\n", symbol->variable.var_name->str, symbol->variable.var_name->str); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " else {\n"); + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_type_mismatch); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + } + raviX_buffer_add_string(&fn->body, " }\n"); +} + +/* Store variables from C back to Ravi/Lua */ +static void emit_userdata_C_variable_store(Function *fn, Instruction *insn, Pseudo *pseudo) +{ + LuaSymbol *symbol = pseudo->type == PSEUDO_SYMBOL ? pseudo->symbol : pseudo->temp_for_local; + ravitype_t type = symbol->variable.value_type.type_code; + if (type != RAVI_TNUMINT && type != RAVI_TNUMFLT) { + return; + } + raviX_buffer_add_string(&fn->body, " {\n"); + raviX_buffer_add_string(&fn->body, " "); + emit_varname(fn, symbol->variable.pseudo); + raviX_buffer_add_string(&fn->body, " = "); + raviX_buffer_add_fstring(&fn->body, "%s;\n", symbol->variable.var_name->str); + raviX_buffer_add_string(&fn->body, " }\n"); +} + + +static int emit_op_embed_C(Function *fn, Instruction *insn) +{ + // Save the buffer and switch to new one temporarily + TextBuffer saved = fn->body; + fn->body.buf = NULL; fn->body.pos = 0; fn->body.capacity = 0; + + // FIXME error handling - as we will leak memory if longjmp occurs + raviX_buffer_add_string(&fn->body, "{\n"); + + // Load the Ravi/Lua symbols into C code + for (int i = 0; i < get_num_operands(insn); i++) { + Pseudo *pseudo = get_operand(insn, i); + emit_userdata_C_variable_load(fn, insn, pseudo); + } + + // output C code + Pseudo *C_code = get_first_target(insn); + assert(C_code->type == PSEUDO_CONSTANT && C_code->constant->type == RAVI_TSTRING); + raviX_buffer_add_string(&fn->body, C_code->constant->s->str); + + // Store values back to Ravi/Lua variables + for (int i = 0; i < get_num_operands(insn); i++) { + Pseudo *pseudo = get_operand(insn, i); + LuaSymbol *symbol = pseudo->type == PSEUDO_SYMBOL ? pseudo->symbol : pseudo->temp_for_local; + if (symbol->variable.value_type.type_code == RAVI_TNUMINT || + symbol->variable.value_type.type_code == RAVI_TNUMFLT) { + emit_userdata_C_variable_store(fn, insn, pseudo); + } + } + + raviX_buffer_add_string(&fn->body, "\n}\n"); + + TextBuffer code = fn->body; + fn->body = saved; // Restore original output buffer + + if (analyze_C_code(fn, &code) != 0) { + return -1; + } + raviX_buffer_add_string(&fn->body, code.buf); + raviX_buffer_free(&code); + return 0; +} + +static int emit_op_embed_C__new(Function *fn, Instruction *insn) { + LinearizerState *linearizer = fn->proc->linearizer; + + TextBuffer code; + raviX_buffer_init(&code, 1024); + raviX_buffer_add_string(&code, Embedded_C_header); + raviX_buffer_add_string(&code, linearizer->C_declarations.buf); + + int status = -1; + C_Parser parser; + C_parser_init(&parser); + C_Scope *global_scope = C_global_scope(&parser); + C_Token *tok = C_tokenize_buffer(&parser, code.buf); + if (tok == NULL) { + goto Lexit; + } + C_convert_pp_tokens(&parser, tok); + if (C_parse(global_scope, &parser, tok) == NULL) { + goto Lexit; + } + Pseudo *tagname = get_operand(insn, 0); + Pseudo *size = get_operand(insn, 1); + Pseudo *target = get_target(insn, 0); + // Add utility in chibicc to find a type + C_Type *ty = hashmap_get(&global_scope->tags, tagname->constant->s->str); + size_t tagsz = 0; + if (ty != NULL) { + tagsz = ty->size; + } + else { + C_VarScope *vc = hashmap_get(&global_scope->vars, tagname->constant->s->str); + if (vc && vc->type_def) { + tagsz = vc->type_def->size; + } + else { + TextBuffer message; + raviX_buffer_init(&message, 128); + raviX_buffer_add_fstring(&message, "Unknown type '%s'", tagname->constant->s->str); + fn->api->error_message(fn->api->context, message.buf); + raviX_buffer_free(&message); + goto Lexit; + } + } + + raviX_buffer_add_string(&fn->body, "{\n"); + raviX_buffer_add_string(&fn->body, " TValue *raviX__elements = "); + emit_reg_accessor(fn, size, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + raviX_buffer_add_string(&fn->body, " TValue *raviX__target = "); + emit_reg_accessor(fn, target, 0); + raviX_buffer_add_string(&fn->body, ";\n"); + + raviX_buffer_add_string(&fn->body, " if (ttisinteger(raviX__elements)) {\n"); + raviX_buffer_add_string(&fn->body, " lua_Integer n = ivalue(raviX__elements);\n"); + raviX_buffer_add_fstring(&fn->body, " Udata *u = luaS_newudata(L, %d * n);\n", (int)tagsz); + raviX_buffer_add_string(&fn->body, " setuvalue(L, raviX__target, u);\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, " else {\n"); + // FIXME need specific error code + raviX_buffer_add_fstring(&fn->body, " error_code = %d;\n", Error_type_mismatch); + raviX_buffer_add_string(&fn->body, " goto Lraise_error;\n"); + raviX_buffer_add_string(&fn->body, " }\n"); + raviX_buffer_add_string(&fn->body, "}\n"); + status = 0; + +Lexit: + C_parser_destroy(&parser); + raviX_buffer_free(&code); + + return status; +} + static int output_instruction(Function *fn, Instruction *insn) { int rc = 0; @@ -2605,6 +3195,14 @@ static int output_instruction(Function *fn, Instruction *insn) rc = emit_op_init(fn, insn); break; + case op_embed_C: + rc = emit_op_embed_C(fn, insn); + break; + + case op_embed_C__new: + rc = emit_op_embed_C__new(fn, insn); + break; + default: fprintf(stderr, "Unsupported opcode %s\n", raviX_opcode_name(insn->opcode)); rc = -1; @@ -2883,11 +3481,57 @@ static void preprocess_upvalues(Proc *proc) END_FOR_EACH_PTR(childproc) } +/* Emits top level C__decl contents */ +static int emit_embedded_C_declarations(LinearizerState *linearizer, struct Ravi_CompilerInterface *api, TextBuffer *mb) +{ + if (linearizer->C_declarations.buf == NULL || linearizer->C_declarations.buf[0] == 0) + return 0; + + TextBuffer code; + raviX_buffer_init(&code, 1024); + raviX_buffer_add_string(&code, Embedded_C_header); + raviX_buffer_add_string(&code, linearizer->C_declarations.buf); + + C_Parser parser; + C_parser_init(&parser); + C_Scope *global_scope = C_global_scope(&parser); + C_Decl_Analysis analysis = {&parser, global_scope, 0}; + + C_Token *tok = C_tokenize_buffer(&parser, code.buf); + if (tok == NULL) { + analysis.status = -1; + goto Lexit; + } + C_convert_pp_tokens(&parser, tok); + if (C_parse(global_scope, &parser, tok) == NULL) { + analysis.status = -1; + goto Lexit; + } + + analysis.is_tags = 1; + hashmap_foreach(&global_scope->tags, analyze_C_declarations, &analysis); + analysis.is_tags = 0; + hashmap_foreach(&global_scope->vars, analyze_C_declarations, &analysis); + + if (analysis.status == 0) { + raviX_buffer_add_string(mb, linearizer->C_declarations.buf); + } + +Lexit: + if (analysis.status < 0 && parser.error_message) { + api->error_message(api->context, parser.error_message); + } + C_parser_destroy(&parser); + raviX_buffer_free(&code); + + return analysis.status; +} + static void debug_message(void *context, const char *filename, long long line, const char *message) { fprintf(stdout, "%s:%lld: %s\n", filename, line, message); } -static void error_message(void *context, const char *message) { fprintf(stdout, "ERROR: %s\n", message); } +static void error_message(void *context, const char *message) { fprintf(stderr, "%s\n", message); } static struct Ravi_CompilerInterface stub_compilerInterface = { .context = NULL, @@ -2914,6 +3558,11 @@ int raviX_generate_C(LinearizerState *linearizer, TextBuffer *mb, struct Ravi_Co // FIXME we need a way to customise this for 32-bit vs 64-bit raviX_buffer_add_string(mb, Lua_header); + /* emit C__decl statements in ravi code */ + if (emit_embedded_C_declarations(linearizer, ravi_interface, mb) != 0) { + return -1; + } + /* Preprocess upvalue attributes */ preprocess_upvalues(linearizer->main_proc); diff --git a/ravicomp/src/df_liveness.c b/ravicomp/src/df_liveness.c index fa2f5f18..87e767a9 100644 --- a/ravicomp/src/df_liveness.c +++ b/ravicomp/src/df_liveness.c @@ -120,4 +120,4 @@ static int live_transfer_func(void *userdata, nodeId_t id) // Right now we have disjoint sets for temps / locals - to do this efficiently we need a merged set of regs for each proc // Liveness analysis is a backward data flow problem -// see calculate_func_cfg_live_info in mir_genc.c \ No newline at end of file +// see calculate_func_cfg_live_info in mir_genc.c diff --git a/ravicomp/src/dominator.c b/ravicomp/src/dominator.c index 3531af4a..460261cb 100644 --- a/ravicomp/src/dominator.c +++ b/ravicomp/src/dominator.c @@ -169,4 +169,4 @@ void raviX_dominator_tree_output(DominatorTree *tree, FILE *fp) for (uint32_t i = 0; i < tree->N; i++) { fprintf(stdout, "IDOM[%d] = %d\n", i, raviX_node_index(tree->IDOM[i])); } -} \ No newline at end of file +} diff --git a/ravicomp/src/hash_table.c b/ravicomp/src/hash_table.c index b1c90857..bc9b0067 100644 --- a/ravicomp/src/hash_table.c +++ b/ravicomp/src/hash_table.c @@ -421,4 +421,4 @@ hash_table_random_entry(HashTable *ht, return NULL; } #endif -#endif \ No newline at end of file +#endif diff --git a/ravicomp/src/lexer.c b/ravicomp/src/lexer.c index 861f96d3..7b5f4444 100644 --- a/ravicomp/src/lexer.c +++ b/ravicomp/src/lexer.c @@ -61,6 +61,9 @@ static const char *const luaX_tokens[] = { "in", "local", "defer", + "C__decl", + "C__unsafe", + "C__new", "nil", "not", "or", diff --git a/ravicomp/src/linearizer.c b/ravicomp/src/linearizer.c index 5c186a34..d292b7e2 100644 --- a/ravicomp/src/linearizer.c +++ b/ravicomp/src/linearizer.c @@ -138,6 +138,7 @@ void raviX_destroy_linearizer(LinearizerState *linearizer) raviX_destroy_graph(proc->cfg); } END_FOR_EACH_PTR(proc) + raviX_buffer_free(&linearizer->C_declarations); raviX_allocator_destroy(&linearizer->instruction_allocator); raviX_allocator_destroy(&linearizer->ptrlist_allocator); raviX_allocator_destroy(&linearizer->pseudo_allocator); @@ -148,6 +149,11 @@ void raviX_destroy_linearizer(LinearizerState *linearizer) raviX_free(linearizer); } +static void add_C_declaration(LinearizerState *linearizer, const StringObject *str) +{ + raviX_buffer_add_string(&linearizer->C_declarations, str->str); +} + /** * We assume strings are all interned and can be compared by * address. Return true if values match else false. @@ -1537,6 +1543,19 @@ static void linearize_local_statement(Proc *proc, AstNode *stmt) linearize_assignment(proc, stmt->local_stmt.expr_list, varinfo, nv); } +static Pseudo *linearize_builtin_expression(Proc *proc, AstNode *expr) +{ + Instruction *insn = allocate_instruction(proc, op_embed_C__new, expr->line_number); + add_instruction_operand(proc, insn, allocate_constant_pseudo(proc, allocate_string_constant(proc, expr->builtin_expr.type_name))); + Pseudo *size_expr = linearize_expression(proc, expr->builtin_expr.size_expr); + add_instruction_operand(proc, insn, size_expr); + Pseudo *target = allocate_temp_pseudo(proc, RAVI_TUSERDATA); + add_instruction_target(proc, insn, target); + add_instruction(proc, insn); + free_temp_pseudo(proc, size_expr, false); + return target; +} + static Pseudo *linearize_expression(Proc *proc, AstNode *expr) { Pseudo *result = NULL; @@ -1569,6 +1588,9 @@ static Pseudo *linearize_expression(Proc *proc, AstNode *expr) case EXPR_CONCAT: { result = linearize_concat_expression(proc, expr); } break; + case EXPR_BUILTIN: { + result = linearize_builtin_expression(proc, expr); + } break; default: handle_error(proc->linearizer->ast_container, "feature not yet implemented"); break; @@ -2293,6 +2315,39 @@ static void linearize_while_statment(Proc *proc, AstNode *node) proc->current_break_scope = previous_break_scope; } +static void linearize_embedded_C_decl(Proc *proc, AstNode *node) +{ + if (proc != proc->linearizer->main_proc) { + handle_error(proc->linearizer->ast_container, + "Embedded C declarations can only be present in the main chunk"); + } + add_C_declaration(proc->linearizer, node->embedded_C_stmt.C_src_snippet); +} + +static void linearize_embedded_C(Proc *proc, AstNode *node) +{ + if (node->embedded_C_stmt.is_decl) { + linearize_embedded_C_decl(proc, node); + return; + } + + Instruction *insn = allocate_instruction(proc, op_embed_C, node->line_number); + LuaSymbol *sym; + FOR_EACH_PTR(node->embedded_C_stmt.symbols, LuaSymbol, sym) + { + if (sym->symbol_type == SYM_LOCAL) { + Pseudo *pseudo = sym->variable.pseudo; + add_instruction_operand(proc, insn, pseudo); + } else { + handle_error(proc->linearizer->ast_container, + "Variables referenced by embed C instruction must be locals"); + } + } + END_FOR_EACH_PTR(sym) + add_instruction_target(proc, insn, allocate_constant_pseudo(proc, allocate_string_constant(proc, node->embedded_C_stmt.C_src_snippet))); + add_instruction(proc, insn); +} + static void linearize_function_statement(Proc *proc, AstNode *node) { /* function funcname funcbody */ @@ -2320,7 +2375,7 @@ static void linearize_function_statement(Proc *proc, AstNode *node) prev_node = this_node; prev_pseudo = next; } - END_FOR_EACH_PTR(node) + END_FOR_EACH_PTR(this_node) // FIXME maybe better to add the method name to the selector list above in the parser // then we could have just handled it above rather than as a special case if (node->function_stmt.method_name) { @@ -2394,6 +2449,10 @@ static void linearize_statement(Proc *proc, AstNode *node) linearize_for_num_statement(proc, node); break; } + case STMT_EMBEDDED_C: { + linearize_embedded_C(proc, node); + break; + } default: handle_error(proc->linearizer->ast_container, "unknown statement type"); break; @@ -2634,7 +2693,7 @@ static const char *op_codenames[] = { "PUTik", "PUTsk", "TPUT", "TPUTik", "TPUTsk", "IAPUT", "IAPUTiv", "FAPUT", "FAPUTfv", "CBR", "BR", "MOV", "MOVi", "MOVif", "MOVf", "MOVfi", "CALL", "GET", "GETik", "GETsk", "TGET", "TGETik", "TGETsk", "IAGET", "IAGETik", "FAGET", "FAGETik", - "STOREGLOBAL", "CLOSE", "CONCAT", "INIT"}; + "STOREGLOBAL", "CLOSE", "CONCAT", "INIT", "EMBED_C", "EMBED_C__NEW"}; static void output_pseudo_list(PseudoList *list, TextBuffer *mb) { @@ -2667,7 +2726,11 @@ static void output_instruction(Instruction *insn, TextBuffer *mb, const char *pr if (insn->operands) { output_pseudo_list(insn->operands, mb); } - if (insn->targets) { + if (insn->opcode == op_embed_C) { + // special handling as we don't want to output all the C code + raviX_buffer_add_string(mb, " { C code }"); + } + else if (insn->targets) { output_pseudo_list(insn->targets, mb); } raviX_buffer_add_string(mb, suffix); diff --git a/ravicomp/src/linearizer.h b/ravicomp/src/linearizer.h index 77ee86a3..4e03f171 100644 --- a/ravicomp/src/linearizer.h +++ b/ravicomp/src/linearizer.h @@ -45,6 +45,7 @@ typedef struct Graph Graph; DECLARE_PTR_LIST(InstructionList, Instruction); DECLARE_PTR_LIST(PseudoList, Pseudo); DECLARE_PTR_LIST(ProcList, Proc); +DECLARE_PTR_LIST(StringObjectList, StringObject); #define container_of(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member))) @@ -143,7 +144,9 @@ enum opcode { op_storeglobal, op_close, op_concat, - op_init + op_init, + op_embed_C, + op_embed_C__new }; /* @@ -277,6 +280,7 @@ struct LinearizerState { ProcList *all_procs; /* All procs allocated by the linearizer */ Proc *current_proc; /* proc being compiled */ uint32_t proc_id; + TextBuffer C_declarations; /* List of top level C declarations to be added to generated code, build from C__decl statements */ }; // Get string name of an op code diff --git a/ravicomp/src/parser.c b/ravicomp/src/parser.c index eb27d3cc..0a3b8a2a 100644 --- a/ravicomp/src/parser.c +++ b/ravicomp/src/parser.c @@ -88,7 +88,7 @@ static AstNode *raviX_allocate_ast_node(ParserState *parser, enum AstNodeType ty static AstNode *allocate_expr_ast_node(ParserState *parser, enum AstNodeType type) { - assert(type >= EXPR_LITERAL && type <= EXPR_CONCAT); + assert(type >= EXPR_LITERAL && type <= EXPR_BUILTIN); AstNode *node = raviX_allocate_ast_node(parser, type); node->common_expr.truncate_results = 0; set_typecode(&node->common_expr.type, RAVI_TANY); @@ -865,6 +865,31 @@ static AstNode *parse_primary_expression(ParserState *parser) return primary_expr; } +static AstNode *parse_builtin_expression(ParserState *parser) +{ + LexerState *ls = parser->ls; + + AstNode *builtin_expr = allocate_expr_ast_node(parser, EXPR_BUILTIN); + builtin_expr->builtin_expr.type.type_code = RAVI_TUSERDATA; + builtin_expr->builtin_expr.type.type_name = NULL; + builtin_expr->builtin_expr.type_name = NULL; + builtin_expr->builtin_expr.size_expr = NULL; + + raviX_next(ls); + checknext(ls, '('); + check(ls, TOK_STRING); + builtin_expr->builtin_expr.type_name = ls->t.seminfo.ts; + raviX_next(ls); + checknext(ls, ','); + builtin_expr->builtin_expr.size_expr = parse_expression(parser); + if (builtin_expr->builtin_expr.size_expr == NULL) { + raviX_syntaxerror(ls, "Expected a size expression as second argument to C__new"); + } + checknext(ls, ')'); + + return builtin_expr; +} + /* variable or field access or function call */ static AstNode *parse_suffixed_expression(ParserState *parser) { @@ -872,6 +897,9 @@ static AstNode *parse_suffixed_expression(ParserState *parser) /* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ int line = ls->linenumber; + if (ls->t.token == TOK_C__new) { + return parse_builtin_expression(parser); + } AstNode *suffixed_expr = allocate_expr_ast_node(parser, EXPR_SUFFIXED); suffixed_expr->suffixed_expr.primary_expr = parse_primary_expression(parser); suffixed_expr->suffixed_expr.type = suffixed_expr->suffixed_expr.primary_expr->common_expr.type; @@ -1188,6 +1216,53 @@ static AstNode *parse_goto_statment(ParserState *parser) return goto_stmt; } +static AstNode *parse_embedded_C(ParserState *parser, bool is_decl) { + LexerState *ls = parser->ls; + /* stat -> C (NAME {',' NAME}) string */ + AstNode *node = raviX_allocate_ast_node(parser, STMT_EMBEDDED_C); + node->embedded_C_stmt.C_src_snippet = NULL; + node->embedded_C_stmt.symbols = NULL; + node->embedded_C_stmt.is_decl = is_decl; + raviX_next(ls); + if (!is_decl && testnext(ls, '(')) { + switch (ls->t.token) { + case ')': { + raviX_next(ls); + break; + } + case TOK_NAME: { + const StringObject *varname = ls->t.seminfo.ts; + bool is_local = 0; + LuaSymbol *symbol = search_for_variable(parser, varname, &is_local); + if (symbol && is_local) + raviX_add_symbol(parser->container, &node->embedded_C_stmt.symbols, symbol); + else { + raviX_syntaxerror(ls, "Argument must be local variable"); + } + raviX_next(ls); + while (testnext(ls, ',')) { + varname = check_name_and_next(ls); + symbol = search_for_variable(parser, varname, &is_local); + if (symbol && is_local) + raviX_add_symbol(parser->container, &node->embedded_C_stmt.symbols, symbol); + else { + raviX_syntaxerror(ls, "Argument must be local variable"); + } + } + checknext(ls, ')'); + break; + } + default: { + raviX_syntaxerror(ls, "Expected set of arguments"); + } + } + } + check(ls, TOK_STRING); + node->embedded_C_stmt.C_src_snippet = ls->t.seminfo.ts; + raviX_next(ls); + return node; +} + /* skip no-op statements */ static void skip_noop_statements(ParserState *parser) { @@ -1624,6 +1699,11 @@ static AstNode *parse_statement(ParserState *parser) stmt = parse_goto_statment(parser); break; } + case TOK_C__unsafe: + case TOK_C__decl: { + stmt = parse_embedded_C(parser, ls->t.token == TOK_C__decl); + break; + } default: { /* stat -> func | assignment */ stmt = parse_expression_statement(parser); break; diff --git a/ravicomp/src/parser.h b/ravicomp/src/parser.h index b6786509..2d40abfe 100644 --- a/ravicomp/src/parser.h +++ b/ravicomp/src/parser.h @@ -280,6 +280,13 @@ struct ForStatement { Scope *for_body; AstNodeList *for_statement_list; /* statements in this block */ }; +/* for embedded C */ +struct EmbeddedCStatement { + LuaSymbolList *symbols; + const StringObject *C_src_snippet; // C source snippet + bool is_decl; // true if the snippet is only supposed to be declarations +}; + /* To access the type field common to all expr objects */ /* all expr types must be compatible with base_expression */ @@ -367,6 +374,12 @@ struct FunctionCallExpression { AstNodeList *arg_list; /* Call arguments */ int num_results; /* How many results do we expect, -1 means all available results */ }; +struct BuiltinExpression { + /* Currently only for C__new but potentially could be other builtins */ + BASE_EXPRESSION_FIELDS; + const StringObject *type_name; /* Name of the C struct type */ + AstNode *size_expr; /* Number of elements of type - > 1 means array */ +}; #undef BASE_EXPRESSION_FIELDS /* ALL AST nodes start with following fields */ @@ -404,6 +417,7 @@ struct AstNode { IfStatement if_stmt; WhileOrRepeatStatement while_or_repeat_stmt; ForStatement for_stmt; + EmbeddedCStatement embedded_C_stmt; BaseExpression common_expr; LiteralExpression literal_expr; SymbolExpression symbol_expr; @@ -416,6 +430,7 @@ struct AstNode { SuffixedExpression suffixed_expr; FunctionCallExpression function_call_expr; StringConcatenationExpression string_concatenation_expr; + BuiltinExpression builtin_expr; }; }; #undef BASE_AST_FIELDS diff --git a/ravicomp/src/typechecker.c b/ravicomp/src/typechecker.c index e8a52827..eead006d 100644 --- a/ravicomp/src/typechecker.c +++ b/ravicomp/src/typechecker.c @@ -552,6 +552,9 @@ static void typecheck_ast_node(CompilerState *container, AstNode *function, AstN typecheck_for_num_statment(container, function, node); break; } + case STMT_EMBEDDED_C: { + break; + } case EXPR_SUFFIXED: { typecheck_suffixedexpr(container, function, node); break; @@ -607,6 +610,8 @@ static void typecheck_ast_node(CompilerState *container, AstNode *function, AstN infer_table_type(container, function, node); break; } + case EXPR_BUILTIN: + break; default: assert(0); } diff --git a/src/ravi_mirjit.c b/src/ravi_mirjit.c index 7c33ed54..5c2c0107 100644 --- a/src/ravi_mirjit.c +++ b/src/ravi_mirjit.c @@ -44,6 +44,8 @@ typedef struct LuaFunc { } LuaFunc; static LuaFunc Lua_functions[] = { + { "memset", memset }, + { "luaS_newudata", luaS_newudata }, { "luaF_close", luaF_close }, { "raviV_raise_error", raviV_raise_error }, { "raviV_raise_error_with_info", raviV_raise_error_with_info }, diff --git a/tests/comptests/inputs/54_embed_C.lua b/tests/comptests/inputs/54_embed_C.lua new file mode 100644 index 00000000..67e45b11 --- /dev/null +++ b/tests/comptests/inputs/54_embed_C.lua @@ -0,0 +1,12 @@ +-- Test simple data passing + +local i: integer = 42 +local j: integer + +C__unsafe(i,j) [[ + j = i; +]] + +assert(j == 42) + +print 'Ok' \ No newline at end of file diff --git a/tests/comptests/inputs/55_embed_C.lua b/tests/comptests/inputs/55_embed_C.lua new file mode 100644 index 00000000..da8259bc --- /dev/null +++ b/tests/comptests/inputs/55_embed_C.lua @@ -0,0 +1,22 @@ +-- Test simple data passing + +local i: integer = 42 +local j: integer + +C__decl [[ +typedef struct { + int a,b; +} TestStruct; +]] + +local u = C__new('TestStruct', 1) + +C__unsafe(i,j,u) [[ + TestStruct *s = (TestStruct*) u.ptr; + s->b = i; + j = s->b; +]] + +assert(j == 42) + +print 'Ok' \ No newline at end of file diff --git a/tests/comptests/test.lua b/tests/comptests/test.lua index 9b391d84..d5757cc2 100644 --- a/tests/comptests/test.lua +++ b/tests/comptests/test.lua @@ -53,9 +53,16 @@ dofile("inputs/50_bug.lua") dofile("inputs/51_concat.lua") dofile("inputs/52_local_s.lua") -local f = compiler.loadfile("inputs/53_ravi_tests.lua") -assert(f and type(f) == 'function') -f() +function runfile(file) + local f = compiler.loadfile(file) + assert(f and type(f) == 'function') + f() +end + +--runfile("inputs/53_ravi_tests.lua") +runfile("inputs/54_embed_C.lua") +runfile("inputs/55_embed_C.lua") + print 'SUCCESS'