9090 TF_IS_FLOAT = 1 << 2 ,
9191};
9292
93+ // Token tags - bitmask classification assigned once at tokenize time
94+ // Eliminates repeated string comparisons in the transpiler
95+ enum
96+ {
97+ TT_TYPE = 1 << 0 , // Type keyword (int, char, void, struct, etc.)
98+ TT_QUALIFIER = 1 << 1 , // Type qualifier (const, volatile, restrict, static, auto, register, _Atomic, _Alignas, __attribute__)
99+ TT_SUE = 1 << 2 , // struct/union/enum
100+ TT_SKIP_DECL = 1 << 3 , // Keywords that can't start a zero-init declaration
101+ TT_ATTR = 1 << 4 , // Attribute keyword (__attribute__, __attribute, __declspec)
102+ TT_ASSIGN = 1 << 5 , // Assignment or compound assignment operator (=, +=, ++, --, [)
103+ TT_MEMBER = 1 << 6 , // Member access operator (. or ->)
104+ };
105+
93106struct Token
94107{
95108 char * loc ;
@@ -103,6 +116,7 @@ struct Token
103116 TokenKind kind ;
104117 uint16_t file_idx ;
105118 uint8_t flags ;
119+ uint8_t tag ; // TT_* bitmask - token classification
106120};
107121
108122// Token accessors
@@ -708,74 +722,114 @@ static inline bool equal(Token *tok, const char *op)
708722 return equiv && strlen (equiv ) == len && !memcmp (equiv , op , len );
709723}
710724
725+ // Internal marker bit for keyword map: values are (tag | KW_MARKER)
726+ // This distinguishes tag=0 keywords from "not found" (NULL)
727+ #define KW_MARKER 0x80
728+
711729static void init_keyword_map (void )
712730{
713- static char * kw [] = {
714- "return" ,
715- "if" ,
716- "else" ,
717- "for" ,
718- "while" ,
719- "do" ,
720- "switch" ,
721- "case" ,
722- "default" ,
723- "break" ,
724- "continue" ,
725- "goto" ,
726- "sizeof" ,
727- "alignof" ,
728- "struct" ,
729- "union" ,
730- "enum" ,
731- "typedef" ,
732- "static" ,
733- "extern" ,
734- "inline" ,
735- "const" ,
736- "volatile" ,
737- "restrict" ,
738- "_Atomic" ,
739- "_Noreturn" ,
740- "_Thread_local" ,
741- "void" ,
742- "char" ,
743- "short" ,
744- "int" ,
745- "long" ,
746- "float" ,
747- "double" ,
748- "signed" ,
749- "unsigned" ,
750- "_Bool" ,
751- "auto" ,
752- "register" ,
753- "_Alignas" ,
754- "_Static_assert" ,
755- "_Generic" ,
756- "typeof" ,
757- "__typeof__" ,
758- "asm" ,
759- "__asm__" ,
760- "__attribute__" ,
761- "__extension__" ,
762- "__builtin_va_list" ,
763- "__builtin_va_arg" ,
764- "__builtin_offsetof" ,
765- "__builtin_types_compatible_p" ,
731+ // Each entry: {keyword, TT_* tag bitmask}
732+ // Tags are assigned once here, then stored on tokens during convert_pp_tokens
733+ static struct
734+ {
735+ char * name ;
736+ uint8_t tag ;
737+ } kw [] = {
738+ // Control flow (skip-decl: can't start a zero-init declaration)
739+ {"return" , TT_SKIP_DECL },
740+ {"if" , TT_SKIP_DECL },
741+ {"else" , TT_SKIP_DECL },
742+ {"for" , TT_SKIP_DECL },
743+ {"while" , TT_SKIP_DECL },
744+ {"do" , TT_SKIP_DECL },
745+ {"switch" , TT_SKIP_DECL },
746+ {"case" , TT_SKIP_DECL },
747+ {"default" , TT_SKIP_DECL },
748+ {"break" , TT_SKIP_DECL },
749+ {"continue" , TT_SKIP_DECL },
750+ {"goto" , TT_SKIP_DECL },
751+ {"sizeof" , TT_SKIP_DECL },
752+ {"alignof" , TT_SKIP_DECL },
753+ {"_Alignof" , TT_SKIP_DECL },
754+ {"_Generic" , TT_SKIP_DECL },
755+ {"_Static_assert" , 0 },
756+ // struct/union/enum (also type keywords)
757+ {"struct" , TT_TYPE | TT_SUE },
758+ {"union" , TT_TYPE | TT_SUE },
759+ {"enum" , TT_TYPE | TT_SUE },
760+ // Storage class / qualifiers that also skip decl
761+ {"typedef" , TT_SKIP_DECL },
762+ {"static" , TT_QUALIFIER | TT_SKIP_DECL },
763+ {"extern" , TT_SKIP_DECL },
764+ {"inline" , 0 },
765+ // Type qualifiers
766+ {"const" , TT_QUALIFIER },
767+ {"volatile" , TT_QUALIFIER },
768+ {"restrict" , TT_QUALIFIER },
769+ {"_Atomic" , TT_QUALIFIER | TT_TYPE },
770+ {"_Noreturn" , 0 },
771+ {"_Thread_local" , 0 },
772+ // Type keywords
773+ {"void" , TT_TYPE },
774+ {"char" , TT_TYPE },
775+ {"short" , TT_TYPE },
776+ {"int" , TT_TYPE },
777+ {"long" , TT_TYPE },
778+ {"float" , TT_TYPE },
779+ {"double" , TT_TYPE },
780+ {"signed" , TT_TYPE },
781+ {"unsigned" , TT_TYPE },
782+ {"_Bool" , TT_TYPE },
783+ {"bool" , TT_TYPE },
784+ {"_Complex" , TT_TYPE },
785+ {"_Imaginary" , TT_TYPE },
786+ {"__int128" , TT_TYPE },
787+ {"__int128_t" , TT_TYPE },
788+ {"__uint128" , TT_TYPE },
789+ {"__uint128_t" , TT_TYPE },
790+ {"typeof_unqual" , TT_TYPE },
791+ {"auto" , TT_QUALIFIER },
792+ {"register" , TT_QUALIFIER },
793+ {"_Alignas" , TT_QUALIFIER },
794+ {"typeof" , TT_TYPE },
795+ {"__typeof__" , TT_TYPE },
796+ {"__typeof" , TT_TYPE },
797+ {"_BitInt" , TT_TYPE },
798+ // Asm (skip-decl: can't start a declaration)
799+ {"asm" , TT_SKIP_DECL },
800+ {"__asm__" , TT_SKIP_DECL },
801+ {"__asm" , TT_SKIP_DECL },
802+ // Attributes
803+ {"__attribute__" , TT_ATTR | TT_QUALIFIER },
804+ {"__attribute" , TT_ATTR | TT_QUALIFIER },
805+ {"__declspec" , TT_ATTR | TT_QUALIFIER },
806+ // Other builtins
807+ {"__extension__" , 0 },
808+ {"__builtin_va_list" , 0 },
809+ {"__builtin_va_arg" , 0 },
810+ {"__builtin_offsetof" , 0 },
811+ {"__builtin_types_compatible_p" , 0 },
766812 // Prism keywords
767- "defer" ,
768- "raw" ,
813+ { "defer" , 0 } ,
814+ { "raw" , 0 } ,
769815 };
770816 for (size_t i = 0 ; i < sizeof (kw ) / sizeof (* kw ); i ++ )
771- hashmap_put (& ctx -> keyword_map , kw [i ], strlen (kw [i ]), (void * )1 );
817+ hashmap_put (& ctx -> keyword_map , kw [i ].name , strlen (kw [i ].name ),
818+ (void * )(uintptr_t )(kw [i ].tag | KW_MARKER ));
772819}
773820
774821static bool is_keyword (Token * tok )
775822{
776823 return hashmap_get (& ctx -> keyword_map , tok -> loc , tok -> len ) != NULL ;
777824}
778825
826+ // Get the tag bits for a keyword (0 if not a keyword)
827+ static uint8_t keyword_tag (Token * tok )
828+ {
829+ void * val = hashmap_get (& ctx -> keyword_map , tok -> loc , tok -> len );
830+ return val ? (uint8_t )((uintptr_t )val & ~KW_MARKER ) : 0 ;
831+ }
832+
779833// Forward declaration for hex digit conversion
780834static int from_hex (char c );
781835
@@ -1427,9 +1481,46 @@ static void convert_pp_tokens(Token *tok)
14271481 for (Token * t = tok ; t && t -> kind != TK_EOF ; t = t -> next )
14281482 {
14291483 if (is_keyword (t ))
1484+ {
14301485 t -> kind = TK_KEYWORD ;
1486+ t -> tag = keyword_tag (t );
1487+ }
14311488 else if (t -> kind == TK_PP_NUM )
14321489 convert_pp_number (t );
1490+ else if (t -> kind == TK_PUNCT )
1491+ {
1492+ // Tag punctuators: assignment ops and member access
1493+ char c = t -> loc [0 ];
1494+ if (t -> len == 1 )
1495+ {
1496+ if (c == '=' || c == '[' )
1497+ t -> tag = TT_ASSIGN ;
1498+ else if (c == '.' )
1499+ t -> tag = TT_MEMBER ;
1500+ }
1501+ else if (t -> len == 2 )
1502+ {
1503+ char c2 = t -> loc [1 ];
1504+ if (c2 == '=' )
1505+ {
1506+ // +=, -=, *=, /=, %=, &=, |=, ^= are assignment; !=, <=, >= are not
1507+ if (c != '!' && c != '<' && c != '>' && c != '=' )
1508+ t -> tag = TT_ASSIGN ;
1509+ }
1510+ else if (c == '+' && c2 == '+' )
1511+ t -> tag = TT_ASSIGN ;
1512+ else if (c == '-' && c2 == '-' )
1513+ t -> tag = TT_ASSIGN ;
1514+ else if (c == '-' && c2 == '>' )
1515+ t -> tag = TT_MEMBER ;
1516+ }
1517+ else if (t -> len == 3 && t -> loc [2 ] == '=' )
1518+ {
1519+ // <<= and >>=
1520+ if ((c == '<' || c == '>' ) && t -> loc [1 ] == c )
1521+ t -> tag = TT_ASSIGN ;
1522+ }
1523+ }
14331524 }
14341525}
14351526
@@ -1818,28 +1909,15 @@ Token *tokenize_file(char *path)
18181909 return tokenize (file );
18191910}
18201911
1821- // Reset state for reuse (keeps arena blocks for reuse)
1822- void tokenizer_reset (void )
1823- {
1824- arena_reset (& ctx -> main_arena );
1825- // Free file view cache first (before freeing files)
1826- free_file_view_cache ();
1827- for (int i = 0 ; i < ctx -> input_file_count ; i ++ )
1828- free_file (ctx -> input_files [i ]);
1829- free (ctx -> input_files );
1830- ctx -> input_files = NULL ;
1831- ctx -> input_file_count = 0 ;
1832- ctx -> input_file_capacity = 0 ;
1833- ctx -> current_file = NULL ;
1834- // Free interned filenames last (after all files are freed)
1835- free_filename_intern_map ();
1836- }
1837-
1838- // Full cleanup - frees all memory including arena blocks
1839- void tokenizer_cleanup (void )
1912+ // Teardown tokenizer state
1913+ // full=false: reset for reuse (keeps arena blocks allocated)
1914+ // full=true: free all memory including arena blocks and keyword map
1915+ void tokenizer_teardown (bool full )
18401916{
1841- arena_free (& ctx -> main_arena );
1842- // Free file view cache first (before freeing files)
1917+ if (full )
1918+ arena_free (& ctx -> main_arena );
1919+ else
1920+ arena_reset (& ctx -> main_arena );
18431921 free_file_view_cache ();
18441922 for (int i = 0 ; i < ctx -> input_file_count ; i ++ )
18451923 free_file (ctx -> input_files [i ]);
@@ -1848,7 +1926,7 @@ void tokenizer_cleanup(void)
18481926 ctx -> input_file_count = 0 ;
18491927 ctx -> input_file_capacity = 0 ;
18501928 ctx -> current_file = NULL ;
1851- // Free interned filenames last (after all files are freed)
18521929 free_filename_intern_map ();
1853- hashmap_clear (& ctx -> keyword_map );
1930+ if (full )
1931+ hashmap_clear (& ctx -> keyword_map );
18541932}
0 commit comments