Skip to content

Commit 23deede

Browse files
committed
algebraic token-spacing + switch-scope consolidation + dead typedef fallback removal
1 parent a1808d0 commit 23deede

File tree

2 files changed

+64
-172
lines changed

2 files changed

+64
-172
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Prism is a single-file transpiler that makes C safer without changing how you wr
1010
- **Proper transpiler** — tracks typedefs, respects scope, catches unsafe patterns
1111
- **Opt-out features** Disable parts of the transpiler, like zero-init, with CLI flags
1212
- **Drop-in overlay** Use `CC=prism` in any build system — GCC-compatible flags pass through automatically
13-
- **Single Repo**6.4k lines, zero dependencies, easy to audit
13+
- **Single Repo**5.8k lines, zero dependencies, easy to audit
1414

1515
Prism is a proper transpiler, not a preprocessor macro.
1616
* **Track Types:** It parses `typedef`s to distinguish pointer declarations from multiplication (the "lexer hack"), ensuring correct zero-initialization.
@@ -208,7 +208,7 @@ Not:
208208
Prism uses a GCC-compatible interface — most flags pass through to the backend compiler.
209209

210210
```sh
211-
Prism v0.103.0 - Robust C transpiler
211+
Prism v0.104.0 - Robust C transpiler
212212

213213
Usage: prism [options] source.c... [-o output]
214214

prism.c

Lines changed: 62 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#define PRISM_VERSION "0.103.0"
1+
#define PRISM_VERSION "0.104.0"
22

33
#ifndef _GNU_SOURCE
44
#define _GNU_SOURCE
@@ -519,41 +519,22 @@ static void mark_switch_control_exit(void)
519519
}
520520
}
521521

522-
// Check if we're currently inside a switch scope
523-
static bool inside_switch_scope(void)
522+
// Find innermost switch scope index, or -1 if not in a switch
523+
static int find_switch_scope(void)
524524
{
525525
for (int d = ctx->defer_depth - 1; d >= 0; d--)
526-
{
527-
if (defer_stack[d].is_switch)
528-
return true;
529-
}
530-
return false;
526+
if (defer_stack[d].is_switch) return d;
527+
return -1;
531528
}
529+
#define inside_switch_scope() (find_switch_scope() >= 0)
532530

533-
// Clear defers at innermost switch scope when hitting case/default
534-
// This is necessary because the transpiler can't track which case was entered at runtime.
535-
// Note: This means defer in case with fallthrough will NOT preserve defers from previous cases.
536-
// For reliable defer behavior in switch, wrap each case body in braces.
537-
// Must clear defers at ALL scopes from current depth down to the switch scope,
538-
// because case labels can appear inside nested blocks (e.g., Duff's device pattern).
531+
// Clear defers from current depth down to the innermost switch scope.
532+
// Case labels can appear inside nested blocks (Duff's device), so we clear all levels.
539533
static void clear_switch_scope_defers(void)
540534
{
541-
// First find the switch scope to avoid clearing non-switch scopes
542-
// if case/default somehow appears outside a switch (malformed input)
543-
int switch_depth = -1;
544-
for (int d = ctx->defer_depth - 1; d >= 0; d--)
545-
{
546-
if (defer_stack[d].is_switch)
547-
{
548-
switch_depth = d;
549-
break;
550-
}
551-
}
552-
if (switch_depth < 0)
553-
return; // Not inside a switch — don't clear anything
554-
555-
// Clear all scopes from current depth down to and including the switch scope
556-
for (int d = ctx->defer_depth - 1; d >= switch_depth; d--)
535+
int sd = find_switch_scope();
536+
if (sd < 0) return;
537+
for (int d = ctx->defer_depth - 1; d >= sd; d--)
557538
{
558539
defer_stack[d].count = 0;
559540
defer_stack[d].had_control_exit = false;
@@ -563,82 +544,32 @@ static void clear_switch_scope_defers(void)
563544
// Check if a space is needed between two tokens
564545
static bool needs_space(Token *prev, Token *tok)
565546
{
566-
if (!prev)
567-
return false;
568-
if (tok_at_bol(tok))
569-
return false;
570-
if (tok_has_space(tok))
571-
return true;
572-
573-
// Identifier/keyword/number adjacency
547+
if (!prev || tok_at_bol(tok)) return false;
548+
if (tok_has_space(tok)) return true;
574549
if ((is_identifier_like(prev) || prev->kind == TK_NUM) &&
575550
(is_identifier_like(tok) || tok->kind == TK_NUM))
576551
return true;
577-
578-
if (prev->kind != TK_PUNCT || tok->kind != TK_PUNCT)
579-
return false;
580-
581-
// Lookup in pairs table
582-
char a = prev->loc[prev->len - 1], b = tok->loc[0];
583-
static const uint16_t pairs[] = {
584-
'+' + ('+' << 8),
585-
'-' + ('-' << 8),
586-
'<' + ('<' << 8),
587-
'>' + ('>'
588-
<< 8),
589-
'&' + ('&' << 8),
590-
'|' + ('|' << 8),
591-
'=' + ('=' << 8),
592-
'!' + ('=' << 8),
593-
'<' + ('=' << 8),
594-
'>' + ('=' << 8),
595-
'+' + ('=' << 8),
596-
'-' + ('=' << 8),
597-
'*' + ('=' << 8),
598-
'/' + ('=' << 8),
599-
'-' + ('>'
600-
<< 8),
601-
'#' + ('#' << 8),
602-
'/' + ('*' << 8),
603-
'*' + ('/' << 8),
604-
};
605-
uint16_t key = (uint8_t)a | ((uint8_t)b << 8);
606-
for (int i = 0; i < (int)(sizeof(pairs) / sizeof(*pairs)); i++)
607-
if (pairs[i] == key)
608-
return true;
609-
return false;
610-
}
611-
612-
// Check if 'tok' is inside a __attribute__((...)) or __declspec(...) context.
613-
// This prevents 'defer' from being recognized as a keyword when it appears
614-
// as a function name inside cleanup() or similar attributes.
615-
// Uses forward-looking heuristic: if we see unbalanced ')' before ';' or '{',
616-
// we're likely inside a parenthesized context (attribute argument list).
552+
if (prev->kind != TK_PUNCT || tok->kind != TK_PUNCT) return false;
553+
// Two adjacent punctuators that would merge into a different token
554+
uint8_t a = (uint8_t)prev->loc[prev->len - 1], b = (uint8_t)tok->loc[0];
555+
// 'b' is always '=' or matches 'a' (++ -- << >> && || == ## /* */), plus ->
556+
if (b == '=') return a == '=' || a == '!' || a == '<' || a == '>' ||
557+
a == '+' || a == '-' || a == '*' || a == '/';
558+
return (a == b && (a == '+' || a == '-' || a == '<' || a == '>' ||
559+
a == '&' || a == '|' || a == '#')) ||
560+
(a == '-' && b == '>') || (a == '/' && b == '*') || (a == '*' && b == '/');
561+
}
562+
563+
// Check if 'tok' is inside a parenthesized context (e.g., __attribute__((cleanup(defer)))).
564+
// Prevents 'defer' from being treated as a keyword when used as an identifier in attributes.
617565
static bool is_inside_attribute(Token *tok)
618566
{
619-
if (!last_emitted)
567+
if (!last_emitted || (!equal(last_emitted, "(") && !equal(last_emitted, ",")))
620568
return false;
621-
622-
// Quick check: defer in cleanup(defer) would follow '(' or ','
623-
if (!equal(last_emitted, "(") && !equal(last_emitted, ","))
624-
return false;
625-
626-
// Forward check: from tok, count parens until we hit ';' or EOF
627-
// If we see unbalanced ')' first, we're inside some paren context
628-
int paren_depth = 0;
629-
for (Token *t = tok; t && t->kind != TK_EOF; t = t->next)
630-
{
631-
if (equal(t, "("))
632-
paren_depth++;
633-
else if (equal(t, ")"))
634-
{
635-
if (--paren_depth < 0)
636-
return true; // Unmatched ')' - inside attribute parens
637-
}
638-
else if (equal(t, ";") || equal(t, "{"))
639-
break;
640-
}
641-
569+
int depth = 0;
570+
for (Token *t = tok; t && t->kind != TK_EOF && !equal(t, ";") && !equal(t, "{"); t = t->next)
571+
if (equal(t, "(")) depth++;
572+
else if (equal(t, ")") && --depth < 0) return true;
642573
return false;
643574
}
644575

@@ -652,54 +583,36 @@ static void emit_tok(Token *tok)
652583
return;
653584

654585
// Check if we need a #line directive BEFORE emitting the token
655-
bool need_line_directive = false;
586+
bool need_line = false;
656587
char *tok_fname = NULL;
657588
int line_no = tok_line_no(tok);
658589

659-
// Skip line directive handling for synthetic tokens (line_no == -1)
660590
if (ctx->emit_line_directives && f && line_no > 0)
661591
{
662592
tok_fname = f->display_name ? f->display_name : f->name;
663-
bool file_changed = (ctx->last_filename != tok_fname &&
664-
(!ctx->last_filename || !tok_fname || strcmp(ctx->last_filename, tok_fname) != 0));
665-
bool system_changed = (f->is_system != ctx->last_system_header);
666-
bool line_jumped = (line_no != ctx->last_line_no && line_no != ctx->last_line_no + 1);
667-
need_line_directive = file_changed || line_jumped || system_changed;
593+
need_line = (ctx->last_filename != tok_fname &&
594+
(!ctx->last_filename || !tok_fname || strcmp(ctx->last_filename, tok_fname) != 0)) ||
595+
(f->is_system != ctx->last_system_header) ||
596+
(line_no != ctx->last_line_no && line_no != ctx->last_line_no + 1);
668597
}
669598

670-
// Handle newlines and spacing
599+
// Spacing: BOL gets newline, otherwise check for #line or token-merge space
671600
if (tok_at_bol(tok))
672-
{
673601
out_char('\n');
674-
// Emit #line directive on new line if needed
675-
if (need_line_directive)
676-
{
677-
out_line(line_no, tok_fname ? tok_fname : "unknown");
678-
ctx->last_line_no = line_no;
679-
ctx->last_filename = tok_fname;
680-
ctx->last_system_header = f->is_system;
681-
}
682-
else if (ctx->emit_line_directives && f && line_no > 0 && line_no > ctx->last_line_no)
683-
{
684-
ctx->last_line_no = line_no;
685-
}
686-
}
687-
else
602+
else if (need_line)
603+
out_char('\n');
604+
else if (needs_space(last_emitted, tok))
605+
out_char(' ');
606+
607+
if (need_line)
688608
{
689-
// Not at beginning of line - emit #line before token if file/line changed significantly
690-
if (need_line_directive)
691-
{
692-
out_char('\n');
693-
out_line(line_no, tok_fname ? tok_fname : "unknown");
694-
ctx->last_line_no = line_no;
695-
ctx->last_filename = tok_fname;
696-
ctx->last_system_header = f->is_system;
697-
}
698-
else if (needs_space(last_emitted, tok))
699-
{
700-
out_char(' ');
701-
}
609+
out_line(line_no, tok_fname ? tok_fname : "unknown");
610+
ctx->last_line_no = line_no;
611+
ctx->last_filename = tok_fname;
612+
ctx->last_system_header = f->is_system;
702613
}
614+
else if (ctx->emit_line_directives && f && line_no > 0 && line_no > ctx->last_line_no)
615+
ctx->last_line_no = line_no;
703616

704617
// Handle preprocessor directives (e.g., #pragma) - emit verbatim
705618
if (tok->kind == TK_PREP_DIR)
@@ -1716,27 +1629,6 @@ static TypeSpecResult parse_type_specifier(Token *tok)
17161629
r.end = tok;
17171630
}
17181631

1719-
// Check for "typedef_name varname" pattern (no pointer)
1720-
if (!r.saw_type && tok->kind == TK_IDENT && is_typedef_like(tok))
1721-
{
1722-
Token *t = tok->next;
1723-
while (t && (t->tag & TT_QUALIFIER))
1724-
t = t->next;
1725-
if (t && t->kind == TK_IDENT && !equal(tok->next, "*"))
1726-
{
1727-
Token *after = t->next;
1728-
if (after && (equal(after, ";") || equal(after, "[") ||
1729-
equal(after, ",") || equal(after, "=")))
1730-
{
1731-
r.saw_type = true;
1732-
r.is_typedef = true;
1733-
if (is_vla_typedef(tok))
1734-
r.is_vla = true;
1735-
r.end = tok->next;
1736-
}
1737-
}
1738-
}
1739-
17401632
return r;
17411633
}
17421634

@@ -1953,17 +1845,6 @@ static bool is_raw_declaration_context(Token *after_raw)
19531845
}
19541846

19551847
// Emit tokens from start through semicolon
1956-
static Token *emit_to_semicolon(Token *start)
1957-
{
1958-
Token *end = start;
1959-
while (end && !equal(end, ";") && end->kind != TK_EOF)
1960-
end = end->next;
1961-
if (equal(end, ";"))
1962-
end = end->next;
1963-
emit_range(start, end);
1964-
return end;
1965-
}
1966-
19671848
// Handle 'raw' after storage class: "static raw int x;"
19681849
static Token *handle_storage_raw(Token *storage_tok)
19691850
{
@@ -1984,7 +1865,12 @@ static Token *handle_storage_raw(Token *storage_tok)
19841865
emit_tok(t);
19851866
t = t->next;
19861867
}
1987-
return emit_to_semicolon(p->next); // Skip 'raw', emit rest
1868+
// Skip 'raw', emit rest through semicolon
1869+
Token *t2 = p->next;
1870+
while (t2 && !equal(t2, ";") && t2->kind != TK_EOF) t2 = t2->next;
1871+
if (equal(t2, ";")) t2 = t2->next;
1872+
emit_range(p->next, t2);
1873+
return t2;
19881874
}
19891875

19901876
// Process all declarators in a declaration and emit with zero-init
@@ -2160,7 +2046,13 @@ static Token *try_zero_init_decl(Token *tok)
21602046
if (tok->tag & TT_SKIP_DECL)
21612047
{
21622048
if (is_raw)
2163-
return emit_to_semicolon(start);
2049+
{
2050+
Token *e = start;
2051+
while (e && !equal(e, ";") && e->kind != TK_EOF) e = e->next;
2052+
if (equal(e, ";")) e = e->next;
2053+
emit_range(start, e);
2054+
return e;
2055+
}
21642056
if (equal(tok, "static") || equal(tok, "extern") || equal(tok, "typedef"))
21652057
{
21662058
Token *result = handle_storage_raw(tok);

0 commit comments

Comments
 (0)