forked from llvm/llvm-project
/
Preprocessor.h
2704 lines (2254 loc) · 101 KB
/
Preprocessor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Defines the clang::Preprocessor interface.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
#define LLVM_CLANG_LEX_PREPROCESSOR_H
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/ModuleLoader.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Token.h"
#include "clang/Lex/TokenLexer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Registry.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
namespace llvm {
template<unsigned InternalLen> class SmallString;
} // namespace llvm
namespace clang {
class CodeCompletionHandler;
class CommentHandler;
class DirectoryEntry;
class EmptylineHandler;
class ExternalPreprocessorSource;
class FileEntry;
class FileManager;
class HeaderSearch;
class MacroArgs;
class PragmaHandler;
class PragmaNamespace;
class PreprocessingRecord;
class PreprocessorLexer;
class PreprocessorOptions;
class ScratchBuffer;
class TargetInfo;
namespace Builtin {
class Context;
}
/// Stores token information for comparing actual tokens with
/// predefined values. Only handles simple tokens and identifiers.
class TokenValue {
tok::TokenKind Kind;
IdentifierInfo *II;
public:
TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
assert(Kind != tok::identifier &&
"Identifiers should be created by TokenValue(IdentifierInfo *)");
assert(!tok::isLiteral(Kind) && "Literals are not supported.");
assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
}
TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
bool operator==(const Token &Tok) const {
return Tok.getKind() == Kind &&
(!II || II == Tok.getIdentifierInfo());
}
};
/// Context in which macro name is used.
enum MacroUse {
// other than #define or #undef
MU_Other = 0,
// macro name specified in #define
MU_Define = 1,
// macro name specified in #undef
MU_Undef = 2
};
/// Engages in a tight little dance with the lexer to efficiently
/// preprocess tokens.
///
/// Lexers know only about tokens within a single source file, and don't
/// know anything about preprocessor-level issues like the \#include stack,
/// token expansion, etc.
class Preprocessor {
friend class VAOptDefinitionContext;
friend class VariadicMacroScopeGuard;
llvm::unique_function<void(const clang::Token &)> OnToken;
std::shared_ptr<PreprocessorOptions> PPOpts;
DiagnosticsEngine *Diags;
LangOptions &LangOpts;
const TargetInfo *Target = nullptr;
const TargetInfo *AuxTarget = nullptr;
FileManager &FileMgr;
SourceManager &SourceMgr;
std::unique_ptr<ScratchBuffer> ScratchBuf;
HeaderSearch &HeaderInfo;
ModuleLoader &TheModuleLoader;
/// External source of macros.
ExternalPreprocessorSource *ExternalSource;
/// A BumpPtrAllocator object used to quickly allocate and release
/// objects internal to the Preprocessor.
llvm::BumpPtrAllocator BP;
/// Identifiers for builtin macros and other builtins.
IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
IdentifierInfo *Ident__COUNTER__; // __COUNTER__
IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
IdentifierInfo *Ident__identifier; // __identifier
IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
IdentifierInfo *Ident__has_feature; // __has_feature
IdentifierInfo *Ident__has_extension; // __has_extension
IdentifierInfo *Ident__has_builtin; // __has_builtin
IdentifierInfo *Ident__has_attribute; // __has_attribute
IdentifierInfo *Ident__has_include; // __has_include
IdentifierInfo *Ident__has_include_next; // __has_include_next
IdentifierInfo *Ident__has_warning; // __has_warning
IdentifierInfo *Ident__is_identifier; // __is_identifier
IdentifierInfo *Ident__building_module; // __building_module
IdentifierInfo *Ident__MODULE__; // __MODULE__
IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
IdentifierInfo *Ident__is_target_arch; // __is_target_arch
IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
IdentifierInfo *Ident__is_target_os; // __is_target_os
IdentifierInfo *Ident__is_target_environment; // __is_target_environment
IdentifierInfo *Ident__is_target_variant_os;
IdentifierInfo *Ident__is_target_variant_environment;
IdentifierInfo *Ident__FLT_EVAL_METHOD__; // __FLT_EVAL_METHOD
// Weak, only valid (and set) while InMacroArgs is true.
Token* ArgMacro;
SourceLocation DATELoc, TIMELoc;
// FEM_UnsetOnCommandLine means that an explicit evaluation method was
// not specified on the command line. The target is queried to set the
// default evaluation method.
LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
// Keeps the value of the last evaluation method before a
// `pragma float_control (precise,off) is applied.
LangOptions::FPEvalMethodKind LastFPEvalMethod =
LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
// The most recent pragma location where the floating point evaluation
// method was modified. This is used to determine whether the
// 'pragma clang fp eval_method' was used whithin the current scope.
SourceLocation LastFPEvalPragmaLocation;
LangOptions::FPEvalMethodKind TUFPEvalMethod =
LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
// Next __COUNTER__ value, starts at 0.
unsigned CounterValue = 0;
enum {
/// Maximum depth of \#includes.
MaxAllowedIncludeStackDepth = 200
};
// State that is set before the preprocessor begins.
bool KeepComments : 1;
bool KeepMacroComments : 1;
bool SuppressIncludeNotFoundError : 1;
// State that changes while the preprocessor runs:
bool InMacroArgs : 1; // True if parsing fn macro invocation args.
/// Whether the preprocessor owns the header search object.
bool OwnsHeaderSearch : 1;
/// True if macro expansion is disabled.
bool DisableMacroExpansion : 1;
/// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
/// when parsing preprocessor directives.
bool MacroExpansionInDirectivesOverride : 1;
class ResetMacroExpansionHelper;
/// Whether we have already loaded macros from the external source.
mutable bool ReadMacrosFromExternalSource : 1;
/// True if pragmas are enabled.
bool PragmasEnabled : 1;
/// True if the current build action is a preprocessing action.
bool PreprocessedOutput : 1;
/// True if we are currently preprocessing a #if or #elif directive
bool ParsingIfOrElifDirective;
/// True if we are pre-expanding macro arguments.
bool InMacroArgPreExpansion;
/// Mapping/lookup information for all identifiers in
/// the program, including program keywords.
mutable IdentifierTable Identifiers;
/// This table contains all the selectors in the program.
///
/// Unlike IdentifierTable above, this table *isn't* populated by the
/// preprocessor. It is declared/expanded here because its role/lifetime is
/// conceptually similar to the IdentifierTable. In addition, the current
/// control flow (in clang::ParseAST()), make it convenient to put here.
///
/// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
/// the lifetime of the preprocessor.
SelectorTable Selectors;
/// Information about builtins.
std::unique_ptr<Builtin::Context> BuiltinInfo;
/// Tracks all of the pragmas that the client registered
/// with this preprocessor.
std::unique_ptr<PragmaNamespace> PragmaHandlers;
/// Pragma handlers of the original source is stored here during the
/// parsing of a model file.
std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
/// Tracks all of the comment handlers that the client registered
/// with this preprocessor.
std::vector<CommentHandler *> CommentHandlers;
/// Empty line handler.
EmptylineHandler *Emptyline = nullptr;
/// True if we want to ignore EOF token and continue later on (thus
/// avoid tearing the Lexer and etc. down).
bool IncrementalProcessing = false;
public:
/// The kind of translation unit we are processing.
const TranslationUnitKind TUKind;
private:
/// The code-completion handler.
CodeCompletionHandler *CodeComplete = nullptr;
/// The file that we're performing code-completion for, if any.
const FileEntry *CodeCompletionFile = nullptr;
/// The offset in file for the code-completion point.
unsigned CodeCompletionOffset = 0;
/// The location for the code-completion point. This gets instantiated
/// when the CodeCompletionFile gets \#include'ed for preprocessing.
SourceLocation CodeCompletionLoc;
/// The start location for the file of the code-completion point.
///
/// This gets instantiated when the CodeCompletionFile gets \#include'ed
/// for preprocessing.
SourceLocation CodeCompletionFileLoc;
/// The source location of the \c import contextual keyword we just
/// lexed, if any.
SourceLocation ModuleImportLoc;
/// The module import path that we're currently processing.
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
/// Whether the last token we lexed was an '@'.
bool LastTokenWasAt = false;
/// A position within a C++20 import-seq.
class ImportSeq {
public:
enum State : int {
// Positive values represent a number of unclosed brackets.
AtTopLevel = 0,
AfterTopLevelTokenSeq = -1,
AfterExport = -2,
AfterImportSeq = -3,
};
ImportSeq(State S) : S(S) {}
/// Saw any kind of open bracket.
void handleOpenBracket() {
S = static_cast<State>(std::max<int>(S, 0) + 1);
}
/// Saw any kind of close bracket other than '}'.
void handleCloseBracket() {
S = static_cast<State>(std::max<int>(S, 1) - 1);
}
/// Saw a close brace.
void handleCloseBrace() {
handleCloseBracket();
if (S == AtTopLevel && !AfterHeaderName)
S = AfterTopLevelTokenSeq;
}
/// Saw a semicolon.
void handleSemi() {
if (atTopLevel()) {
S = AfterTopLevelTokenSeq;
AfterHeaderName = false;
}
}
/// Saw an 'export' identifier.
void handleExport() {
if (S == AfterTopLevelTokenSeq)
S = AfterExport;
else if (S <= 0)
S = AtTopLevel;
}
/// Saw an 'import' identifier.
void handleImport() {
if (S == AfterTopLevelTokenSeq || S == AfterExport)
S = AfterImportSeq;
else if (S <= 0)
S = AtTopLevel;
}
/// Saw a 'header-name' token; do not recognize any more 'import' tokens
/// until we reach a top-level semicolon.
void handleHeaderName() {
if (S == AfterImportSeq)
AfterHeaderName = true;
handleMisc();
}
/// Saw any other token.
void handleMisc() {
if (S <= 0)
S = AtTopLevel;
}
bool atTopLevel() { return S <= 0; }
bool afterImportSeq() { return S == AfterImportSeq; }
bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
private:
State S;
/// Whether we're in the pp-import-suffix following the header-name in a
/// pp-import. If so, a close-brace is not sufficient to end the
/// top-level-token-seq of an import-seq.
bool AfterHeaderName = false;
};
/// Our current position within a C++20 import-seq.
ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
/// Track whether we are in a Global Module Fragment
class TrackGMF {
public:
enum GMFState : int {
GMFActive = 1,
MaybeGMF = 0,
BeforeGMFIntroducer = -1,
GMFAbsentOrEnded = -2,
};
TrackGMF(GMFState S) : S(S) {}
/// Saw a semicolon.
void handleSemi() {
// If it is immediately after the first instance of the module keyword,
// then that introduces the GMF.
if (S == MaybeGMF)
S = GMFActive;
}
/// Saw an 'export' identifier.
void handleExport() {
// The presence of an 'export' keyword always ends or excludes a GMF.
S = GMFAbsentOrEnded;
}
/// Saw an 'import' identifier.
void handleImport(bool AfterTopLevelTokenSeq) {
// If we see this before any 'module' kw, then we have no GMF.
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
S = GMFAbsentOrEnded;
}
/// Saw a 'module' identifier.
void handleModule(bool AfterTopLevelTokenSeq) {
// This was the first module identifier and not preceded by any token
// that would exclude a GMF. It could begin a GMF, but only if directly
// followed by a semicolon.
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
S = MaybeGMF;
else
S = GMFAbsentOrEnded;
}
/// Saw any other token.
void handleMisc() {
// We saw something other than ; after the 'module' kw, so not a GMF.
if (S == MaybeGMF)
S = GMFAbsentOrEnded;
}
bool inGMF() { return S == GMFActive; }
private:
/// Track the transitions into and out of a Global Module Fragment,
/// if one is present.
GMFState S;
};
TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
/// Whether the module import expects an identifier next. Otherwise,
/// it expects a '.' or ';'.
bool ModuleImportExpectsIdentifier = false;
/// The identifier and source location of the currently-active
/// \#pragma clang arc_cf_code_audited begin.
std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
/// The source location of the currently-active
/// \#pragma clang assume_nonnull begin.
SourceLocation PragmaAssumeNonNullLoc;
/// Set only for preambles which end with an active
/// \#pragma clang assume_nonnull begin.
///
/// When the preamble is loaded into the main file,
/// `PragmaAssumeNonNullLoc` will be set to this to
/// replay the unterminated assume_nonnull.
SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
/// True if we hit the code-completion point.
bool CodeCompletionReached = false;
/// The code completion token containing the information
/// on the stem that is to be code completed.
IdentifierInfo *CodeCompletionII = nullptr;
/// Range for the code completion token.
SourceRange CodeCompletionTokenRange;
/// The directory that the main file should be considered to occupy,
/// if it does not correspond to a real file (as happens when building a
/// module).
const DirectoryEntry *MainFileDir = nullptr;
/// The number of bytes that we will initially skip when entering the
/// main file, along with a flag that indicates whether skipping this number
/// of bytes will place the lexer at the start of a line.
///
/// This is used when loading a precompiled preamble.
std::pair<int, bool> SkipMainFilePreamble;
/// Whether we hit an error due to reaching max allowed include depth. Allows
/// to avoid hitting the same error over and over again.
bool HasReachedMaxIncludeDepth = false;
/// The number of currently-active calls to Lex.
///
/// Lex is reentrant, and asking for an (end-of-phase-4) token can often
/// require asking for multiple additional tokens. This counter makes it
/// possible for Lex to detect whether it's producing a token for the end
/// of phase 4 of translation or for some other situation.
unsigned LexLevel = 0;
/// The number of (LexLevel 0) preprocessor tokens.
unsigned TokenCount = 0;
/// Preprocess every token regardless of LexLevel.
bool PreprocessToken = false;
/// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
/// warning, or zero for unlimited.
unsigned MaxTokens = 0;
SourceLocation MaxTokensOverrideLoc;
public:
struct PreambleSkipInfo {
SourceLocation HashTokenLoc;
SourceLocation IfTokenLoc;
bool FoundNonSkipPortion;
bool FoundElse;
SourceLocation ElseLoc;
PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
bool FoundNonSkipPortion, bool FoundElse,
SourceLocation ElseLoc)
: HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
ElseLoc(ElseLoc) {}
};
using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
private:
friend class ASTReader;
friend class MacroArgs;
class PreambleConditionalStackStore {
enum State {
Off = 0,
Recording = 1,
Replaying = 2,
};
public:
PreambleConditionalStackStore() = default;
void startRecording() { ConditionalStackState = Recording; }
void startReplaying() { ConditionalStackState = Replaying; }
bool isRecording() const { return ConditionalStackState == Recording; }
bool isReplaying() const { return ConditionalStackState == Replaying; }
ArrayRef<PPConditionalInfo> getStack() const {
return ConditionalStack;
}
void doneReplaying() {
ConditionalStack.clear();
ConditionalStackState = Off;
}
void setStack(ArrayRef<PPConditionalInfo> s) {
if (!isRecording() && !isReplaying())
return;
ConditionalStack.clear();
ConditionalStack.append(s.begin(), s.end());
}
bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
void clearSkipInfo() { SkipInfo.reset(); }
llvm::Optional<PreambleSkipInfo> SkipInfo;
private:
SmallVector<PPConditionalInfo, 4> ConditionalStack;
State ConditionalStackState = Off;
} PreambleConditionalStack;
/// The current top of the stack that we're lexing from if
/// not expanding a macro and we are lexing directly from source code.
///
/// Only one of CurLexer, or CurTokenLexer will be non-null.
std::unique_ptr<Lexer> CurLexer;
/// The current top of the stack what we're lexing from
/// if not expanding a macro.
///
/// This is an alias for CurLexer.
PreprocessorLexer *CurPPLexer = nullptr;
/// Used to find the current FileEntry, if CurLexer is non-null
/// and if applicable.
///
/// This allows us to implement \#include_next and find directory-specific
/// properties.
ConstSearchDirIterator CurDirLookup = nullptr;
/// The current macro we are expanding, if we are expanding a macro.
///
/// One of CurLexer and CurTokenLexer must be null.
std::unique_ptr<TokenLexer> CurTokenLexer;
/// The kind of lexer we're currently working with.
enum CurLexerKind {
CLK_Lexer,
CLK_TokenLexer,
CLK_CachingLexer,
CLK_DependencyDirectivesLexer,
CLK_LexAfterModuleImport
} CurLexerKind = CLK_Lexer;
/// If the current lexer is for a submodule that is being built, this
/// is that submodule.
Module *CurLexerSubmodule = nullptr;
/// Keeps track of the stack of files currently
/// \#included, and macros currently being expanded from, not counting
/// CurLexer/CurTokenLexer.
struct IncludeStackInfo {
enum CurLexerKind CurLexerKind;
Module *TheSubmodule;
std::unique_ptr<Lexer> TheLexer;
PreprocessorLexer *ThePPLexer;
std::unique_ptr<TokenLexer> TheTokenLexer;
ConstSearchDirIterator TheDirLookup;
// The following constructors are completely useless copies of the default
// versions, only needed to pacify MSVC.
IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
std::unique_ptr<Lexer> &&TheLexer,
PreprocessorLexer *ThePPLexer,
std::unique_ptr<TokenLexer> &&TheTokenLexer,
ConstSearchDirIterator TheDirLookup)
: CurLexerKind(std::move(CurLexerKind)),
TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
ThePPLexer(std::move(ThePPLexer)),
TheTokenLexer(std::move(TheTokenLexer)),
TheDirLookup(std::move(TheDirLookup)) {}
};
std::vector<IncludeStackInfo> IncludeMacroStack;
/// Actions invoked when some preprocessor activity is
/// encountered (e.g. a file is \#included, etc).
std::unique_ptr<PPCallbacks> Callbacks;
struct MacroExpandsInfo {
Token Tok;
MacroDefinition MD;
SourceRange Range;
MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
: Tok(Tok), MD(MD), Range(Range) {}
};
SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
/// Information about a name that has been used to define a module macro.
struct ModuleMacroInfo {
/// The most recent macro directive for this identifier.
MacroDirective *MD;
/// The active module macros for this identifier.
llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
/// The generation number at which we last updated ActiveModuleMacros.
/// \see Preprocessor::VisibleModules.
unsigned ActiveModuleMacrosGeneration = 0;
/// Whether this macro name is ambiguous.
bool IsAmbiguous = false;
/// The module macros that are overridden by this macro.
llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
};
/// The state of a macro for an identifier.
class MacroState {
mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
const IdentifierInfo *II) const {
if (II->isOutOfDate())
PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
// FIXME: Find a spare bit on IdentifierInfo and store a
// HasModuleMacros flag.
if (!II->hasMacroDefinition() ||
(!PP.getLangOpts().Modules &&
!PP.getLangOpts().ModulesLocalVisibility) ||
!PP.CurSubmoduleState->VisibleModules.getGeneration())
return nullptr;
auto *Info = State.dyn_cast<ModuleMacroInfo*>();
if (!Info) {
Info = new (PP.getPreprocessorAllocator())
ModuleMacroInfo(State.get<MacroDirective *>());
State = Info;
}
if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
Info->ActiveModuleMacrosGeneration)
PP.updateModuleMacroInfo(II, *Info);
return Info;
}
public:
MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective *MD) : State(MD) {}
MacroState(MacroState &&O) noexcept : State(O.State) {
O.State = (MacroDirective *)nullptr;
}
MacroState &operator=(MacroState &&O) noexcept {
auto S = O.State;
O.State = (MacroDirective *)nullptr;
State = S;
return *this;
}
~MacroState() {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
Info->~ModuleMacroInfo();
}
MacroDirective *getLatest() const {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
return Info->MD;
return State.get<MacroDirective*>();
}
void setLatest(MacroDirective *MD) {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
Info->MD = MD;
else
State = MD;
}
bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
auto *Info = getModuleInfo(PP, II);
return Info ? Info->IsAmbiguous : false;
}
ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
if (auto *Info = getModuleInfo(PP, II))
return Info->ActiveModuleMacros;
return None;
}
MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
SourceManager &SourceMgr) const {
// FIXME: Incorporate module macros into the result of this.
if (auto *Latest = getLatest())
return Latest->findDirectiveAtLoc(Loc, SourceMgr);
return {};
}
void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
if (auto *Info = getModuleInfo(PP, II)) {
Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
Info->ActiveModuleMacros.begin(),
Info->ActiveModuleMacros.end());
Info->ActiveModuleMacros.clear();
Info->IsAmbiguous = false;
}
}
ArrayRef<ModuleMacro*> getOverriddenMacros() const {
if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
return Info->OverriddenMacros;
return None;
}
void setOverriddenMacros(Preprocessor &PP,
ArrayRef<ModuleMacro *> Overrides) {
auto *Info = State.dyn_cast<ModuleMacroInfo*>();
if (!Info) {
if (Overrides.empty())
return;
Info = new (PP.getPreprocessorAllocator())
ModuleMacroInfo(State.get<MacroDirective *>());
State = Info;
}
Info->OverriddenMacros.clear();
Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
Overrides.begin(), Overrides.end());
Info->ActiveModuleMacrosGeneration = 0;
}
};
/// For each IdentifierInfo that was associated with a macro, we
/// keep a mapping to the history of all macro definitions and #undefs in
/// the reverse order (the latest one is in the head of the list).
///
/// This mapping lives within the \p CurSubmoduleState.
using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
struct SubmoduleState;
/// Information about a submodule that we're currently building.
struct BuildingSubmoduleInfo {
/// The module that we are building.
Module *M;
/// The location at which the module was included.
SourceLocation ImportLoc;
/// Whether we entered this submodule via a pragma.
bool IsPragma;
/// The previous SubmoduleState.
SubmoduleState *OuterSubmoduleState;
/// The number of pending module macro names when we started building this.
unsigned OuterPendingModuleMacroNames;
BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
SubmoduleState *OuterSubmoduleState,
unsigned OuterPendingModuleMacroNames)
: M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
OuterSubmoduleState(OuterSubmoduleState),
OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
};
SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
/// Information about a submodule's preprocessor state.
struct SubmoduleState {
/// The macros for the submodule.
MacroMap Macros;
/// The set of modules that are visible within the submodule.
VisibleModuleSet VisibleModules;
// FIXME: CounterValue?
// FIXME: PragmaPushMacroInfo?
};
std::map<Module *, SubmoduleState> Submodules;
/// The preprocessor state for preprocessing outside of any submodule.
SubmoduleState NullSubmoduleState;
/// The current submodule state. Will be \p NullSubmoduleState if we're not
/// in a submodule.
SubmoduleState *CurSubmoduleState;
/// The files that have been included.
IncludedFilesSet IncludedFiles;
/// The set of known macros exported from modules.
llvm::FoldingSet<ModuleMacro> ModuleMacros;
/// The names of potential module macros that we've not yet processed.
llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
/// The list of module macros, for each identifier, that are not overridden by
/// any other module macro.
llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
LeafModuleMacros;
/// Macros that we want to warn because they are not used at the end
/// of the translation unit.
///
/// We store just their SourceLocations instead of
/// something like MacroInfo*. The benefit of this is that when we are
/// deserializing from PCH, we don't need to deserialize identifier & macros
/// just so that we can report that they are unused, we just warn using
/// the SourceLocations of this set (that will be filled by the ASTReader).
using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
/// This is a pair of an optional message and source location used for pragmas
/// that annotate macros like pragma clang restrict_expansion and pragma clang
/// deprecated. This pair stores the optional message and the location of the
/// annotation pragma for use producing diagnostics and notes.
using MsgLocationPair = std::pair<std::string, SourceLocation>;
struct MacroAnnotationInfo {
SourceLocation Location;
std::string Message;
};
struct MacroAnnotations {
llvm::Optional<MacroAnnotationInfo> DeprecationInfo;
llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo;
llvm::Optional<SourceLocation> FinalAnnotationLoc;
static MacroAnnotations makeDeprecation(SourceLocation Loc,
std::string Msg) {
return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
llvm::None, llvm::None};
}
static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
std::string Msg) {
return MacroAnnotations{
llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None};
}
static MacroAnnotations makeFinal(SourceLocation Loc) {
return MacroAnnotations{llvm::None, llvm::None, Loc};
}
};
/// Warning information for macro annotations.
llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
/// A "freelist" of MacroArg objects that can be
/// reused for quick allocation.
MacroArgs *MacroArgCache = nullptr;
/// For each IdentifierInfo used in a \#pragma push_macro directive,
/// we keep a MacroInfo stack used to restore the previous macro value.
llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
PragmaPushMacroInfo;
// Various statistics we track for performance analysis.
unsigned NumDirectives = 0;
unsigned NumDefined = 0;
unsigned NumUndefined = 0;
unsigned NumPragma = 0;
unsigned NumIf = 0;
unsigned NumElse = 0;
unsigned NumEndif = 0;
unsigned NumEnteredSourceFiles = 0;
unsigned MaxIncludeStackDepth = 0;
unsigned NumMacroExpanded = 0;
unsigned NumFnMacroExpanded = 0;
unsigned NumBuiltinMacroExpanded = 0;
unsigned NumFastMacroExpanded = 0;
unsigned NumTokenPaste = 0;
unsigned NumFastTokenPaste = 0;
unsigned NumSkipped = 0;
/// The predefined macros that preprocessor should use from the
/// command line etc.
std::string Predefines;
/// The file ID for the preprocessor predefines.
FileID PredefinesFileID;
/// The file ID for the PCH through header.
FileID PCHThroughHeaderFileID;
/// Whether tokens are being skipped until a #pragma hdrstop is seen.
bool SkippingUntilPragmaHdrStop = false;
/// Whether tokens are being skipped until the through header is seen.
bool SkippingUntilPCHThroughHeader = false;
/// \{
/// Cache of macro expanders to reduce malloc traffic.
enum { TokenLexerCacheSize = 8 };
unsigned NumCachedTokenLexers;
std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
/// \}
/// Keeps macro expanded tokens for TokenLexers.
//
/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
/// going to lex in the cache and when it finishes the tokens are removed
/// from the end of the cache.
SmallVector<Token, 16> MacroExpandedTokens;
std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
/// A record of the macro definitions and expansions that
/// occurred during preprocessing.
///
/// This is an optional side structure that can be enabled with
/// \c createPreprocessingRecord() prior to preprocessing.
PreprocessingRecord *Record = nullptr;
/// Cached tokens state.
using CachedTokensTy = SmallVector<Token, 1>;
/// Cached tokens are stored here when we do backtracking or
/// lookahead. They are "lexed" by the CachingLex() method.
CachedTokensTy CachedTokens;
/// The position of the cached token that CachingLex() should
/// "lex" next.
///
/// If it points beyond the CachedTokens vector, it means that a normal
/// Lex() should be invoked.
CachedTokensTy::size_type CachedLexPos = 0;