/
clang_parser.cpp
433 lines (391 loc) · 12.9 KB
/
clang_parser.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
#include <iostream>
#include <string.h>
#include "llvm/Config/llvm-config.h"
#include "ast.h"
#include "clang_parser.h"
#include "types.h"
#include "utils.h"
#include "headers.h"
namespace bpftrace {
static std::string get_clang_string(CXString string)
{
std::string str = clang_getCString(string);
clang_disposeString(string);
return str;
}
static void remove_struct_union_prefix(std::string &str)
{
if (strncmp(str.c_str(), "struct ", 7) == 0)
str.erase(0, 7);
else if (strncmp(str.c_str(), "union ", 6) == 0)
str.erase(0, 6);
}
/*
* is_anonymous
*
* Determine whether the provided cursor points to an anonymous struct.
*
* This union is anonymous:
* struct { int i; };
* This is not, although it is marked as such in LLVM 8:
* struct { int i; } obj;
* This is not, and does not actually declare an instance of a struct:
* struct X { int i; };
*
* The libclang API was changed in LLVM 8 and restored under a different
* function in LLVM 9. For LLVM 8 there is no way to properly tell if
* a record declaration is anonymous, so we do some hacks here.
*
* LLVM version differences:
* https://reviews.llvm.org/D54996
* https://reviews.llvm.org/D61232
*/
static bool is_anonymous(CXCursor c)
{
#if LLVM_VERSION_MAJOR <= 7
return clang_Cursor_isAnonymous(c);
#elif LLVM_VERSION_MAJOR >= 9
return clang_Cursor_isAnonymousRecordDecl(c);
#else // LLVM 8
if (!clang_Cursor_isAnonymous(c))
return false;
// In LLVM 8, some structs which the above function says are anonymous
// are actually not. We iterate through the siblings of our struct
// definition to see if there is a field giving it a name.
//
// struct Parent struct Parent
// { {
// struct struct
// { {
// ... ...
// } name; };
// int sibling; int sibling;
// }; };
//
// Children of parent: Children of parent:
// Struct: (cursor c) Struct: (cursor c)
// Field: (Record)name Field: (int)sibling
// Field: (int)sibling
//
// Record field found after No record field found after
// cursor - not anonymous cursor - anonymous
auto parent = clang_getCursorSemanticParent(c);
if (clang_Cursor_isNull(parent))
return false;
struct AnonFinderState
{
CXCursor struct_to_check;
bool is_anon;
bool prev_was_definition;
} state;
state.struct_to_check = c;
state.is_anon = true;
state.prev_was_definition = false;
clang_visitChildren(
parent,
[](CXCursor c2, CXCursor, CXClientData client_data)
{
auto state = static_cast<struct AnonFinderState*>(client_data);
if (state->prev_was_definition)
{
// This is the next child after the definition of the struct we're
// interested in. If it is a field containing a record, we assume
// that it must be the field for our struct, so our struct is not
// anonymous.
state->prev_was_definition = false;
auto kind = clang_getCursorKind(c2);
auto type = clang_getCanonicalType(clang_getCursorType(c2));
if (kind == CXCursor_FieldDecl && type.kind == CXType_Record)
{
state->is_anon = false;
return CXChildVisit_Break;
}
}
// We've found the definition of the struct we're interested in
if (memcmp(c2.data, state->struct_to_check.data, 3*sizeof(uintptr_t)) == 0)
state->prev_was_definition = true;
return CXChildVisit_Continue;
},
&state);
return state.is_anon;
#endif
}
/*
* get_named_parent
*
* Find the parent struct of the field pointed to by the cursor.
* Anonymous structs are skipped.
*/
static CXCursor get_named_parent(CXCursor c)
{
CXCursor parent = clang_getCursorSemanticParent(c);
while (!clang_Cursor_isNull(parent) && is_anonymous(parent))
{
parent = clang_getCursorSemanticParent(parent);
}
return parent;
}
// NOTE(mmarchini): as suggested in http://clang-developers.42468.n3.nabble.com/Extracting-macro-information-using-libclang-the-C-Interface-to-Clang-td4042648.html#message4042666
static bool translateMacro(CXCursor cursor, std::string &name, std::string &value)
{
CXToken* tokens = nullptr;
unsigned numTokens = 0;
CXTranslationUnit transUnit = clang_Cursor_getTranslationUnit(cursor);
CXSourceRange srcRange = clang_getCursorExtent(cursor);
clang_tokenize(transUnit, srcRange, &tokens, &numTokens);
for (unsigned n=0; n<numTokens; n++)
{
auto tokenText = clang_getTokenSpelling(transUnit, tokens[n]);
if (n == 0)
{
value.clear();
name = clang_getCString(tokenText);
}
else
{
CXTokenKind tokenKind = clang_getTokenKind(tokens[n]);
if (tokenKind != CXToken_Comment)
{
const char* text = clang_getCString(tokenText);
if (text)
value += text;
}
}
}
clang_disposeTokens(transUnit, tokens, numTokens);
return value.length() != 0;
}
static SizedType get_sized_type(CXType clang_type)
{
auto size = clang_Type_getSizeOf(clang_type);
auto typestr = get_clang_string(clang_getTypeSpelling(clang_type));
remove_struct_union_prefix(typestr);
switch (clang_type.kind)
{
case CXType_Bool:
case CXType_Char_U:
case CXType_UChar:
case CXType_UShort:
case CXType_UInt:
case CXType_ULong:
case CXType_ULongLong:
return SizedType(Type::integer, size);
case CXType_Record:
return SizedType(Type::cast, size, typestr);
case CXType_Char_S:
case CXType_SChar:
case CXType_Short:
case CXType_Long:
case CXType_LongLong:
case CXType_Int:
return SizedType(Type::integer, size, true);
case CXType_Pointer:
{
auto pointee_type = clang_getPointeeType(clang_type);
SizedType type;
if (pointee_type.kind == CXType_Record)
{
auto pointee_typestr = get_clang_string(clang_getTypeSpelling(pointee_type));
remove_struct_union_prefix(pointee_typestr);
type = SizedType(Type::cast, sizeof(uintptr_t), pointee_typestr);
}
else
{
type = SizedType(Type::integer, sizeof(uintptr_t));
}
auto pointee_size = clang_Type_getSizeOf(pointee_type);
type.is_pointer = true;
type.pointee_size = pointee_size;
return type;
}
case CXType_ConstantArray:
{
auto elem_type = clang_getArrayElementType(clang_type);
auto size = clang_getArraySize(clang_type);
if (elem_type.kind == CXType_Char_S || elem_type.kind == CXType_Char_U)
{
return SizedType(Type::string, size);
}
// Only support one-dimensional arrays for now
if (elem_type.kind != CXType_ConstantArray)
{
auto type = get_sized_type(elem_type);
auto sized_type = SizedType(Type::array, size);
sized_type.pointee_size = type.size;
sized_type.elem_type = type.type;
return sized_type;
} else {
return SizedType(Type::none, 0);
}
}
default:
return SizedType(Type::none, 0);
}
}
ClangParser::ClangParserHandler::ClangParserHandler()
{
index = clang_createIndex(1, 1);
}
ClangParser::ClangParserHandler::~ClangParserHandler()
{
clang_disposeTranslationUnit(translation_unit);
clang_disposeIndex(index);
}
CXTranslationUnit ClangParser::ClangParserHandler::get_translation_unit() {
return translation_unit;
}
CXErrorCode ClangParser::ClangParserHandler::parse_translation_unit(
const char *source_filename,
const char *const *command_line_args,
int num_command_line_args,
struct CXUnsavedFile *unsaved_files,
unsigned num_unsaved_files,
unsigned options)
{
return clang_parseTranslationUnit2(
index,
source_filename,
command_line_args, num_command_line_args,
unsaved_files, num_unsaved_files,
options,
&translation_unit);
}
CXCursor ClangParser::ClangParserHandler::get_translation_unit_cursor() {
return clang_getTranslationUnitCursor(translation_unit);
}
bool ClangParser::visit_children(CXCursor &cursor, BPFtrace &bpftrace)
{
int err = clang_visitChildren(
cursor,
[](CXCursor c, CXCursor parent, CXClientData client_data)
{
if (clang_getCursorKind(c) == CXCursor_MacroDefinition)
{
std::string macro_name;
std::string macro_value;
if (translateMacro(c, macro_name, macro_value))
{
auto ¯os = static_cast<BPFtrace*>(client_data)->macros_;
macros[macro_name] = macro_value;
}
return CXChildVisit_Recurse;
}
if (clang_getCursorKind(parent) == CXCursor_EnumDecl)
{
auto &enums = static_cast<BPFtrace*>(client_data)->enums_;
enums[get_clang_string(clang_getCursorSpelling(c))] = clang_getEnumConstantDeclValue(c);
return CXChildVisit_Recurse;
}
if (clang_getCursorKind(parent) != CXCursor_StructDecl &&
clang_getCursorKind(parent) != CXCursor_UnionDecl)
return CXChildVisit_Recurse;
if (clang_getCursorKind(c) == CXCursor_FieldDecl)
{
auto &structs = static_cast<BPFtrace*>(client_data)->structs_;
auto named_parent = get_named_parent(c);
auto ptype = clang_getCanonicalType(clang_getCursorType(named_parent));
auto ptypestr = get_clang_string(clang_getTypeSpelling(ptype));
auto ptypesize = clang_Type_getSizeOf(ptype);
auto ident = get_clang_string(clang_getCursorSpelling(c));
auto offset = clang_Type_getOffsetOf(ptype, ident.c_str()) / 8;
auto type = clang_getCanonicalType(clang_getCursorType(c));
auto struct_name = get_clang_string(clang_getCursorSpelling(named_parent));
if (struct_name == "")
struct_name = ptypestr;
remove_struct_union_prefix(struct_name);
structs[struct_name].fields[ident].offset = offset;
structs[struct_name].fields[ident].type = get_sized_type(type);
structs[struct_name].size = ptypesize;
}
return CXChildVisit_Recurse;
},
&bpftrace);
// clang_visitChildren returns a non-zero value if the traversal
// was terminated by the visitor returning CXChildVisit_Break.
return err == 0;
}
bool ClangParser::parse(ast::Program *program, BPFtrace &bpftrace, std::vector<std::string> extra_flags)
{
auto input = program->c_definitions;
if (input.size() == 0)
return true; // We occasionally get crashes in libclang otherwise
CXUnsavedFile unsaved_files[] =
{
{
.Filename = "definitions.h",
.Contents = input.c_str(),
.Length = input.size(),
},
{
.Filename = "/bpftrace/include/__stddef_max_align_t.h",
.Contents = __stddef_max_align_t_h,
.Length = __stddef_max_align_t_h_len,
},
{
.Filename = "/bpftrace/include/float.h",
.Contents = float_h,
.Length = float_h_len,
},
{
.Filename = "/bpftrace/include/limits.h",
.Contents = limits_h,
.Length = limits_h_len,
},
{
.Filename = "/bpftrace/include/stdarg.h",
.Contents = stdarg_h,
.Length = stdarg_h_len,
},
{
.Filename = "/bpftrace/include/stddef.h",
.Contents = stddef_h,
.Length = stddef_h_len,
},
{
.Filename = "/bpftrace/include/stdint.h",
.Contents = stdint_h,
.Length = stdint_h_len,
},
{
.Filename = "/bpftrace/include/" ASM_GOTO_WORKAROUND_H,
.Contents = asm_goto_workaround_h,
.Length = asm_goto_workaround_h_len,
},
};
std::vector<const char *> args =
{
"-isystem", "/usr/local/include",
"-isystem", "/bpftrace/include",
"-isystem", "/usr/include",
};
for (auto &flag : extra_flags)
{
args.push_back(flag.c_str());
}
ClangParserHandler handler;
CXErrorCode error = handler.parse_translation_unit(
"definitions.h",
&args[0], args.size(),
unsaved_files, sizeof(unsaved_files)/sizeof(CXUnsavedFile),
CXTranslationUnit_DetailedPreprocessingRecord);
if (error)
{
if (bt_debug == DebugLevel::kFullDebug) {
std::cerr << "Clang error while parsing C definitions: " << error << std::endl;
std::cerr << "Input (" << input.size() << "): " << input << std::endl;
}
return false;
}
for (unsigned int i=0; i < clang_getNumDiagnostics(handler.get_translation_unit()); i++) {
CXDiagnostic diag = clang_getDiagnostic(handler.get_translation_unit(), i);
CXDiagnosticSeverity severity = clang_getDiagnosticSeverity(diag);
if (severity == CXDiagnostic_Error || severity == CXDiagnostic_Fatal) {
if (bt_debug >= DebugLevel::kDebug)
std::cerr << "Input (" << input.size() << "): " << input << std::endl;
return false;
}
}
CXCursor cursor = handler.get_translation_unit_cursor();
return visit_children(cursor, bpftrace);
}
} // namespace bpftrace