/
program_graph.proto
129 lines (102 loc) · 3.53 KB
/
program_graph.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
syntax = "proto3";
package splitbrain;
// NodeDef represents a node within the program graph.
//
// This can be be a file, BUILD target, or AST symbol. Note there is not enough
// information to split the program yet. Multi-file symbols are unsupported.
message NodeDef {
// Unique name within the context of a single program.
string name = 1;
enum Kind {
UNKNOWN = 0;
FILE = 1;
BUILD_TARGET = 2;
SYMBOL = 3;
}
// The type of node. Is this a file or a AST symbol (e.g. function)?
Kind kind = 2;
// Name of out-edge symbols. For a program to be considered valid, all
// symbolic references must be present in the graph.
repeated string u_edge = 3;
// Number of lines added for this symbol.
int32 lines_added = 4;
// Number of lines changed for this symbol.
int32 lines_changed = 5;
// Number of lines removed for this symbol.
int32 lines_removed = 6;
// The language this symbol is written in.
//
// For edge cases such as SWIG definitions or mixed C/C++ code, we simply
// ignore that and take the majority language.
// This can either be populated by Kythe or extracted from file extension.
string language = 7;
message FileRef {
// Arbitrary filepath to file.
string file_path = 1;
// The start line of the symbol, inclusive.
int32 line_bound_start = 2;
// The end line of the symbol, exclusive.
int32 line_bound_end = 3;
}
// Links this symbol to relevant file references. The intention is to allow
// generation of a whole diff after computation.
repeated FileRef file_ref = 8;
}
// GraphDef represents a high-level program graph.
message GraphDef {
// Symbols present in the source graph.
repeated NodeDef symbol = 1;
}
// Statistics stores the output for a single run of the SplitBrain algo.
//
// These are used to populate a dashboard for further analysis of performance,
// with the following requirements:
//
// - algorithm name
// - histogram of total code delta
// - histogram of total split count
// - slice by majority programming language
// - diff metrics (total delta, add, rm)
// - graph complexity metrics
message Statistics {
reserved 1, 2, 3, 4, 5;
// Unique identifier for this CL. Arbitrary.
string cl_identifier = 6;
// Types of algorithm in SplitBrain.
enum Algorithm {
UNKNOWN = 0;
SPLITBRAIN_V2 = 1;
CONTROL = 2;
// A legacy backport from the original experiment. This only runs over the
// Bazel graph, not Kythe.
SPLITBRAIN_V1 = 3;
}
// Type of algorithm used for inference.
Algorithm algorithm = 7;
// CL represents statistics for a sub-changelist created.
message CL {
// Average connectivity of the weighted nodes in the CL.
float average_node_connectivity = 1;
// Connectivity of the weighted edges in the CL.
float edge_connectivity = 2;
// Number of nodes (symbols) present in this CL.
int32 number_of_nodes = 3;
// Number of edges (calls, deps, etc...) present in this CL.
int32 number_of_edges = 4;
// Number of lines added for this CL.
int32 lines_added = 5;
// Number of lines changed for this CL.
int32 lines_changed = 6;
// Number of lines removed for this CL.
int32 lines_removed = 7;
// The *dominant* language present in this CL.
//
// This is computed using a utility class based on symbol diff sizes,
// and file extensions.
string language = 8;
}
// Statistics for the original changelist.
CL original_changelist = 8;
// Statistics for each individually split-off changelist.
repeated CL split_changelist = 9;
}