Adding OpenMetrics report support

- added OpenMetrics reporter - added unit tests for OpenMetrics reporter - added `openmetrics` to usage help - updated usage and added description of OpenMetrics to `README.md` - extracted common language summary aggregation from `toCSVSummary` and `toJSON` into separate method `aggregateLanguageSummary` - renamed `golang.org/x/text/language` import to avoid warnings about naming conflicts with local variables
boyter · Dec 13, 2021 · 4ef15e2 · 4ef15e2
1 parent 9f2bb6b
commit 4ef15e2
Show file tree

Hide file tree

Showing 4 changed files with 358 additions and 101 deletions.
diff --git a/README.md b/README.md
@@ -207,7 +207,7 @@ Flags:
       --debug                        enable debug output
       --exclude-dir strings          directories to exclude (default [.git,.hg,.svn])
       --file-gc-count int            number of files to parse before turning the GC on (default 10000)
-  -f, --format string                set output format [tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert] (default "tabular")
+  -f, --format string                set output format [tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert, openmetrics] (default "tabular")
       --format-multi string          have multiple format output overriding --format [e.g. tabular:stdout,csv:file.csv,json:file.json]
       --gen                          identify generated files
       --generated-markers strings    string markers in head of generated files (default [do not edit,<auto-generated />])
@@ -440,7 +440,7 @@ Note that in all cases if the remap rule does not apply normal #! rules will app
 
 By default `scc` will output to the console. However you can produce output in other formats if you require.
 
-The different options are `tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert`. 
+The different options are `tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert, openmetrics`. 
 
 Note that you can write `scc` output to disk using the `-o, --output` option. This allows you to specify a file to
 write your output to. For example `scc -f html -o output.html` will run `scc` against the current directory, and output
@@ -589,6 +589,58 @@ sqlite3 code.db 'select project,file,max(nCode) as nL from t
 See the cloc documentation for more examples.
 
 
+#### OpenMetrics
+
+[OpenMetrics](https://openmetrics.io/) is a metric reporting format specification extending the Prometheus exposition text format.
+
+The produced output is natively supported by [Prometheus](https://prometheus.io/) and [GitLab CI](https://docs.gitlab.com/ee/ci/metrics_reports.html)
+
+Note that OpenMetrics respects `--by-file` and as such will return a summary by default.
+
+The output includes a metadata header containing definitions of the returned metrics: 
+```text
+# TYPE scc_files count
+# HELP scc_files Number of sourcecode files.
+# TYPE scc_lines count
+# UNIT scc_lines lines
+# HELP scc_lines Number of lines.
+# TYPE scc_code count
+# UNIT scc_code lines
+# HELP scc_code Number of lines of actual code.
+# TYPE scc_comments count
+# HELP scc_comments Number of comments.
+# TYPE scc_blanks count
+# UNIT scc_blanks lines
+# HELP scc_blanks Number of blank lines.
+# TYPE scc_complexity count
+# UNIT scc_complexity lines
+# HELP scc_complexity Code complexity.
+# TYPE scc_bytes count
+# UNIT scc_bytes bytes
+# HELP scc_bytes Size in bytes.
+```
+
+The header is followed by the metric data in either language summary form:
+```text
+scc_files{language="Go"} 1
+scc_lines{language="Go"} 1000
+scc_code{language="Go"} 1000
+scc_comments{language="Go"} 1000
+scc_blanks{language="Go"} 1000
+scc_complexity{language="Go"} 1000
+scc_bytes{language="Go"} 1000
+```
+
+or, if `--by-file` is present, in per file form:
+```text
+scc_lines{language="Go", file="./bbbb.go"} 1000
+scc_code{language="Go", file="./bbbb.go"} 1000
+scc_comments{language="Go", file="./bbbb.go"} 1000
+scc_blanks{language="Go", file="./bbbb.go"} 1000
+scc_complexity{language="Go", file="./bbbb.go"} 1000
+scc_bytes{language="Go", file="./bbbb.go"} 1000
+```
+
 ### Performance
 
 Generally `scc` will the fastest code counter compared to any I am aware of and have compared against. The below comparisons are taken from the fastest alternative counters. See `Other similar projects` above to see all of the other code counters compared against. It is designed to scale to as many CPU's cores as you can provide.

diff --git a/main.go b/main.go
@@ -87,7 +87,7 @@ func main() {
 		"format",
 		"f",
 		"tabular",
-		"set output format [tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert]",
+		"set output format [tabular, wide, json, csv, csv-stream, cloc-yaml, html, html-table, sql, sql-insert, openmetrics]",
 	)
 	flags.StringSliceVarP(
 		&processor.AllowListExtensions,

diff --git a/processor/formatters.go b/processor/formatters.go
@@ -17,7 +17,7 @@ import (
 
 	"github.com/mattn/go-runewidth"
 
-	"golang.org/x/text/language"
+	glanguage "golang.org/x/text/language"
 	gmessage "golang.org/x/text/message"
 	"gopkg.in/yaml.v2"
 )
@@ -42,6 +42,29 @@ var tabularWideFormatBody = "%-33s %9d %9d %8d %9d %8d %10d %16.2f\n"
 var tabularWideFormatFile = "%s %9d %8d %9d %8d %10d %16.2f\n"
 var wideFormatFileTruncate = 42
 
+var openMetricsMetadata = `# TYPE scc_files count
+# HELP scc_files Number of sourcecode files.
+# TYPE scc_lines count
+# UNIT scc_lines lines
+# HELP scc_lines Number of lines.
+# TYPE scc_code count
+# UNIT scc_code lines
+# HELP scc_code Number of lines of actual code.
+# TYPE scc_comments count
+# HELP scc_comments Number of comments.
+# TYPE scc_blanks count
+# UNIT scc_blanks lines
+# HELP scc_blanks Number of blank lines.
+# TYPE scc_complexity count
+# UNIT scc_complexity lines
+# HELP scc_complexity Code complexity.
+# TYPE scc_bytes count
+# UNIT scc_bytes bytes
+# HELP scc_bytes Size in bytes.
+`
+var openMetricsSummaryRecordFormat = "scc_%s{language=\"%s\"} %d\n"
+var openMetricsFileRecordFormat = "scc_%s{language=\"%s\", file=\"%s\"} %d\n"
+
 func sortSummaryFiles(summary *LanguageSummary) {
 	switch {
 	case SortBy == "name" || SortBy == "names" || SortBy == "language" || SortBy == "languages":
@@ -200,54 +223,7 @@ func toClocYAML(input chan *FileJob) string {
 
 func toJSON(input chan *FileJob) string {
 	startTime := makeTimestampMilli()
-	languages := map[string]LanguageSummary{}
-
-	for res := range input {
-		_, ok := languages[res.Language]
-
-		if !ok {
-			files := []*FileJob{}
-			if Files {
-				files = append(files, res)
-			}
-
-			languages[res.Language] = LanguageSummary{
-				Name:       res.Language,
-				Lines:      res.Lines,
-				Code:       res.Code,
-				Comment:    res.Comment,
-				Blank:      res.Blank,
-				Complexity: res.Complexity,
-				Count:      1,
-				Files:      files,
-				Bytes:      res.Bytes,
-			}
-		} else {
-			tmp := languages[res.Language]
-			files := tmp.Files
-			if Files {
-				files = append(files, res)
-			}
-
-			languages[res.Language] = LanguageSummary{
-				Name:       res.Language,
-				Lines:      tmp.Lines + res.Lines,
-				Code:       tmp.Code + res.Code,
-				Comment:    tmp.Comment + res.Comment,
-				Blank:      tmp.Blank + res.Blank,
-				Complexity: tmp.Complexity + res.Complexity,
-				Count:      tmp.Count + 1,
-				Files:      files,
-				Bytes:      res.Bytes + tmp.Bytes,
-			}
-		}
-	}
-
-	language := []LanguageSummary{}
-	for _, summary := range languages {
-		language = append(language, summary)
-	}
-
+	language := aggregateLanguageSummary(input)
 	language = sortLanguageSummary(language)
 
 	jsonString, _ := json.Marshal(language)
@@ -268,53 +244,7 @@ func toCSV(input chan *FileJob) string {
 }
 
 func toCSVSummary(input chan *FileJob) string {
-	languages := map[string]LanguageSummary{}
-
-	for res := range input {
-		_, ok := languages[res.Language]
-
-		if !ok {
-			files := []*FileJob{}
-			if Files {
-				files = append(files, res)
-			}
-
-			languages[res.Language] = LanguageSummary{
-				Name:       res.Language,
-				Lines:      res.Lines,
-				Code:       res.Code,
-				Comment:    res.Comment,
-				Blank:      res.Blank,
-				Complexity: res.Complexity,
-				Count:      1,
-				Files:      files,
-				Bytes:      res.Bytes,
-			}
-		} else {
-			tmp := languages[res.Language]
-			files := tmp.Files
-			if Files {
-				files = append(files, res)
-			}
-
-			languages[res.Language] = LanguageSummary{
-				Name:       res.Language,
-				Lines:      tmp.Lines + res.Lines,
-				Code:       tmp.Code + res.Code,
-				Comment:    tmp.Comment + res.Comment,
-				Blank:      tmp.Blank + res.Blank,
-				Complexity: tmp.Complexity + res.Complexity,
-				Count:      tmp.Count + 1,
-				Files:      files,
-				Bytes:      res.Bytes + tmp.Bytes,
-			}
-		}
-	}
-
-	language := []LanguageSummary{}
-	for _, summary := range languages {
-		language = append(language, summary)
-	}
+	language := aggregateLanguageSummary(input)
 	language = sortLanguageSummary(language)
 
 	records := [][]string{{
@@ -380,6 +310,47 @@ func toCSVFiles(input chan *FileJob) string {
 	return b.String()
 }
 
+func toOpenMetrics(input chan *FileJob) string {
+	if Files {
+		return toOpenMetricsFiles(input)
+	}
+
+	return toOpenMetricsSummary(input)
+}
+
+func toOpenMetricsSummary(input chan *FileJob) string {
+	language := aggregateLanguageSummary(input)
+	language = sortLanguageSummary(language)
+
+	var sb strings.Builder
+	sb.WriteString(openMetricsMetadata)
+	for _, result := range language {
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "files", result.Name, result.Count))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "lines", result.Name, result.Lines))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "code", result.Name, result.Code))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "comments", result.Name, result.Comment))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "blanks", result.Name, result.Blank))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "complexity", result.Name, result.Complexity))
+		sb.WriteString(fmt.Sprintf(openMetricsSummaryRecordFormat, "bytes", result.Name, result.Bytes))
+	}
+	return sb.String()
+}
+
+func toOpenMetricsFiles(input chan *FileJob) string {
+	var sb strings.Builder
+	sb.WriteString(openMetricsMetadata)
+	for file := range input {
+		var filename = strings.ReplaceAll(file.Location, "\\", "\\\\")
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "lines", file.Language, filename, file.Lines))
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "code", file.Language, filename, file.Code))
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "comments", file.Language, filename, file.Comment))
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "blanks", file.Language, filename, file.Blank))
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "complexity", file.Language, filename, file.Complexity))
+		sb.WriteString(fmt.Sprintf(openMetricsFileRecordFormat, "bytes", file.Language, filename, file.Bytes))
+	}
+	return sb.String()
+}
+
 // For very large repositories CSV stream can be used which prints results out as they come in
 // with the express idea of lowering memory usage, see https://github.com/boyter/scc/issues/210 for
 // the background on why this might be needed
@@ -610,6 +581,8 @@ func fileSummarize(input chan *FileJob) string {
 		return toSql(input)
 	case strings.ToLower(Format) == "sql-insert":
 		return toSqlInsert(input)
+	case strings.ToLower(Format) == "openmetrics":
+		return toOpenMetrics(input)
 	}
 
 	return fileSummarizeShort(input)
@@ -665,6 +638,8 @@ func fileSummarizeMulti(input chan *FileJob) string {
 				val = toSql(i)
 			case "sql-insert":
 				val = toSqlInsert(i)
+			case "openmetrics":
+				val = toOpenMetrics(i)
 			}
 
 			if t[1] == "stdout" {
@@ -1000,7 +975,7 @@ func calculateCocomoSLOCCount(sumCode int64, str *strings.Builder) {
 	estimatedPeopleRequired := estimatedEffort / estimatedScheduleMonths
 	estimatedCost := EstimateCost(estimatedEffort, AverageWage, Overhead)
 
-	p := gmessage.NewPrinter(language.Make(os.Getenv("LANG")))
+	p := gmessage.NewPrinter(glanguage.Make(os.Getenv("LANG")))
 
 	str.WriteString(p.Sprintf("Total Physical Source Lines of Code (SLOC)                     = %d\n", sumCode))
 	str.WriteString(p.Sprintf("Development Effort Estimate, Person-Years (Person-Months)      = %.2f (%.2f)\n", estimatedEffort/12, estimatedEffort))
@@ -1018,7 +993,7 @@ func calculateCocomo(sumCode int64, str *strings.Builder) {
 	estimatedScheduleMonths := EstimateScheduleMonths(estimatedEffort)
 	estimatedPeopleRequired := estimatedEffort / estimatedScheduleMonths
 
-	p := gmessage.NewPrinter(language.Make(os.Getenv("LANG")))
+	p := gmessage.NewPrinter(glanguage.Make(os.Getenv("LANG")))
 
 	str.WriteString(p.Sprintf("Estimated Cost to Develop (%s) %s%d\n", CocomoProjectType, CurrencySymbol, int64(estimatedCost)))
 	str.WriteString(p.Sprintf("Estimated Schedule Effort (%s) %.2f months\n", CocomoProjectType, estimatedScheduleMonths))
@@ -1093,6 +1068,58 @@ func isLeapYear(year int) bool {
 	return leapFlag
 }
 
+func aggregateLanguageSummary(input chan *FileJob) []LanguageSummary {
+	languages := map[string]LanguageSummary{}
+
+	for res := range input {
+		_, ok := languages[res.Language]
+
+		if !ok {
+			var files []*FileJob
+			if Files {
+				files = append(files, res)
+			}
+
+			languages[res.Language] = LanguageSummary{
+				Name:       res.Language,
+				Lines:      res.Lines,
+				Code:       res.Code,
+				Comment:    res.Comment,
+				Blank:      res.Blank,
+				Complexity: res.Complexity,
+				Count:      1,
+				Files:      files,
+				Bytes:      res.Bytes,
+			}
+		} else {
+			tmp := languages[res.Language]
+			files := tmp.Files
+			if Files {
+				files = append(files, res)
+			}
+
+			languages[res.Language] = LanguageSummary{
+				Name:       res.Language,
+				Lines:      tmp.Lines + res.Lines,
+				Code:       tmp.Code + res.Code,
+				Comment:    tmp.Comment + res.Comment,
+				Blank:      tmp.Blank + res.Blank,
+				Complexity: tmp.Complexity + res.Complexity,
+				Count:      tmp.Count + 1,
+				Files:      files,
+				Bytes:      res.Bytes + tmp.Bytes,
+			}
+		}
+	}
+
+	var language []LanguageSummary
+	for _, summary := range languages {
+		language = append(language, summary)
+	}
+
+	return language
+}
+
 func sortLanguageSummary(language []LanguageSummary) []LanguageSummary {
 	// Cater for the common case of adding plural even for those options that don't make sense
 	// as its quite common for those who English is not a first language to make a simple mistake