Skip to content

Commit

Permalink
Merge 3182851 into ad1833c
Browse files Browse the repository at this point in the history
  • Loading branch information
boyter committed Jun 13, 2019
2 parents ad1833c + 3182851 commit de54e29
Show file tree
Hide file tree
Showing 8 changed files with 1,086 additions and 878 deletions.
1,706 changes: 857 additions & 849 deletions languages.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion processor/constants.go

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ func processLanguageFeature(name string, value Language) {
processMask |= multiLineCommentMask

for _, v := range value.Quotes {
stringMask |= v[0][0]
stringTrie.InsertClose(TString, []byte(v[0]), []byte(v[1]))
tokenTrie.InsertClose(TString, []byte(v[0]), []byte(v[1]))
stringMask |= v.Start[0]
stringTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))
tokenTrie.InsertClose(TString, []byte(v.Start), []byte(v.End))
}
processMask |= stringMask

Expand All @@ -235,6 +235,7 @@ func processLanguageFeature(name string, value Language) {
StringCheckMask: stringMask,
ProcessMask: processMask,
Keywords: value.Keywords,
Quotes: value.Quotes,
}
LanguageFeaturesMutex.Unlock()
}
Expand Down
11 changes: 10 additions & 1 deletion processor/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,22 @@ const (
TComplexity
)

// Quote is a struct which holds rules and start/end values for string quotes
type Quote struct {
Start string `json:"start"`
End string `json:"end"`
IgnoreEscape bool `json:"ignoreEscape"` // To enable turning off the \ check for C# @"\" string examples https://github.com/boyter/scc/issues/71
DocString bool `json:"docString"` // To enable docstring check for Python where "If the triple quote string starts following a newline with only white-space characters in front and ends followed by only a newline or white-space characters it is a comment" https://github.com/boyter/scc/issues/62
}

// Language is a struct which contains the values for each language stored in languages.json
type Language struct {
LineComment []string `json:"line_comment"`
ComplexityChecks []string `json:"complexitychecks"`
Extensions []string `json:"extensions"`
ExtensionFile bool `json:"extensionFile"`
MultiLine [][]string `json:"multi_line"`
Quotes [][]string `json:"quotes"`
Quotes []Quote `json:"quotes"`
NestedMultiLine bool `json:"nestedmultiline"`
Keywords []string `json:"keywords"`
}
Expand All @@ -39,6 +47,7 @@ type LanguageFeature struct {
StringCheckMask byte
ProcessMask byte
Keywords []string
Quotes []Quote
}

// FileJobCallback is an interface that FileJobs can implement to get a per line callback with the line type
Expand Down
91 changes: 69 additions & 22 deletions processor/workers.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,21 @@ func resetState(currentState int64) int64 {
return currentState
}

func stringState(fileJob *FileJob, index int, endPoint int, stringTrie *Trie, endString []byte, currentState int64) (int, int64) {
func stringState(fileJob *FileJob, index int, endPoint int, stringTrie *Trie, endString []byte, currentState int64, ignoreEscape bool) (int, int64) {
// Its not possible to enter this state without checking at least 1 byte so it is safe to check -1 here
// without checking if it is out of bounds first
for i := index; i < endPoint; i++ {
index = i

// If we hit a newline, return because we want to count the stats but keep
// the current state so we end up back in this loop when the outer
// one calls again
if fileJob.Content[i] == '\n' {
return i, currentState
}

if fileJob.Content[i-1] != '\\' {
// If we are in a literal string we want to ignore the \ check OR we aren't checking for special ones
if ignoreEscape || fileJob.Content[i-1] != '\\' {
if ok, _, _ := stringTrie.Match(fileJob.Content[i:]); ok != 0 {
return i, SCode
}
Expand All @@ -137,18 +141,18 @@ func codeState(
endComments [][]byte,
langFeatures LanguageFeature,
digest *hash.Hash,
) (int, int64, []byte, [][]byte) {
) (int, int64, []byte, [][]byte, bool) {
for i := index; i < endPoint; i++ {
curByte := fileJob.Content[i]
index = i

if curByte == '\n' {
return i, currentState, endString, endComments
return i, currentState, endString, endComments, false
}

if isBinary(i, curByte) {
fileJob.Binary = true
return i, currentState, endString, endComments
return i, currentState, endString, endComments, false
}

if shouldProcess(curByte, langFeatures.ProcessMask) {
Expand All @@ -162,24 +166,30 @@ func codeState(

switch tokenType, offsetJump, endString := langFeatures.Tokens.Match(fileJob.Content[i:]); tokenType {
case TString:
// If we are in string state then check what sort of string so we know if docstring OR ignoreescape string
i, ignoreEscape := verifyIgnoreEscape(langFeatures, fileJob, index)

// It is safe to -1 here as to enter the code state we need to have
// transitioned from blank to here hence i should always be >= 1
// This check is to ensure we aren't in a character declaration
// TODO this should use language features
if fileJob.Content[i-1] != '\\' {
currentState = SString
}
return i, currentState, endString, endComments

return i, currentState, endString, endComments, ignoreEscape

case TSlcomment:
currentState = SCommentCode
return i, currentState, endString, endComments
return i, currentState, endString, endComments, false

case TMlcomment:
if langFeatures.Nested || len(endComments) == 0 {
endComments = append(endComments, endString)
currentState = SMulticommentCode
i += offsetJump - 1
return i, currentState, endString, endComments

return i, currentState, endString, endComments, false
}

case TComplexity:
Expand All @@ -190,7 +200,7 @@ func codeState(
}
}

return index, currentState, endString, endComments
return index, currentState, endString, endComments, false
}

func commentState(fileJob *FileJob, index int, endPoint int, currentState int64, endComments [][]byte, endString []byte, langFeatures LanguageFeature) (int, int64, []byte, [][]byte) {
Expand Down Expand Up @@ -227,6 +237,7 @@ func commentState(fileJob *FileJob, index int, endPoint int, currentState int64,
if ok, offsetJump, endString := langFeatures.MultiLineComments.Match(fileJob.Content[i:]); ok != 0 {
endComments = append(endComments, endString)
i += offsetJump - 1

return i, currentState, endString, endComments
}
}
Expand All @@ -243,23 +254,24 @@ func blankState(
endComments [][]byte,
endString []byte,
langFeatures LanguageFeature,
) (int, int64, []byte, [][]byte) {
) (int, int64, []byte, [][]byte, bool) {
switch tokenType, offsetJump, endString := langFeatures.Tokens.Match(fileJob.Content[index:]); tokenType {
case TMlcomment:
if langFeatures.Nested || len(endComments) == 0 {
endComments = append(endComments, endString)
currentState = SMulticomment
index += offsetJump - 1
return index, currentState, endString, endComments
return index, currentState, endString, endComments, false
}

case TSlcomment:
currentState = SComment
return index, currentState, endString, endComments
return index, currentState, endString, endComments, false

case TString:
index, ignoreEscape := verifyIgnoreEscape(langFeatures, fileJob, index)
currentState = SString
return index, currentState, endString, endComments
return index, currentState, endString, endComments, ignoreEscape

case TComplexity:
currentState = SCode
Expand All @@ -271,7 +283,35 @@ func blankState(
currentState = SCode
}

return index, currentState, endString, endComments
return index, currentState, endString, endComments, false
}

// Some languages such as C# have quoted strings like @"\" where no escape character is required
// this checks if there is one so we can cater for these cases
func verifyIgnoreEscape(langFeatures LanguageFeature, fileJob *FileJob, index int) (int, bool) {
ignoreEscape := false

// loop over the string states and if we have the special flag match, and if so we need to ensure we can handle them
for i := 0; i < len(langFeatures.Quotes); i++ {
if langFeatures.Quotes[i].DocString || langFeatures.Quotes[i].IgnoreEscape {
// If so we need to check if where we are falls into these conditions
isMatch := true
for j := 0; j < len(langFeatures.Quotes[i].Start); j++ {
if fileJob.Content[index+j] != langFeatures.Quotes[i].Start[j] {
isMatch = false
break
}
}

// If we have a match then jump ahead enough so we don't pick it up again for cases like @"
if isMatch {
ignoreEscape = true
index = index + len(langFeatures.Quotes[i].Start)
}
}
}

return index, ignoreEscape
}

// CountStats will process the fileJob
Expand Down Expand Up @@ -316,6 +356,9 @@ func CountStats(fileJob *FileJob) {
endComments := [][]byte{}
endString := []byte{}

// TODO needs to be set via langFeatures.Quotes[0].IgnoreEscape for the matching feature
ignoreEscape := false

// For determining duplicates we need the below. The reason for creating
// the byte array here is to avoid GC pressure. MD5 is in the standard library
// and is fast enough to not warrant murmur3 hashing. No need to be
Expand All @@ -327,7 +370,6 @@ func CountStats(fileJob *FileJob) {
}

for index := checkBomSkip(fileJob); index < len(fileJob.Content); index++ {

// Based on our current state determine if the state should change by checking
// what the character is. The below is very CPU bound so need to be careful if
// changing anything in here and profile/measure afterwards!
Expand All @@ -336,7 +378,7 @@ func CountStats(fileJob *FileJob) {

switch currentState {
case SCode:
index, currentState, endString, endComments = codeState(
index, currentState, endString, endComments, ignoreEscape = codeState(
fileJob,
index,
endPoint,
Expand All @@ -347,7 +389,7 @@ func CountStats(fileJob *FileJob) {
&digest,
)
case SString:
index, currentState = stringState(fileJob, index, endPoint, langFeatures.Strings, endString, currentState)
index, currentState = stringState(fileJob, index, endPoint, langFeatures.Strings, endString, currentState, ignoreEscape)
case SMulticomment, SMulticommentCode:
index, currentState, endString, endComments = commentState(
fileJob,
Expand All @@ -361,7 +403,7 @@ func CountStats(fileJob *FileJob) {
case SBlank, SMulticommentBlank:
// From blank we can move into comment, move into a multiline comment
// or move into code but we can only do one.
index, currentState, endString, endComments = blankState(
index, currentState, endString, endComments, ignoreEscape = blankState(
fileJob,
index,
endPoint,
Expand All @@ -384,10 +426,6 @@ func CountStats(fileJob *FileJob) {
if fileJob.Content[index] == '\n' || index >= endPoint {
fileJob.Lines++

if Trace {
printTrace(fmt.Sprintf("%s line %d ended with state: %d", fileJob.Location, fileJob.Lines, currentState))
}

switch currentState {
case SCode, SString, SCommentCode, SMulticommentCode:
fileJob.Code++
Expand All @@ -397,6 +435,9 @@ func CountStats(fileJob *FileJob) {
return
}
}
if Trace {
printTrace(fmt.Sprintf("%s line %d ended with state: %d: counted as code", fileJob.Location, fileJob.Lines, currentState))
}
case SComment, SMulticomment, SMulticommentBlank:
fileJob.Comment++
currentState = resetState(currentState)
Expand All @@ -405,13 +446,19 @@ func CountStats(fileJob *FileJob) {
return
}
}
if Trace {
printTrace(fmt.Sprintf("%s line %d ended with state: %d: counted as comment", fileJob.Location, fileJob.Lines, currentState))
}
case SBlank:
fileJob.Blank++
if fileJob.Callback != nil {
if !fileJob.Callback.ProcessLine(fileJob, fileJob.Lines, LINE_BLANK) {
return
}
}
if Trace {
printTrace(fmt.Sprintf("%s line %d ended with state: %d: counted as blank", fileJob.Location, fileJob.Lines, currentState))
}
}
}
}
Expand Down
29 changes: 29 additions & 0 deletions processor/workers_regression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,32 @@ namespace Baz
t.Errorf("Expected 1 lines got %d", fileJob.Blank)
}
}

func TestCountStatsPr76(t *testing.T) {
ProcessConstants()
fileJob := FileJob{
Language: "Go",
}

fileJob.Content = []byte(`package main
var MyString = ` + "`\\`" + `
// Comment`)

CountStats(&fileJob)

if fileJob.Lines != 3 {
t.Errorf("Expected 3 lines")
}

if fileJob.Code != 2 {
t.Errorf("Expected 2 lines got %d", fileJob.Code)
}

if fileJob.Comment != 1 {
t.Errorf("Expected 1 lines got %d", fileJob.Comment)
}

if fileJob.Blank != 0 {
t.Errorf("Expected 0 lines got %d", fileJob.Blank)
}
}
Loading

0 comments on commit de54e29

Please sign in to comment.