Skip to content

Commit

Permalink
feat(nirjas): add continuous single line as multiline & bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
hastagAB committed Jul 19, 2020
1 parent 3278b52 commit 00c947c
Show file tree
Hide file tree
Showing 22 changed files with 221 additions and 25 deletions.
25 changes: 23 additions & 2 deletions extractor/binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
'''

import re
from itertools import groupby
from operator import itemgetter


def readSingleLine(file, regex):
Expand All @@ -47,6 +49,25 @@ def readSingleLine(file, regex):

return content, total_lines, blank_lines, line_of_comments

def contSingleLines(data):
lines, startLine, endLine, output = [], [], [], []
content = ""
for i in data[0]:
lines.append(i[0])

for a, b in groupby(enumerate(lines), lambda x : x[0] - x[1]):
temp = list(map(itemgetter(1), b))
content = ""

if len(temp)>1:
startLine.append(temp[0])
endLine.append(temp[-1])
for i in temp:
comment = [x[1] for x in data[0] if x[0] == i]
[data[0].remove(x) for x in data[0] if x[0] == i]
content = content + ' ' + comment[0]
output.append(content)
return data, startLine, endLine, output

def readMultiLineSame(file, syntax: str):
lines, output, startLine, endLine = [], [], [], []
Expand All @@ -67,7 +88,7 @@ def readMultiLineSame(file, syntax: str):

if copy:
lines_of_comment += 1
content = content + line.replace('\n', '')
content = content + line.replace('\n', ' ')

output = [s.strip("'''") for s in output]

Expand All @@ -94,7 +115,7 @@ def readMultiLineDiff(file, startSyntax: str, endSyntax: str):
endLine.append(lineNumber)
if copy:
line_of_comments += 1
content = content + line.replace('\n','')
content = content + line.replace('\n',' ')
if not line.strip():
blank_lines += 1
line_of_comments += len(output)
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/c.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def cExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def cExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/c_sharp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def c_sharpExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def c_sharpExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def cppExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def cppExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
2 changes: 1 addition & 1 deletion extractor/languages/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def cssExtractor(file):
output = CommentSyntax()
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def goExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def goExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
13 changes: 12 additions & 1 deletion extractor/languages/haskell.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *


def haskellExtractor(file):
result = CommentSyntax()
result1 = result.doubleDash(file)
result2 = result.curlybracesDash(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +39,21 @@ def haskellExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def htmlExtractor(file):
result = CommentSyntax()
result1 = result.gtExclamationDash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def htmlExtractor(file):
"sloc": result1[4]-(result1[3]+result2[3]+result1[5])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def javaExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def javaExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def javascriptExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def javascriptExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
12 changes: 11 additions & 1 deletion extractor/languages/kotlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''

from binder import readSingleLine, readMultiLineSame, readMultiLineDiff, CommentSyntax
from extractor.binder import *

def kotlinExtractor(file):
result = CommentSyntax()
result1 = result.doubleSlash(file)
result2 = result.slashStar(file)
result4 = contSingleLines(result1)
file = file.split("/")
output = {
"metadata": [{
Expand All @@ -37,12 +38,21 @@ def kotlinExtractor(file):
"sloc": result1[1]-(result1[3]+result2[3]+result1[2])
}],
"single_line_comment": [],
"cont_single_line_comment": [],
"multi_line_comment": []
}

if result4:
result1 = result4[0]

if result1:
for i in result1[0]:
output['single_line_comment'].append({"line_number" :i[0],"comment": i[1]})

if result4:
for idx,i in enumerate(result4[1]):
output['cont_single_line_comment'].append({"start_line": result4[1][idx], "end_line": result4[2][idx], "comment": result4[3][idx]})

if result2:
for idx,i in enumerate(result2[0]):
output['multi_line_comment'].append({"start_line": result2[0][idx], "end_line": result2[1][idx], "comment": result2[2][idx]})
Expand Down
Loading

0 comments on commit 00c947c

Please sign in to comment.