In [2]:
import re

In [3]:
class HiddenFileError(Exception):
    def __init__(self, fileName):
        self.fileName = fileName
        self.message = "'" + self.fileName + "' is a hidden file, pass."
        
class NoExtentionError(Exception):
    def __init__(self, fileName):
        self.fileName = fileName
        self.message = "'" + self.fileName + "' file has no extention, invalid file type."

In [86]:
def getFileType(fileName):
    try:
        if (fileName.split("/")[-1].find(".") == 0):
            raise HiddenFileError(fileName)
        if (fileName.split("/")[-1].find(".") == -1):
            raise NoExtentionError(fileName)
        extention = fileName.split(".")[-1]
        return extention
    except HiddenFileError as e:
        print(e.message)
    except NoExtentionError as e:
        print(e.message)

In [5]:
BlockCommentDiffSingleCommentType = ["java", "cpp", "c"]
SingleCommentType = ["py"]
BlockCommentType = ["xml", "html"]
def getFileCommentsDesc(fileName):
    fileType = getFileType(fileName)
    if fileType in BlockCommentDiffSingleCommentType:
        return BlockCommentDiffSingleCommentFileCommentsDesc(fileName, "/*", "*/", "//")
    elif fileType in SingleCommentType:
        return SingleCommentFileCommentsDesc(fileName, "#")
    elif fileType in BlockCommentType:
        return BlockCommentFileCommentsDesc(fileName, "<!--", "-->")

In [6]:
class BlockComment:
    def __init__(self):
        self.lines = []
        
    def addLine(self, line):
        self.lines.append(line)
        
    def getNumberOfLines(self):
        return len(self.lines)

In [7]:
class FileCommentsDesc:
    def __init__(self, fileName):
        self.fileName = fileName
        self.blockComments = []
        self.singleLineComments = []
        self.TODOes = []
        self.numCommentsLines = 0
        self.numLines = 0
        
    def getNumberOfLines(self):
        return self.numLines
    
    def getNumberOfCommentsLines(self):
        return self.numCommentsLines
    
    def getNumberOfSingleLineCommentsLines(self):
        return len(self.singleLineComments)
    
    def getNumberOfBlockLineCommentsLines(self):
        return self.getNumberOfCommentsLines() - self.getNumberOfSingleLineCommentsLines()
    
    def getNumberOfBlockLineComments(self):
        return len(self.blockComments)
    
    def getNumberOfTODOes(self):
        return len(self.TODOes)
    
    def setNumberOfLines(self, num):
        self.numLines = num
    
#     check whether delimiter in "", using regex
    def isDelimiterValidInLine(self, delimiter, line):
        p = r'"([^"]*)"'
        removeQuotesText = re.sub(p, "", line)
        return delimiter in removeQuotesText
    
# #     virtual methods to be implemented by child classes
#     def addValidTODOes()
#     def checkTODOes()
#     def readFileAndCloseFile()
    

In [79]:
class BlockCommentFileCommentsDesc(FileCommentsDesc):
    def __init__(self, fileName, beginDelimiter, endDelimiter):
        super().__init__(fileName)
        
        self.beginDelimiter = beginDelimiter
        self.endDelimiter = endDelimiter
        self.readFileAndCloseFile()
        
    def readFileAndCloseFile(self):
        f = open(self.fileName, "r")
        
        tempBlockComment = BlockComment()
        blockCommentHasStarted = False
        
        for line in f:
            self.numLines += 1
            inCommentRange = 0
#             0 => line is not comment
#             1 => single line comment
#             2 => first line of block comment
#             3 => middle line of block comment
#             4 => last line of block comment
            if blockCommentHasStarted == True and self.isDelimiterValidInLine(self.endDelimiter, line) == False:
                self.numCommentsLines += 1
                inCommentRange = 3
                tempBlockComment.addLine(line)
            if self.isDelimiterValidInLine(self.beginDelimiter, line) == True\
            and self.isDelimiterValidInLine(self.endDelimiter, line) == True:
                self.numCommentsLines += 1
                inCommentRange = 1
                self.singleLineComments.append(line)
            elif self.isDelimiterValidInLine(self.beginDelimiter, line) == True:
#                 TODO assume no nested block comments
                self.numCommentsLines += 1
                inCommentRange = 2
                blockCommentHasStarted = True
                tempBlockComment.addLine(line)
            elif self.isDelimiterValidInLine(self.endDelimiter, line) == True:
                self.numCommentsLines += 1
                inCommentRange = 4
                blockCommentHasStarted = False
                tempBlockComment.addLine(line)
                self.blockComments.append(tempBlockComment)
                tempBlockComment = BlockComment()
            
            self.checkTODOes(inCommentRange, line)
 
        f.close()
    
    def checkTODOes(self, inCommentRange, line):
        if inCommentRange == 0:
            return
        p = re.compile("todo", re.IGNORECASE)
        beginDelimiterIndex = line.find(self.beginDelimiter)
        endDelimiterIndex = line.find(self.endDelimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex + 1)
            if inCommentRange == 3:
                self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 4:
                if todoIndex < endDelimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 2:
                if todoIndex > beginDelimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 1:
                if todoIndex > beginDelimiterIndex and todoIndex < endDelimiterIndex:
                    self.TODOes.append(line[todoIndex:])

In [96]:
bbf = getFileCommentsDesc("../data/agents.xml")

In [91]:
bbf.getNumberOfLines()

20

In [92]:
bbf.getNumberOfBlockLineComments()

1

In [93]:
bbf.getNumberOfBlockLineCommentsLines()

3

In [97]:
bbf.getNumberOfTODOes()

4

In [95]:
bbf.getNumberOfCommentsLines()

6

In [30]:
class BlockCommentSameSingleCommentFileCommentsDesc(FileCommentsDesc):
    def __init__(self, fileName, blockDelimiter, singleAndBlockDelimiter):
        super().__init__(fileName)

        self.blockDelimiter = blockDelimiter
        self.singleAndBlockDelimiter = singleAndBlockDelimiter
        self.readFileAndCloseFile()
  
    def addValidTODOesInBlockComment(self, inCommentRange, line):
        p = re.compile("todo", re.IGNORECASE)
        delimiterIndex = line.find(self.blockDelimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex + 1)
            if inCommentRange == 3:
                self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 4:
                if todoIndex < delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 2:
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
                
    def checkTODOesInBlockComment(self, inCommentRange, line):
        if inCommentRange == 0:
            return
        p = re.compile("todo", re.IGNORECASE)
        delimiterIndex = line.find(self.blockDelimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex + 1)
            if inCommentRange == 3:
                self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 4:
                if todoIndex < delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 2:
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
                    
        elif inCommentRange == 2:
            self.addValidTODOesInBlockComment(inCommentRange, line)
        elif inCommentRange == 3:
            self.addValidTODOesInBlockComment(inCommentRange, line)
        elif inCommentRange == 4:
            self.addValidTODOesInBlockComment(inCommentRange, line)
    
    def addValidTODOesInSingleAndBlockComment(self, line):
        p = re.compile("todo", re.IGNORECASE)
        delimiterIndex = line.find(self.singleAndBlockDelimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex + 1)
            if todoIndex > delimiterIndex:
                self.TODOes.append(line[todoIndex:])
    
    def checkTODOesInSingleAndBlockComment(self, inCommentRange, line):
        if inCommentRange == 0:
            return
        else:
            self.addValidTODOesInSingleAndBlockComment(line)
    
    def readFileAndCloseFile(self):
        f = open(self.fileName, "r")
        
        tempBlockComment = BlockComment()
        blockCommentHasStarted = False
        singleAndBlockCommentHasStarted = False
        
        for line in f:
            self.numLines += 1
            inCommentRange = 0
            if self.isDelimiterValidInLine(self.blockDelimiter, line) == True or blockCommentHasStarted == True:
                inCommentRange = 0
#                 0 => line is not comment
#                 1 => single line comment
#                 2 => first line of block comment
#                 3 => middle line of block comment
#                 4 => last line of block comment
                if blockCommentHasStarted == False:
#                 first line of block comment
                    self.numCommentsLines += 1
                    inCommentRange = 2
                    blockCommentHasStarted = True
                    tempBlockComment.addLine(line)
                else:
                    if self.isDelimiterValidInLine(self.blockDelimiter, line) == True:
#                         last line of block comment
                        self.numCommentsLines += 1
                        inCommentRange = 4
                        blockCommentHasStarted = False
                        tempBlockComment.addLine(line)
                        self.blockComments.append(tempBlockComment)
                        tempBlockComment = BlockComment()
                    else:
#                         middle line of block comment
                        self.numCommentsLines += 1
                        inCommentRange = 3
                        tempBlockComment.addLine(line)
                self.checkTODOesInBlockComment(inCommentRange, line)
            else:
                inCommentRange = 0
#                 0 => line is not comment
#                 1 => line is a comment
                if singleAndBlockCommentHasStarted == True and self.isPotentialBlockCommentLine(self.singleAndBlockDelimiter, line) == True:
                    self.numCommentsLines += 1
                    if tempBlockComment.getNumberOfLines() == 0:
                        firstLine = self.singleLineComments.pop()
                        tempBlockComment.addLine(firstLine)
                    inCommentRange = 1
                    tempBlockComment.addLine(line)
                else:
                    if self.isDelimiterValidInLine(self.singleAndBlockDelimiter, line) == True:
                        singleAndBlockCommentHasStarted = self.isPotentialBlockCommentLine(self.singleAndBlockDelimiter, line)
                        self.numCommentsLines += 1
                        inCommentRange = 1
                        self.singleLineComments.append(line)
                    else:
                        if tempBlockComment.getNumberOfLines() > 0:
                            self.blockComments.append(tempBlockComment)
                            tempBlockComment = BlockComment()
                        singleAndBlockCommentHasStarted = False

                self.checkTODOesInSingleAndBlockComment(inCommentRange, line)
 
        f.close()
    
    def isPotentialBlockCommentLine(self, delimiter, line):
        return super().isDelimiterValidInLine(delimiter, line) and line.lstrip().find(delimiter) == 0
    

In [31]:
bsf = BlockCommentSameSingleCommentFileCommentsDesc("../data/example.py", "'''", "#")

In [32]:
bsf.getNumberOfCommentsLines()
bsf.getNumberOfBlockLineComments()

2

In [35]:
bsf.getNumberOfTODOes()
bsf.getNumberOfSingleLineCommentsLines()

3

In [69]:
class BlockCommentDiffSingleCommentFileCommentsDesc(FileCommentsDesc):
    def __init__(self, fileName, beginDelimiterBlock, endDelimiterBlock, endOfLineDelimiter):
        super().__init__(fileName)

        self.beginDelimiterBlock = beginDelimiterBlock
        self.endDelimiterBlock = endDelimiterBlock
        self.endOfLineDelimiter = endOfLineDelimiter
        self.readFileAndCloseFile()
  
    def addValidTODOes(self, delimiter, line):
        p = re.compile("todo", re.IGNORECASE)
        delimiterIndex = line.find(delimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex)
            if delimiter == "":
                self.TODOes.append(line[todoIndex:])
            elif delimiter == self.endDelimiterBlock:
                if todoIndex < delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            else:
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
                
    def checkTODOes(self, inCommentRange, line):
        if inCommentRange == 0:
            return
        p = re.compile("todo", re.IGNORECASE)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex + 1)
            if inCommentRange == 3:
                self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 4:
                delimiterIndex = line.find(self.endDelimiterBlock)
                if todoIndex < delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 2:
                delimiterIndex = line.find(self.beginDelimiterBlock)
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 1:
                delimiterIndex = line.find(self.endOfLineDelimiter)
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
            elif inCommentRange == 5:
                beginDelimiterIndex = line.find(self.beginDelimiterBlock)
                endDelimiterIndex = line.find(self.endDelimiterBlock)
                if todoIndex > beginDelimiterIndex and todoIndex < endDelimiterIndex:
                    self.TODOes.append(line[todoIndex:])
    
    def readFileAndCloseFile(self):
        f = open(self.fileName, "r")
        
        tempBlockComment = BlockComment()
        blockCommentHasStarted = False
        
        for line in f:
            self.numLines += 1
            inCommentRange = 0
#             0 => line is not comment
#             1 => single line comment
#             2 => first line of block comment
#             3 => middle line of block comment
#             4 => last line of block comment
#             5 => line of block comment
            if blockCommentHasStarted == True and self.isDelimiterValidInLine(self.endDelimiterBlock, line) == False:
                self.numCommentsLines += 1
                inCommentRange = 3
                tempBlockComment.addLine(line)
            if self.isDelimiterValidInLine(self.endOfLineDelimiter, line) == True:
                self.numCommentsLines += 1
                inCommentRange = 1
                self.singleLineComments.append(line)
            elif self.isDelimiterValidInLine(self.beginDelimiterBlock, line) == True:
                if self.isDelimiterValidInLine(self.endDelimiterBlock, line) == True\
                and blockCommentHasStarted == False:
#                     line of block comment
                    self.numCommentsLines += 1
                    inCommentRange = 5
                    tempBlockComment.addLine(line)
                    self.blockComments.append(tempBlockComment)
                    tempBlockComment = BlockComment()
                else:
#                     first line of block comment
                    self.numCommentsLines += 1
                    inCommentRange = 2
                    blockCommentHasStarted = True
                    tempBlockComment.addLine(line)
            elif self.isDelimiterValidInLine(self.endDelimiterBlock, line) == True:
                self.numCommentsLines += 1
                inCommentRange = 4
                blockCommentHasStarted = False
                tempBlockComment.addLine(line)
                self.blockComments.append(tempBlockComment)
                tempBlockComment = BlockComment()
            
            self.checkTODOes(inCommentRange, line)
 
        f.close()

In [77]:
bf = BlockCommentDiffSingleCommentFileCommentsDesc("../data/PlaneFuelTest.java", "/*", "*/", "//")

In [71]:
bf.getNumberOfLines()

49

In [72]:
bf.getNumberOfBlockLineComments()

3

In [73]:
bf.getNumberOfBlockLineCommentsLines()

9

In [74]:
bf.getNumberOfCommentsLines()

13

In [75]:
bf.getNumberOfSingleLineCommentsLines()

4

In [78]:
bf.getNumberOfTODOes()

6

In [98]:
class SingleCommentFileCommentsDesc(FileCommentsDesc):
    def __init__(self, fileName, delimiter):
        super().__init__(fileName)

        self.delimiter = delimiter
        self.readFileAndCloseFile()
    
    def isPotentialBlockCommentLine(self, delimiter, line):
        return super().isDelimiterValidInLine(delimiter, line) and line.lstrip().find(delimiter) == 0
    
    def addValidTODOes(self, line):
        p = re.compile("todo", re.IGNORECASE)
        delimiterIndex = line.find(self.delimiter)
        todoes = p.findall(line)
        todoIndex = 0
        for t in todoes:
            todoIndex = line.find(t, todoIndex)
            if todoIndex > delimiterIndex:
                self.TODOes.append(line[todoIndex:])
                
    def checkTODOes(self, inCommentRange, line):
        if inCommentRange == 0:
            return
        else:
            p = re.compile("todo", re.IGNORECASE)
            delimiterIndex = line.find(self.delimiter)
            todoes = p.findall(line)
            todoIndex = 0
            for t in todoes:
                todoIndex = line.find(t, todoIndex)
                if todoIndex > delimiterIndex:
                    self.TODOes.append(line[todoIndex:])
    
    def readFileAndCloseFile(self):
        f = open(self.fileName, "r")
        
        tempBlockComment = BlockComment()
        blockCommentHasStarted = False
        
        for line in f:
            self.numLines += 1
            inCommentRange = 0
#             0 => line is not comment
#             1 => line is a comment
            if blockCommentHasStarted == True and self.isPotentialBlockCommentLine(self.delimiter, line) == True:
                self.numCommentsLines += 1
                if tempBlockComment.getNumberOfLines() == 0:
                    firstLine = self.singleLineComments.pop()
                    tempBlockComment.addLine(firstLine)
                inCommentRange = 1
                tempBlockComment.addLine(line)
            else:
                if self.isDelimiterValidInLine(self.delimiter, line) == True:
                    blockCommentHasStarted = self.isPotentialBlockCommentLine(self.delimiter, line)
                    self.numCommentsLines += 1
                    inCommentRange = 1
                    self.singleLineComments.append(line)
                else:
                    if tempBlockComment.getNumberOfLines() > 0:
                        self.blockComments.append(tempBlockComment)
                        tempBlockComment = BlockComment()
                    blockCommentHasStarted = False

            self.checkTODOes(inCommentRange, line)
 
        f.close()

In [100]:
sf = SingleCommentFileCommentsDesc("../data/example.py", "#")

In [101]:
sf.getNumberOfLines()

21

In [102]:
sf.getNumberOfBlockLineComments()

1

In [103]:
sf.getNumberOfBlockLineCommentsLines()

3

In [104]:
sf.getNumberOfCommentsLines()

6

In [105]:
sf.getNumberOfSingleLineCommentsLines()

3

In [106]:
sf.getNumberOfTODOes()

2