In [3]:
# Predicted cases that won't be accounted for:
#   When keywords follow a */ in the same line: they will not be counted

In [4]:
! rm -rf *

! git clone https://github.com/sudhamshu091/32-Verilog-Mini-Projects.git
! git clone https://github.com/snbk001/Verilog-Design-Examples.git

Cloning into '32-Verilog-Mini-Projects'...
remote: Enumerating objects: 1163, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (61/61), done.[K
remote: Total 1163 (delta 28), reused 84 (delta 24), pack-reused 1074[K
Receiving objects: 100% (1163/1163), 12.87 MiB | 32.39 MiB/s, done.
Resolving deltas: 100% (498/498), done.
Cloning into 'Verilog-Design-Examples'...
remote: Enumerating objects: 371, done.[K
remote: Counting objects: 100% (229/229), done.[K
remote: Compressing objects: 100% (142/142), done.[K
remote: Total 371 (delta 85), reused 196 (delta 71), pack-reused 142[K
Receiving objects: 100% (371/371), 116.87 KiB | 2.92 MiB/s, done.
Resolving deltas: 100% (136/136), done.


In [5]:
import re       # for regular expressions
import sys      # for getting command line arguments (the file we are reading)

parts = ['module', 'input', 'output', 'reg', 'wire']        # create our list of parts we want to search for; this can be updated
parts_dict = dict(zip(parts, [0 for part in parts]))        # initialize the dictionary with zeros as the counts of all the parts

def parse_lines(parts_dict, lines, output_file):
    in_comment_block = False
    lines_in_comment_block = 0                              # assumes there is nothing to the right of the */ that ends the comment block
    lines_individually_commented = 0                        # lines which are wholly commented with a //
    lines_with_eol_comment_outside_comment_block = 0        # lines which have a // at some point that is not the beginning
    lines_with_individual_or_eol_in_comment_block = 0

    in_module = False
    inputs = []
    outputs = []
    looking_for_in_out = False

    def clean_in_out_lists(list):
        whole = ''.join(letter for letter in list)                        # the list returned from group matching in re search was a list of single characters
        elements = [e.strip() for e in re.split(',|;', whole) if e]       # split the string by commas to create a list, then strip each item in the list
        for e in elements:
            if not e:
                elements.remove(e)                                  # get rid of empty strings
        return elements

    def print_in_out(inputs, outputs):
        print('    Inputs ({}):'.format(len(clean_in_out_lists(inputs))), file=output_file)
        for i in clean_in_out_lists(inputs):
            print('      ', i, sep='', file=output_file)
        print('    Outputs ({}):'.format(len(clean_in_out_lists(outputs))), file=output_file)
        for o in clean_in_out_lists(outputs):
            print('      ', o, sep='', file=output_file)

    def check_if_in_module(in_module):
        if in_module:                           # this checks to see if we're already in a module, in case the previous module did not have had an 'endmodule'
            print_in_out(inputs, outputs)       # print the inputs and outputs from the previous module before we start looking at this module
        return True

#######################################################################

    # Search through each line

    for line in lines:

        line = line.split("//")[0]              # remove any in-line comments

        # Look for comment blocks
        if re.search('\s/\*', line):            # look for /* at the beginning of the text in the line
            in_comment_block = True             # we are now in a comment block
            lines_in_comment_block += 1
            continue

        if in_comment_block:                    # if currently in a comment block
            lines_in_comment_block += 1
            if re.search('\s\*/', line):        # end of the comment block
                in_comment_block = False        # out of the comment block for next time
            continue

        # Search based on format 1:
        #   if we're currently on the first line of the module and the inputs and outputs are defined there
        match = re.search(r'\bmodule\b.*input.*output', line)
        if match:
            in_module = check_if_in_module(in_module)
            module_name = re.search('module (\w+)', line)
            if module_name:
                print("  Module:", module_name.group(1), file=output_file)          # Get module name only if found
            if re.search('input', line):
                inputs = re.search('\(input(.*)output(.*)\)', line).group(1)        # put each character in the inputs section in a list
                outputs = re.search('\(input(.*)output(.*)\)', line).group(2)       # put each character in the outputs section in a list

        # Search based on format 2 or 3:
        #   2: if we're currently on the first line of the module and the variable names are there but not specified as input or output
        #   3: if we're currently on the first line of the module and there is only an open parenthesis (no variable names)
        elif re.search(r'\bmodule\b', line):
            in_module = check_if_in_module(in_module)
            module_name = re.search('module (\w+)', line)
            if module_name:
                print("  Module:", module_name.group(1), file=output_file)            # Get module name only if found
            looking_for_in_out = True

        elif re.search('endmodule', line):    # end of a module
            looking_for_in_out = False
            in_module = False
            print_in_out(inputs, outputs)
            inputs, outputs = [], []          # Reset the two lists to be empty


        if looking_for_in_out:                               # if we are looking for inputs or outputs (since they were not part of the module definition)
            if re.search('input', line):
                input = re.search('\A\s*input\s*(.*)', line)
                if input:
                    inputs.append(input.group(1))
            elif re.search('output', line):
                output = re.search('\A\s*output\s*(.*)', line)
                if output:
                    outputs.append(output.group(1))

    # Print the inputs and outputs if we have gotten to the end of the file and there was only one module but it had no 'endmodule'
    check_if_in_module(in_module)

In [6]:
import os

output_file = open('output.txt', 'w')

def traverse_directory(directory, extension, output_file):
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)

        if os.path.isfile(filepath) and filename.endswith(extension):
            file = open(filepath, 'r')
            lines = file.readlines()
            print("File:", filepath, file=output_file)
            parse_lines(parts_dict, lines, output_file)
            print(file=output_file)    # print a new line after the information for the current file
            file.close()
        elif os.path.isdir(filepath):
            traverse_directory(filepath, extension, output_file)

traverse_directory(".", ".v", output_file)
output_file.close()