In [1]:
# This is the script for analyzing the output of Pylint for a batch of files. 
# This program generates an Excel file that summarise the frequency of Pylint warning codes for each student. 
# To use this program, we need to use a terminal to generate Pylint warning messages for all students and save the output in a .txt file.
# The command for generating Pylint warning messages for a batch of files is:
# Pylint <Path for Data Set>/*.py > <Path for Pylint Output>/pylintOutput.txt
# Eg: Pylint /Users/apple/Desktop/SummerResearch/Week1/Python-Q4quality\ 2/*.py > /Users/apple/Desktop/SummerResearch/Week1/pylintOutput.txt

In [2]:
# Read the file
pylintOutput = open("/Users/apple/Desktop/SummerResearch/Week1/pylintOutput.txt", "a")

In [3]:
# Data Preprocess Stage
pylintOutput.write("************")
pylintOutput.close()

In [4]:
# Store Each line of the Pylint Outcome
pylintOutput = open("/Users/apple/Desktop/SummerResearch/Week1/pylintOutput.txt")
lines = []
while True:
    line = pylintOutput.readline()
    lines.append(line)
    if ("" == line):
        print("File Finished!")
        break

File Finished!


In [5]:
# Categorize each student's warning message 
students = []
student = []
for line in lines:
    if line[0:12] == "************":
        students.append(student)
        student = []
    student.append(line)
students.pop(0)

[]

In [6]:
# Process each student
# single_outcome = ['Student_ID', 'error code 1', 'error code 2', ...]
# outcome = [single_outcome1, single_outcome2, ...]

codeReference = set()
outcome = []
for student in students:
    single_outcome = []
    # Get the Student ID first
    indexOfStudentID = student[0].find("Module") + 17
    single_outcome.append(student[0][indexOfStudentID : ].strip('\n'))
    single_outcome.insert(0, int(student[0][indexOfStudentID + 1 : ].strip('\n')))
    
    # Get the error code of this student
    messages = student[1 : ]
    for message in messages:
        tokens = message.split() 
        # Pick the warning code
        for token in tokens:
            if (len(token) == 6) and (token[0].isalpha()) and (token[1 : 4].isdigit()) and token[-1] == ':':
                single_outcome.append(token[0 : 5])
                codeReference.add(message[message.find(token[0 : 5]):].strip('\n'))
    outcome.append(single_outcome)

# Sort elements based on students' ID
outcome.sort(key = lambda x:x[0])
for e in outcome:
    e.pop(0)
outcome

[['s01', 'C0303', 'C0303', 'C0103', 'C0114', 'W0127', 'W0127'],
 ['s02', 'C0303', 'C0103', 'C0114'],
 ['s04', 'W0622', 'C0103', 'C0114', 'R1716'],
 ['s05', 'C0103', 'C0114'],
 ['s06', 'W0622', 'C0103', 'C0114'],
 ['s16', 'C0103', 'C0114'],
 ['s17', 'C0103', 'C0114', 'R1724'],
 ['s19', 'C0103', 'C0114'],
 ['s22', 'C0303', 'C0103', 'C0114', 'W1309', 'W1309', 'R1724'],
 ['s23', 'C0303', 'C0103', 'C0114'],
 ['s24', 'C0303', 'C0103', 'C0114', 'R1716'],
 ['s25', 'C0303', 'C0325', 'C0303', 'C0103', 'C0114', 'W0127'],
 ['s26', 'C0103', 'C0114', 'R1716'],
 ['s27', 'C0303', 'C0303', 'C0103', 'C0114'],
 ['s31', 'C0103', 'C0114'],
 ['s32', 'C0103', 'C0114'],
 ['s34', 'C0303', 'C0103', 'C0114', 'R1716'],
 ['s37', 'C0303', 'C0103', 'C0114'],
 ['s38', 'C0103', 'C0114', 'W1309', 'W1309'],
 ['s39', 'C0303', 'C0303', 'C0103', 'C0114'],
 ['s46', 'C0303', 'C0305', 'C0103', 'C0114'],
 ['s48', 'C0103', 'C0114'],
 ['s49', 'C0103', 'C0114', 'C0103'],
 ['s52', 'C0103', 'C0114', 'R1716'],
 ['s53', 'E0001'],
 ['

In [7]:
# Find and sort all unique warning code alphabetically 
allErrorCodeSet = set()
# studentInfo = ['Student_ID', 'error code 1', 'error code 2', ....]
for studentInfo in outcome:
    for i in range(1, len(studentInfo)):
        allErrorCodeSet.add(studentInfo[i])
        
allErrorCodes = list(allErrorCodeSet)
allErrorCodes.sort()
print(allErrorCodes)
print(len(allErrorCodes))

['C0103', 'C0114', 'C0301', 'C0303', 'C0305', 'C0325', 'E0001', 'E0602', 'R0801', 'R1716', 'R1724', 'W0127', 'W0311', 'W0622', 'W1309']
15


In [8]:
# Count the number of each warning code for each student
# processedStudent = [Student_ID, {"c0001": 2, "w0001": 3, ...}]
# processedStudents = [processedStudent1, processedStudent2, ...]

processedStudents = []
for single_outcome in outcome:
    processedStudent = [single_outcome[0]]
    frequencyMap = {}
    for i in range(1, len(single_outcome)):
        if single_outcome[i] not in frequencyMap:
            frequencyMap[single_outcome[i]] = 1
        else:
            frequencyMap[single_outcome[i]] += 1
    processedStudent.append(frequencyMap)
    processedStudents.append(processedStudent)
print(processedStudents)
print(len(processedStudents))

[['s01', {'C0303': 2, 'C0103': 1, 'C0114': 1, 'W0127': 2}], ['s02', {'C0303': 1, 'C0103': 1, 'C0114': 1}], ['s04', {'W0622': 1, 'C0103': 1, 'C0114': 1, 'R1716': 1}], ['s05', {'C0103': 1, 'C0114': 1}], ['s06', {'W0622': 1, 'C0103': 1, 'C0114': 1}], ['s16', {'C0103': 1, 'C0114': 1}], ['s17', {'C0103': 1, 'C0114': 1, 'R1724': 1}], ['s19', {'C0103': 1, 'C0114': 1}], ['s22', {'C0303': 1, 'C0103': 1, 'C0114': 1, 'W1309': 2, 'R1724': 1}], ['s23', {'C0303': 1, 'C0103': 1, 'C0114': 1}], ['s24', {'C0303': 1, 'C0103': 1, 'C0114': 1, 'R1716': 1}], ['s25', {'C0303': 2, 'C0325': 1, 'C0103': 1, 'C0114': 1, 'W0127': 1}], ['s26', {'C0103': 1, 'C0114': 1, 'R1716': 1}], ['s27', {'C0303': 2, 'C0103': 1, 'C0114': 1}], ['s31', {'C0103': 1, 'C0114': 1}], ['s32', {'C0103': 1, 'C0114': 1}], ['s34', {'C0303': 1, 'C0103': 1, 'C0114': 1, 'R1716': 1}], ['s37', {'C0303': 1, 'C0103': 1, 'C0114': 1}], ['s38', {'C0103': 1, 'C0114': 1, 'W1309': 2}], ['s39', {'C0303': 2, 'C0103': 1, 'C0114': 1}], ['s46', {'C0303': 1, 'C

In [9]:
# Fill in data to the spreadsheet

# import package
import xlsxwriter 
from openpyxl import Workbook
from openpyxl.styles import Alignment
from openpyxl.styles import PatternFill
from openpyxl.styles import Font

# Create Workbook 
workbook = Workbook()

# Delete the empty worksheet
del workbook["Sheet"]

# Sheets Names
sName = "Analysis of Pylint Outcome"

# Create Sheet
workbook.create_sheet(sName)

# Choose worksheet
sheet = workbook[sName]

In [10]:
# Create worksheet x-axis title
sheet["A1"] = "Student ID"
sheet.column_dimensions['A'].width = 30
sheet["A1"].alignment = Alignment(horizontal = 'center')
index = 0
for row in sheet.iter_rows(min_row = 1, min_col = 2, max_row = 1, max_col = len(allErrorCodes) + 1):
    for cell in row:
        sheet.column_dimensions[xlsxwriter.utility.xl_col_to_name(cell.column - 1)].width = 30
        cell.value = allErrorCodes[index]
        cell.alignment = Alignment(horizontal = 'center')
        cell.fill = PatternFill(start_color = "00FFFF00", end_color = "00FFFF00", fill_type = "solid")
        cell.font = Font(bold = True)
        index += 1

In [11]:
# Fill in the information of each student

# Define the level of numbers of errors (for visualizarion later)
maxError = 0
for processedStudent in processedStudents:
    if maxError < max(processedStudent[1].values()):
        maxError = max(processedStudent[1].values())

studentIndex = 0
for row in sheet.iter_rows(min_row = 2, min_col = 1, max_row = len(processedStudents) + 1, max_col = len(allErrorCodes) + 1):
    flag = True
    errorCodeIndex = 0
    for cell in row:
        if flag:
            cell.value = processedStudents[studentIndex][0]
            cell.font = Font(bold = True)
            flag = False
        else:
            # Loop through set of error codes
            errorCodes = processedStudents[studentIndex][1]
            if allErrorCodes[errorCodeIndex] in errorCodes:
                cell.value = errorCodes[allErrorCodes[errorCodeIndex]]
                # Add gradient colors for better visualization
                hexColor = '#%02x%02x%02x' % ((int)(255 - ((cell.value / maxError) * 255)), (int)(255 - ((cell.value / maxError) * 155)), 255)
                cell.fill = PatternFill(start_color = str(hexColor)[1:], end_color = str(hexColor)[1:], fill_type = "solid")
            errorCodeIndex += 1  
        cell.alignment = Alignment(horizontal = 'center')
    studentIndex += 1

In [12]:
# Count warning message of each student

# Title of the column
sheet.cell(row = 1, column = len(allErrorCodes) + 2).value = "Total Warning per Student"
sheet.cell(row = 1, column = len(allErrorCodes) + 2).font = Font(bold = True)
sheet.cell(row = 1, column = len(allErrorCodes) + 2).fill = PatternFill(start_color = "00FFFF00", end_color = "00FFFF00", fill_type = "solid")
sheet.column_dimensions[xlsxwriter.utility.xl_col_to_name(len(allErrorCodes) + 1)].width = 30

studentIndex = 0
maxError = 0
for row in sheet.iter_rows(min_row = 2, min_col = len(allErrorCodes) + 2, max_row = len(processedStudents) + 1, max_col = len(allErrorCodes) + 2):
    for cell in row:
        cell.value = sum(processedStudents[studentIndex][1].values())
        if maxError < cell.value:
            maxError = cell.value
        cell.alignment = Alignment(horizontal = 'center')
        # Add gradient colors for better visualization
        hexColor = '#%02x%02x%02x' % (255, (int)(255 - ((cell.value / maxError) * 255)), (int)(255 - ((cell.value / maxError) * 255)))
        cell.fill = PatternFill(start_color = str(hexColor)[1:], end_color = str(hexColor)[1:], fill_type = "solid")
        studentIndex += 1

In [13]:
# Count warning message occurrence

# Title of the row
sheet.cell(row = len(processedStudents) + 2, column = 1).value = "Warning Message Occurrence"
sheet.cell(row = len(processedStudents) + 2, column = 1).font = Font(bold = True)
sheet.cell(row = len(processedStudents) + 2, column = 1).alignment = Alignment(horizontal = 'center')

# Count the frequency of each warning message
errorFreq = []
for singleError in allErrorCodes:
    errorCounter = 0
    for processedStudent in processedStudents:
        if singleError in processedStudent[1]:
            errorCounter += processedStudent[1][singleError]
    errorFreq.append(errorCounter)

maxError = max(errorFreq)
errorIndex = 0
for row in sheet.iter_rows(min_row = len(processedStudents) + 2, min_col = 2, max_row = len(processedStudents) + 2, max_col = len(allErrorCodes) + 1):
    for cell in row:
        cell.value = errorFreq[errorIndex]
        cell.alignment = Alignment(horizontal = 'center')
         # Add gradient colors for better visualization
        hexColor = '#%02x%02x%02x' % (255, (int)(255 - ((cell.value / maxError) * 255)), (int)(255 - ((cell.value / maxError) * 255)))
        cell.fill = PatternFill(start_color = str(hexColor)[1:], end_color = str(hexColor)[1:], fill_type = "solid")
        errorIndex += 1

In [14]:
# Add reference to the meaning of each warning code
simpleCodeReference = []
allErrorCodesCpy = list(allErrorCodes)
for ref in codeReference:
    singleSimpleCodeReference = []
    singleSimpleCodeReference.append(ref[0:5])
    start = ref.rfind('(') + 1
    end = ref.rfind(')')
    if start == 0 or end == -1:
        singleSimpleCodeReference.append(ref[8:])
    else:
        singleSimpleCodeReference.append(ref[start : end])
    if singleSimpleCodeReference[0] in allErrorCodesCpy: 
        simpleCodeReference.append(singleSimpleCodeReference)
        allErrorCodesCpy.remove(singleSimpleCodeReference[0])

# Sort error codes
simpleCodeReference.sort(key = lambda x:x[0])
# simpleCodeReference
completeCodeReference = list(codeReference)
completeCodeReference

["C0325: Unnecessary parens after 'not' keyword (superfluous-parens)",
 'W1309: Using an f-string that does not have any interpolated variables (f-string-without-interpolation)',
 'C0103: Module name "question4-s19" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0103: Module name "question4-s127" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0103: Module name "question4-s54" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0103: Module name "question4-s118" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0103: Module name "question4-s23" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0103: Module name "question4-s123" doesn\'t conform to snake_case naming style (invalid-name)',
 "C0325: Unnecessary parens after 'elif' keyword (superfluous-parens)",
 'C0103: Module name "question4-s124" doesn\'t conform to snake_case naming style (invalid-name)',
 'C0114: Missing module docstring (missing-module-docstring

In [15]:
# Add code reference to excel
# Title of the row
sheet.cell(row = len(processedStudents) + 3, column = 1).value = "Code Reference"
sheet.cell(row = len(processedStudents) + 3, column = 1).font = Font(bold = True)
sheet.cell(row = len(processedStudents) + 3, column = 1).alignment = Alignment(horizontal = 'center')
refIndex = 0
for row in sheet.iter_rows(min_row = len(processedStudents) + 3, min_col = 2, max_row = len(processedStudents) + 3, max_col = len(allErrorCodes) + 1):
    for cell in row:
        cell.value = simpleCodeReference[refIndex][1]
        cell.alignment = Alignment(horizontal = 'center')
        refIndex += 1

In [16]:
workbook.save(filename="Week1.xlsx")