# Spreadsheet comparison and grading

This notebook holds the exploratory code for comparing submissions against the key. It will also do grading.

## Key preparation

In [3]:
from openpyxl import load_workbook

# Loading the key workbook.
# The compared data will come from Sheet3, cell B6.

key_data = load_workbook("../sample_excel_files/type_1_key.xlsx", data_only=True)
key_formulas = load_workbook("../sample_excel_files/type_1_key.xlsx", data_only=False)

print(f"All sheetnames: {key_data.sheetnames}")

# This way, we assume all necessary sheets will have the _CheckOrder pair.
grading_sheetnames = [name for name in key_data.sheetnames if name.find("_CheckOrder") == -1]
print(f"Grading sheetnames: {grading_sheetnames}")


All sheetnames: ['Sheet3', 'Sheet3_CheckOrder']
Grading sheetnames: ['Sheet3']


## Key formula and grade parsing

In [35]:
# Parsing methods


def get_grading_sheetnames(key_wb):
    """
    Returns grading sheetnames from the passed key workbook.
    """
    return [name for name in key_wb.sheetnames if name.find("_CheckOrder") == -1]

def get_sheet_grading_sequence(key_wb):
    """
    Returns a dictionary of sheetnames as key. Each key will have an ordered list of tuples,
    which contains the cell name in that sheet and the grading rubric. Both are in strings.
    """
    grading_dict = {}
    sheetnames = get_grading_sheetnames(key_wb)
    
    for name in sheetnames:
        order_sheet = key_wb[name + "_CheckOrder"]
        key_sheet = key_wb[name]
        
        # Assumptions of the order sheet 
        # 1. The scoring column is always on B. (min_col=2, max_col=2)
        # 2. The scoring column always has a header (min_row=2)
        # 3. The scoring column is always in order        
        for column in order_sheet.iter_cols(min_col=2, max_col=2, min_row=2):
            # Assuming this for-loop will only be executed for B column
            # And the reference cells always have comment text for rubric.
            grading_dict[name] = [(cell.value, key_sheet[cell.value].comment.text) for cell in column]

    return grading_dict


In [36]:
# Test the methods above

key_grading_sequence = get_sheet_grading_sequence(key_data)

print("Key grading sequence for each sheet:")

for sheetname in key_grading_sequence:
    print(sheetname + ":")
    print(key_grading_sequence[sheetname])

Key grading sequence for each sheet:
Sheet3:
[('B6', 'Rubric:\n\t10P-C\n'), ('B7', 'Rubric:\n\t5P-C\n')]


## Rubric extraction

In [43]:
# Method definition
import re

# TODO: for clarity, perhaps it's better to return a class with properties instead of tuple in the future.
def parse_grading_rubric(rubric):
    """
    Returns a tuple of (score, type) for the passed rubric String.
    The score is a number, could be an Integer or Float.
    The type is a letter, "C" for Constant evaluation and "F" for Formula evaluation.
    """
    grade_search = re.search('\t(.+?)P', rubric)
    type_search = re.search('P-(.+?)', rubric)
    
    grade = grade_search.group(1) if grade_search else None
    grading_type = type_search.group(1) if type_search else None
    
    return grade, grading_type

def parse_grading_criteria(criteria):
    """
    Returns a tuple of (rubric, unit_tests) for passed criteria String.

    The rubric is a tuple of (score, type), where the score is a number and type is a letter.
        The letter "C" represents Constant evaluation and "F" represents Formula evaluation.
        
    The unit_tests is an array of String that can be parsed for unit testing the submission cell.
    """
    criteria_by_line = criteria.split(sep="\n")
        
    # Assumption: 
    # 1. Grading rubric is always on the second line.
    # 2. Unit tests is always on the fourth line forward.
    rubric = criteria_by_line[1] if len(criteria_by_line) >= 2 else None
    unit_tests = criteria_by_line[3:] if len(criteria_by_line) >= 4 else None

    parsed_rubric = parse_grading_rubric(rubric) if rubric else None    
    return parsed_rubric, unit_tests
    

In [44]:
# Testing above methods

for sheetname in key_grading_sequence:
    for cell_name, criteria in key_grading_sequence[sheetname]:
        
        parsed_rubric, unit_tests = parse_grading_criteria(criteria)        
        print(f"Grading rubric for cell {cell_name}: {parsed_rubric}")
        print(f"Unit tests for cell {cell_name}: {unit_tests}")
        

Grading rubric for cell B6: ('10', 'C')
Unit tests for cell B6: None
Grading rubric for cell B7: ('5', 'C')
Unit tests for cell B7: None


## Constant value grading

The cells below will be used to explore how to grade a submission against constant rubrics.

In [56]:
# Method definition

# TODO: Use this method for the formula-based grading later.

def grade_submission(submission_filepath, key_data_wb, key_formula_wb):
    """
    Returns the grade for passed submission_filepath, based on key workbook - both data and formula version.
    """
    submission_score = 0
    
    sub_data_wb = load_workbook(submission_filepath, data_only=True)
    sub_formula_wb = load_workbook(submission_filepath, data_only=False)

    # Assumption: Both the submitted file and key have similar sheetnames and cell structures.
    grading_sheet_sequence = get_sheet_grading_sequence(key_data_wb)

    for sheetname in grading_sheet_sequence:
        
        sub_data_sheet = sub_data_wb[sheetname]
        sub_formula_sheet = sub_formula_wb[sheetname]
        
        key_data_sheet = key_data_wb[sheetname]
        key_formula_sheet = key_formula_wb[sheetname]
        
        for cell_coord, criteria in grading_sheet_sequence[sheetname]:
            
            rubric, unit_tests = parse_grading_criteria(criteria)
            score, grading_type = rubric
            
            if grading_type != "C":
                pass
            elif sub_data_sheet[cell_coord].value == key_data_sheet[cell_coord].value:
                submission_score += int(score)
                
                
    return submission_score

In [58]:
# Test the method above

submission_filepaths = [
    "../sample_excel_files/type_1_key.xlsx",
    "../sample_excel_files/type_1_sub_right_different_formula_1.xlsx",
    "../sample_excel_files/type_1_sub_right_different_formula_2.xlsx",
    "../sample_excel_files/type_1_sub_right_exact.xlsx",
    "../sample_excel_files/type_1_sub_wrong_constant_value.xlsx",
    "../sample_excel_files/type_1_sub_wrong_different_formula_and_result.xlsx",
    "../sample_excel_files/type_1_sub_wrong_different_result.xlsx"
]

for file in submission_filepaths:
    score = grade_submission(file, key_data, key_formulas)
    print(f"Grade for {file}:\n{score}\n")


Grade for ../sample_excel_files/type_1_key.xlsx:		15
Grade for ../sample_excel_files/type_1_sub_right_different_formula_1.xlsx:		15
Grade for ../sample_excel_files/type_1_sub_right_different_formula_2.xlsx:		15
Grade for ../sample_excel_files/type_1_sub_right_exact.xlsx:		15
Grade for ../sample_excel_files/type_1_sub_wrong_constant_value.xlsx:		15
Grade for ../sample_excel_files/type_1_sub_wrong_different_formula_and_result.xlsx:		0
Grade for ../sample_excel_files/type_1_sub_wrong_different_result.xlsx:		0
