1. Exam prep:
  - print exams with QR codes identify each page of each exam. 

2. Grading

  - scan exams
  - split pdfs into pages
  - add score table to each page - various templates for pages with different problem to accommodate subproblems? 
  - assemble exams for grading - give different options - by problem, by student? 
  - gets scores from exams, save into csv/Excel file
  - re-assemble exams by student flatten so they are not editable, add score table to the title page
  - email exams to students
  
3. Issues
  - how to handle makeup exams etc? 
  - score tables: either radio buttons or check for double score entries
  - check for ungraded problems
  

with subprocess: seems a bit faster

In [1]:
import os
import subprocess
import io
from pyzbar.pyzbar import decode
import numpy as np
import pdf2image
import PyPDF2 as pdf
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
import pandas as pd



def extract_pages(inputpdf, fpage, lpage):
    '''
    Extracts specified range of pages from a PyPDF2 PdfFileReader object. 
    
    :inputpdf: 
        A PyPDF2 PdfFileReader object.
    :fpage: 
        Page number of the first page to be extracted.
    :lpage: 
        Page number of the last page to be extracted.
    
    Returns: 
        PyPDF2 PfgFileWriter object containing extracted pages
    '''
    output = pdf.PdfFileWriter()
    for i in range(fpage-1,lpage-1):
        output.addPage(inputpdf.getPage(i))
    return output



def pdf2pages(fname, output_fname=None, output_directory = None):
    '''
    Splits a pdf file into files containing individual pages
    
    :fname: 
        Name of the pdf file.
    :output_fname: 
        If string output files will be named output_fname_n.pdf where n is the page number. 
        This argument can be also a function with signature f(fname, n) which returns a string. 
        In such case names of the output files will be named by return values of this function.
        Defaults to the name of the processed file. 
    :output_directory: 
        directory where output files will be saved. 
        Defaults to the current working directory
    
    Returns: 
        the list of file names created. 
    
    Note: Page splitting seems to interfere with checkboxes embedded in pages. 
    After splitting they can't be read, but if checkboxes are reselected they 
    work again. Splitting pages using pdftk does not create this problem:
    os.system('pdftk merged.pdf burst > test.txt')
    '''
    
    # if no output_directory set it to the current dirtectory 
    if output_directory == None:
         output_directory = os.getcwd()
    # is specified directory does not exist create it
    if not os.path.isdir(output_directory):
        os.makedirs(output_directory)
    
    if output_fname == None:
        output_fname = os.path.basename(fname)[:-4]
        
    if type(output_fname) == str:
        def label(n):
            s = f"{output_fname}_{n}.pdf"
            return s
    else:
        def label(n):
            return output_fname(fname, n)
        
    source = pdf.PdfFileReader(open(fname, 'rb'))
    num_pages = source.numPages
    outfiles = []
    for n in range(num_pages):
        page = extract_pages(source, n+1, n+2)
        outfile_name = label(n)
        outfile_path = os.path.join(output_directory, outfile_name)
        with open(outfile_path , "wb") as f:
            page.write(f)
        outfiles.append(outfile_name)
    return outfiles
 
    
    
def merge_pdfs(files, output_fname="merged.pdf"):
    '''
    Merge pdf files into a single pdf file.
    
    :files: 
        A list of pdf files.
    :output_fname: 
        File name of the merged pdf file.
    
    Returns: None
    
    Note: If a pdf file is split into pages using pdf2pages, and then some pages
    are merged, then checkboxes will be unreadable due to the issue with pdf2pages. 
    However, it seems that reselcting a few of the checkboxes in the merged file 
    makes all of them readable again. 
    '''
    
    output = pdf.PdfFileWriter()
    
    for f in files:
        f_pdf = pdf.PdfFileReader(open(f, 'rb'))
        for n in range(f_pdf.numPages):
            output.addPage(f_pdf.getPage(n))
    with open(output_fname , "wb") as outpdf:
                output.write(outpdf)

            


def qr_decode_pdf(fname, data_only=True, dpi=200):
    '''
    Reads data from QR codes embedded in a pdf file.
   
    :fname: 
        The name of the pdf file.
    :data_only: 
        If true only data read from QR codes is returned, otherwise returns 
        the whole pyzbar.decoded objects. 
    :dpi: 
        Resolution of the images produced from the pdf to read QR codes.
    
    Returns: 
        list indexes by pages of the pdf where each list entry 
        is a list of data read from QR codes on that page
    '''
    qr_data = []
    
    with open(fname, 'rb') as f:
        source = pdf.PdfFileReader(f)
        num_pages = source.numPages
        for n in range(num_pages):
            output = pdf.PdfFileWriter()
            output.addPage(source.getPage(n))
            # Note: pdf2image.convert_from_bytes can convert 
            # a multipage pdf file into a list of images, but 
            # to save memory the code below reads one page at a time
            # io.BytesIO() provides a file objects to write the page to 
            page = io.BytesIO()
            output.write(page)
            page.seek(0)
            page_image = pdf2image.convert_from_bytes(page.read(), dpi = dpi)
            qr_list = decode(page_image[0])
            if data_only:
                p_qr = [q.data.decode('utf8') for q in qr_list]
                qr_data.append(p_qr)
            else:
                qr_data.append(qr_list)
            page.close()
        return qr_data


def format_table(page, latex_template=None, maxpoints=10, name="XXXX", problem="0"):
    '''
    Formats a LaTeX template to add a score table to a given
    pdf page
    
    :page: 
        Name of the pdf file to add score table to. 
    :latex_template: 
        Name of the LaTeX file with the formatting template.
    :maxpoints: 
        The maximal numberber of points in the score table (up to 30 will look fine)
    :name: 
        Name or id identiftying the student.
    :problem: 
        The number of the problem corresponding to the score table
    
    Returns:
        A string with LaTeX code. 
    '''
      
    default_template = "/Users/bb/Desktop/grading/grade_table_template.tex"
    
    if latex_template == None:
        latex_template = default_template
           

    # read the LaTeX template
    with open(latex_template, 'r') as f:
        latex = f.read()
    
    
    # insert data into the template
    shift = str((maxpoints + 2)/2) #for score table formatting
    latex = (latex.replace("FILENAME", page)
             .replace("MAXPOINTS", str(maxpoints))
             .replace("SHIFT", shift)
             .replace("PROBLEMNUM", problem)
             .replace("NAME", name)
            )
    return latex


def format_qr(page, qr_string, label_string, latex_template=None):
    '''
    Formats a LaTeX template to add QR code to a given
    pdf page
    
    :page: 
        Name of the pdf file to QR code to. 
    :qr_string: 
        String to be encoded in the QR code. 
    :label_string: 
        String (up to 3 lines) with text of the label to be places next 
        to the QR code. 
    :latex_template: 
        Name of the LaTeX file with the formatting template. 
    
    Returns: 
        A string with LaTeX code. 
    '''
    
    default_template = "/Users/bb/Desktop/grading/qr_template.tex"
    
    if latex_template == None:
        latex_template = default_template
           

    # read the LaTeX template
    with open(latex_template, 'r') as f:
        latex = f.read()
    
    
    # insert data into the template
    latex = (latex.replace("FILENAME", page)
             .replace("QR_STRING", qr_string)
             .replace("QR_LABEL", label_string.replace('\n', '\\\\'))
            )
    return latex


def compile_latex(source, output_file = None, output_directory = None):
    '''
    Compiles a given string with LaTeX code into pdf  and cleans up all 
    auxiliary files created in the process. Requires pdflatex to work. 
    
    :source: 
        String with LaTeX code to be compiled.
    :output_file: 
        Name of the pdf file to be produced.
    :output_directory: 
        Name of the directory where the pdf file will be saved.
        If none given the current directory will be used.
    
    Returns: 
        A tuple consisting of the pdflatex subprocess return code and
    its stdout stream
    '''
    
         
    if output_directory == None:
        output_directory = os.getcwd()
    
    # remove output file suffix if needed
    if output_file[-4:] == ".pdf":
        output_file = output_file[:-4]
    
    tex_file_path = os.path.join(output_directory, output_file + ".tex")
    with open(tex_file_path, "w") as f:
        f.write(source)
    
    #compile LaTeX
    latex_command = ["pdflatex", "-shell-escape", "-output-directory", output_directory, output_file + ".tex"]
    completed = subprocess.run(latex_command, capture_output = True)
    
    # clean up the auxiliary files created during LaTeX compilation  
    for f in os.listdir(output_directory):
        fl = f.split('.')
        if fl[0] == output_file and fl[-1] in ['tex', 'aux', 'log', 'gz', 'out']:
            os.remove(os.path.join(output_directory, f))  
    
    return  completed.returncode, completed.stdout



def read_scores(fname):
    '''
    Gets data from checkbox forms embedded in a pdf file. 
    
    :fname: 
        Name of the pdf file.
        
    Returns: 
        A list of names pdf checkboxed that are checked.
    
    Note: reading pdf form data with pdftk:
    os.system('pdftk source.pdf dump_data_fields_utf8 > output_file.txt')
    '''
    
    with open(fname, 'rb') as fp:
        parser = PDFParser(fp)
        doc = PDFDocument(parser)
        fields = resolve1(doc.catalog['AcroForm'])['Fields']
    
        scores = []
        for i in fields:
            field = resolve1(i)
            name, value = str(field.get('T')).split("'")[1], str(field['V']).split("'")[1]
            if value=="Yes":
                scores.append(name)
    ""
    return scores




def student_scores(score_list, problem_labels=None):
    
    '''
    Takes a list of names of pdf checkboxes indicating student scores 
    and returns a pandas data frame of student scores, with rows corresponding 
    to students and columns corresponding to problems. Also checks if 
    multiple scores were entered for the same student and problem
    
    This assumes that checkbox names are of the form name.problem_label.score where:
    name = identifies the student
    problem_label = identifies the problem
    score = is the problem score for the student
    
    :score_list: 
         List of names of pdf checkboxes indicating student scores.
    :problem_labels: 
         List of numbers (or names) of all exam/assignment problems.
         If not given dataframe columns will be labeled by problem labels 
         discovered in checkbox names.
    
    Returns: 
        A tuple consisting of:
         - a pandas dataframe with scores 
         - dictionary whose keys are names of students with multiple problem 
           scores entered and whose values are lists of problems with multiple scores. 
    '''
    
    if problem_labels == None:
        problem_d = {}
    else:
        problem_d  = dict.fromkeys([str(p) for p in problem_labels])
    
    score_d = {}
    multiple_scores  = {}
    for record in score_list:
        name, problem, score = record.split('.')
        if name not in score_d:
            score_d[name] = problem_d.copy()
        # check for multiple scores for a given problem, if they exist record them
        if problem in score_d[name] and score_d[name][problem] != None:
            if name not in multiple_scores:
                multiple_scores[name] = [problem]
            else:
                multiple_scores[name].append(problem)
        else:
            score_d[name][problem] = int(score)
    scores_df = pd.DataFrame(score_d).T
    return scores_df, multiple_scores


def qr_exam(fname, output_fname, qr_string, label_string, output_directory=None, latex_template=None):
    '''
    Embed QR codes in exam pages
    
    :fname: 
        Name of the pdf file with the exam.
    :output_fname: 
        Name of the output pdf file.
    :qr_string: 
        A function with one integer argument n. The string returned by this function 
        will be encoded in the QR code on page number n. 
    :label_string: 
        A function with one integer argument n. The string returned by this function 
        Will printed next to the QR code on page number n. The string should consist 
        of at most 3 lines. 
    :output_directory: 
        Name of the directory where the output pdf file will be saved. If none given 
        the current directory will be used.
    :latex_template: 
        The template file used for placing QR codes on pages, if None the default 
        template will be used. 
        
    Returns:
        Name of the output file. 

    '''
    
    if latex_template == None:
        latex_template = "/Users/bb/Desktop/grading/qr_template.tex"
    
    # if no output_directory set it to the current dirtectory 
    if output_directory == None:
         output_directory = os.getcwd()
    
    # split exam into pages
    page_list = pdf2pages(fname, output_fname=f"qr_temp_{fname[:-4]}", output_directory = output_directory)
    for n, p in enumerate(page_list):
        latex = format_qr(os.path.join(output_directory, p), qr_string(n), label_string(n))
        r = compile_latex(latex, output_file = "tex_" + p[:-4], output_directory=output_directory)
        if r[0] != 0:
            print("Latex compilation failed.")
            return r[1]
    qr_list = [os.path.join(output_directory, "tex_" + p) for p in page_list]
    merge_pdfs(qr_list, output_fname = os.path.join(output_directory, output_fname))
    for p in page_list:
        os.remove(os.path.join(output_directory, p))
    for p in qr_list:
        os.remove(p)
    
    return output_fname


def qr_exams_from_list(fname, id_list, exam_num, course_id, course_sec=0, output_directory=None, latex_template=None):
    '''
    Produce exams with embedded QR codes for a given list of student ids. 
    
    :fname: 
        Name of pdf file containg the exam.
    :id_list:
        List with ids of students.
    :exam_num:
        Number of exam or some short label identifying it (e.g. "FINAL").
    :course_id:
        ID of the course (e.g. "MTH 141").
    :course section:
        Course section (e.g. "Y").
    :output_directory: 
        Name of the directory where exams will be created. If the directory does
        not exist it will be created. If the directory is not specified the current 
        directory will be used. 
    :latex_template: 
        Template LaTeX file used to place the QR codes. If none given the default template 
        will be used.
    
    Returns:
        A list with file names of produced pdf files. 
    '''
    

    
    if output_directory == None:
        output_directory = os.getcwd()
    
    os.makedirs(output_directory, exist_ok = True)
    
    
    qr_str = f"{course_id}_{course_sec}_EX_{exam_num}"
    qr_lab = f"{course_id} SEC. {course_sec}\nEXAM {exam_num}"
    
    def qr_string_name(name, n):
        return f"{name}_{qr_str}_{n}"

    def label_string_name(name, n):
        return f"{name}\n{qr_lab} P.{n}"
    
    qr_exam_list = []

    for name in id_list:
        
        def qr_string(n):
            return qr_string_name(name, n)
        def label_string(n):
            return label_string_name(name, n)
        
        ex = qr_exam(fname = fname, 
                    output_fname = f"{name}_{fname}", 
                    qr_string = qr_string, 
                    label_string = label_string, 
                    output_directory = output_directory, 
                    latex_template = latex_template 
                   )
        
        qr_exam_list.append(ex)
        
    return qr_exam_list 

In [35]:
def save_csv(df, fname):
    df.to_csv(fname,  index_label= "Name")

In [36]:
# flatten file so that the form is not editable
os.system('pdftk sample_t.pdf output sample_st_flat.pdf flatten')

0

In [2]:
roster = pd.read_csv("roster.csv")
roster

Unnamed: 0,ID,Name,Email
0,50000000,"Aswani, Amy",amyaswan@buffalo.edu
1,50281090,"Engelhardt,Carolyn",cengelha@buffalo.edu
2,50291345,"Hozoji,Kosuke",kosukeho@buffalo.edu
3,50293175,"Liao,Chang Chih",cliao9@buffalo.edu
4,50290003,"Ma,Ning",nma22@buffalo.edu
5,50133998,"Wang,Daxun",daxunwan@buffalo.edu
6,50285537,"Winton,Daniel Marc",dmwinton@buffalo.edu
7,50291281,"Zhang,Baoming",baomingz@buffalo.edu
8,50285569,"Ziegler,Cameron",cz22@buffalo.edu


In [3]:
ids = [x.split("@")[0] for x in roster["Email"]]
ids

['amyaswan',
 'cengelha',
 'kosukeho',
 'cliao9',
 'nma22',
 'daxunwan',
 'dmwinton',
 'baomingz',
 'cz22']

In [5]:
qr_exams_from_list(id_list=ids, 
                   course_id = "MTH528", 
                   exam_num =1, 
                   fname = "exam3.pdf", 
                   output_directory="MTH428")

['amyaswan_exam3.pdf',
 'cengelha_exam3.pdf',
 'kosukeho_exam3.pdf',
 'cliao9_exam3.pdf',
 'nma22_exam3.pdf',
 'daxunwan_exam3.pdf',
 'dmwinton_exam3.pdf',
 'baomingz_exam3.pdf',
 'cz22_exam3.pdf']

# Varia

In [248]:
 
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfform
from reportlab.lib.colors import magenta, pink, blue, green
 
def create_simple_radios():
    c = canvas.Canvas('simple_radios.pdf')
 
    c.setFont("Courier", 20)
    c.drawCentredString(300, 700, 'Radio demo')
    c.setFont("Courier", 14)
    form = c.acroForm
 
    c.drawString(10, 650, 'Dog:')
    for i in range(10):
        val = f'value{i}'
        form.radio(name='score_table', tooltip='Field radio1',
                   value= val, selected=False,
                   x=110, y= 645 - 30*i, buttonStyle='check',
                   borderStyle='solid', shape='square',
                   borderColor=magenta, fillColor=pink, 
                   textColor=blue, forceBorder=False)


 
    c.save()

In [252]:
create_simple_radios()

In [260]:
read_scores('simple_radios.pdf')

[]

In [258]:
with open('simple_radios.pdf', 'rb') as fp:
    parser = PDFParser(fp)
    doc = PDFDocument(parser)
    field = resolve1(doc.catalog['AcroForm'])['Fields'][0]
    
    field = resolve1(field)
    print(field)

{'V': /'value0', 'Kids': [<PDFObjRef:11>, <PDFObjRef:12>, <PDFObjRef:13>, <PDFObjRef:14>, <PDFObjRef:15>, <PDFObjRef:16>, <PDFObjRef:17>, <PDFObjRef:18>, <PDFObjRef:19>, <PDFObjRef:20>], 'T': b'score_table', 'FT': /'Btn'}


In [None]:
if not os.path.exists(directory):
    os.makedirs(directory)

In [62]:
import glob

In [65]:
glob.glob("*")

['book.pdf',
 'test1.txt',
 'TEST',
 'roster.csv',
 'simple_radios.pdf',
 'qr_template2.tex',
 'gui.ipynb',
 'grade_table_template.tex',
 '309_test',
 'qr_template.tex',
 'many_qr.pdf',
 'exam3.pdf',
 'grade_table.ipynb',
 'sample_exam.pdf']

In [10]:
os.path.isdir("TEST")

True

In [11]:
help(os.makedirs)

Help on function makedirs in module os:

makedirs(name, mode=511, exist_ok=False)
    makedirs(name [, mode=0o777][, exist_ok=False])
    
    Super-mkdir; create a leaf directory and all intermediate ones.  Works like
    mkdir, except that any intermediate path segment (not just the rightmost)
    will be created if it does not exist. If the target directory already
    exists, raise an OSError if exist_ok is False. Otherwise no exception is
    raised.  This is recursive.



In [14]:
os.makedirs("TEST", exist_ok = True)