# Post Processing
1. Backup grading result.
2. Generate score report.
3. Create individual scored pdf.
4. Collect samples.

In [1]:
pdf_file = "../data/TestScript.pdf"

In [2]:
import os

file_name = os.path.basename(pdf_file)
file_name = os.path.splitext(file_name)[0]
base_path = "../marking_form/" + file_name
base_path_images = base_path + "/images/"
base_path_annotations = base_path+"/annotations/"
base_path_questions = base_path+"/questions"

base_path_marked_images = base_path + "/marked/images/"
base_path_marked_pdfs = base_path + "/marked/pdf/"
base_path_marked_scripts = base_path + "/marked/scripts/"
os.makedirs(base_path_marked_images, exist_ok=True)
os.makedirs(base_path_marked_pdfs, exist_ok=True)
os.makedirs(base_path_marked_scripts, exist_ok=True)

## Backup grading result
Remove version history, before you backup.

In [3]:
import os
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file.startswith("control-") or file.startswith("mark-"):
            os.remove(os.path.join(path, file))

Zip the website.

In [4]:
import shutil
shutil.make_archive(base_path,"zip",base_path)

'/workspaces/AI-Handwrite-Grader/marking_form/TestScript.zip'

# Generate Score Report

Check the ID and Name pages to verify the values before generate the marksheet.

In [5]:
import os
import json
import pandas as pd
from termcolor import colored

pageToStudentId = {}

# read base_path_annotations
with open(os.path.join(base_path_annotations, "annotations.json")) as f:
    data = json.load(f)
    # data is a dict and get the number of items
numberOfPage = len(data)

with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        pageToStudentId[i["id"]] = (
            i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        )


def getStudentId(page):
    # search reverse for the student ID page.
    for p in range(page, page - numberOfPage, -1):
        if str(p) in pageToStudentId:
            return pageToStudentId[str(p)]
    print(colored("{} is not in pageToStudentId!".format(page), "red"))
    return None


questionAndMarks = {}
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file == "mark.json":
            question = path[len(base_path_questions) + 1 :]
            f = open(os.path.join(path, file))
            data = json.load(f)
            marks = {}
            for i in data:
                studentId = getStudentId(int(i["id"]))
                marks[studentId] = (
                    i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
                )
            questionAndMarks[question] = marks
            f.close()
marksDf = pd.DataFrame(questionAndMarks)
marksDf = marksDf[
    ["ID", "NAME", "CLASS"]
    + [
        col
        for col in sorted(marksDf.columns)
        if col != "ID" and col != "NAME" and col != "CLASS"
    ]
]

marksDf["Marks"] = (
    marksDf.loc[:, ~marksDf.columns.isin(["ID", "NAME", "CLASS"])]
    .apply(pd.to_numeric)
    .sum(axis=1)
)
print(marksDf)

                  ID                  NAME  CLASS A1-5 A11-15 A6-10 B1b1 B1b2  \
240122568  240122568             HO Man Ki  S3:1B    0      4     2    1    2   
240076117  240076117        LOCK Chun Kwan  53:1C    6      2     2    0    0   
240193868  240193868             LI Bailin  53:1A    2      6     4    1    0   
240039736  240039736          WONG Yiu Tin  S3.1C    6      4     4    0    0   
240221558  240221558         HE Shing Yuen  S3:1B    6      6     4    1    2   
240236836  240236836         CHAN Chun Nok  53.1A    4      4     8    1    2   
240020889  240020889           MAK Wing Ho  S3.1C    6      4     2    1    0   
220495992  220495992         CHAN Hau Kwan  S3.3A    8      8     4    1    0   
220496374  220496374         LAW Hong Yung  S3.3A    4      2     2    0    0   
240132931  240132931           YU Tsun Hei  S3.1B    6      4     4    0    0   
240198876  240198876         CHAN Chi Ning  S3:1B   10      6     2    0    2   
240061478  240061478        

# Create Scored Scripts

Copy raw images to marked folder

In [6]:
import shutil
import os

if os.path.exists(base_path_marked_images):
    shutil.rmtree(base_path_marked_images)
shutil.copytree(base_path_images, base_path_marked_images)

'../marking_form/TestScript/marked/images/'

In [7]:
import json
annotations_path = base_path_annotations + "annotations.json"
with open(annotations_path, "r") as f: 
    annotations = json.load(f)          

#flatten annotations to list 
annotations_list = []
for page in annotations:
    for annotation in annotations[page]:
        annotation["page"] = int(page)
        # x to left, y to top
        annotation["left"] = annotation["x"]
        annotation["top"] = annotation["y"]
        annotation.pop("x")
        annotation.pop("y")
        annotations_list.append(annotation) 
annotations_list

# convert annotations_list to dict with key with label
annotations_dict = {}
for annotation in annotations_list:
    annotations_dict[annotation["label"]] = annotation
annotations_dict


{'NAME': {'width': 482,
  'height': 52,
  'label': 'NAME',
  'page': 0,
  'left': 340,
  'top': 184},
 'ID': {'width': 444,
  'height': 58,
  'label': 'ID',
  'page': 0,
  'left': 297,
  'top': 241},
 'CLASS': {'width': 200,
  'height': 57,
  'label': 'CLASS',
  'page': 0,
  'left': 832,
  'top': 243},
 'A1-5': {'width': 759,
  'height': 137,
  'label': 'A1-5',
  'page': 0,
  'left': 111,
  'top': 358},
 'A6-10': {'width': 760,
  'height': 153,
  'label': 'A6-10',
  'page': 0,
  'left': 810,
  'top': 348},
 'A11-15': {'width': 770,
  'height': 136,
  'label': 'A11-15',
  'page': 0,
  'left': 105,
  'top': 474},
 'B1b1': {'width': 752,
  'height': 142,
  'label': 'B1b1',
  'page': 0,
  'left': 99,
  'top': 584},
 'B1b2': {'width': 755,
  'height': 143,
  'label': 'B1b2',
  'page': 0,
  'left': 823,
  'top': 588},
 'B1b3': {'width': 758,
  'height': 149,
  'label': 'B1b3',
  'page': 0,
  'left': 99,
  'top': 689},
 'B1b4': {'width': 746,
  'height': 131,
  'label': 'B1b4',
  'page': 0,
 

In [8]:
studentIdToPage={}
with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        studentId = i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        studentIdToPage[studentId] = int(i["id"])
studentIdToPage

{'240036734': 0,
 '240336478': 10,
 '240076117': 100,
 '240063724': 102,
 '240075815': 12,
 '240062764': 14,
 '240193868': 16,
 '240039736': 18,
 '240020889': 2,
 '240221558': 20,
 '240349462': 22,
 '240294599': 24,
 '240236836': 26,
 '240021143': 28,
 '220495992': 30,
 '240062611': 32,
 '220496374': 34,
 '240132931': 36,
 '240107184': 38,
 '240141706': 4,
 '240406461': 40,
 '240198876': 42,
 '240061478': 44,
 '240105523': 46,
 '240001758': 48,
 '240110164': 50,
 '240511445': 52,
 '240120237': 54,
 '240107400': 56,
 '240443954': 58,
 '240059575': 6,
 '240075839': 60,
 '240155170': 62,
 '240436810': 64,
 '240075981': 66,
 '240080364': 68,
 '240122568': 70,
 '240358077': 72,
 '240151195': 74,
 '240108261': 76,
 '240106163': 78,
 '240110674': 8,
 '240519025': 80,
 '240426110': 82,
 '240001814': 84,
 '240020785': 86,
 '240106022': 88,
 '240059563': 90,
 '240020908': 92,
 '240059587': 94,
 '240076271': 96,
 '240065951': 98}

In [9]:
import cv2
from IPython.display import display
from ipywidgets import IntProgress


# Covert marksDf to dict
marksDf_list = marksDf.to_dict(orient="records")

f = IntProgress(min=0, max=len(marksDf_list))  # instantiate the bar
display(f)  # display the bar

for student in marksDf_list:
    first_page = studentIdToPage[student["ID"]]
    for annotation in annotations_dict:
        value = student[annotation]
        if annotation == "ID":
            value = value + " Marks: " + str(student["Marks"])
        x = annotations_dict[annotation]["left"]
        y = annotations_dict[annotation]["top"]
        page = first_page + annotations_dict[annotation]["page"]
      
        image_path = base_path_marked_images + str(page) + ".jpg"
        # print(value, x, y, imagePath)
        img = cv2.imread(image_path)
        textSize = cv2.getTextSize(text=str(value), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2)
        height = textSize[0][1]
        cv2.putText(img, str(value), (x, y + height), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imwrite(image_path, img)
    f.value += 1      

IntProgress(value=0, max=52)

In [10]:
from PIL import Image

for student in marksDf_list:
    studentId = student["ID"]
    first_page = studentIdToPage[student["ID"]]
    last_page = first_page + numberOfPage - 1
    print(studentId, first_page, last_page)
    pdf_path = base_path_marked_pdfs + studentId + ".pdf"

    images = list(map(Image.open, [base_path_marked_images + str(i) + ".jpg" for i in range(first_page, last_page + 1)]))
    images[0].save(pdf_path, save_all=True, append_images=images[1:]) 
        

240122568 70 71
240076117 100 101
240193868 16 17
240039736 18 19
240221558 20 21
240236836 26 27
240020889 2 3
220495992 30 31
220496374 34 35
240132931 36 37
240198876 42 43
240061478 44 45
240141706 4 5
240120237 54 55
240443954 58 59
240436810 64 65
240075981 66 67
240358077 72 73
240151195 74 75
240106163 78 79
240020785 86 87
240110674 8 9
240059563 90 91
240059587 94 95
240076271 96 97
240065951 98 99
240036734 0 1
240075815 12 13
240110164 50 51
240511445 52 53
240075839 60 61
240519025 80 81
240063724 102 103
240336478 10 11
240294599 24 25
240107184 38 39
240059575 6 7
240080364 68 69
240406461 40 41
240426110 82 83
240349462 22 23
240062611 32 33
240107400 56 57
240001814 84 85
240020908 92 93
240001758 48 49
240108261 76 77
240062764 14 15
240021143 28 29
240106022 88 89
240105523 46 47
240155170 62 63


# Generate Script Sample

5 set Samples:
1. Combined scripts
2. 3 Good, 3 Average, and 3 Weak.
3. 5 Good, 5 Average, and 5 Weak.
4. 3 Good, 3 Average, and 3 Weak above the passing mark.
5. 5 Good, 5 Average, and 5 Weak above the passing mark. 

In [11]:
passingMark = 40

In [12]:
from PyPDF4 import PdfFileMerger

writer = PdfFileMerger(strict=True)

# merge all pdfs in base_path_marked_pdfs
for path, currentDirectory, files in os.walk(base_path_marked_pdfs):
    for file in files:
        if file.endswith(".pdf"):
            pdf_path = os.path.join(path, file)
            writer.append(pdf_path)
writer.write(base_path_marked_scripts + "all.pdf")           

In [13]:
from PyPDF4 import PdfFileMerger, PdfFileReader

sampling = marksDf.sort_values(by=["Marks"], ascending=False)["Marks"]

from_directory = os.path.join(os.getcwd(), "..", "templates", "pdf")

goodPage = PdfFileReader(from_directory + "/Good.pdf")
averagePage = PdfFileReader(from_directory + "/Average.pdf")
weakPage = PdfFileReader(from_directory + "/Weak.pdf")


def get_scripts_psf(df):
    return list(map(lambda rowNumber: base_path_marked_pdfs + rowNumber + ".pdf", df.index))


def take_sample(n, sampling, suffix=""):
    if len(sampling) < 3 * n:
        n = int(len(sampling) / 3)
    good = sampling.head(n)
    weak = sampling.tail(n)
    median = int(len(sampling) / 2)
    take = int(n / 2)
    average = sampling.iloc[median - take : median + take]

    merger = PdfFileMerger()
    merger.append(goodPage)
    for pdf in get_scripts_psf(good):
        merger.append(PdfFileReader(pdf))
    merger.append(averagePage)
    for pdf in get_scripts_psf(average):
        merger.append(PdfFileReader(pdf))
    merger.append(weakPage)
    for pdf in get_scripts_psf(weak):
        merger.append(PdfFileReader(pdf))
    fileName = base_path_marked_scripts + "sampleOf" + str(n) + suffix + ".pdf"
    merger.write(open(fileName, "wb"))
    print("Output successfully written to" + fileName)
    merger.close()


take_sample(3, sampling)
take_sample(5, sampling)

sampling = sampling.where(lambda x: x > passingMark)
take_sample(3, sampling, "_only_pass")
take_sample(5, sampling, "_only_pass")

Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf3.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf5.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf3_only_pass.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf5_only_pass.pdf


In [14]:
# save marksDf to excel
marksDf.to_excel(base_path_marked_scripts + "details_score_report.xlsx", index=False)
# save marksDf to excel but only show ID, NAME, CLASS, Marks
marksDf[["ID", "NAME", "CLASS", "Marks"]].to_excel(base_path_marked_scripts + "score_report.xlsx", index=False)

In [15]:
from IPython.display import FileLink 

# zip base_path_marked_scripts folder
script_zip = base_path_marked_scripts + "../scripts"
shutil.make_archive(script_zip, "zip", base_path_marked_scripts)
FileLink(script_zip + ".zip")