# Automatic Grader with Azure OpenAI ChatGPT
This notebook can grade students’ assignments automatically by downloading them from Moodle LMS. It will unzip the assignment file from Moodle and create a folder for each student. If a student submits a zip file, it will also unzip it in their folder. The folder should contain either some Docx files or one PDF file. For Docx files, the notebook will extract and merge all the texts into one answer. For PDF files, it will only extract the text from the first page as the answer.

The notebook will then use a marking scheme as prompts and let Azure OpenAI ChatGPT evaluate the answer according to the rules. It will also estimate the probability that the answer is copied from the internet or generated by AI.

The notebook will use Azure OpenAI text-embedding-ada-002 to get the embedding of the answer. It will then use K-means clustering to group the answers based on their embeddings and show the teachers the different types of answers. It will also perform PCA on the embeddings and plot the first three principal components in 3D. This will help the teachers see how similar or different the answers are.

### Install packages

In [4]:
%pip install -q pypandoc docx2txt PyPDF2 openpyxl python-dotenv openai num2words matplotlib plotly scipy scikit-learn pandas tiktoken ipywidgets seaborn ipympl PyQt6 pdfplumber
%load_ext dotenv
%dotenv

Note: you may need to restart the kernel to use updated packages.
The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv
cannot find .env file


You should consider upgrading via the 'c:\Users\callu\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


### Submit Files

In [None]:
import sys
import os
import shutil
from PyQt6.QtWidgets import QApplication, QWidget, QPushButton, QFileDialog, QLabel, QVBoxLayout

class FolderUploadApp(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle("File Submission")
        self.setGeometry(100, 100, 400, 200)

        layout = QVBoxLayout()

        self.label = QLabel("Upload a folder containing PDFs", self)
        layout.addWidget(self.label)

        self.button = QPushButton("Select Folder", self)
        self.button.clicked.connect(self.upload_folder)
        layout.addWidget(self.button)

        self.setLayout(layout)

    def upload_folder(self):
        # Open a folder selection dialog
        folder_path = QFileDialog.getExistingDirectory(self, "Select Folder")

        if folder_path:
            # Define the destination folder inside VS Code project
            destination_folder = os.path.join(os.getcwd(), "Files")
            os.makedirs(destination_folder, exist_ok=True)  # Ensure the folder exists

            # Move only PDF files from selected folder
            pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".pdf")]

            if not pdf_files:
                self.label.setText("No PDF files found in the selected folder.")
                return
            
            for file_name in pdf_files:
                src_file = os.path.join(folder_path, file_name)
                dest_file = os.path.join(destination_folder, file_name)

                shutil.copy(src_file, dest_file)  # Use shutil.move() if you want to delete original files

            # Update UI
            self.label.setText(f"Uploaded {len(pdf_files)} PDFs to 'Files' folder.")
            print(f"Moved {len(pdf_files)} PDFs to: {destination_folder}")

            # Close the application after 2 seconds
            QApplication.instance().quit()

# Run the application
app = QApplication(sys.argv)
window = FolderUploadApp()
window.show()
sys.exit(app.exec())


### Common Functions

In [5]:
def read_text_file(file_path):
    with open(file_path, "r", encoding="utf-8", errors="replace") as file:
        return file.read()


In [2]:
# read text file and return the content
def read_text_file(path):
    with open(path, 'r') as file:
        data = file.read().replace('\n', '')
    return data

def write_text_to_file(path, content):
    with open(path, 'w') as file:
        file.write(content)

Extract all submissions to a tmp folder


In [None]:
# Import the zipfile module
from zipfile import ZipFile
# Create a zip file object using ZipFile class
with ZipFile("data/submission.zip", "r") as zip_obj:
    # Extract all the files into a directory
    zip_obj.extractall("tmp/") 

In [None]:
# Import the os module
import os
import pandas as pd

# Define the path to list
temp_path = "Files/"

def is_folder_contains_file(folder_path, extension): 
    # Get a list of all files and directories in the path 
    names = os.listdir(folder_path) 
    for name in names: 
        if name.endswith(extension): 
            return True 
    return False   
    
# Get a list of all files and directories in the path
def get_submissions_df(path):
    assignment_folders = []
    names = os.listdir(path)
    # Loop through the list
    for name in names:
        # Join the path and the name
        full_path = os.path.join(path, name)
        # Check if it is a directory
        if os.path.isdir(full_path):
            # Print the directory name
            assignment_folders.append({
                "Student": name.split("_")[0],
                "Path": full_path,
                "ContainsDocxFile": is_folder_contains_file(full_path, ".docx"),                
                "ContainsPdfFile": is_folder_contains_file(full_path, ".pdf"),
                "ContainsZipFile": is_folder_contains_file(full_path, ".zip")
                })
    df = pd.DataFrame([p for p in assignment_folders])
    return df


df = get_submissions_df(temp_path)

df

### Ensure that all the files submitted are valid

In [None]:
def filter_df_by_not_contains_any_expected_files(df):
    return df[(df["ContainsDocxFile"] == False) & (df["ContainsPdfFile"] == False) & (df["ContainsZipFile"] == False)]
filter_df_by_not_contains_any_expected_files(df)

Handle zip file.

In [None]:
import os
import shutil

def flatten(directory):
    for dirpath, _, filenames in os.walk(directory, topdown=False):
        for filename in filenames:
            i = 0
            source = os.path.join(dirpath, filename)
            target = os.path.join(directory, filename)

            while os.path.exists(target):
                i += 1
                file_parts = os.path.splitext(os.path.basename(filename))

                target = os.path.join(
                    directory,
                    file_parts[0] + "_" + str(i) + file_parts[1],
                )

            shutil.move(source, target)

            print("Moved ", source, " to ", target)

        if dirpath != directory:
            os.rmdir(dirpath)
            print("Deleted ", dirpath)

def get_first_file_path(path, ext):
    names = os.listdir(path)
    for name in names:
        if name.endswith(ext):
            return os.path.join(path, name)

def extract_zip_file_in_place(path):
    zip_path = get_first_file_path(path, ".zip")
    print(zip_path)
    import zipfile
    # Create a zip file object using ZipFile class
    with zipfile.ZipFile(zip_path, "r") as zip_obj:
        # Extract all the files into a directory
        zip_obj.extractall(path)
    flatten(path) 


def filter_df_by_contains_zip_file(df):
    return df[(df["ContainsZipFile"] == True)]

paths = filter_df_by_contains_zip_file(df)["Path"].values
for path in paths:
    extract_zip_file_in_place(path)

In [None]:
df = get_submissions_df(temp_path)
## check all rows contains Docx or PDF file
def filter_df_by_contains_docx_or_pdf_file(df):
    return df[(df["ContainsDocxFile"] == True) | (df["ContainsPdfFile"] == True)]

filter_df_by_contains_docx_or_pdf_file(df)

## Processing Docx files

In [None]:
def filter_df_by_contains_docx(df):
    return df[(df["ContainsDocxFile"] == True)]
words_df = filter_df_by_contains_docx(df)
paths = words_df["Path"].values

def get_all_docx_files(path):
    import glob
    return glob.glob(path + "/*.docx")

import docx2txt
from functools import reduce

students_words_files = list(map(get_all_docx_files, paths)) # List of lists of word files

file_contents =[];
for word_files in students_words_files:  
    file_contents.append(reduce(lambda x, y: x + y, map(lambda f: docx2txt.process(f), word_files), "\n\n"))
# reduce(map(lambda f: docx2txt.process(f), word_files), lambda x, y: x + y, "")
words_df.loc[:, "Sources"] = students_words_files
words_df.loc[:, "Answers"] = file_contents


In [None]:
def filter_df_by_contains_pdf(df):
    return df[(df["ContainsPdfFile"] == True)]
pdfs_df = filter_df_by_contains_pdf(df)
paths = pdfs_df["Path"].values

def get_add_pdf_files(path):
    import glob
    return glob.glob(path + "/*.pdf")

import PyPDF2
from functools import reduce

def convert_pdf_all_pages_to_txt(path):
    pdfFileObj = open(path, 'rb')
    reader = PyPDF2.PdfReader(pdfFileObj)
    num_pages = len(reader.pages)
    count = 0
    text = ""
    while count < num_pages:
        pageObj = reader.pages[count]
        count += 1
        text += pageObj.extract_text()
        text += "\n\n"
    return text

students_pdf_files = list(map(get_add_pdf_files, paths)) # List of lists of word files

file_contents =[];
for pdf_files in students_pdf_files:
    file_contents.append(reduce(lambda x, y: x + y, map(convert_pdf_all_pages_to_txt, pdf_files), "\n\n"))

pdfs_df.loc[:, "Sources"] = students_pdf_files
pdfs_df.loc[:, "Answers"] = file_contents
pdfs_df


In [11]:
# combine two dataframes into one and export to excel
df_answers = pd.concat([words_df, pdfs_df])
df_answers.to_excel("data/answers.xlsx", index=False)

In [3]:
import pdfplumber

def extract_text_pdfplumber(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text()  # Extract text from each page

    return text

# Example usage
student_answer_pdf = extract_text_pdfplumber("Files\Copy of FYP UP2061187.pdf")
#marking_scheme = extract_text_pdfplumber("Files\FYP UP2061187.pdf")

## Grading students’ responses using Azure OpenAI ChatGPT

In [7]:
import os
import json
import openai
from openai import AzureOpenAI

api_key = os.getenv("AZURE_OPENAI_API_KEY")

marking_scheme = read_text_file("marking_scheme.txt")
student_answer = student_answer_pdf
prompt=marking_scheme.replace("<ANSWER></ANSWER>", student_answer)

client = AzureOpenAI(
    api_version="2023-07-01-preview",
    azure_endpoint="https://up206-m6upi167-swedencentral.cognitiveservices.azure.com/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-08-01-preview",
    api_key=api_key 
)


completion = client.chat.completions.create(
    model="gpt-35-turbo-16k",  # e.g. gpt-35-instant
    messages=[
        {
            "role": "system", "content": "You are a teaching assistant.",
            "role": "user", "content": prompt,
        },
    ],
)
completion_dict = completion.model_dump()
response_content = completion_dict["choices"][0]["message"]["content"]
print(response_content)




Dear [Student's Name],

Thank you for sharing your final-year project titled "Automated Assignment Scoring Via Azure OpenAI ChatGPT." I appreciate your effort and the knowledge you have demonstrated in your project. I have reviewed your project and provided feedback based on the criteria outlined in the marking rubric. Here is the breakdown of your scores:

1. Statement of project’s context, aims and objectives: 75
2. Critical review of relevant literature: 70
3. Methodological approach: 0
4. Specification and discussion of the requirements: 0
5. Analysis and discussion of the IT design: 50
6. Discussion of implementation: 40
7. Discussion of verification and validation: 60
8. Evaluation against requirements: 0
9. Evidence of project planning and management: 0
10. Attributes of the solution: 0
11. Summary, conclusions and recommendations: 50
12. Structure and presentation: 60
13. Overall understanding and reflection: 40

Overall Score: 40

Feedback:
1. Statement of project’s context, a

In [None]:
df_answers

In [None]:
data_dict = json.loads(response_content)

# Convert the dictionary to a DataFrame
df_marked = pd.DataFrame([data_dict])


# Display the DataFrame
df_marked

In [35]:
df_total = pd.concat([df_answers, df_marked], axis=1)

In [None]:
df_total

Anaylse Results: 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Sample dataframe (replace this with your actual dataframe)
data = {
    "Student": ["Callum Fry"],
    "marks": [75],
}
df_total = pd.DataFrame(data)

# Extract the required columns
students = df_total['Student']
marks = df_total['marks']

# Create a plot chart to analyze the results
plt.figure(figsize=(10, 6))
plt.plot(students, marks, marker='o', color='skyblue', linestyle='-', linewidth=2, markersize=8)

# Add labels and title
plt.xlabel("Students", fontsize=12)
plt.ylabel("Marks", fontsize=12)
plt.title("Student Marks Analysis", fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import sys
import os
import shutil
from PyQt6.QtWidgets import QApplication, QWidget, QPushButton, QFileDialog, QLabel, QVBoxLayout

class FolderUploadApp(QWidget):
    def __init__(self):
        super().__init__()
        self.initUI()

    def initUI(self):
        self.setWindowTitle("Dissertation Submission")
        self.setGeometry(100, 100, 400, 200)

        layout = QVBoxLayout()

        self.label = QLabel("Upload a folder containing PDFs", self)
        layout.addWidget(self.label)

        self.button = QPushButton("Select Folder", self)
        self.button.clicked.connect(self.upload_folder)
        layout.addWidget(self.button)

        self.setLayout(layout)

    def upload_folder(self):
        # Open a folder selection dialog
        folder_path = QFileDialog.getExistingDirectory(self, "Select Folder")

        if folder_path:
            # Define the destination folder inside VS Code project
            destination_folder = os.path.join(os.getcwd(), "Files")
            os.makedirs(destination_folder, exist_ok=True)  # Ensure the folder exists

            # Move only PDF files from selected folder
            pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".pdf")]

            if not pdf_files:
                self.label.setText("No PDF files found in the selected folder.")
                return
            
            for file_name in pdf_files:
                src_file = os.path.join(folder_path, file_name)
                dest_file = os.path.join(destination_folder, file_name)

                shutil.copy(src_file, dest_file)  # Use shutil.move() if you want to delete original files

            # Update UI
            self.label.setText(f"Uploaded {len(pdf_files)} PDFs to 'Files' folder.")
            print(f"Moved {len(pdf_files)} PDFs to: {destination_folder}")

            # Close the application after 2 seconds
            QApplication.instance().quit()

# Run the application
app = QApplication(sys.argv)
window = FolderUploadApp()
window.show()
sys.exit(app.exec())
