In [22]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler
import torch
from torch.nn.functional import softmax
import datasets
from datasets import load_dataset, Dataset
import random
from torch.utils.data import DataLoader, Subset
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchmetrics import F1Score
import pandas as pd
import numpy as np

from comment import category as comment_categories
from comment import generate_comment

import math

pd.options.mode.copy_on_write = True

In [9]:
reverse_mapping_3 = {
    3.5: 0, 4.0: 0,
    4.5: 1, 5.0: 1,
    5.5: 2, 6.0: 2,
    6.5: 3, 7.0: 3,
    7.5: 4, 8.0: 4,
    8.5: 5, 9.0: 5
}

class_mapping = {
    0: "3.5 - 4.0",
    1: "4.5 - 5.0",
    2: "5.5 - 6.0",
    3: "6.5 - 7.0",
    4: "7.5 - 8.0",
    5: "8.5 - 9.0",
}

## Load Classifier Models

In [10]:
classifier_categories = ['task_achievement', 
              'grammatical',
              'coherence',
              # 'lexical'  # not available
              ]
models = []

for category in classifier_categories:
    model = AutoModelForSequenceClassification.from_pretrained("mrm8488/deberta-v3-ft-financial-news-sentiment-analysis",num_labels=6, ignore_mismatched_sizes=True)
    # Load the saved state_dict into the model
    with open(f'models/{category}.pt', "rb") as f:
        model.load_state_dict(torch.load(f))

    # Move model to the device (GPU if available)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)
    models.append(model)

tokenizer = AutoTokenizer.from_pretrained("mrm8488/deberta-v3-ft-financial-news-sentiment-analysis")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at mrm8488/deberta-v3-ft-financial-news-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  model.load_state_dict(torch.load(f))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at mrm8488/deberta-v3-ft-financial-news-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint

In [11]:
prompt = """ Some people feel that with the rise of artificial intelligence, computers and robots will take over the roles of teachers. To what extent do you agree or disagree with this statement? 
Give reasons for your answer and include any relevant examples from your knowledge or experience. 
You should write at least 250 words. """

essay = """ With ever increasing technological advances, computers and robots are replacing human roles in different areas of society. This trend can also be seen in education, where interactive programs can enhance the educational experience for children and young adults. Whether, however, this revolution can also take over the role of the teacher completely is debatable, and I oppose this idea as it is unlikely to serve students well. 
The roles of computers and robots can be seen in many areas of the workplace. Classic examples are car factories, where a lot of the repetitive precision jobs done on assembly lines have been performed by robots for many years, and medicine, where diagnosis, and treatment, including operations, have also been assisted by computers for a long time. According to the media, it won't also be long until we have cars that are driven automatically. 
It has long been discussed whether robots and computers can do this in education. It is well known that the complexity of programs can now adapt to so many situations that something can already be set up that has the required knowledge of the teacher, along with the ability to predict and answer all questions that might be asked by students. In fact, due to the nature of computers, the knowledge levels can far exceed a teacher's and have more breadth, as a computer can have equal knowledge in all the subjects that are taught in school, as opposed to a single teacher's specialisation. It seems very likely, therefore, that computers and robots should be able to deliver the lessons that teachers can, including various ways of differentiating and presenting materials to suit varying abilities and ages of students. 
Where I am not convinced is in the pastoral role of teachers. Part of teaching is managing behaviour and showing empathy with students, so that they feel cared for and important. Even if a robot or computer can be programmed to imitate these actions, students will likely respond in a different way when they know an interaction is part of an algorithm rather than based on human emotion. 
Therefore, although I feel that computers should be able to perform a lot of the roles of teachers in the future, they should be used as educational tools to assist teachers and not to replace them. In this way, students would receive the benefits of both ways of instruction. 
"""
# comment = """Will the comment affect the overall score?"""

example_input1 = f"""Prompt: {prompt}
Essay: {essay}
"""

In [12]:
# pipeline 1: generate comment
comments = []
for each in comment_categories:
    # print(each)
    comment = generate_comment(prompt, essay, each)
    comments.append(comment)


In [13]:
for each in comments:
    print(each)

The essay effectively addresses the prompt by presenting a clear position against the complete takeover of teaching roles by computers and robots. It discusses relevant examples from both technology in the workplace and the specific context of education, illustrating the potential advantages of AI while emphasizing the irreplaceable human qualities of teachers. However, there could be more development of ideas; for instance, the paragraph on the pastoral role of teachers, while strong, could include specific examples to enhance its impact. Overall, the response demonstrates a good understanding of the task, with a clear stance and logical argumentation, but it could benefit from deeper exploration of certain points.
The essay effectively addresses the task by presenting a clear position against the notion that computers and robots can entirely replace teachers. The introduction sets the tone well, and the progression of ideas is logical. Each paragraph focuses on a central topic: the p

## Pass downstream to provide band score for each categories

In [None]:
classifier_output = []

for i, model in enumerate(models):
    classifier_input = f"Prompt: {prompt}\n\nEssay:{essay}\n\nComment: {comments[i]}"
    inputs = tokenizer(classifier_input, return_tensors="pt").to(device)
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = logits.argmax(dim=1).item()
    classifier_output.append(predicted_class)

    print(f"{comment_categories[i]}: {class_mapping[predicted_class]}")

Task Achievement: 6.5 - 7.0
Coherence and Cohesion: 6.5 - 7.0
Lexical Resource: 5.5 - 6.0


## Average to get final band score

In [21]:
print(classifier_output)
avg = sum(classifier_output) / len(classifier_output)
print(avg)

final_band_score = math.ceil(avg)

print(f'final band score: {class_mapping[final_band_score]}')

[3, 3, 2]
2.6666666666666665
final band score: 6.5 - 7.0


## Finally, provide tips for improvement

In [None]:
# TODO: implement this

## GUI: Gradio

In [None]:
import gradio as gr
import time

def process_input(prompt, essay):
    """
    Processes the given prompt and essay, checking word counts and returning feedback.
    """
    comments = []
    for each in comment_categories:
        yield f"Generating comments for {each} ..."
        comment = generate_comment(prompt, essay, each)
        # comment = "debug"
        comments.append(comment)
        time.sleep(1)  # avoid OpenAI firewall

    yield "Generating comments complete ... Calculating suggested band score"
    
    classifier_output = []

    for i, model in enumerate(models):
        classifier_input = f"Prompt: {prompt}\n\nEssay:{essay}\n\nComment: {comments[i]}"
        inputs = tokenizer(classifier_input, return_tensors="pt").to(device)
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = logits.argmax(dim=1).item()
        classifier_output.append(predicted_class)

    avg = sum(classifier_output) / len(classifier_output)
    final_band_score = math.ceil(avg)
    final_band_score = class_mapping[final_band_score]

    output = ""
    for i in range(len(classifier_categories)):
        output += (f"- {comment_categories[i]}:\n"
                   f"- {comments[i]}\n"
                   f"- Suggested Band Score ({comment_categories[i]}): {class_mapping[classifier_output[i]]}\n\n")
    
    output += f"Suggested overall Band Score: {final_band_score}\n\n"

    output += f"Tips for improvement: \nCurrently unavailable"  # TODO

    yield output

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("""
    ### Prompt and Essay Input
    Please provide a **prompt** (about 100 words) and an **essay** (250-300 words).
    """)

    with gr.Row():
        prompt_input = gr.Textbox(
            label="Prompt (about 100 words)",
            placeholder="Enter your prompt here (approximately 100 words)...",
            lines=7  # Approximate height for 100 words
        )
        essay_input = gr.Textbox(
            label="Essay (250-300 words)",
            placeholder="Enter your essay here (approximately 250-300 words)...",
            lines=15  # Approximate height for 300 words
        )

    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Output", lines=25, interactive=False)

    submit_button.click(process_input, inputs=[prompt_input, essay_input], outputs=output)

# Launch the Gradio app
demo.launch()
