In [1]:
# === 1. Imports & dataset loading ===
from datasets import load_dataset
import re
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import mse_loss
from transformers import AutoTokenizer, AutoModelForMaskedLM, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from types import SimpleNamespace

# === 2. Load & preprocess dataset ===
dataset = load_dataset("chillies/IELTS-writing-task-2-evaluation")

def extract_scores(evaluation_text):
    criteria = {
        "task_score": r"Task Achievement.*?\[(\d+\.?\d*)\]|Suggested Band Score \(Task Achievement\): (\d+\.?\d*)",
        "coherence_score": r"Coherence and Cohesion.*?\[(\d+\.?\d*)\]|Suggested Band Score \(Coherence and Cohesion\): (\d+\.?\d*)",
        "lexical_score": r"Lexical Resource.*?\[(\d+\.?\d*)\]|Suggested Band Score \(Lexical Resource\): (\d+\.?\d*)",
        "grammar_score": r"Grammatical Range and Accuracy.*?\[(\d+\.?\d*)\]|Suggested Band Score \(Grammatical Range and Accuracy\): (\d+\.?\d*)",
    }
    scores = {}
    for key, pattern in criteria.items():
        match = re.search(pattern, evaluation_text, re.DOTALL)
        if match:
            scores[key] = float(match.group(1) or match.group(2))
        else:
            return None
    return scores

def preprocess(example):
    scores = extract_scores(example["evaluation"])
    try:
        overall_band = float(re.sub(r"[^\d.]", "", example["band"]))
    except:
        overall_band = None
    if scores and overall_band:
        return {
            "prompt": example["prompt"],
            "essay": example["essay"],
            **scores,
            "overall_score": overall_band
        }
    else:
        return {
            "prompt": None,
            "essay": None,
            "task_score": None,
            "coherence_score": None,
            "lexical_score": None,
            "grammar_score": None,
            "overall_score": None
        }

dataset = dataset.map(preprocess)
dataset = dataset.filter(lambda x: x["prompt"] is not None)
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [None]:
# === 3. Tokenization & normalization ===
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")

def tokenize_function(example):
    text = f"Prompt: {example['prompt']}\nEssay: {example['essay']}"
    tokenized = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    # Normalize label: (band - 4) / 5 → [0, 1] For: 4-4=0/5=0, 9-4=5/5=1
    tokenized["labels"] = [
        (example["task_score"] - 4) / 5,
        (example["coherence_score"] - 4) / 5,
        (example["lexical_score"] - 4) / 5,
        (example["grammar_score"] - 4) / 5,
        (example["overall_score"] - 4) / 5,
    ]
    return tokenized

tokenized_datasets = dataset.map(tokenize_function)
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])




In [None]:
# === 4. Model + LoRA ===
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_proj", "key_proj", "value_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.FEATURE_EXTRACTION,
)

base_model = AutoModelForMaskedLM.from_pretrained("microsoft/deberta-v3-base")

class DebertaWithLoRA(nn.Module):
    def __init__(self, base_model, lora_config):
        super().__init__()
        self.backbone = get_peft_model(base_model.base_model, lora_config)
        self.regressor = nn.Linear(base_model.config.hidden_size, 5)

    def forward(self, input_ids, attention_mask, **kwargs):
        if "labels" in kwargs:
            kwargs.pop("labels")
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
        cls_output = outputs.last_hidden_state[:, 0, :]
        return self.regressor(cls_output)

model = DebertaWithLoRA(base_model, lora_config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def print_trainable_params(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"🔧 Trainable params: {trainable:,} / {total:,} ({100 * trainable / total:.2f}%)")

print_trainable_params(model)


  return torch.load(checkpoint_file, map_location=map_location)
Some weights of DebertaV2ForMaskedLM were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔧 Trainable params: 446,213 / 184,277,765 (0.24%)


In [4]:
# === 5. Training setup ===
training_args = TrainingArguments(
    output_dir="lora_v2_5_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=5,
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    metric_for_best_model="eval_RMSE",
    greater_is_better=False,
    remove_unused_columns=False,
    fp16=True,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    load_best_model_at_end=True,
)

def compute_metrics(eval_pred):
    print("🧪 compute_metrics called")
    predictions, labels = eval_pred
    mse = np.mean((predictions - labels) ** 2)
    rmse = np.sqrt(mse)
    return {"eval_RMSE": rmse}

class RegressionTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        predictions = model(**inputs)
        # weighted loss
        loss = (
            0.2 * mse_loss(predictions[:, 0], labels[:, 0]) +
            0.2 * mse_loss(predictions[:, 1], labels[:, 1]) +
            0.2 * mse_loss(predictions[:, 2], labels[:, 2]) +
            0.2 * mse_loss(predictions[:, 3], labels[:, 3]) +
            0.4 * mse_loss(predictions[:, 4], labels[:, 4])
        )
        output = SimpleNamespace(logits=predictions)
        return (loss, output) if return_outputs else loss

    def prediction_step(self, model, inputs, prediction_loss_only=False, ignore_keys=None):
        has_labels = "labels" in inputs
        inputs = self._prepare_inputs(inputs)
        with torch.no_grad():
            labels = inputs.pop("labels") if has_labels else None
            predictions = model(**inputs)
            loss = mse_loss(predictions, labels) if has_labels else None
        return (loss, predictions, labels)

trainer = RegressionTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: st124689 (binit-ait) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


  0%|          | 0/4495 [00:00<?, ?it/s]

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.3783, 'learning_rate': 0.00019964404894327032, 'epoch': 0.01}
{'loss': 0.2104, 'learning_rate': 0.0001991991101223582, 'epoch': 0.02}
{'loss': 0.1529, 'learning_rate': 0.00019875417130144608, 'epoch': 0.03}
{'loss': 0.105, 'learning_rate': 0.00019830923248053396, 'epoch': 0.04}
{'loss': 0.1367, 'learning_rate': 0.0001978642936596218, 'epoch': 0.06}
{'loss': 0.0975, 'learning_rate': 0.00019741935483870969, 'epoch': 0.07}
{'loss': 0.1639, 'learning_rate': 0.00019697441601779756, 'epoch': 0.08}
{'loss': 0.1207, 'learning_rate': 0.00019652947719688544, 'epoch': 0.09}
{'loss': 0.1085, 'learning_rate': 0.00019608453837597332, 'epoch': 0.1}
{'loss': 0.1446, 'learning_rate': 0.0001956395995550612, 'epoch': 0.11}
{'loss': 0.1137, 'learning_rate': 0.00019519466073414908, 'epoch': 0.12}
{'loss': 0.1311, 'learning_rate': 0.00019474972191323696, 'epoch': 0.13}
{'loss': 0.1196, 'learning_rate': 0.0001943047830923248, 'epoch': 0.14}
{'loss': 0.0686, 'learning_rate': 0.00019385984427141268,

  0%|          | 0/100 [00:00<?, ?it/s]

🧪 compute_metrics called
{'eval_RMSE': 0.26052042841911316, 'eval_loss': 0.06787089258432388, 'eval_runtime': 23.8942, 'eval_samples_per_second': 16.74, 'eval_steps_per_second': 4.185, 'epoch': 1.0}
{'loss': 0.0831, 'learning_rate': 0.00016004449388209123, 'epoch': 1.0}
{'loss': 0.0767, 'learning_rate': 0.0001595995550611791, 'epoch': 1.01}
{'loss': 0.0726, 'learning_rate': 0.000159154616240267, 'epoch': 1.02}
{'loss': 0.0689, 'learning_rate': 0.00015870967741935487, 'epoch': 1.03}
{'loss': 0.0563, 'learning_rate': 0.00015826473859844274, 'epoch': 1.05}
{'loss': 0.0968, 'learning_rate': 0.00015781979977753062, 'epoch': 1.06}
{'loss': 0.075, 'learning_rate': 0.00015737486095661847, 'epoch': 1.07}
{'loss': 0.0903, 'learning_rate': 0.00015692992213570635, 'epoch': 1.08}
{'loss': 0.0468, 'learning_rate': 0.00015648498331479423, 'epoch': 1.09}
{'loss': 0.0952, 'learning_rate': 0.0001560400444938821, 'epoch': 1.1}
{'loss': 0.0714, 'learning_rate': 0.00015559510567296998, 'epoch': 1.11}
{'los

  0%|          | 0/100 [00:00<?, ?it/s]

🧪 compute_metrics called
{'eval_RMSE': 0.2428446263074875, 'eval_loss': 0.058973513543605804, 'eval_runtime': 15.8831, 'eval_samples_per_second': 25.184, 'eval_steps_per_second': 6.296, 'epoch': 2.0}
{'loss': 0.0625, 'learning_rate': 0.00012, 'epoch': 2.0}
{'loss': 0.0368, 'learning_rate': 0.00011955506117908788, 'epoch': 2.01}
{'loss': 0.0626, 'learning_rate': 0.00011911012235817576, 'epoch': 2.02}
{'loss': 0.0479, 'learning_rate': 0.00011866518353726364, 'epoch': 2.04}
{'loss': 0.0756, 'learning_rate': 0.00011822024471635152, 'epoch': 2.05}
{'loss': 0.103, 'learning_rate': 0.0001177753058954394, 'epoch': 2.06}
{'loss': 0.0594, 'learning_rate': 0.00011733036707452724, 'epoch': 2.07}
{'loss': 0.0872, 'learning_rate': 0.00011688542825361512, 'epoch': 2.08}
{'loss': 0.0779, 'learning_rate': 0.000116440489432703, 'epoch': 2.09}
{'loss': 0.0534, 'learning_rate': 0.00011599555061179088, 'epoch': 2.1}
{'loss': 0.04, 'learning_rate': 0.00011555061179087876, 'epoch': 2.11}
{'loss': 0.0583, 'le

  0%|          | 0/100 [00:00<?, ?it/s]

🧪 compute_metrics called
{'eval_RMSE': 0.23546022176742554, 'eval_loss': 0.055441513657569885, 'eval_runtime': 15.8705, 'eval_samples_per_second': 25.204, 'eval_steps_per_second': 6.301, 'epoch': 3.0}
{'loss': 0.0462, 'learning_rate': 7.99555061179088e-05, 'epoch': 3.0}
{'loss': 0.0491, 'learning_rate': 7.951056729699667e-05, 'epoch': 3.01}
{'loss': 0.0541, 'learning_rate': 7.906562847608455e-05, 'epoch': 3.03}
{'loss': 0.055, 'learning_rate': 7.862068965517242e-05, 'epoch': 3.04}
{'loss': 0.0486, 'learning_rate': 7.81757508342603e-05, 'epoch': 3.05}
{'loss': 0.0636, 'learning_rate': 7.773081201334817e-05, 'epoch': 3.06}
{'loss': 0.0592, 'learning_rate': 7.728587319243604e-05, 'epoch': 3.07}
{'loss': 0.0585, 'learning_rate': 7.684093437152392e-05, 'epoch': 3.08}
{'loss': 0.0758, 'learning_rate': 7.63959955506118e-05, 'epoch': 3.09}
{'loss': 0.0541, 'learning_rate': 7.595105672969966e-05, 'epoch': 3.1}
{'loss': 0.072, 'learning_rate': 7.550611790878754e-05, 'epoch': 3.11}
{'loss': 0.036

  0%|          | 0/100 [00:00<?, ?it/s]

🧪 compute_metrics called
{'eval_RMSE': 0.23712413012981415, 'eval_loss': 0.05622785538434982, 'eval_runtime': 22.806, 'eval_samples_per_second': 17.539, 'eval_steps_per_second': 4.385, 'epoch': 4.0}
{'loss': 0.0524, 'learning_rate': 3.991101223581758e-05, 'epoch': 4.0}
{'loss': 0.0374, 'learning_rate': 3.946607341490545e-05, 'epoch': 4.02}
{'loss': 0.0442, 'learning_rate': 3.902113459399333e-05, 'epoch': 4.03}
{'loss': 0.0454, 'learning_rate': 3.85761957730812e-05, 'epoch': 4.04}
{'loss': 0.0932, 'learning_rate': 3.813125695216908e-05, 'epoch': 4.05}
{'loss': 0.0619, 'learning_rate': 3.768631813125695e-05, 'epoch': 4.06}
{'loss': 0.0506, 'learning_rate': 3.724137931034483e-05, 'epoch': 4.07}
{'loss': 0.0766, 'learning_rate': 3.67964404894327e-05, 'epoch': 4.08}
{'loss': 0.0333, 'learning_rate': 3.635150166852058e-05, 'epoch': 4.09}
{'loss': 0.0614, 'learning_rate': 3.590656284760846e-05, 'epoch': 4.1}
{'loss': 0.0339, 'learning_rate': 3.5461624026696336e-05, 'epoch': 4.12}
{'loss': 0.0

  0%|          | 0/100 [00:00<?, ?it/s]

🧪 compute_metrics called
{'eval_RMSE': 0.23862318694591522, 'eval_loss': 0.05694102868437767, 'eval_runtime': 15.5655, 'eval_samples_per_second': 25.698, 'eval_steps_per_second': 6.424, 'epoch': 5.0}
{'train_runtime': 2264.1889, 'train_samples_per_second': 7.934, 'train_steps_per_second': 1.985, 'train_loss': 0.06705738789711699, 'epoch': 5.0}


TrainOutput(global_step=4495, training_loss=0.06705738789711699, metrics={'train_runtime': 2264.1889, 'train_samples_per_second': 7.934, 'train_steps_per_second': 1.985, 'train_loss': 0.06705738789711699, 'epoch': 5.0})

In [11]:
# === 5.5 Save model ===
save_path = "lora_v2_5_model"

# Save LoRA adapter weights
torch.save(model.backbone.state_dict(), f"{save_path}/lora_adapter.bin")

# Save regression layer weights
torch.save(model.regressor.state_dict(), f"{save_path}/regression_head.pt")

# Save tokenizer and base model config
tokenizer.save_pretrained(save_path)
model.backbone.base_model.save_pretrained(save_path)

# Save regression config (manual)
reg_config = {"hidden_size": model.regressor.in_features, "output_size": model.regressor.out_features}
torch.save(reg_config, f"{save_path}/regression_config.pt")

print("✅ Model saved to:", save_path)


✅ Model saved to: lora_v2_5_model


In [1]:
# === 6.0 Load model for inference ===
import torch
import os
from transformers import AutoTokenizer, AutoModel
from peft import get_peft_model, LoraConfig, TaskType
import torch.nn as nn

# Define model wrapper class again
class DebertaWithLoRA(nn.Module):
    def __init__(self, base_model, lora_config):
        super().__init__()
        self.backbone = get_peft_model(base_model, lora_config)
        self.regressor = nn.Linear(base_model.config.hidden_size, 5)

    def forward(self, input_ids, attention_mask, **kwargs):
        if "labels" in kwargs:
            kwargs.pop("labels")
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
        cls_output = outputs.last_hidden_state[:, 0, :]
        return self.regressor(cls_output)

# Load components
load_path = "lora_v2_5_model"
tokenizer = AutoTokenizer.from_pretrained(load_path)
base_model = AutoModel.from_pretrained(load_path)

# LoRA config must match training
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_proj", "key_proj", "value_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.FEATURE_EXTRACTION,
)

# Build model again
model = DebertaWithLoRA(base_model, lora_config)

# Load weights
model.backbone.load_state_dict(torch.load(os.path.join(load_path, "lora_adapter.bin")), strict=False)
model.regressor.load_state_dict(torch.load(os.path.join(load_path, "regression_head.pt")))

# Move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

print("✅ Model and tokenizer loaded for inference.")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at lora_v2_5_model were not used when initializing DebertaV2Model: ['encoder.layer.0.attention.self.key_proj.base_layer.bias', 'encoder.layer.0.attention.self.key_proj.base_layer.weight', 'encoder.layer.0.attention.self.key_proj.lora_A.default.weight', 'encoder.layer.0.attention.self.key_proj.lora_B.default.weight', 'encoder.layer.0.attention.self.query_proj.base_layer.bias', 'encoder.layer.0.attention.self.query_proj.base_layer.weight', 'encoder.layer.0.attention.self.query_proj.lora_A.default.weight', 'encoder.layer.0.attention.self.query_proj.lora_B.default.weight', 'encoder.layer.0.attention.self.value_proj.base_layer.bias', 'encoder.layer.0.attention.self.value_proj.base_layer.weight', 'encoder.layer.0.attention.self.value_proj.lora_A.default.weight', 'encoder.layer.0.attention.self.value_proj.lora_B.default.weight', 'encoder.layer.1.attention.self.key_proj.base_layer.bias', 'encoder.layer.1.att

✅ Model and tokenizer loaded for inference.


In [2]:
# === 6. Inference ===
def postprocess_scores(scores):
    scores = scores * 5 + 4
    scores = torch.round(scores * 2) / 2
    return torch.clamp(scores, min=4.0, max=9.0)

def predict_scores(prompt, essay, model, tokenizer):
    model.eval()
    device = next(model.parameters()).device
    text = f"Prompt: {prompt}\nEssay: {essay}"
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512).to(device)

    with torch.no_grad():
        outputs = model(**inputs).squeeze()
        scores = postprocess_scores(outputs)

    return {
        "Task Achievement": scores[0].item(),
        "Coherence & Cohesion": scores[1].item(),
        "Lexical Resource": scores[2].item(),
        "Grammar": scores[3].item(),
        "Overall Band": scores[4].item()
    }


In [3]:
prompt = "Interviews form the basic selecting criteria for most large companies. However, some people think that the interview is not a reliable method of choosing whom to employ and there are other better methods. To what extent do you agree or disagree?"

# essay = ("It is undeniable that most companies rely on interviews for hiring new employees, but some people believe that this dependence on interviews is wrong and that other alternatives should be preferred as they are better. I believe that interviews are a reliable method, and the other methods cannot be considered more effective.There are many reasons why interviews are suitable for hiring new employees. Firstly, through the interview, the recruiters can get an idea about the personality and social skills of the potential employees. In interviews, there is face-to-face interaction, and the candidates have to answer impromptu questions, from which personality traits can be judged. Also, by asking some case study-type questions, employees can judge traits like the ability to handle pressure, confidence and the ability to think outside the box.In addition, although there are many other methods for hiring, none of them could be considered superior to interviews. One the other common method is the written test, which is good to judge the theoretical knowledge of the person. However, this method cannot give a good idea about the personality of the candidate. By contrast, through interviews, employers can judge both the knowledge and temperament of the potential employee. Moreover, the written test is fallible to cheating as sometimes candidates can take outside help.Another selection process is a group discussion, which is good where a major job requirement is conversational and persuasion skills for instance sales jobs. They are really not suited for technical jobs because these are not customer oriented. On the other hand, interviews hold good for any type of job as the interviewer can frame questions to test the particular skills they require.To conclude, I would like to reiterate that the preference for interviews for hiring among employers is justified as interviews have advantages over other commonly adopted methods.")
# essay = ("It is irrefuitable that most companies rely on interviews for hiring new employees , but some people opine that alternatives methods should be preferred as they are best as compared to the dependence on interview. I totally disagree with this statement as interview is best way to know about individual's potential and personality skills ,however, other methods such as written exam are is lengthy procedure .To begin with, there are much reasons why interviews are the best way. firstly, with the ,interviews an employer can get an idea about the personality and social skills of the employee. Because it is face to face interaction in which candidate have to answer different questions, from which personality traits can be judged by employer. Moreover, potential skills are vital for any organisation so by asking some case study question , employees can judge the ability to handle pressure , confidence, ability to think in an adverse situation . For instance, in the private ,sector all MNC totally depend upon direct interviews through which the organiser get a chance to know the mental ability of a person.On the other hand, some people opine that written test is the way of judgement they consider theoretical knowledge is important than practical knowledge. however, this way can not give a best idea about candidate's skills. Moreover, the written test is fallible to cheating as sometimes candidates can take outside help. Such as, in public sector recruitment government conduct exam in which scams take place and an unable person got a selection . So how anyone can consider this way best for hiring an employee .To sum up, in my opinion, interviews are justified as it has advantages over the commonly adopted methods.")
# essay = ("Interviews are a common selection method for many companies, but they are not the only method. Other methods such as skills tests, reference checks, and work samples may provide a more accurate assessment of a candidate's abilities and fit for the job.While interviews can give an initial impression of a candidate, they may not provide a complete picture of their skills, personality, and work style. Interviews may also be subject to biases, such as unconscious biases or interviewer biases, which can affect the outcome of the selection process.Additionally, some candidates may perform well in an interview but not be a good fit for the job, while others may not perform as well in an interview but have the necessary skills and qualities for the job.Therefore, while interviews can be a useful selection method, they should not be relied on solely to make hiring decisions. Combining interviews with other selection methods may provide a more comprehensive assessment of candidates and increase the chances of making a successful hire.")
# essay = ("It is believed by some experts that the interview is the main factor by which an employee should get a chance to work in an organization. However, Some are against this perception and think that this is not the only way of selecting an individual, there can be other reliable methods by which the workforce can be chosen. I agree with the latter view to some extent.There are myriad reasons why an employer finds an interview to be the best way among all other alternatives. Firstly, It includes face-to-face interaction with the person which can benefit the employer in understanding the real potential of people. Secondly, It is helpful in accessing the traits related to their personality, especially, their communication skills which are necessary for an employee to work in any department. In addition to it, an interviewer can judge the mental ability of the humans by providing them with the tasks related to their capability.Apart from this, It is also believed by companies that it assists them in hiring the right person in comparison to other competitive exams. For instance, It might be possible that a person with a good academic result is not able to perform a particular task, therefore, resulting in lower productivity.Furthermore, It is not possible to ensure the correctness of the details mentioned in the curriculum vitae by the candidate in other selection forms.Although It has various advantages which overweigh the disadvantages yet I believe that there should be a mix of both terminologies in order to appoint a worker in a company. There are some jobs which did not require personal skills, such as these are not mandatory for the post of a computer operator. Only written and theoretical knowledge is enough to fill the vacancy for this post.To sum up, I would like to reiterate that if I consider it logically then I believe there are some jobs for which there is no need to conduct an interview. So, it is wise to say that firms should use a combination of these terms depending upon the requirement of a particular position when selecting an employee.")
# essay = ("Nowadays, employing citizens in a variety of companies by interviewing them is an impactful process. However, there are cultures that believe it doesn't reflect their true abilities. On the opposite, there are business owners who believe it is efficient to select people who stand out. In this essay, I will demonstrate why this method has a positive outcome in most cases for companies.Currently, people who think attending personal sessions in business areas has a negative outcome, are usually unemployed based on their experience. This is simply because they lack the skills and knowledge to stand out, thus It will be difficult to get hired. For instance, if a student had a long journey of only studying without having experience in the market, they won't find a job. This means although It is wrong to spread information about what human resources should do to hire. Ultimately, investing time to develop skills is a key factor to have a higher chance of occupation.However, most of our society believes it is an efficient method because it reflects behaviour and work ethic. Indeed, these factors are impactful in the work environment to avoid having lazy employees, thus others won't be productive as they were. For instance, if the employee does not invest enough effort in reaching the goals set for him, thus business owner will fire him in the long run. Without this ,we wouldn't know the truth of what is inside the workers. Clearly, using this is a must to have a better idea of how efficient he will be at work.In conclusion, I believe we should spread awareness to reduce the number of people who think it is not the right way to select employees. In addition, citizens should prepare themselves to go through this process rather than criticise it.")
essay = ("Many multinational and national companies select their staff after a lot of research. In that face to face interaction is the prime site for an employee hiring. Although, it is considered by few that interview is not an appropriate way of filling the vacancy and there are more reliable sources. However, I mostly disagree with the view. My point is justified further.If it was easy to crack an interview of reputed establishments, then everyone might had got a place in each corporation. Moreover, it is not a piece of cake for all, it needs courage, dedication along with proper knowledge because all true colours flow out in front of interviewers. One of the many criteria judged is, crammed theory. For example, when a person is given a complicated situation and asked how will they put their learning into an action. Such is the time when the stars start surrounding around the head, because all they remember is what the theory says, but do not know the real life application of that intelligence. Thus, the individual gets rejected as they do not need a book, but a human engine to run their machines. Furthermore, other criteria to qualify is, communication skills. For example, if a product is needed to be sold, then the buyers should be provided with as best offer as possible, just for company profit. Whatever, the business it is running, the best way to buy or sell products is negotiation with the way of talking and persuading them with some schemes. In addition, introsceptiner also notes the practical exposure and change of facial express. For example, expressions like lies, fear, worry, anxiety and the capability of a man.The capability of a person is also judged by his past interest and achievements until now.On the contrary, the era has changed and continuation of age old methods of employee acceptance needs a change. This can be bought by replacing the system with either questionnaire pamphlet or multiple short tests. For example, all the interviewing questions, exam can be taken or else multiple different field exam. By this the need for interview will also not arise and the manager can fully check the person, along with saving their time.To conclude, the new system can be introduced, but interviews cannot be replaced by those methods, as it is the most reliable source for knowing a person thoroughly.")

print(predict_scores(prompt, essay, model, tokenizer))


{'Task Achievement': 6.5, 'Coherence & Cohesion': 6.0, 'Lexical Resource': 6.0, 'Grammar': 6.0, 'Overall Band': 6.5}


In [None]:
for i in range(3):
    print(dataset["train"][i])


{'prompt': 'Some people believe that eventually all jobs will be done by artificially intelligent robots. \nWhat is your opinion?', 'essay': 'Some may argue that in the future all human workforces will be replaced by robots. While it is possible that most low skill jobs will be taken over by artificial intelligent computers, it is still difficult for some area of work to fully rely on them. This essay will discuss why in the end humans are still needed to do many other jobs.\r\n\r\nFirst, the majority of the non-complex occupation will be handle by robot in the future. It simply because, robot can be easily programmed to do a constant and simple job, moreover, companies will try their best to limit the salary expense by hiring more robots to replace the non-essential workforce. In Indonesia for example, they already started to create an automatic payment gate in every toll-roads. In addition, this automatic gate now being implemented and more common to be found as an exit gate in the p