In [None]:
# !pip install datasets
# !pip install transformers
# !pip install evaluate
# !pip install peft
# !pip install accelerate

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [18]:
import torch
import sklearn
from sklearn.model_selection import train_test_split
import datasets
from transformers import AutoTokenizer, GPT2Tokenizer,  GPT2ForSequenceClassification, Trainer, TrainingArguments
import random
import numpy as np

In [1]:
from datasets import load_dataset


In [4]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [2]:
# dataset = load_dataset('csv', data_files='drive/My Drive/ECE1786_data/clean_data_gpt2.csv')
dataset = load_dataset('csv', data_files='..\data\dataset\processed\clean_data_gpt2.csv')

In [3]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'essay', 'label'],
        num_rows: 9766
    })
})


In [None]:
from transformers import AutoTokenizer, DataCollatorWithPadding

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
collator = DataCollatorWithPadding(tokenizer=tokenizer)



def tokenize_function(examples):
    combined_text = examples["prompt"] + '\n' + examples["essay"]
    return tokenizer(combined_text, padding="max_length", truncation=True)


tokenized_datasets = dataset.map(tokenize_function, batched=False)

In [50]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(5000))
small_eval_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(5000, 7000))
small_test_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(7000, 9766))


In [24]:
print(small_train_dataset)

Dataset({
    features: ['prompt', 'essay', 'label', 'input_ids', 'attention_mask'],
    num_rows: 5000
})


In [26]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=12)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer")

In [28]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

In [29]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [30]:
import accelerate
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [31]:
import torch.utils
from transformers import get_scheduler
from tqdm.auto import tqdm 

def run(config):
    bs = config["batch_size"]
    lr = config["lr"]
    epochs = config["epochs"]
    train_dataset = config["train_dataset"]
    val_dataset = config["val_dataset"]
    model = config["model"]
    collator = config["collator"]

    # postprocess dataset
    # note: both dataset are already tokenized
    train_dataset = train_dataset.remove_columns(["sentence", "idx"])
    train_dataset = train_dataset.rename_column("label", "labels")

    val_dataset = val_dataset.remove_columns(["sentence", "idx"])
    val_dataset = val_dataset.rename_column("label", "labels")

    train_dataset.set_format("torch")
    val_dataset.set_format("torch")

    # convert dataset into dataLoader
    train_dataloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=bs, collate_fn=collator)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=bs, collate_fn=collator)

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=config["weight_decay"])

    training_steps = epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=training_steps)

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    progress_bar = tqdm(range(training_steps))

    def get_accuracy(logits, label):
        predictions = torch.argmax(logits, dim=-1)
        correct = (predictions == label).sum().item()
        return correct / len(label)


    train_acc = []
    train_loss = []
    val_acc = []
    val_loss = []

    for epoch in range(epochs):
        print(f"=============== epoch number: {epoch + 1} ================")
        total_train_loss = 0.0
        total_val_loss = 0.0

        total_train_acc = 0.0
        total_val_acc = 0.0

        # Training portion
        model.train()
        for batch in train_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss

            total_train_acc += get_accuracy(outputs.logits, batch["labels"])
            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)
        
        print(f"training loss: {total_train_loss/len(train_dataloader)}")
        train_loss.append(total_train_loss/len(train_dataloader))

        print(f"training accuracy: {total_train_acc/len(train_dataloader)}")
        train_acc.append(total_train_acc/len(train_dataloader))

        # validation portion
        metric = evaluate.load("accuracy")
        model.eval()
        for batch in val_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)

            logits = outputs.logits
            total_val_acc += get_accuracy(logits, batch["labels"])
            total_val_loss += outputs.loss.item()
            predictions = torch.argmax(logits, dim=-1)
            metric.add_batch(predictions=predictions, references=batch["labels"])

        metric.compute()

        print(f"validation loss: {total_val_loss/len(val_dataloader)}")
        val_loss.append(total_val_loss/len(val_dataloader))

        print(f"validation accuracy: {total_val_acc/len(val_dataloader)}")
        val_acc.append(total_val_acc/len(val_dataloader))  

    return train_acc, train_loss, val_acc, val_loss

In [32]:
import matplotlib.pyplot as plt

def display_graph(train_loss, val_loss, train_acc, val_acc):
    plt.figure(figsize=(8, 6))
    plt.plot(train_loss, label="Train Loss")
    plt.plot(val_loss, label="Validation Loss")   
    plt.title(f"Training and Validation Loss Curve")
    plt.xlabel('Epoch')
    plt.ylabel('Losses')
    plt.legend()
    plt.show()
    
    plt.figure(figsize=(8, 6))
    plt.plot(train_acc, label="Train Accuracy")
    plt.plot(val_acc, label="Validation Accuracy")
    plt.title(f"Training and Validation Accuracy Curve")
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()
    return None 

def find_avg_std(lst):
    lst = np.array(lst)
    avg = np.mean(lst)
    std = np.std(lst)
    return avg, std

In [33]:
torch.cuda.empty_cache()
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=12)
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer)) 

config = {}
config["batch_size"] = 8
config["lr"] = 1e-5
config["epochs"] = 20
config["weight_decay"] = 0.1
config["train_dataset"] = small_train_dataset

config["val_dataset"] = small_eval_dataset
config["model"] = model
config["collator"] = collator

# begin training
train_acc, train_loss, val_acc, val_loss = run(config)

train_acc_avg, train_acc_std = find_avg_std(train_acc)
train_loss_avg, train_loss_std = find_avg_std(train_loss)
val_acc_avg, val_acc_std = find_avg_std(val_acc)
val_loss_avg, val_loss_std = find_avg_std(val_loss)

print("\n ========== TRAINING/VALIDATION SUMMARY ============ ")
print(f"training loss mean {train_loss_avg} +/- {train_loss_std}")
print(f"training accuracy mean: {train_acc_avg} +/- {train_acc_std}")
print(f"validation loss mean: {val_loss_avg} +/- {val_loss_std}")
print(f"validation accuracy mean: {val_acc_avg} +/- {val_acc_std}")
print(f"final validation accucacy: {val_acc[-1]}")
print(f"final validation loss: {val_loss[-1]}")

display_graph(train_loss, val_loss, train_acc, val_acc)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: Column name ['sentence', 'idx'] not in the dataset. Current columns in the dataset: ['prompt', 'essay', 'label', 'input_ids', 'attention_mask']

In [56]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="test_trainer",
    eval_strategy="epoch",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=10)

In [57]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

In [58]:
trainer.train()


  0%|          | 0/50000 [00:00<?, ?it/s]

{'loss': 0.2812, 'grad_norm': 3.845147311665187e-09, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.1}
{'loss': 0.1697, 'grad_norm': 782.0405883789062, 'learning_rate': 4.9e-05, 'epoch': 0.2}
{'loss': 0.383, 'grad_norm': 0.003151032142341137, 'learning_rate': 4.85e-05, 'epoch': 0.3}
{'loss': 0.3283, 'grad_norm': 162.03866577148438, 'learning_rate': 4.8e-05, 'epoch': 0.4}
{'loss': 0.3821, 'grad_norm': 5.441792382043786e-06, 'learning_rate': 4.75e-05, 'epoch': 0.5}
{'loss': 0.5513, 'grad_norm': 4.02103205487947e-06, 'learning_rate': 4.7e-05, 'epoch': 0.6}
{'loss': 0.533, 'grad_norm': 1.7732545432025404e-09, 'learning_rate': 4.6500000000000005e-05, 'epoch': 0.7}
{'loss': 0.4861, 'grad_norm': 0.5461054444313049, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.8}
{'loss': 0.5458, 'grad_norm': 3.11140320263803e-05, 'learning_rate': 4.55e-05, 'epoch': 0.9}
{'loss': 0.617, 'grad_norm': 5.8695011517784224e-08, 'learning_rate': 4.5e-05, 'epoch': 1.0}


  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 12.594629287719727, 'eval_accuracy': 0.2095, 'eval_runtime': 64.1667, 'eval_samples_per_second': 31.169, 'eval_steps_per_second': 31.169, 'epoch': 1.0}
{'loss': 0.3866, 'grad_norm': 0.16529926657676697, 'learning_rate': 4.4500000000000004e-05, 'epoch': 1.1}
{'loss': 0.3071, 'grad_norm': 0.004531966056674719, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.2}
{'loss': 0.3366, 'grad_norm': 0.0011518449755385518, 'learning_rate': 4.35e-05, 'epoch': 1.3}
{'loss': 0.6005, 'grad_norm': 0.018270758911967278, 'learning_rate': 4.3e-05, 'epoch': 1.4}
{'loss': 0.5647, 'grad_norm': 4.205833829473704e-05, 'learning_rate': 4.25e-05, 'epoch': 1.5}
{'loss': 0.5211, 'grad_norm': 293.5579833984375, 'learning_rate': 4.2e-05, 'epoch': 1.6}
{'loss': 0.4601, 'grad_norm': 1.0273228326695971e-05, 'learning_rate': 4.15e-05, 'epoch': 1.7}
{'loss': 0.3867, 'grad_norm': 4.139243628742406e-06, 'learning_rate': 4.1e-05, 'epoch': 1.8}
{'loss': 0.3618, 'grad_norm': 4.3345690925455926e-10, 'learning_

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 12.367122650146484, 'eval_accuracy': 0.2215, 'eval_runtime': 61.0953, 'eval_samples_per_second': 32.736, 'eval_steps_per_second': 32.736, 'epoch': 2.0}
{'loss': 0.2952, 'grad_norm': 2.3966403861486185e-10, 'learning_rate': 3.9500000000000005e-05, 'epoch': 2.1}
{'loss': 0.2746, 'grad_norm': 9.308111573180788e-12, 'learning_rate': 3.9000000000000006e-05, 'epoch': 2.2}
{'loss': 0.2265, 'grad_norm': 6.275900545915647e-07, 'learning_rate': 3.85e-05, 'epoch': 2.3}
{'loss': 0.2518, 'grad_norm': 2.079681053146487e-06, 'learning_rate': 3.8e-05, 'epoch': 2.4}
{'loss': 0.3378, 'grad_norm': 0.01506952103227377, 'learning_rate': 3.7500000000000003e-05, 'epoch': 2.5}
{'loss': 0.5446, 'grad_norm': 0.000356829259544611, 'learning_rate': 3.7e-05, 'epoch': 2.6}
{'loss': 0.4036, 'grad_norm': 1.4278353452682495, 'learning_rate': 3.65e-05, 'epoch': 2.7}
{'loss': 0.2912, 'grad_norm': 2.5169335504671153e-09, 'learning_rate': 3.6e-05, 'epoch': 2.8}
{'loss': 0.264, 'grad_norm': 2.049649783231189e

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 12.959720611572266, 'eval_accuracy': 0.203, 'eval_runtime': 60.905, 'eval_samples_per_second': 32.838, 'eval_steps_per_second': 32.838, 'epoch': 3.0}
{'loss': 0.1554, 'grad_norm': 5.484984058057307e-07, 'learning_rate': 3.45e-05, 'epoch': 3.1}
{'loss': 0.183, 'grad_norm': 4.3949094106210396e-06, 'learning_rate': 3.4000000000000007e-05, 'epoch': 3.2}
{'loss': 0.1903, 'grad_norm': 494.7222900390625, 'learning_rate': 3.35e-05, 'epoch': 3.3}
{'loss': 0.2978, 'grad_norm': 1.2432559742592275e-05, 'learning_rate': 3.3e-05, 'epoch': 3.4}
{'loss': 0.1511, 'grad_norm': 1.3028131888859207e-06, 'learning_rate': 3.2500000000000004e-05, 'epoch': 3.5}
{'loss': 0.1648, 'grad_norm': 1.004780436630881e-08, 'learning_rate': 3.2000000000000005e-05, 'epoch': 3.6}
{'loss': 0.2037, 'grad_norm': 3.705028802869492e-06, 'learning_rate': 3.15e-05, 'epoch': 3.7}
{'loss': 0.1734, 'grad_norm': 0.45362958312034607, 'learning_rate': 3.1e-05, 'epoch': 3.8}
{'loss': 0.2628, 'grad_norm': 0.0017636583652347

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 13.222750663757324, 'eval_accuracy': 0.218, 'eval_runtime': 62.7228, 'eval_samples_per_second': 31.886, 'eval_steps_per_second': 31.886, 'epoch': 4.0}
{'loss': 0.1257, 'grad_norm': 1.7784730285685946e-11, 'learning_rate': 2.95e-05, 'epoch': 4.1}
{'loss': 0.126, 'grad_norm': 0.10923507064580917, 'learning_rate': 2.9e-05, 'epoch': 4.2}
{'loss': 0.1042, 'grad_norm': 3.061736641996049e-09, 'learning_rate': 2.8499999999999998e-05, 'epoch': 4.3}
{'loss': 0.193, 'grad_norm': 1.5068312916866056e-11, 'learning_rate': 2.8000000000000003e-05, 'epoch': 4.4}
{'loss': 0.0669, 'grad_norm': 1.8254540240514316e-11, 'learning_rate': 2.7500000000000004e-05, 'epoch': 4.5}
{'loss': 0.126, 'grad_norm': 3.835392271867022e-05, 'learning_rate': 2.7000000000000002e-05, 'epoch': 4.6}
{'loss': 0.1722, 'grad_norm': 6.876189928561871e-08, 'learning_rate': 2.6500000000000004e-05, 'epoch': 4.7}
{'loss': 0.0768, 'grad_norm': 3.260879748268053e-05, 'learning_rate': 2.6000000000000002e-05, 'epoch': 4.8}
{'

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 13.987447738647461, 'eval_accuracy': 0.215, 'eval_runtime': 65.6298, 'eval_samples_per_second': 30.474, 'eval_steps_per_second': 30.474, 'epoch': 5.0}
{'loss': 0.0731, 'grad_norm': 5.008688449859619, 'learning_rate': 2.45e-05, 'epoch': 5.1}
{'loss': 0.2496, 'grad_norm': 9.351071525998123e-07, 'learning_rate': 2.4e-05, 'epoch': 5.2}
{'loss': 0.0991, 'grad_norm': 1.1536369193265728e-08, 'learning_rate': 2.35e-05, 'epoch': 5.3}
{'loss': 0.0665, 'grad_norm': 8.327097691940288e-12, 'learning_rate': 2.3000000000000003e-05, 'epoch': 5.4}
{'loss': 0.14, 'grad_norm': 5.2568640967365354e-05, 'learning_rate': 2.25e-05, 'epoch': 5.5}
{'loss': 0.2235, 'grad_norm': 8.256272820972299e-08, 'learning_rate': 2.2000000000000003e-05, 'epoch': 5.6}
{'loss': 0.1305, 'grad_norm': 1.6443662403942128e-11, 'learning_rate': 2.15e-05, 'epoch': 5.7}
{'loss': 0.0948, 'grad_norm': 9.772696429877215e-09, 'learning_rate': 2.1e-05, 'epoch': 5.8}
{'loss': 0.1281, 'grad_norm': 2.75317824094401e-10, 'learnin

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 14.242908477783203, 'eval_accuracy': 0.2345, 'eval_runtime': 62.6062, 'eval_samples_per_second': 31.946, 'eval_steps_per_second': 31.946, 'epoch': 6.0}
{'loss': 0.072, 'grad_norm': 1.728742669848682e-11, 'learning_rate': 1.9500000000000003e-05, 'epoch': 6.1}
{'loss': 0.0447, 'grad_norm': 4.337873349413712e-08, 'learning_rate': 1.9e-05, 'epoch': 6.2}
{'loss': 0.05, 'grad_norm': 1.5175830592184525e-09, 'learning_rate': 1.85e-05, 'epoch': 6.3}
{'loss': 0.0664, 'grad_norm': 5.845919234803709e-12, 'learning_rate': 1.8e-05, 'epoch': 6.4}
{'loss': 0.0665, 'grad_norm': 8.114767538480727e-12, 'learning_rate': 1.75e-05, 'epoch': 6.5}
{'loss': 0.023, 'grad_norm': 2.839945389965237e-09, 'learning_rate': 1.7000000000000003e-05, 'epoch': 6.6}
{'loss': 0.0101, 'grad_norm': 7.096487286695208e-11, 'learning_rate': 1.65e-05, 'epoch': 6.7}
{'loss': 0.0258, 'grad_norm': 1.8208722307733183e-09, 'learning_rate': 1.6000000000000003e-05, 'epoch': 6.8}
{'loss': 0.0859, 'grad_norm': 1.699858813708

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 14.793861389160156, 'eval_accuracy': 0.2275, 'eval_runtime': 62.0285, 'eval_samples_per_second': 32.243, 'eval_steps_per_second': 32.243, 'epoch': 7.0}
{'loss': 0.0255, 'grad_norm': 4.2378985737334673e-11, 'learning_rate': 1.45e-05, 'epoch': 7.1}
{'loss': 0.0262, 'grad_norm': 1.0015089202397576e-08, 'learning_rate': 1.4000000000000001e-05, 'epoch': 7.2}
{'loss': 0.0236, 'grad_norm': 3.6287997318140697e-07, 'learning_rate': 1.3500000000000001e-05, 'epoch': 7.3}
{'loss': 0.0471, 'grad_norm': 9.078040363874607e-08, 'learning_rate': 1.3000000000000001e-05, 'epoch': 7.4}
{'loss': 0.0104, 'grad_norm': 6.874850078020245e-05, 'learning_rate': 1.25e-05, 'epoch': 7.5}
{'loss': 0.0094, 'grad_norm': 9.995345480115492e-13, 'learning_rate': 1.2e-05, 'epoch': 7.6}
{'loss': 0.0457, 'grad_norm': 8.720875399603756e-08, 'learning_rate': 1.1500000000000002e-05, 'epoch': 7.7}
{'loss': 0.0178, 'grad_norm': 4.5817203364606485e-09, 'learning_rate': 1.1000000000000001e-05, 'epoch': 7.8}
{'loss': 

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 15.362398147583008, 'eval_accuracy': 0.2175, 'eval_runtime': 61.5219, 'eval_samples_per_second': 32.509, 'eval_steps_per_second': 32.509, 'epoch': 8.0}
{'loss': 0.0416, 'grad_norm': 1.0423387413610596e-12, 'learning_rate': 9.5e-06, 'epoch': 8.1}
{'loss': 0.0329, 'grad_norm': 3.9141906427175854e-07, 'learning_rate': 9e-06, 'epoch': 8.2}
{'loss': 0.0139, 'grad_norm': 1.9031163311922228e-09, 'learning_rate': 8.500000000000002e-06, 'epoch': 8.3}
{'loss': 0.0028, 'grad_norm': 1.852583864092594e-08, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.4}
{'loss': 0.0052, 'grad_norm': 1.6996849581119022e-06, 'learning_rate': 7.5e-06, 'epoch': 8.5}
{'loss': 0.0, 'grad_norm': 2.3955976091727393e-12, 'learning_rate': 7.000000000000001e-06, 'epoch': 8.6}
{'loss': 0.0283, 'grad_norm': 9.737476667007194e-13, 'learning_rate': 6.5000000000000004e-06, 'epoch': 8.7}
{'loss': 0.0103, 'grad_norm': 1.1287784040803217e-09, 'learning_rate': 6e-06, 'epoch': 8.8}
{'loss': 0.0577, 'grad_norm': 1.40

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 15.765429496765137, 'eval_accuracy': 0.223, 'eval_runtime': 62.878, 'eval_samples_per_second': 31.808, 'eval_steps_per_second': 31.808, 'epoch': 9.0}
{'loss': 0.0001, 'grad_norm': 2.229499362904619e-10, 'learning_rate': 4.5e-06, 'epoch': 9.1}
{'loss': 0.007, 'grad_norm': 2.10346979656606e-06, 'learning_rate': 4.000000000000001e-06, 'epoch': 9.2}
{'loss': 0.0297, 'grad_norm': 3.2072393352233775e-11, 'learning_rate': 3.5000000000000004e-06, 'epoch': 9.3}
{'loss': 0.0074, 'grad_norm': 4.3553689300923626e-12, 'learning_rate': 3e-06, 'epoch': 9.4}
{'loss': 0.0013, 'grad_norm': 0.0009408674668520689, 'learning_rate': 2.5e-06, 'epoch': 9.5}
{'loss': 0.0038, 'grad_norm': 3.505306045425982e-10, 'learning_rate': 2.0000000000000003e-06, 'epoch': 9.6}
{'loss': 0.0008, 'grad_norm': 1.4736772754986305e-05, 'learning_rate': 1.5e-06, 'epoch': 9.7}
{'loss': 0.007, 'grad_norm': 9.466847927797062e-07, 'learning_rate': 1.0000000000000002e-06, 'epoch': 9.8}
{'loss': 0.0021, 'grad_norm': 3.326

  0%|          | 0/2000 [00:00<?, ?it/s]

{'eval_loss': 15.879420280456543, 'eval_accuracy': 0.2285, 'eval_runtime': 63.133, 'eval_samples_per_second': 31.679, 'eval_steps_per_second': 31.679, 'epoch': 10.0}
{'train_runtime': 7288.1038, 'train_samples_per_second': 6.86, 'train_steps_per_second': 6.86, 'train_loss': 0.17500640067743137, 'epoch': 10.0}


TrainOutput(global_step=50000, training_loss=0.17500640067743137, metrics={'train_runtime': 7288.1038, 'train_samples_per_second': 6.86, 'train_steps_per_second': 6.86, 'total_flos': 2.61320343552e+16, 'train_loss': 0.17500640067743137, 'epoch': 10.0})

In [62]:
model.save_pretrained("C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20")
tokenizer.save_pretrained("C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20")

('C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\tokenizer_config.json',
 'C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\special_tokens_map.json',
 'C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\vocab.json',
 'C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\merges.txt',
 'C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\added_tokens.json',
 'C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20\\tokenizer.json')

In [59]:
# Set up evaluation arguments
eval_args = TrainingArguments(
    per_device_eval_batch_size=1,
    output_dir="./results",
    logging_dir="./logs",
)

# Initialize Trainer with test dataset and custom compute metrics
trainer = Trainer(
    model=model,
    args=eval_args,
    eval_dataset=small_test_dataset,
    compute_metrics=compute_metrics
)

# Run evaluation
trainer.evaluate()


  0%|          | 0/2766 [00:00<?, ?it/s]

{'eval_loss': 16.065521240234375,
 'eval_model_preparation_time': 0.001,
 'eval_accuracy': 0.21366594360086769,
 'eval_runtime': 99.2678,
 'eval_samples_per_second': 27.864,
 'eval_steps_per_second': 27.864}

In [60]:
def compute_metrics_tol(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Calculate accuracy with a tolerance of ±1 around the actual label
    correct = (predictions == labels) | (predictions == labels + 1) | (predictions == labels - 1)
    accuracy = np.mean(correct)  # Average to get the accuracy

    return {"accuracy": accuracy}

In [61]:
# Set evaluation arguments
eval_args = TrainingArguments(
    per_device_eval_batch_size=1,
    output_dir="./results",
)

# Initialize Trainer with test dataset and compute_metrics function
trainer = Trainer(
    model=model,
    args=eval_args,
    eval_dataset=small_test_dataset,
    compute_metrics=compute_metrics_tol
)

# Run evaluation on the test set
results = trainer.evaluate()

# Output test accuracy
print("Test Set Accuracy:", results["eval_accuracy"])

  0%|          | 0/2766 [00:00<?, ?it/s]

Test Set Accuracy: 0.4689081706435286


In [54]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the fine-tuned model and tokenizer
save_directory = "C:/Users/EricZ/Documents/GitHub/nlp-project/model/trained_20"
model = AutoModelForSequenceClassification.from_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)

# Define a function to classify a single example
def classify_single_input(input_text):
    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=128)

    # Pass the tokenized input through the model
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted class (index of the highest logit)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()

    return predicted_class

# Example input for classification
input_text = "While many people go to university for academic study, more people should be encouraged to do vocational training because there is a lack of qualified workers such as electricians and plumbers. Do you agree or disagree?" + "\n" + "Nowadays, with the development of education, more and more students go to colleges for academic learning. However, some people consider that we should provide more vocational courses because a lack of practical workers such as electricians and plumbers. Personally, I think academic study is obviously important bacause it can improve the technology and the misunderstanding of job-not the lack of vocational training- is the main reason for a lack of labored workers.\
It is no doubt that the academic learning in university can induct the practical training and provoke the development of technology in different areas such as agricuture. If we deeply understand the theory of academic knowledge and reasonablely apply them in different fields, we could efficiently improve the technology and help human beings to increase their living standard. For instance, the development of drones turly improve the quantity of crops, which is benifit for famers. However, this technology need a lot of basic theories to invent the drones and teach worker to handle this machine. At this time, the academic study becomes much more important for practical workers.\
Some people claim that it is because a lack of vocational training, thus leading to a lack of qualified worker. However, in my opinion, I think there are mainly two reasons come from the misunderstanding of jod cause this bad situation. For one aspect, some people think that practical workers usually cannot earn much more money compared to empolyees working in office. Thus, they just give up this occupation for their career. For another aspect, some people do not respect those qualified worker, which make them feel their job are secondary. At last, there are no more people would like to choose this type of works.\
In conclusion, I think academic study is very much important for students to improve the technology, and the lack of practical workers is not because of less provided vocational training, but the misunderstanding of job between some people."
# Get the predicted class
predicted_class = classify_single_input(input_text)
print(f"Predicted Class: {predicted_class}")

Predicted Class: 0
