In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Using device: cuda


In [2]:
import os
os.environ["WANDB_DISABLED"] = "true"
seed = 25
# random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [3]:
train_data = pd.read_csv('datasets/subtask_1/en/train.tsv',sep='\t')
train_data = train_data.reset_index(drop=True)
print(train_data.head())

      id                                               text      label
0  12322  you need to stop the engine and wait until it ...  generated
1   1682  The Commission shall publish the report; an in...  generated
2  22592  I have not been tweeting a lot lately, but I d...  generated
3  17390  I pass my exam and really thankgod for that bu...      human
4  30453  The template will have 3 parts: a mustache sha...      human


In [4]:
from sklearn.model_selection import train_test_split
train_data_texts = train_data['text'].to_list()
train_data_labels = train_data['label'].to_list()
train_texts, test_texts, train_labels, test_labels = train_test_split(train_data_texts, train_data_labels, test_size=0.1, random_state=25)
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=25)
print('train data size: ', len(train_texts))
print('validation data size: ', len(val_texts))
print('test data size: ', len(test_texts))

train data size:  27414
validation data size:  3046
test data size:  3385


In [5]:
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from transformers import TrainingArguments, Trainer
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSequenceClassification.from_pretrained("gpt2").to(device)
model.config.pad_token_id = model.config.eos_token_id
print("Model Configurations")
print()
print(model.config)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Configurations

GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "pad_token_id": 50256,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.24.0",
  "use_cache": true,
  "vocab_size": 50257
}



In [6]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=256)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=256)

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels[idx] == 'human':
            item['labels'] = torch.tensor(0)
        else:
            item['labels'] = torch.tensor(1)
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CustomDataset(train_encodings, train_labels)
val_dataset = CustomDataset(val_encodings, val_labels)
test_dataset = CustomDataset(test_encodings, test_labels)

print("Sample Data Point")
print()
print(train_dataset[0])

Sample Data Point

{'input_ids': tensor([ 7085,  3871,  1998,   517,  2761,   351, 38628, 21545,   618,   484,
          423,   281,  5387,  3797, 43332,   287,  1295,    11,   290,   777,
         3797,   258,  1010,   389,  4615,  1141,   262,  4905,    13,   471,
         7640, 21545,   290,   753,   756, 18386,   460,   635,   307,  4073,
          416,   257, 26234, 15050,  5095,    13,   383, 25377,   326,  1630,
          262, 34918,   743,   423,   587,  9694,   287,   257,  1097,  5778,
          393,   584, 14649,    13,  3457,   756, 18386,  3221,  8833,   618,
          262, 25377,   284,   262, 34918,   651, 28472,  2434,   393, 10153,
          445,    13,  1318,   318,   645, 13996,   329,   428,  1917,    11,
          475, 17638,   460,  3360,   787,   262,  3074,  1365,    13,  3406,
         6253, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
        50256, 50256, 50256, 50

In [7]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='macro')
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


In [8]:
training_args = TrainingArguments(
    output_dir='./models/english_gpt2_task1',       
    num_train_epochs=4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=1,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)
trainer.train()

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running training *****
  Num examples = 27414
  Num Epochs = 4
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 96
  Gradient Accumulation steps = 1
  Total optimization steps = 1144
  Number of trainable parameters = 124441344


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3279,0.69855,0.773802,0.767092,0.815015,0.77604
2,0.1931,0.29192,0.896257,0.895886,0.904119,0.897142
3,0.1134,0.316618,0.896257,0.895713,0.907266,0.897298
4,0.0531,0.343166,0.903152,0.902728,0.912663,0.904116


***** Running Evaluation *****
  Num examples = 3046
  Batch size = 96
Saving model checkpoint to ./models/english_gpt2_task1/checkpoint-286
Configuration saved in ./models/english_gpt2_task1/checkpoint-286/config.json
Model weights saved in ./models/english_gpt2_task1/checkpoint-286/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3046
  Batch size = 96
Saving model checkpoint to ./models/english_gpt2_task1/checkpoint-572
Configuration saved in ./models/english_gpt2_task1/checkpoint-572/config.json
Model weights saved in ./models/english_gpt2_task1/checkpoint-572/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3046
  Batch size = 96
Saving model checkpoint to ./models/english_gpt2_task1/checkpoint-858
Configuration saved in ./models/english_gpt2_task1/checkpoint-858/config.json
Model weights saved in ./models/english_gpt2_task1/checkpoint-858/pytorch_model.bin
Deleting older checkpoint [models/english_gpt2_task1/checkpoint-286] due to args.save_total

TrainOutput(global_step=1144, training_loss=0.368081795585739, metrics={'train_runtime': 879.1594, 'train_samples_per_second': 124.728, 'train_steps_per_second': 1.301, 'total_flos': 7666850855780352.0, 'train_loss': 0.368081795585739, 'epoch': 4.0})

In [9]:
trainer.evaluate(test_dataset)
predictions, labels, _ = trainer.predict(test_dataset)
predictions = np.argmax(predictions, axis=1)

from sklearn.metrics import classification_report
print(classification_report(labels, predictions))

***** Running Evaluation *****
  Num examples = 3385
  Batch size = 96


***** Running Prediction *****
  Num examples = 3385
  Batch size = 96


              precision    recall  f1-score   support

           0       0.97      0.81      0.89      1703
           1       0.84      0.98      0.90      1682

    accuracy                           0.89      3385
   macro avg       0.91      0.90      0.89      3385
weighted avg       0.91      0.89      0.89      3385



In [10]:
trainer.save_model('./models/english_gpt2_task1/trained_model')

Saving model checkpoint to ./models/english_gpt2_task1/trained_model
Configuration saved in ./models/english_gpt2_task1/trained_model/config.json
Model weights saved in ./models/english_gpt2_task1/trained_model/pytorch_model.bin
