# Fine-Tuning of OpenAI detector

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/Colab Notebooks/ai_text_detection/cookbooks
# modify

/content/drive/MyDrive/Colab Notebooks/ai_text_detection/cookbooks


In [36]:
import json
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from sklearn.utils import resample
import torch.nn.functional as F
import copy
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [8]:
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"

### Load json data file and convert to dataframe

In [9]:
# human data

# loads a tofel dataset
with open('../dataset/human/tofel.json', "r") as f:
    h_tofel_dataset = json.load(f)

# loads an arxiv dataset
with open('../dataset/human/arxiv.json', "r") as f:
    h_arxiv_dataset = json.load(f)

# loads student essay
with open('../dataset/human/student_essay.json', "r") as f:
    h_essay_dataset = json.load(f)

# loads student computer essay
with open('../dataset/human/student_cs_essay.json', "r") as f:
    h_essay_cs_dataset = json.load(f)

In [10]:
# gpt data

# loads a tofel dataset
with open('../dataset/ai/gpt2medium_tofel.json', "r") as f:
    gpt_tofel_dataset = json.load(f)

# loads an arxiv dataset
with open('../dataset/ai/gpt2medium_arxiv.json', "r") as f:
    gpt_arxiv_dataset = json.load(f)

# loads student essay
with open('../dataset/ai/gpt2medium_essay.json', "r") as f:
    gpt_essay_dataset = json.load(f)

# loads student computer essay
with open('../dataset/ai/gpt2medium_essay_cs.json', "r") as f:
    gpt_essay_cs_dataset = json.load(f)

# loads a tofel dataset
with open('../dataset/ai/gpt35_tofel.json', "r") as f:
    gpt_35_tofel_dataset = json.load(f)

# loads student computer essay
with open('../dataset/ai/gpt35_essay_cs.json', "r") as f:
    gpt_35_essay_cs_dataset = json.load(f)

In [11]:
h_dataset = []
for i in [h_tofel_dataset, h_arxiv_dataset, h_essay_dataset, h_essay_cs_dataset]:
    h_dataset.extend(i)

len(h_dataset)

2478

In [12]:
gpt_dataset = []
for i in [gpt_tofel_dataset, gpt_arxiv_dataset, gpt_essay_dataset, gpt_essay_cs_dataset,
          gpt_35_tofel_dataset, gpt_35_essay_cs_dataset]:
    gpt_dataset.extend(i)

len(gpt_dataset)

2956

In [13]:
dct = {
    "text": [item['input'] for item in h_dataset] + [item['input'] for item in gpt_dataset],
    "label": [item['label'] for item in h_dataset] + [item['label'] for item in gpt_dataset],
}

df = pd.DataFrame(dct)

def label_to_numeric(value):
    if value == "human":
        return 1
    else:
        return 0

df['target'] = df['label'].apply(lambda x: label_to_numeric(x))

In [14]:
class_counts = df['target'].value_counts()
majority_class = class_counts.idxmax()
minority_class = class_counts.idxmin()

# Separate majority and minority classes
majority_df = df[df['target'] == majority_class]
minority_df = df[df['target'] == minority_class]

# Undersample majority class
undersampled_majority_df = resample(majority_df,
                                    replace=False,  # Sample without replacement
                                    n_samples=len(minority_df),  # Match minority class size
                                    random_state=42)  # For reproducibility

# Combine minority class with undersampled majority class
undersampled_df = pd.concat([undersampled_majority_df, minority_df])
undersampled_df = undersampled_df.sample(frac=1, random_state=42).reset_index(drop=True)
df = undersampled_df
df

Unnamed: 0,text,label,target
0,The spectral action on noncommutative torus is...,human,1
1,Computerized Ordering System Thesis\n\nIntrodu...,ai,0
2,We report observation of spin-orbit Berry's ph...,human,1
3,We report observation of spin-orbit Berry's ph...,ai,0
4,\n\nThe first thing you need to do is to get y...,ai,0
...,...,...,...
4951,The Lessons from Yusufiyah Essay\n\nThe Lesson...,human,1
4952,We present an algorithm that produces the clas...,ai,0
4953,In this paper we investigate the optimal contr...,human,1
4954,Correlation between Socioeconomic Status and H...,human,1


### Preparing the Dataset and Dataloader

In [15]:
class SentimentData(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = self.data.text
        self.target = self.data.target
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            pad_to_max_length=True,
            max_length=self.max_len,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.target[index], dtype=torch.float)
        }

In [60]:
tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-large-openai-detector")
model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-large-openai-detector")

Some weights of the model checkpoint at openai-community/roberta-large-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [42]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 256
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 1e-05
# tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True)

In [43]:
train_size = 0.8
train_data=df.sample(frac=train_size, random_state=42)
test_data=df.drop(train_data.index).reset_index(drop=True)
train_data = train_data.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_data.shape))
print("TEST Dataset: {}".format(test_data.shape))

training_set = SentimentData(train_data, tokenizer, MAX_LEN)
testing_set = SentimentData(test_data, tokenizer, MAX_LEN)

FULL Dataset: (4956, 3)
TRAIN Dataset: (3965, 3)
TEST Dataset: (991, 3)


In [44]:
train_params = {'batch_size': TRAIN_BATCH_SIZE, 'shuffle': True, 'num_workers': 0}
test_params = {'batch_size': VALID_BATCH_SIZE, 'shuffle': True, 'num_workers': 0}

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

### Fine Tuning the Model

In [45]:
# Creating the loss function and optimizer
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [46]:
def calcuate_accuracy(preds, targets):
    n_correct = (preds==targets).sum().item()
    return n_correct

In [62]:
# Training Loop with Early Stopping:**
def train_model(epochs, file_name, model):
  best_loss = float('inf')
  best_model_weights = None
  patience = 10

  train_losses = []
  valid_losses = []

  model = model.to(device)
  for epoch in range(1, epochs+1):
    n_correct = 0
    tr_loss = 0
    nb_tr_steps = 0; nb_tr_examples = 0;

    ###################
    # train the model #
    ###################
    model.train()
    for i, data in enumerate(tqdm(training_loader)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['target'].to(device, dtype = torch.long)

        outputs = model(ids, mask, token_type_ids)
        loss_ = loss_function(outputs.logits, targets)

        tr_loss += loss_.item()
        big_val, big_idx = torch.max(outputs.logits, dim=1)
        n_correct += calcuate_accuracy(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)

        if i%100==0:
            step_loss = tr_loss/nb_tr_steps
            step_acc = (n_correct*100)/nb_tr_examples
            print(f"Training Loss per 100 steps: {step_loss}")
            print(f"Training Accuracy per 100 steps: {step_acc}")
            print("=="*50)

        optimizer.zero_grad()
        loss_.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    train_epoch_loss = tr_loss/nb_tr_steps
    train_epoch_acc = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {train_epoch_loss}")
    print(f"Training Accuracy Epoch: {train_epoch_acc}")
    train_losses.append(train_epoch_loss)

    # ######################
    # # validate the model #
    # ######################
    # model.eval()  # Set model to evaluation mode
    # val_n_correct = 0; val_n_wrong = 0; total = 0;
    # val_loss = 0;
    # nb_val_steps = 0; nb_val_example = 0;
    # with torch.no_grad():  # Disable gradient calculation for validation
    #     for i, data in enumerate(tqdm(testing_loader)):
    #       ids = data['ids'].to(device, dtype = torch.long)
    #       mask = data['mask'].to(device, dtype = torch.long)
    #       token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
    #       targets = data['target'].to(device, dtype = torch.long)

    #       outputs = model(ids, mask, token_type_ids)
    #       loss_ = loss_function(outputs.logits, targets)

    #       val_loss += loss_.item()
    #       big_val, big_idx = torch.max(outputs.logits, dim=1)
    #       val_n_correct += calcuate_accuracy(big_idx, targets)

    #       nb_val_steps += 1
    #       nb_val_examples += targets.size(0)

    #       if i%100==0:
    #         step_loss = tr_loss/nb_tr_steps
    #         step_acc = (n_correct*100)/nb_tr_examples
    #         print(f"Validation Loss per 100 steps: {step_loss}")
    #         print(f"Validation Accuracy per 100 steps: {step_acc}")

    #       if val_loss < best_loss:
    #         best_loss = val_loss
    #         best_model_weights = copy.deepcopy(model.state_dict())
    #         patience = 10  # Reset patience counter
    #       else:
    #         patient -= 1
    #         if patient == 0:
    #           print("="*50)
    #           print("="*20, "early stop", "="*20)
    #           print("="*50)
    #           break

    # valid_epoch_loss = val_loss/nb_val_steps
    # valid_epoch_accu = (val_n_correct*100)/nb_val_example
    # print(f"Validation Loss Epoch: {valid_epoch_loss}")
    # print(f"Validation Accuracy Epoch: {valid_epoch_accu}")
    # valid_losses.append(valid_epoch_loss)


    path = '/content/drive/MyDrive/Colab Notebooks/ai_text_detection/models/'+file_name+'_epoch'+str(epoch)+'.pth'
    torch.save(model.state_dict(), path)

  return train_losses, valid_losses

In [63]:
train_losses, valid_losses = train_model(4, str('23May2024_2'), model)

  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.7031255960464478
Training Accuracy per 100 steps: 87.5


 20%|██        | 100/496 [01:50<07:19,  1.11s/it]

Training Loss per 100 steps: 0.5198084413267599
Training Accuracy per 100 steps: 83.91089108910892


 40%|████      | 200/496 [03:41<05:28,  1.11s/it]

Training Loss per 100 steps: 0.5553283661103515
Training Accuracy per 100 steps: 83.3955223880597


 60%|██████    | 300/496 [05:32<03:37,  1.11s/it]

Training Loss per 100 steps: 0.5559600055576658
Training Accuracy per 100 steps: 83.80398671096346


 81%|████████  | 400/496 [07:23<01:46,  1.10s/it]

Training Loss per 100 steps: 0.5571192507585627
Training Accuracy per 100 steps: 83.8216957605985


100%|██████████| 496/496 [09:09<00:00,  1.11s/it]


The Total Accuracy for Epoch 1: 83.5561160151324
Training Loss Epoch: 0.5589507854864707
Training Accuracy Epoch: 83.5561160151324


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 1.6776599884033203
Training Accuracy per 100 steps: 75.0


 20%|██        | 100/496 [01:50<07:23,  1.12s/it]

Training Loss per 100 steps: 0.5222560936668058
Training Accuracy per 100 steps: 84.77722772277228


 40%|████      | 200/496 [03:41<05:27,  1.11s/it]

Training Loss per 100 steps: 0.5769979786475314
Training Accuracy per 100 steps: 83.33333333333333


 60%|██████    | 300/496 [05:32<03:37,  1.11s/it]

Training Loss per 100 steps: 0.5503550191855435
Training Accuracy per 100 steps: 83.63787375415282


 81%|████████  | 400/496 [07:23<01:46,  1.11s/it]

Training Loss per 100 steps: 0.5658189091736223
Training Accuracy per 100 steps: 83.22942643391521


100%|██████████| 496/496 [09:10<00:00,  1.11s/it]


The Total Accuracy for Epoch 2: 83.95964691046659
Training Loss Epoch: 0.555270833052477
Training Accuracy Epoch: 83.95964691046659


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.12199705839157104
Training Accuracy per 100 steps: 100.0


 20%|██        | 100/496 [01:50<07:22,  1.12s/it]

Training Loss per 100 steps: 0.5885796867274117
Training Accuracy per 100 steps: 83.7871287128713


 40%|████      | 200/496 [03:41<05:29,  1.11s/it]

Training Loss per 100 steps: 0.5667090179463524
Training Accuracy per 100 steps: 84.32835820895522


 60%|██████    | 300/496 [05:33<03:38,  1.11s/it]

Training Loss per 100 steps: 0.5505517761461264
Training Accuracy per 100 steps: 84.59302325581395


 81%|████████  | 400/496 [07:24<01:46,  1.11s/it]

Training Loss per 100 steps: 0.5587301412527623
Training Accuracy per 100 steps: 84.03990024937656


100%|██████████| 496/496 [09:10<00:00,  1.11s/it]


The Total Accuracy for Epoch 3: 84.01008827238336
Training Loss Epoch: 0.5591309158311736
Training Accuracy Epoch: 84.01008827238336


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.45658066868782043
Training Accuracy per 100 steps: 87.5


 20%|██        | 100/496 [01:50<07:22,  1.12s/it]

Training Loss per 100 steps: 0.6353326649543377
Training Accuracy per 100 steps: 81.55940594059406


 40%|████      | 200/496 [03:42<05:30,  1.12s/it]

Training Loss per 100 steps: 0.5853137497266578
Training Accuracy per 100 steps: 82.96019900497512


 60%|██████    | 300/496 [05:33<03:36,  1.11s/it]

Training Loss per 100 steps: 0.5735118876016417
Training Accuracy per 100 steps: 83.34717607973423


 81%|████████  | 400/496 [07:24<01:46,  1.11s/it]

Training Loss per 100 steps: 0.5700660363905726
Training Accuracy per 100 steps: 83.16708229426433


100%|██████████| 496/496 [09:10<00:00,  1.11s/it]


The Total Accuracy for Epoch 4: 83.203026481715
Training Loss Epoch: 0.5654975044805812
Training Accuracy Epoch: 83.203026481715


In [64]:
train_losses, valid_losses

([0.5589507854864707,
  0.555270833052477,
  0.5591309158311736,
  0.5654975044805812],
 [])

In [34]:
# Defining the training function on the 80% of the dataset for tuning the distilbert model
def train(epoch, model):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model = model.to(device)
    model.train()

    # for i, data in tqdm(enumerate(training_loader, 0), total=len(training_loader)):
    # for _, data in tqdm(enumerate(training_loader, 0)):
    for i, data in enumerate(tqdm(training_loader)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['target'].to(device, dtype = torch.long)

        outputs = model(ids, mask, token_type_ids)
        loss = loss_function(outputs.logits, targets)

        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.logits, dim=1)
        n_correct += calcuate_accuracy(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)

        if i%100==0:
            step_loss = tr_loss/nb_tr_steps
            step_acc = (n_correct*100)/nb_tr_examples
            print(f"Training Loss per 100 steps: {step_loss}")
            print(f"Training Accuracy per 100 steps: {step_acc}")
            print("=="*50)

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_acc = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_acc}")

    return epoch_loss, epoch_acc, step_loss, step_acc

In [35]:
EPOCHS = 10

train_result = []
for epoch in range(EPOCHS):
    train_result.append(train(epoch, model))

    # with gpt paraphrased data, epoch=10
    torch.save(model, '../models/23May2024_epoch10.pth')
    # loaded_model = torch.load('../models/fine_tune_epoch10.pth')



Training Loss per 100 steps: 0.29653799533843994
Training Accuracy per 100 steps: 87.5


 20%|██        | 100/496 [01:56<07:53,  1.19s/it]

Training Loss per 100 steps: 0.1919173747785205
Training Accuracy per 100 steps: 94.05940594059406


 40%|████      | 200/496 [03:55<05:53,  1.20s/it]

Training Loss per 100 steps: 0.14656814898448806
Training Accuracy per 100 steps: 94.96268656716418


 60%|██████    | 300/496 [05:55<03:55,  1.20s/it]

Training Loss per 100 steps: 0.13486164158803451
Training Accuracy per 100 steps: 95.34883720930233


 81%|████████  | 400/496 [07:55<01:55,  1.20s/it]

Training Loss per 100 steps: 0.118978393176005
Training Accuracy per 100 steps: 95.82294264339153


100%|██████████| 496/496 [09:50<00:00,  1.19s/it]


The Total Accuracy for Epoch 0: 96.14123581336696
Training Loss Epoch: 0.11149888655446345
Training Accuracy Epoch: 96.14123581336696


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.015021657571196556
Training Accuracy per 100 steps: 100.0


 20%|██        | 100/496 [01:59<07:53,  1.20s/it]

Training Loss per 100 steps: 0.02984088203480172
Training Accuracy per 100 steps: 99.38118811881188


 40%|████      | 200/496 [03:59<05:55,  1.20s/it]

Training Loss per 100 steps: 0.025363737390178547
Training Accuracy per 100 steps: 99.37810945273633


 60%|██████    | 300/496 [05:59<03:54,  1.20s/it]

Training Loss per 100 steps: 0.019061782061330437
Training Accuracy per 100 steps: 99.54318936877077


 81%|████████  | 400/496 [07:59<01:54,  1.20s/it]

Training Loss per 100 steps: 0.029492534709321522
Training Accuracy per 100 steps: 99.09600997506234


100%|██████████| 496/496 [09:53<00:00,  1.20s/it]


The Total Accuracy for Epoch 1: 98.99117276166457
Training Loss Epoch: 0.031799002159635666
Training Accuracy Epoch: 98.99117276166457


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.0004704270395450294
Training Accuracy per 100 steps: 100.0


 20%|██        | 100/496 [01:59<07:54,  1.20s/it]

Training Loss per 100 steps: 0.00332287800645843
Training Accuracy per 100 steps: 99.87623762376238


 40%|████      | 200/496 [03:59<05:54,  1.20s/it]

Training Loss per 100 steps: 0.009660644453993086
Training Accuracy per 100 steps: 99.6268656716418


 60%|██████    | 300/496 [05:59<03:54,  1.20s/it]

Training Loss per 100 steps: 0.01671150113390975
Training Accuracy per 100 steps: 99.29401993355482


 81%|████████  | 400/496 [07:59<01:55,  1.20s/it]

Training Loss per 100 steps: 0.01584218728031349
Training Accuracy per 100 steps: 99.34538653366583


100%|██████████| 496/496 [09:54<00:00,  1.20s/it]


The Total Accuracy for Epoch 2: 99.34426229508196
Training Loss Epoch: 0.019170242963839207
Training Accuracy Epoch: 99.34426229508196


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.001520379213616252
Training Accuracy per 100 steps: 100.0


 20%|██        | 100/496 [01:59<07:53,  1.20s/it]

Training Loss per 100 steps: 0.003298978157004913
Training Accuracy per 100 steps: 100.0


 40%|████      | 200/496 [03:59<05:55,  1.20s/it]

Training Loss per 100 steps: 0.006936699594848845
Training Accuracy per 100 steps: 99.93781094527363


 60%|██████    | 300/496 [05:59<03:55,  1.20s/it]

Training Loss per 100 steps: 0.011242661749490806
Training Accuracy per 100 steps: 99.66777408637874


 81%|████████  | 400/496 [07:59<01:54,  1.20s/it]

Training Loss per 100 steps: 0.015470851579694973
Training Accuracy per 100 steps: 99.59476309226933


100%|██████████| 496/496 [09:54<00:00,  1.20s/it]


The Total Accuracy for Epoch 3: 99.54602774274906
Training Loss Epoch: 0.01697757558348155
Training Accuracy Epoch: 99.54602774274906


  0%|          | 0/496 [00:00<?, ?it/s]

Training Loss per 100 steps: 0.0008747725514695048
Training Accuracy per 100 steps: 100.0


 20%|██        | 100/496 [01:59<07:53,  1.20s/it]

Training Loss per 100 steps: 0.0020462061337017832
Training Accuracy per 100 steps: 100.0


 40%|████      | 200/496 [03:59<05:54,  1.20s/it]

Training Loss per 100 steps: 0.0014261899499914178
Training Accuracy per 100 steps: 100.0


 47%|████▋     | 232/496 [04:39<05:17,  1.20s/it]


KeyboardInterrupt: 

In [38]:
torch.save(model, '../models/fine_tune_epoch4.pth')

In [None]:
# Load the best model weights
train_result.append(train(epoch, model))

model.load_state_dict(best_model_weights)

In [None]:
def valid(model, testing_loader):
    model = model.to(device)
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0; tr_loss=0; nb_tr_steps=0; nb_tr_examples=0
    with torch.no_grad():
        # for _, data in tqdm(enumerate(testing_loader, 0)):
        for i, data in enumerate(iter(testing_loader)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
            targets = data['target'].to(device, dtype = torch.long)

            outputs = model(ids, mask, token_type_ids)
            loss = loss_function(outputs.logits, targets)

            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.logits, dim=1)
            n_correct += calcuate_accuracy(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)

            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                print(f"Validation Accuracy per 100 steps: {accu_step}")
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")

    return epoch_accu

In [None]:
acc = valid(model, testing_loader)
print("Accuracy on test data = %0.2f%%" % acc)

KeyboardInterrupt: 

In [None]:
model = model.to(device)
model.eval()

for i, data in enumerate(iter(testing_loader)):
    ids = data['ids'].to(device, dtype = torch.long)
    mask = data['mask'].to(device, dtype = torch.long)
    token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
    targets = data['target']

    outputs = model(ids, mask, token_type_ids)
    logits = outputs.logits
    prob = F.softmax(logits, dim=-1)[:, :].detach().cpu().numpy()
    # 0: fake, 1: real
    for _, a in enumerate(prob):
        print(targets[_], a)
    break



tensor(0.) [9.9980313e-01 1.9692969e-04]
tensor(0.) [9.9980086e-01 1.9911495e-04]
tensor(0.) [9.998054e-01 1.946893e-04]
tensor(0.) [9.9980313e-01 1.9692969e-04]
tensor(1.) [0.00323358 0.99676645]
tensor(0.) [9.9979335e-01 2.0667377e-04]
tensor(1.) [2.0647602e-04 9.9979359e-01]
tensor(0.) [9.9980313e-01 1.9692969e-04]
tensor(1.) [1.847777e-04 9.998153e-01]
tensor(1.) [2.2683069e-04 9.9977320e-01]
tensor(1.) [2.1246231e-04 9.9978751e-01]
tensor(1.) [0.00831866 0.9916814 ]
tensor(1.) [2.248533e-04 9.997751e-01]
tensor(1.) [1.8204887e-04 9.9981803e-01]
tensor(0.) [9.9980408e-01 1.9599737e-04]
tensor(1.) [1.9227664e-04 9.9980778e-01]


In [None]:
# check the model

for item in h_tofel_dataset:
    test = item['input']
    encoded_inputs = tokenizer.encode_plus(
        test,   # Tokenize the sentence.
        None,   # Prepend the `[CLS]` token to the start.
        add_special_tokens=True,
        pad_to_max_length=True,
        max_length=512,
        return_token_type_ids=True,
        truncation=True,
        return_tensors="pt",
    )

    encoded_inputs = encoded_inputs.to(device)

    output = model(
        encoded_inputs.input_ids,
        encoded_inputs.attention_mask,
        encoded_inputs.token_type_ids,
    )

    logits = output.logits
    prob = F.softmax(logits, dim=-1)[:, :].detach().cpu().numpy().squeeze()
    print({"Fake": prob[0], "Real": prob[1]})



{'Fake': 0.0004347022, 'Real': 0.9995653}
{'Fake': 0.00055805565, 'Real': 0.9994419}
{'Fake': 0.00038633827, 'Real': 0.9996137}
{'Fake': 0.0010733912, 'Real': 0.9989266}
{'Fake': 0.00028868736, 'Real': 0.99971133}
{'Fake': 0.0004142776, 'Real': 0.9995857}
{'Fake': 0.00023252562, 'Real': 0.9997675}
{'Fake': 0.00033761762, 'Real': 0.9996624}
{'Fake': 0.000558327, 'Real': 0.9994417}
{'Fake': 0.0007697495, 'Real': 0.99923027}
{'Fake': 0.00022874387, 'Real': 0.9997713}
{'Fake': 0.0003081829, 'Real': 0.99969184}
{'Fake': 0.00021044456, 'Real': 0.99978954}
{'Fake': 0.000567336, 'Real': 0.9994326}
{'Fake': 0.0011065092, 'Real': 0.99889356}
{'Fake': 0.00071633206, 'Real': 0.99928373}
{'Fake': 0.0002635781, 'Real': 0.99973637}
{'Fake': 0.000228002, 'Real': 0.999772}
{'Fake': 0.001737065, 'Real': 0.99826294}
{'Fake': 0.00028268207, 'Real': 0.9997173}
{'Fake': 0.00028259447, 'Real': 0.9997174}
{'Fake': 0.0015291135, 'Real': 0.99847084}
{'Fake': 0.005053571, 'Real': 0.9949464}
{'Fake': 0.0002841300

### save the model

In [None]:
# output_model_file = 'pytorch_roberta_sentiment.bin'
# output_vocab_file = './'

# model_to_save = model
# torch.save(model_to_save, output_model_file)
# tokenizer.save_vocabulary(output_vocab_file)

# print('All files saved')
# print('This tutorial is completed')

In [None]:
torch.save(model, '../models/fine_tune_epoch1.pth')

# Load the saved model
loaded_model = torch.load('../models/fine_tune_epoch1.pth')