#LABELS
#0 = FALSE
#1 = TRUE

#The following code will read a csv that contains a list of wikipedia articles about Canadian cities.

In [None]:
pip install openai

In [None]:
from openai import OpenAI
import math

In [None]:
import csv
from google.colab import drive

drive.mount('/content/drive')
path = '/content/drive/MyDrive/ECE1786/'

In [None]:
cities = []
with open(path+'cities.csv', newline='') as csvfile:
  csvreader = csv.reader(csvfile, delimiter=',')
  for row in csvreader:
    cities.append(', '.join(row))

#print(cities)

#The following code will get summary data from a list of wikipedia articles. Then it will split them into sentences and label them with "1" for True and write it into a csv file.

In [None]:
import requests

def get_wikipedia_summary(article_title):
    """
    Fetches the summary of a Wikipedia article using the Wikipedia API.

    Parameters:
    article_title (str): The title of the Wikipedia article to fetch.

    Returns:
    str: The summary of the Wikipedia article.
    """
    URL = "https://en.wikipedia.org/w/api.php"

    PARAMS = {
        "action": "query",
        "format": "json",
        "titles": article_title,
        "prop": "extracts",
        "exintro": True,
        "explaintext": True,
    }

    response = requests.get(URL, params=PARAMS)
    data = response.json()

    page = next(iter(data["query"]["pages"].values()))
    return page["extract"] if "extract" in page else "Article not found."


In [None]:
list_of_summaries = []
article_not_found = []

for i in range(len(cities)):
  cities[i] = cities[i].replace("\"", "")
  list_of_summaries.append(get_wikipedia_summary(cities[i]))
  #print(list_of_summaries[i])
  #print()
  if(list_of_summaries[i] == "Article not found."):
    article_not_found.append(cities[i])

for i in article_not_found:
  print(i)

In [None]:
for i in list_of_summaries:
  print(i)

In [None]:
import nltk
nltk.download('punkt')

In [None]:
from nltk.tokenize import sent_tokenize
sentences = []
for article in list_of_summaries:
  sentences.extend(sent_tokenize(article))

for sample in sentences:
  ### Arbitrarily remove samples that are shorter than 50 characters
  if (len(sample) < 50):
    print("removing: ", sample)
    sentences.remove(sample)
  ### Remove samples that end with :
  elif (sample[-1] == ":"):
    print("removing: ", sample)
    sentences.remove(sample)


In [None]:
labels = [1]*len(sentences)

def zipLabels(sentences, labels):
  list_zip = zip(sentences, labels)
  zipped_list = list(list_zip)

  #for i in zipped_list:
  #  print(i)
  return zipped_list

In [None]:
zipped_list = zipLabels(sentences, labels)

In [None]:
fields = ['sample', 'label']
with open(path+'dataset.csv', 'w') as csvfile:
  csvwriter = csv.writer(csvfile, delimiter=',')
  csvwriter.writerow(fields)
  csvwriter.writerows(zipped_list)

#The following code will take a set of sentences and negate them using the chat GPT 4 API. The resulting data will be used as samples and labeled as "0" for False in the csv file.

In [None]:
dataset = []
with open(path+'dataset.csv', newline='') as csvfile:
  csvreader = csv.reader(csvfile, delimiter=',')
  for row in csvreader:
    dataset.append(', '.join(row))

import math
half = math.floor(len(dataset)/2)
negdataset = dataset[:half]  # ~50% of the dataset, to be negated
dataset = dataset[half:]

In [None]:
#Remove the label from the samples

In [None]:
def gptRequest(input):
  api_key = ""
  prompt = "Reverse the meaning of the input sentence. For example, Input is \"Toronto is the most populous city in Canada and the capital city of the Canadian province of Ontario\" and the desired output would be: \"Toronto is the least populous city in Canada\". Try to be creative and change different part of the sentence to reach the goal."
  client = OpenAI(api_key=api_key)

  response = client.chat.completions.create(
    model="gpt-4",
    messages=[
      {"role": "system", "content": prompt},
      {"role": "user", "content": input},
    ]
  )
  return response.choices[0].message.content

In [None]:
'''negatedSentences = []
for sentence in negdataset:
  negatedSentences.append(gptRequest(sentence[:-3])+", "+str(0))''' #Loop to negate half of data, only performed once. Costs ~2.5$ per use

In [None]:
''' import pandas as pd
negdf = pd.DataFrame(negatedSentences)
negdf.to_csv(path+'negdataset.csv', header=False)''' # Ran once, to save first csv file

In [None]:
import pandas as pd

ndf = pd.read_csv(path+'negdataset.csv')

for i in range(len(ndf)):
  if i==0:
    odf = pd.DataFrame({'sample': [ndf["Sample, no., 0"][i][:-3]] , 'label': [int(ndf["Sample, no., 0"][i][-1])]})
  else:
    t = pd.DataFrame({'sample': [ndf["Sample, no., 0"][i][:-3]] , 'label': [int(ndf["Sample, no., 0"][i][-1])]})
    odf = pd.concat([odf, t], ignore_index=True)


completeDataset = pd.read_csv(path+'dataset.csv')
newDataset = pd.concat([odf, completeDataset[math.floor(len(completeDataset)/2):]], ignore_index=True)
newDataset.to_csv(path+'newDataset.csv', index=False)

#The following code will read the newdataset.csv file and group them into samples of 3. Each sample will contain 2 truths and 1 lie for training

In [None]:
import random

newDataset = []
with open(path+'newDataset.csv', newline='') as csvfile:
  csvreader = csv.reader(csvfile, delimiter=',')
  for row in csvreader:
    newDataset.append(', '.join(row))

list_of_truths = []
list_of_lies = []

for i in newDataset:
  if(i[-1] == "0"):
    list_of_lies.append(i)
  else:
    list_of_truths.append(i)

In [None]:
def group(lies, truths):
  newSamples = []
  for i in range(len(lies)):
    two_true_sentences = random.sample(truths, 2)
    one_lie_sentence = random.sample(lies, 1)
    temp = two_true_sentences + one_lie_sentence
    newSamples.append(temp)
    #print("2", two_true_sentences)
    #print("1", one_lie_sentence)

  return newSamples


In [None]:
def label_samples(samples):
  labels = []
  newSentences = []
  for sample in samples:
    newSentence = ""
    for i, sent in enumerate(sample):
      label = -1
      if (sent[-1] == "0"):
        labels.append(i)
      sample[i] = sent[:-3]
      newSentence = newSentence + sample[i]
    newSentences.append(newSentence)
  return newSentences, labels

In [None]:
tempSamples = group(list_of_lies, list_of_truths)

for sample in tempSamples:
  #print("or", sample)
  random.shuffle(sample)
  #print("new", sample)

In [None]:
newSamples, labels = label_samples(tempSamples)
print(labels)
output = zipLabels(newSamples, labels)

In [None]:
def writeToCSV(data):
  fields = ['sample', 'label']
  with open(path+'groupDataset.csv', 'w') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',')
    csvwriter.writerow(fields)
    csvwriter.writerows(data)

In [None]:
writeToCSV(output)

# Data Augmentation (create bigger trainset using GPT4 to rephrase some sentences)

In [None]:
def rephrase(input):
  api_key = ""
  prompt = "Rephrase the input sentence but keep the meaning of the sentence. Try to be creative!"
  client = OpenAI(api_key=api_key)

  response = client.chat.completions.create(
    model="gpt-4",
    messages=[
      {"role": "system", "content": prompt},
      {"role": "user", "content": input},
    ]
  )
  return response.choices[0].message.content

In [None]:
'''portion = newDataset.sample(frac = 1)
portion = portion[:math.floor(len(newDataset)/2)]
portion = portion.reset_index(drop=True)

for i, sentence in enumerate(portion["sample"]):
  if i==0:
    rephrased = pd.DataFrame({'sample': [rephrase(sentence)] , 'label': [int(portion["label"][i])]})
  else:
    t = pd.DataFrame({'sample': [rephrase(sentence)] , 'label': [int(portion["label"][i])]})
    rephrased = pd.concat([rephrased, t], ignore_index=True)

rephrased.to_csv(path+'rephrased.csv', index=False)
rephrased''' # Used to create more data

In [None]:
'''# Append the new senteces to previous file
new = pd.concat([newDataset, rephrased], ignore_index=True)
new.to_csv(path+'originalPlusRephrased.csv', index=False)
new'''

# Converting originalPlusRephrased.csv into a 3 class dataset

In [None]:
import random

oPR = [] # OriginalPlusRephrased
with open(path+'originalPlusRephrased.csv', newline='') as csvfile:
  csvreader = csv.reader(csvfile, delimiter=',')
  for row in csvreader:
    oPR.append(', '.join(row))

list_of_truths = []
list_of_lies = []

for i in oPR:
  if(i[-1] == "0"):
    list_of_lies.append(i)
  else:
    list_of_truths.append(i)

In [None]:
def group(lies, truths):
  newSamples = []
  for i in range(len(lies)):
    two_true_sentences = random.sample(truths, 2)
    one_lie_sentence = random.sample(lies, 1)
    temp = two_true_sentences + one_lie_sentence
    newSamples.append(temp)

  return newSamples

In [None]:
def label_samples(samples):
  labels = []
  newSentences = []
  for sample in samples:
    newSentence = ""
    for i, sent in enumerate(sample):
      label = -1
      if (sent[-1] == "0"):
        labels.append(i)
      sample[i] = sent[:-3]
      newSentence = newSentence + sample[i]
    newSentences.append(newSentence)
  return newSentences, labels

In [None]:
tempSamples = group(list_of_lies, list_of_truths)

for sample in tempSamples:
  random.shuffle(sample)

In [None]:
newSamples, labels = label_samples(tempSamples)
print(labels)
output = zipLabels(newSamples, labels)

In [None]:
def writeToCSV(data):
  fields = ['sample', 'label']
  with open(path+'groupDatasetOPR.csv', 'w') as csvfile: #groupDataset Original Plus Rephrased
    csvwriter = csv.writer(csvfile, delimiter=',')
    csvwriter.writerow(fields)
    csvwriter.writerows(data)

In [None]:
writeToCSV(output)

# Training

In [None]:
!pip install datasets
! pip install -U accelerate
! pip install -U transformers
!pip install evaluate

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer
import numpy as np
import evaluate
import matplotlib.pyplot as plt
from transformers import TrainerCallback, TrainerControl, TrainerState
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
# from sklearn.metrics import multiclass_log_loss

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


class ComputeTrainMetricsCallback(TrainerCallback):
    def __init__(self, trainer=None):
        super().__init__()
        self.trainer = trainer
        self.train_accuracy = []
        self.eval_accuracy = []
        self.train_loss = []
        self.eval_loss = []

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        if self.trainer is None:
            raise ValueError("Trainer not set for ComputeTrainMetricsCallback")

        # Making predictions on the training dataset
        train_preds = self.trainer.predict(self.trainer.train_dataset)
        eval_preds = self.trainer.predict(self.trainer.eval_dataset)

        # Calculate accuracy
        train_labels = train_preds.label_ids
        train_preds = np.argmax(train_preds.predictions, axis=1)
        train_accuracy = accuracy_score(train_labels, train_preds)
        train_loss = log_loss(train_labels, train_preds)

        # train_loss = multiclass_log_loss(train_labels, train_preds)

        eval_labels = eval_preds.label_ids
        eval_preds = np.argmax(eval_preds.predictions, axis=1)
        eval_accuracy = accuracy_score(eval_labels, eval_preds)
        eval_loss = log_loss(eval_labels, eval_preds)

        # train_loss = multiclass_log_loss(eval_labels, eval_preds)

        self.train_accuracy.append(train_accuracy)
        self.eval_accuracy.append(eval_accuracy)

        self.train_loss.append(train_loss)
        self.eval_loss.append(eval_loss)



def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

## Train on 2 class original dataset (original sentences + half of them negated)

In [None]:
ds = load_dataset('csv', data_files=path+'newDataset.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=2)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=6, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 2-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 2-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# trainer.predict(small_eval_dataset)
# print(small_eval_dataset["sample"][4])
# print(small_eval_dataset["label"][4])

# Train on new dataset (original + rephrased sentences)

In [None]:
ds = load_dataset('csv', data_files=path+'originalPlusRephrased.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=2)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)

tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))

metric = evaluate.load("accuracy")

# training_args = TrainingArguments(output_dir="/content/test_trainer")
training_args = TrainingArguments(
    output_dir="/content/test_trainer",
    num_train_epochs=6,
    evaluation_strategy="epoch")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)

trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 2-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 2-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Train for 3 class dataset

In [None]:
import torch

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


class ComputeTrainMetricsCallback(TrainerCallback):
    def __init__(self, trainer=None):
        super().__init__()
        self.trainer = trainer
        self.train_accuracy = []
        self.eval_accuracy = []
        self.train_loss = []
        self.eval_loss = []

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        if self.trainer is None:
            raise ValueError("Trainer not set for ComputeTrainMetricsCallback")

        # Making predictions on the training dataset
        train_preds = self.trainer.predict(self.trainer.train_dataset)
        eval_preds = self.trainer.predict(self.trainer.eval_dataset)

        # Calculate accuracy
        train_labels = train_preds.label_ids
        train_loss = log_loss(train_labels, train_preds.predictions, labels=train_labels)
        train_preds = np.argmax(train_preds.predictions, axis=1)
        train_accuracy = accuracy_score(train_labels, train_preds)

        eval_labels = eval_preds.label_ids
        eval_loss = log_loss(eval_labels, eval_preds.predictions, labels=eval_labels)
        eval_preds = np.argmax(eval_preds.predictions, axis=1)
        eval_accuracy = accuracy_score(eval_labels, eval_preds)

        self.train_accuracy.append(train_accuracy)
        self.eval_accuracy.append(eval_accuracy)

        self.train_loss.append(train_loss)
        self.eval_loss.append(eval_loss)



def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
ds = load_dataset('csv', data_files=path+'groupDataset.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=3)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
# model.config.num_labels = 3

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=6, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 3-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 3-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Train on new 3 class dataset (original + rephrased sentences)

In [None]:
ds = load_dataset('csv', data_files=path+'groupDatasetOPR.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=3)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
# model.config.num_labels = 3

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=6, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 3-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 3-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

## Train for 3 epochs

### Training

In [None]:
!pip install datasets
! pip install -U accelerate
! pip install -U transformers
!pip install evaluate

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer
import numpy as np
import evaluate
import matplotlib.pyplot as plt
from transformers import TrainerCallback, TrainerControl, TrainerState
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
# from sklearn.metrics import multiclass_log_loss

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


class ComputeTrainMetricsCallback(TrainerCallback):
    def __init__(self, trainer=None):
        super().__init__()
        self.trainer = trainer
        self.train_accuracy = []
        self.eval_accuracy = []
        self.train_loss = []
        self.eval_loss = []

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        if self.trainer is None:
            raise ValueError("Trainer not set for ComputeTrainMetricsCallback")

        # Making predictions on the training dataset
        train_preds = self.trainer.predict(self.trainer.train_dataset)
        eval_preds = self.trainer.predict(self.trainer.eval_dataset)

        # Calculate accuracy
        train_labels = train_preds.label_ids
        train_preds = np.argmax(train_preds.predictions, axis=1)
        train_accuracy = accuracy_score(train_labels, train_preds)
        train_loss = log_loss(train_labels, train_preds)

        # train_loss = multiclass_log_loss(train_labels, train_preds)

        eval_labels = eval_preds.label_ids
        eval_preds = np.argmax(eval_preds.predictions, axis=1)
        eval_accuracy = accuracy_score(eval_labels, eval_preds)
        eval_loss = log_loss(eval_labels, eval_preds)

        # train_loss = multiclass_log_loss(eval_labels, eval_preds)

        self.train_accuracy.append(train_accuracy)
        self.eval_accuracy.append(eval_accuracy)

        self.train_loss.append(train_loss)
        self.eval_loss.append(eval_loss)



def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

## Train on 2 class original dataset (original sentences + half of them negated)

In [None]:
ds = load_dataset('csv', data_files=path+'newDataset.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=2)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=3, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 2-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 2-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
# trainer.predict(small_eval_dataset)
# print(small_eval_dataset["sample"][4])
# print(small_eval_dataset["label"][4])

# Train on new dataset (original + rephrased sentences)

In [None]:
ds = load_dataset('csv', data_files=path+'originalPlusRephrased.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=2)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)

tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))

metric = evaluate.load("accuracy")

# training_args = TrainingArguments(output_dir="/content/test_trainer")
training_args = TrainingArguments(
    output_dir="/content/test_trainer",
    num_train_epochs=3,
    evaluation_strategy="epoch")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)

trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 2-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 2-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Train for 3 class dataset

In [None]:
import torch

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)


class ComputeTrainMetricsCallback(TrainerCallback):
    def __init__(self, trainer=None):
        super().__init__()
        self.trainer = trainer
        self.train_accuracy = []
        self.eval_accuracy = []
        self.train_loss = []
        self.eval_loss = []

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        if self.trainer is None:
            raise ValueError("Trainer not set for ComputeTrainMetricsCallback")

        # Making predictions on the training dataset
        train_preds = self.trainer.predict(self.trainer.train_dataset)
        eval_preds = self.trainer.predict(self.trainer.eval_dataset)

        # Calculate accuracy
        train_labels = train_preds.label_ids
        train_loss = log_loss(train_labels, train_preds.predictions, labels=train_labels)
        train_preds = np.argmax(train_preds.predictions, axis=1)
        train_accuracy = accuracy_score(train_labels, train_preds)

        eval_labels = eval_preds.label_ids
        eval_loss = log_loss(eval_labels, eval_preds.predictions, labels=eval_labels)
        eval_preds = np.argmax(eval_preds.predictions, axis=1)
        eval_accuracy = accuracy_score(eval_labels, eval_preds)

        self.train_accuracy.append(train_accuracy)
        self.eval_accuracy.append(eval_accuracy)

        self.train_loss.append(train_loss)
        self.eval_loss.append(eval_loss)



def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
ds = load_dataset('csv', data_files=path+'groupDataset.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=3)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
# model.config.num_labels = 3

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=3, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 3-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 3-class original dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# Train on new 3 class dataset (original + rephrased sentences)

In [None]:
ds = load_dataset('csv', data_files=path+'groupDatasetOPR.csv')
ds = ds.shuffle()

tokenizer = AutoTokenizer.from_pretrained("gpt2")

model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=3)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
# model.config.num_labels = 3

def tokenize_function(examples):
    return tokenizer(examples["sample"], padding="max_length", truncation=True)


tokenized_datasets = ds.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"])))))
small_eval_dataset = tokenized_datasets["train"].select(range(math.floor(0.7*len((ds["train"]["sample"]))), len((ds["train"]["sample"]))))


training_args = TrainingArguments(output_dir="/content/test_trainer", num_train_epochs=3, evaluation_strategy="epoch")
metric = evaluate.load("accuracy")


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

train_metrics_callback = ComputeTrainMetricsCallback(trainer=trainer)
trainer.add_callback(train_metrics_callback)


trainer.train()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_accuracy, 'bo-', label='Training accuracy')
plt.plot(epochs, train_metrics_callback.eval_accuracy, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy for 3-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
epochs = range(1, len(train_metrics_callback.eval_accuracy) + 1)

plt.plot(epochs, train_metrics_callback.train_loss, 'bo-', label='Training loss')
plt.plot(epochs, train_metrics_callback.eval_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss for 3-class augmented dataset')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()