# GPT-2

In [None]:
import pandas as pd

df = pd.read_csv("train.csv")

In [None]:
from sklearn.model_selection import train_test_split
# Split the dataframe into train (70%) and temp (30%)
df_train, df_temp = train_test_split(df, test_size=0.3, random_state=42)

# Split the temp dataframe into validation (20%) and test (10%)
df_val, df_test = train_test_split(df_temp, test_size=1/3, random_state=42)

df_train.to_csv("train.csv", index=False)
df_test.to_csv("test.csv", index=False)

In [None]:
# import torch
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, TextDataset, Trainer, TrainingArguments

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=3)  # 3 labels for NLI: contradiction, entailment, neutral

# Load NLI dataset
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="train.csv",  # Replace with the path to your training dataset file
    block_size=128  # Adjust the block size according to your dataset
)
val_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="test.csv",  # Replace with the path to your validation dataset file
    block_size=128  # Adjust the block size according to your dataset
)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./nli_results",
    overwrite_output_dir=True,
    num_train_epochs=3,  # Adjust the number of epochs as needed
    per_device_train_batch_size=8,  # Adjust batch size based on GPU memory
    per_device_eval_batch_size=8,
    warmup_steps=500,  # Adjust warmup steps
    weight_decay=0.01,  # Adjust weight decay
    logging_dir="./logs",
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Fine-tune the model
trainer.train()
trainer.evaluate()