In [None]:
import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

In [None]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
CHECKPOINT = "microsoft/DialoGPT-medium"
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT, padding_side="right")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT, pad_token_id=tokenizer.eos_token_id).to(DEVICE)

In [None]:
data = pd.read_csv("train_data.csv")
data.shape

In [None]:
data['input'] = data["context"] + " " + data["response"]

In [None]:
conv = Dataset.from_pandas(data)
conv

In [None]:
def encode(examples):
    encoded = tokenizer(examples['input'], truncation=True, padding='max_length', max_length=128, return_tensors="pt")
    encoded['labels'] = encoded['input_ids'][:]
    return encoded

In [None]:
trn_df = conv.train_test_split(test_size=0.2)
encoded_data = trn_df.map(encode, batched=True)

In [None]:
training_args = TrainingArguments(
    output_dir="temp_dir",
    num_train_epochs=10,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    learning_rate=2e-5,
    logging_dir=None,
    fp16=True,
    logging_strategy="steps",
    logging_steps=40
)



In [None]:
training_args.device

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_data["train"],
    eval_dataset=encoded_data["test"]
)

In [None]:
trainer.train()

In [None]:
trainer.save_model("rigby_v2")