In [1]:
import logging

import pandas as pd
from simpletransformers.t5 import T5Model, T5Args

from sklearn.model_selection import train_test_split

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [4]:
data_df = pd.read_csv("./data/PassiveDataWidderTSV.tsv", sep=".", names=["input_text", "target_text","empty"])
data_df = data_df.drop("empty", axis=1)
data_df.insert(0,"prefix","en_passive_to_active")

# if data.shape[0] > 24000: # len(data) > 24000 would also work
#     train_data = data[:24000]
#     eval_data = data[24000:]

data_df.head()

Unnamed: 0,prefix,input_text,target_text
0,en_passive_to_active,the curious penguin was eaten by the curious c...,the curious chicken near the playground ...
1,en_passive_to_active,a big penguin was complimented by a cow in the...,a cow in the stadium complimented a bi...
2,en_passive_to_active,a goofy rabbit was stalked by a goofy fish at ...,a goofy fish at a barn stalked a goofy rabbit
3,en_passive_to_active,a big fish by a field was hated by the happy pig,the happy pig hated a big fish by a field
4,en_passive_to_active,the cow was hated by a teacher,a teacher hated the cow


In [6]:
#train_df = pd.DataFrame(train_data, columns=["prefix", "input_text", "target_text"])
#eval_df = pd.DataFrame(eval_data, columns=["prefix", "input_text", "target_text"])
train_df, eval_df = train_test_split(data_df, test_size=0.05)
train_df.shape

(23750, 3)

In [7]:
eval_df.shape

(1250, 3)

In [None]:
# Configure the model
model_args = T5Args()

# Fix pytoch issue with Bad File descriptors
# See this for more info: https://github.com/ThilinaRajapakse/simpletransformers/issues/789
model_args.process_count = 1
model_args.use_multiprocessing = False
model_args.dataloader_num_workers = 1

# Basic model params
model_args.num_train_epochs = 200
model_args.no_save = False
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True

# Evaluate during training
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False

# Don't save models every checkpoint
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False

# Create T5 Model
model = T5Model("t5-small", args=model_args, use_cuda=False)

# Train T5 Model on the new task
model.train_model(train_df, eval_data=eval_df)

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=23750.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_12823750





INFO:simpletransformers.t5.t5_model: Training started


HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=200.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 200'), FloatProgress(value=0.0, max=2969.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 1 of 200'), FloatProgress(value=0.0, max=2969.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 200'), FloatProgress(value=0.0, max=2969.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 200'), FloatProgress(value=0.0, max=2969.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 4 of 200'), FloatProgress(value=0.0, max=2969.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Creating features from dataset file at cache_dir/


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1250.0), HTML(value='')))

INFO:simpletransformers.t5.t5_utils: Saving features into cached file cache_dir/t5-small_cached_1281250





HBox(children=(HTML(value='Generating outputs'), FloatProgress(value=0.0, max=157.0), HTML(value='')))




HBox(children=(HTML(value='Decoding outputs'), FloatProgress(value=0.0, max=1250.0), HTML(value='')))




In [2]:
# Evaluate T5 Model on new task
results = model.eval_model(eval_df)

# Predict with trained T5 model
print(model.predict(["en_passive_to_active: our turkey was eaten by the dog"]))

NameError: name 'model' is not defined