In [1]:
import logging
import wandb
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
import numpy as np
import fastwer

import pandas as pd
from simpletransformers.seq2seq import (
    Seq2SeqModel,
    Seq2SeqArgs,
)


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.num_train_epochs = 15
# model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False
model_args.tensorboard_dir = "runs"
model_args.max_length = 200
model_args.train_batch_size=25
model_args.overwrite_output_dir=True
model_args.wandb_project = "cs224u"
model_args.eval_batch_size = 25

# Initialize model
model = Seq2SeqModel(
    encoder_decoder_type="bart",
    encoder_decoder_name="./outputs/best_model",
    args=model_args,
    use_cuda=True,
)

# model = Seq2SeqModel(encoder_decoder_type="bart", encoder_decoder_name="./outputs/best_model", args=model_args, use_cuda=True,)

# model = Seq2SeqModel(
#     encoder_type="bert",
#     encoder_name="bert-base-uncased",
#     decoder_name="bert-base-uncased",
#     args=model_args,
#     use_cuda=True,
# )


def count_matches(labels, preds):
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )

def get_wer(labels, preds):
    return np.mean(
        [
            fastwer.score_sent(pred, label)
            for label, pred in zip(labels, preds)
        ]
    )

In [2]:
test_df = pd.read_pickle("test.pkl")

In [3]:
model.predict(
        [
            "Hee walks dogks", "Hai my precous boi", "tteko", "e trade often coing sides with other traes", "he kepts extensive nodes on a cosing playurs"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…




['She walks dogs..... she walks dogs',
 "Hai my cousin, you.Hai's cousin.",
 'What do you think?...”',
 'They trade often co-operate with other tribes.',
 'He keeps extensive nodes on a competing platform..']

In [4]:
testlist = test_df['input_text'].tolist()
len(testlist)

2000

In [5]:
outlist = model.predict(testlist)

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=80.0, style=ProgressStyle(descri…




In [6]:
outlist

['Hanno wrote of his expedition in Stel.',
 "Little Eilidh's death for many other families in this country is sad in many ways.",
 'You have the ability to fog on the song "The Her".',
 'His major priority is ungariocolomications, a central to communications and software development.',
 'The mansion also has a marriage office...',
 'Hannicopters can land and lunch on its deck.',
 'Thirteen of the series have been collected into train paper backs.',
 'Jane and Ros nearly fell overboard... and Jane and Ros.',
 "We'd like to seek clear weather...",
 'The stream near Ritchie is still called Ridewater River, near Wyndham, Vermont.',
 'It is located near the school on Church Lane.',
 "The show was eventually staged for four weeks at O'Valley House in Cambridge.",
 'Abraham Ham has also played for the Atlanta Falcons and Arizona Cardinals.',
 "The parts were running against each other, so they didn't have to go through rounds.",
 'The burden for this cost is transferred to the department of f

In [7]:
test_df['pred_text'] = outlist

In [8]:
test_df

Unnamed: 0,input_text,target_text,pred_text
0,hanno rot of his expedition in stel,Hannu wrote of his expedition in stone.,Hanno wrote of his expedition in Stel.
1,little enla difer for many other meimhberhoods...,Little India differs from many other neighbour...,Little Eilidh's death for many other families ...
2,u as us had the obbelity to fogt on the song t...,Users had the ability to vote on the songs the...,"You have the ability to fog on the song ""The H..."
3,his majors possor is ungariicolmunications a t...,"His major sponsor is Algario Communications, a...","His major priority is ungariocolomications, a ..."
4,the mansion also has a marriage office,The mansion also has a marriage office.,The mansion also has a marriage office...
...,...,...,...
1995,i wer return ears mashare bebot this mist him,After three years the shareholders dismissed him.,I'll return home to meet this guy..
1996,one of the man oite arept es an antanpent oftu...,One of the main positive effects is an enhance...,One of the main attractions is an antelope far...
1997,we ced stated that he himself with cattolic,Weikath stated that he himself is Catholic.,Weck stated that he himself with Christ the Ch...
1998,the judge groled a complaint was not wellid fo...,The judge ruled the complaint was not valid pe...,The judge ruled the complaint was not fit for ...


In [9]:
test_df.to_pickle("distilbart_output.pkl")