In [1]:
import logging
import fastwer
import numpy as np
import wandb
import torch.multiprocessing
from transformers import RobertaConfig, EncoderDecoderConfig
torch.multiprocessing.set_sharing_strategy('file_system')

import pandas as pd
from simpletransformers.seq2seq import (
    Seq2SeqModel,
    Seq2SeqArgs,
)

import pandas as pd

def count_matches(labels, preds):
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )

def get_wer(labels, preds):
    return np.mean(
        [
            fastwer.score_sent(pred, label)
            for label, pred in zip(labels, preds)
        ]
    )


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.num_train_epochs = 1
# model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False
model_args.tensorboard_dir = "runs"
model_args.max_length = 50
model_args.train_batch_size=20
model_args.overwrite_output_dir=True
model_args.wandb_project = "cs224u"
model_args.use_multiprocessed_decoding = True

model = Seq2SeqModel(encoder_type="bert", encoder_name="./outputs/best_model/encoder", decoder_name="./outputs/best_model/decoder", args=model_args, use_cuda=True)

In [2]:
test_df = pd.read_pickle("test.pkl")

In [3]:
model.predict(
        [
            "Hee walks dogks"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=1.0, style=ProgressStyle(descripti…




['she walks dogs. she walks. steps steps steps. steps steps. steps steps. steps steps. steps steps steps. steps steps. steps steps. steps steps steps. steps steps.']

In [4]:
testlist = test_df['input_text'].tolist()
len(testlist)

2000

In [5]:
outlist = model.predict(testlist)

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=250.0, style=ProgressStyle(descr…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=2000.0, style=ProgressStyle(descri…




In [6]:
outlist

['van roke of his last voyage in australia. no more details. no no more. no no, nor nor of it. no no more more. no no, nor of it. no no, nor of',
 'little known for this action was many other ships in the mediterranean. little new adventures in many ways. little new new adventures in many ways in many ways in many ways in many ways in many ways. little new new things.',
 'you have the same idea to write on the song on the song. you had to prededededefined to the song song on the radio. you you had the song on the song. you you had the lyrics',
 'his major specialty is air warfare and space warfare, and space warfare development. his basic ground ground warfare doctrine. his basic basic air air warfare doctrine is air air warfare and airborne to airborne to air warfare. his basic doctrine is air',
 'the palace also has a marriage hall. the marriage hall. the marriage also has a marriage hall. the marriage marriage also also a marriage court court court. the marriage and a marriage court

In [7]:
test_df['pred_text'] = outlist

In [8]:
test_df

Unnamed: 0,input_text,target_text,pred_text
0,hanno rot of his expedition in stel,Hannu wrote of his expedition in stone.,van roke of his last voyage in australia. no m...
1,little enla difer for many other meimhberhoods...,Little India differs from many other neighbour...,little known for this action was many other sh...
2,u as us had the obbelity to fogt on the song t...,Users had the ability to vote on the songs the...,you have the same idea to write on the song on...
3,his majors possor is ungariicolmunications a t...,"His major sponsor is Algario Communications, a...",his major specialty is air warfare and space w...
4,the mansion also has a marriage office,The mansion also has a marriage office.,the palace also has a marriage hall. the marri...
...,...,...,...
1995,i wer return ears mashare bebot this mist him,After three years the shareholders dismissed him.,"i'll return with my files, but this is about t..."
1996,one of the man oite arept es an antanpent oftu...,One of the main positive effects is an enhance...,one of the men were killed and several of the ...
1997,we ced stated that he himself with cattolic,Weikath stated that he himself is Catholic.,"he said that he himself had lied with "" catani..."
1998,the judge groled a complaint was not wellid fo...,The judge ruled the complaint was not valid pe...,the judge found the case was not sufficient fo...


In [9]:
test_df.to_pickle("bert.pkl")