In [1]:
import logging
import fastwer
import numpy as np
import wandb
import torch.multiprocessing
from transformers import EncoderDecoderConfig, BertConfig
torch.multiprocessing.set_sharing_strategy('file_system')

import pandas as pd
from aamod.seq2seq import (
    Seq2SeqModel,
    Seq2SeqArgs,
)
# from simpletransformersmod.seq2seq import (
#     Seq2SeqModel,
#     Seq2SeqArgs,
# )

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.num_train_epochs = 1
# model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False
model_args.tensorboard_dir = "runs"
model_args.max_length = 50
model_args.train_batch_size=25
model_args.overwrite_output_dir=True
model_args.wandb_project = "cs224u"
model_args.use_multiprocessed_decoding = True
model_args.cache_dir = "./cache_dir/"
model_args.eval_batch_size = 25

config_encoder = BertConfig()
# config_decoder = BertConfig(is_decoder=True, add_cross_attention=True)
config_decoder = BertConfig()
config_decoder.is_decoder = True
config_decoder.add_cross_attention = True
config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
# config.use_return_dict = False
encoder_decoder_name = "characterbert"
# model = EncoderDecoderModel(config=config)
model = Seq2SeqModel(
     encoder_decoder_type="characterbert",
     encoder_name="./outputs/best_model/encoder",
     decoder_name="./outputs/best_model/decoder",
     args=model_args,
     config=config,
     use_cuda=True,
)
# model = Seq2SeqModel(encoder_decoder_type="bart", encoder_decoder_name="./outputs/best_model", args=model_args, use_cuda=True)

In [2]:
test_df = pd.read_pickle("test.pkl")

In [3]:
model.predict(
        [
            "Hee walks dogks", "Hai my precous boi", "tteko", "e trade often coing sides with other traes", "he kepts extensive nodes on a cosing playurs"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=5.0, style=ProgressStyle(descripti…




['i walk home...',
 'my friend, my friend, likes me....................',
 'what do you want?',
 'trade trade often involved trade routes................................',
 'he kept his own books on his own. keep keep keep your own notes. on on your screen. play play play playlist. play play play playlist. play play play playlist. play play play play']

In [4]:
testlist = test_df['input_text'].tolist()
len(testlist)

2000

In [5]:
outlist = model.predict(testlist)

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=80.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=2000.0, style=ProgressStyle(descri…




In [6]:
outlist

['all of this was a dream. in his travels.. in his travels... in his dream............ in',
 'many of these features have become common in many languages and many languages. in many languages in many languages. in many languages languages in many languages languages in many languages........',
 'you have the ability to do that, you should say it. s the song, you are on the song., e e e e e e e e e e e e e e e e e e e e e e e',
 'his major research is in the communication industry, communications, communications, and communications.. e. e. e. e e.. e e. e e e.. e e.. e. e. g. e e.',
 'the palace also has a marriage hall...............',
 "man can fly and fly over water on a blue glider. and it can it. it's a sail sail sail sail sail sail sail sail sail sail sail sail sail sail. it sails it on its sail sail sail sail",
 'many of the books have been collected in a collection. pieces of scrap scraps. s.',
 'and her sister and sister jumped off a bike. and and then nearly nearly falling. an

In [7]:
test_df['pred_text'] = outlist

In [8]:
test_df

Unnamed: 0,input_text,target_text,pred_text
0,hanno rot of his expedition in stel,Hannu wrote of his expedition in stone.,all of this was a dream. in his travels.. in h...
1,little enla difer for many other meimhberhoods...,Little India differs from many other neighbour...,many of these features have become common in m...
2,u as us had the obbelity to fogt on the song t...,Users had the ability to vote on the songs the...,"you have the ability to do that, you should sa..."
3,his majors possor is ungariicolmunications a t...,"His major sponsor is Algario Communications, a...",his major research is in the communication ind...
4,the mansion also has a marriage office,The mansion also has a marriage office.,the palace also has a marriage hall..............
...,...,...,...
1995,i wer return ears mashare bebot this mist him,After three years the shareholders dismissed him.,"i'll return to my office, but this is a strang..."
1996,one of the man oite arept es an antanpent oftu...,One of the main positive effects is an enhance...,one of the men were killed by an unknown force...
1997,we ced stated that he himself with cattolic,Weikath stated that he himself is Catholic.,he said that he was unaware of any racial disc...
1998,the judge groled a complaint was not wellid fo...,The judge ruled the complaint was not valid pe...,the court found the complaint was not undecida...


In [9]:
test_df.to_pickle("charb.pkl")