In [1]:
import logging
import fastwer
import numpy as np
import wandb
import torch.multiprocessing
from transformers import RobertaConfig, EncoderDecoderConfig
torch.multiprocessing.set_sharing_strategy('file_system')

import pandas as pd
from simpletransformers.seq2seq import (
    Seq2SeqModel,
    Seq2SeqArgs,
)


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.num_train_epochs = 1
# model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False
model_args.tensorboard_dir = "runs"
model_args.max_length = 50
model_args.train_batch_size=20
model_args.overwrite_output_dir=True
model_args.wandb_project = "cs224u"
model_args.use_multiprocessed_decoding = True

# config_encoder = RobertaConfig()
# config_decoder = RobertaConfig(is_decoder=True, add_cross_attention=True)
# config_decoder = RobertaConfig()
# config_decoder.is_decoder = True
# config_decoder.add_cross_attention = True
# model_args = {} #{"use_multiprocessed_decoding": True}
# config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
# encoder_decoder_name = "roberta"
# model = EncoderDecoderModel(config=config)
model = Seq2SeqModel(
    encoder_type="roberta",
    encoder_name="roberta-base",
    decoder_name="bert-base-cased",
    args=model_args,
    use_cuda=True,
)
# config=config,

# Initialize model
# model = Seq2SeqModel(
#     encoder_decoder_type="bart",
#     encoder_decoder_name="facebook/bart-large",
#     args=model_args,
#     use_cuda=True,
# )

# model = Seq2SeqModel(encoder_decoder_type="bart", encoder_decoder_name="./outputs/best_model", args=model_args, use_cuda=True,)

# model = Seq2SeqModel(
#     encoder_type="bert",
#     encoder_name="bert-base-uncased",
#     decoder_name="bert-base-uncased",
#     args=model_args,
#     use_cuda=True,
# )


def count_matches(labels, preds):
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )

def get_wer(labels, preds):
    return np.mean(
        [
            fastwer.score_sent(pred, label)
            for label, pred in zip(labels, preds)
        ]
    )

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another a

In [2]:
import pandas as pd

train_df = pd.read_pickle("train.pkl")
train_df = train_df.dropna()
dev_df = pd.read_pickle("dev.pkl")

INFO:numexpr.utils:NumExpr defaulting to 4 threads.


In [3]:
# train_df = train_df.head(200)

In [4]:
# dev_df = dev_df.head(200)

In [None]:
# Train the model
wandb.init(project='cs224u', entity='gbanerje')

# 2. Save model inputs and hyperparameters
config = wandb.config
config.learning_rate = 0.01

# Model training here

model.train_model(
    train_df, eval_data=dev_df, matches=count_matches, wer=get_wer, show_running_loss=True, args={'fp16': False}
)

wandb.join()

# # Evaluate the model
results = model.eval_model(dev_df)

[34m[1mwandb[0m: Currently logged in as: [33mgbanerje[0m (use `wandb login --relogin` to force relogin)


INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


HBox(children=(FloatProgress(value=0.0, max=16163.0), HTML(value='')))

INFO:simpletransformers.seq2seq.seq2seq_utils: Saving features into cached file cache_dir/roberta-base-bert-base-cased_cached_12816163





INFO:simpletransformers.seq2seq.seq2seq_model: Training started


HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=809.0, style=ProgressStyle(des…

In [5]:
model.predict(
        [
            "Hee walks dogks", "Hai my precous boi", "tteko", "e trade often coing sides with other traes", "he kepts extensive nodes on a cosing playurs"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…








HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=5.0, style=ProgressStyle(descripti…












['', '', '', '', '']

In [7]:
dev_df.head()

Unnamed: 0,input_text,target_text
0,the coma sat to te parnting afternoon and the ...,Takuma Sato's disappointing afternoon ended wi...
1,leat remark many has fhurtorexxpended to inclu...,Tony Roma's menu has further expanded to inclu...
2,there is nobody that's rich and stupid and not...,There's nobody that rich and stupid and narcis...
3,e trade often coing sides with other traes wit...,The trail often coincides with other trails wi...
4,he kepts extensive nodes on a cosing playurs i...,He kept extensive notes on opposing players an...


In [9]:
model.predict(
        [
            "Hee woks dogks"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…




['Hee woks!']

In [6]:
model

<simpletransformers.seq2seq.seq2seq_model.Seq2SeqModel at 0x7f95b0f93d50>

In [1]:
import logging
import fastwer
import numpy as np
import wandb
import torch.multiprocessing
from transformers import RobertaConfig, EncoderDecoderConfig
torch.multiprocessing.set_sharing_strategy('file_system')

import pandas as pd
from simpletransformers.seq2seq import (
    Seq2SeqModel,
    Seq2SeqArgs,
)


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = Seq2SeqArgs()
model_args.num_train_epochs = 10
# model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = False
model_args.tensorboard_dir = "runs"
model_args.max_length = 50
model_args.train_batch_size=10
model_args.overwrite_output_dir=True
model_args.wandb_project = "cs224u"
model_args.use_multiprocessed_decoding = True

config_encoder = RobertaConfig()
# config_decoder = RobertaConfig(is_decoder=True, add_cross_attention=True)
config_decoder = RobertaConfig()
config_decoder.is_decoder = True
config_decoder.add_cross_attention = True
# model_args = {} #{"use_multiprocessed_decoding": True}
config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
# encoder_decoder_name = "roberta"
# model = EncoderDecoderModel(config=config)
model = Seq2SeqModel(encoder_type="roberta", encoder_name="./outputs/best_model/encoder", decoder_name="./outputs/best_model/decoder", args=model_args, config=config, use_cuda=True)

In [3]:
model.predict(
        [
            "the coma sat to te parnting afternoon and the"
        ]
    )

HBox(children=(FloatProgress(value=0.0, description='Generating outputs', max=1.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Decoding outputs', max=1.0, style=ProgressStyle(descripti…




['The sat to the the the the the the.']