In [1]:
import os
import torch
from einops import rearrange

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
%load_ext autoreload
%autoreload 2

# load data

In [5]:
from transformer_translation.dataset import ParallelLanguageDataset, load_pickle
from torch.utils.data import DataLoader

In [6]:
data_path = r"/home/alex/data/nlp/agmir/transf_processed_data"
#data_path = 'transformer_translation/data/processed'

In [7]:
splits = load_pickle('20200612_splits_transl.pkl')

### train set

In [8]:
num_tokens = 2000
max_seq_length = 96
dataset = ParallelLanguageDataset(
    os.path.join(data_path, 'fra/set.pkl')
    ,os.path.join(data_path, 'eng/set.pkl')
    ,num_tokens
    ,max_seq_length
    ,idxs=splits['train']
)
loader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True)
print(len(dataset))

66


### val set

In [9]:
val_dataset = ParallelLanguageDataset(
    os.path.join(data_path, 'fra/set.pkl')
    ,os.path.join(data_path, 'eng/set.pkl')
    ,num_tokens
    ,max_seq_length
    ,idxs=splits['val']
)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True)
print(len(val_dataset))

8


### create vocab dicts 

In [10]:
fra_index2word = load_pickle(
    os.path.join(data_path, 'fra', 'voc.pkl'))
eng_index2word = load_pickle(
    os.path.join(data_path, 'eng', 'voc.pkl'))

In [11]:
from tsf_utils import print_nl_pred_vs_tgt

# train

In [12]:
from transformer_translation.model import LanguageTransformer

In [13]:
vocab_size = 10000 + 4#1952#
nhead = 8
d_model = 512# - (587 % nhead) + nhead
num_encoder_layers = 6
num_decoder_layers = 6
dim_feedforward = 2048
pos_dropout = 0.1
trans_dropout = 0.1

model = LanguageTransformer(
    vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward,
    max_seq_length, pos_dropout, trans_dropout
).to(device)

In [14]:
    from transformer_translation.Optim import ScheduledOptim
    import torch.nn as nn
    from torch.optim import Adam
    
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_normal_(p)

    n_warmup_steps = 4000
    optim = ScheduledOptim(
        Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        d_model, n_warmup_steps)

    criterion = nn.CrossEntropyLoss(ignore_index=0)

In [14]:
from tsf_infer_utils import prep_transf_inputs2, infer2, oos_infer_batched2
from torchtext.data.metrics import bleu_score
from tsf_utils import format_list_for_bleu, get_bleu_from_loader

In [134]:
    %%time
    print_every = 15
    num_epochs = 10
    early_stopping_flag = True

    lowest_val = 1e9
    train_losses, val_losses, train_bleu, val_bleu  = {}, [], [], []
    total_step = 0
    
    for epoch in range(num_epochs):
        
        model.train()
        total_loss = 0

        for step, (src, src_key_padding_mask, tgt, tgt_key_padding_mask) in enumerate(iter(loader)):
            total_step += 1

            src, src_key_padding_mask, tgt, tgt_key_padding_mask, memory_key_padding_mask, tgt_inp, tgt_out, tgt_mask = prep_transf_inputs2(
                src, src_key_padding_mask, tgt, tgt_key_padding_mask, device)
            
            optim.zero_grad()
            outputs = model(src, tgt_inp, src_key_padding_mask, tgt_key_padding_mask[:, :-1], memory_key_padding_mask, tgt_mask)
            loss = criterion(rearrange(outputs, 'b t v -> (b t) v'), rearrange(tgt_out, 'b o -> (b o)'))

            loss.backward()
            optim.step_and_update_lr()

            total_loss += loss.item()
            if step % print_every == print_every - 1:
                print(f'Epoch [{epoch + 1} / {num_epochs}] \t Step [{step + 1} / {len(loader)}] \t '
                      'Train Loss: {:.3f}'.format(total_loss / print_every))
                total_loss = 0
            
        if early_stopping_flag:
            model.eval()
            print(f'Epoch [{epoch + 1} / {num_epochs}]:')
            
            # train
            pred_list, tgt_list, loss_per_batch = infer2(model, loader)
            pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
            train_bleu.append(bleu_score(pred_list_bleu, tgt_list_bleu))
            print('{} BLEU in-sample: {:.2%}'.format(
                '\ttrain', train_bleu[-1]))
            
            # val IS
            pred_list, tgt_list, loss_per_batch = infer2(model, val_loader)
            val_losses.append(sum(loss_per_batch) / len(loss_per_batch))
            pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
            val_bleu.append(bleu_score(pred_list_bleu, tgt_list_bleu))
            print('{} BLEU in-sample: {:.2%}'.format(
                '\tval', val_bleu[-1]))
            
            # val OOS
            #pred_list, tgt_list, tag_list = oos_infer_batched(model, val_loader, max_seq_length)
            print('{} BLEU out-of-sample: {:.2%}'.format(
                '\tval',bleu_score(*format_list_for_bleu(pred_list, tgt_list))
            ))
            
        print('\n')

Epoch [1 / 10] 	 Step [15 / 66] 	 Train Loss: 0.487
Epoch [1 / 10] 	 Step [30 / 66] 	 Train Loss: 0.680
Epoch [1 / 10] 	 Step [45 / 66] 	 Train Loss: 0.678
Epoch [1 / 10] 	 Step [60 / 66] 	 Train Loss: 0.601
Epoch [1 / 10]:
	train BLEU in-sample: 58.46%
	val BLEU in-sample: 32.64%
	val BLEU out-of-sample: 32.64%


Epoch [2 / 10] 	 Step [15 / 66] 	 Train Loss: 0.498
Epoch [2 / 10] 	 Step [30 / 66] 	 Train Loss: 0.536
Epoch [2 / 10] 	 Step [45 / 66] 	 Train Loss: 0.538


KeyboardInterrupt: 

In [14]:
if False:
    from tsf_utils import save_model
    save_model(model, 'transf_transl')

In [15]:
%%capture
PATH = os.path.join(r'models','20200612_1249_model_transf_transl.pth')
model.load_state_dict(torch.load(PATH))
model.to(device)
model.eval()

In [139]:
get_tk_from_proba(outputs)

tensor([[   3,    4,   70,    5,    2,    5,    5],
        [ 130,   79,  575,    5,    2,    5,    5],
        [ 222,   79,  143,    5,    2,    5,    5],
        [   3,    4,   24,    5,    2,    5,    5],
        [   3,    4,   93,    5,    2,    5,    5],
        [ 130,   79,  215,    5,    2,    5,    5],
        [ 130,  125,  703,    5,    2,    5,    5],
        [   3,    4, 1752,    5,    2,    5,    5],
        [ 130,   79,  796,    5,    2,    5,    5],
        [  78,   79,  204,    5,    2,    5,    5],
        [  78,   79,  434,    5,    2,    5,    5],
        [   3,    4,   70,    5,    2,    5,    5],
        [ 222,   79,  204,    5,    2,    5,    5],
        [  15,   16,   82,    5,    2,    5,    5],
        [  78,   79,  195,    5,    2,    5,    5],
        [ 130,  125,  383,    5,    2,    5,    5],
        [ 130,  125,   65,    5,    2,    5,    5],
        [ 130,   79,  215,    5,    2,    5,    5],
        [   3,    4,  215,    5,    2,    5,    5],
        [   

In [135]:
%%time
from transformer_translation.dataset import IDX_EOS, IDX_SOS
pred_list, tgt_list, loss_per_batch = infer2(model, val_loader)
pred_list = [[IDX_SOS] + sent for sent in pred_list]

pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
val_bleu.append(bleu_score(pred_list_bleu, tgt_list_bleu))
print('{} BLEU in-sample: {:.2%}'.format(
    'val', val_bleu[-1]))

val BLEU in-sample: 39.36%


In [136]:
%%time
from transformer_translation.dataset import IDX_EOS, IDX_SOS
pred_list, tgt_list, loss_per_batch = infer2(model, loader)
pred_list = [[IDX_SOS] + sent for sent in pred_list]

pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
train_bleu.append(bleu_score(pred_list_bleu, tgt_list_bleu))
print('{} BLEU in-sample: {:.2%}'.format(
    'train', train_bleu[-1]))

train BLEU in-sample: 67.20%


In [137]:
print_nl_pred_vs_tgt(pred_list, tgt_list, eng_index2word)

TARGET:  SOS we re not sure yet . EOS
PREDICTION:  SOS we re not sure . . EOS


TARGET:  SOS he s cruel and heartless . EOS
PREDICTION:  SOS he s cruel and heartless . EOS


TARGET:  SOS i m not busy either . EOS
PREDICTION:  SOS i m not busy either . EOS


TARGET:  SOS i m not done yet . EOS
PREDICTION:  SOS i m not done yet . EOS


TARGET:  SOS they re about to leave . EOS
PREDICTION:  SOS they are about to leave . EOS


TARGET:  SOS i m not so convinced . EOS
PREDICTION:  SOS i m not so convinced . EOS


TARGET:  SOS you re such a jerk . EOS
PREDICTION:  SOS you re such a jerk . EOS


TARGET:  SOS he isn t at home . EOS
PREDICTION:  SOS he isn t at home . EOS


TARGET:  SOS i m not done yet . EOS
PREDICTION:  SOS i m not done yet . EOS


TARGET:  SOS you re not welcome here . EOS
PREDICTION:  SOS you are no welcome here . EOS


TARGET:  SOS he s an old timer . EOS
PREDICTION:  SOS she is a old old . EOS


TARGET:  SOS you re not welcome here . EOS
PREDICTION:  SOS you re not welcome

PREDICTION:  SOS i m better . EOS


TARGET:  SOS we re different . EOS
PREDICTION:  SOS we re different . EOS


TARGET:  SOS you re overworked . EOS
PREDICTION:  SOS you are overworked . EOS


TARGET:  SOS you re stuck . EOS
PREDICTION:  SOS you re stuck . EOS


TARGET:  SOS they re students . EOS
PREDICTION:  SOS they are students . EOS


TARGET:  SOS you re old . EOS
PREDICTION:  SOS you re old . EOS


TARGET:  SOS you re overworked . EOS
PREDICTION:  SOS you re overworked . EOS


TARGET:  SOS i m azerbaijani . EOS
PREDICTION:  SOS i m azerbaijani . EOS


TARGET:  SOS you are sharp . EOS
PREDICTION:  SOS you re clever . EOS


TARGET:  SOS we re powerful . EOS
PREDICTION:  SOS we re powerful . EOS


TARGET:  SOS you re rude . EOS
PREDICTION:  SOS you re rude . EOS


TARGET:  SOS you re lost . EOS
PREDICTION:  SOS you re lost . EOS


TARGET:  SOS we re resilient . EOS
PREDICTION:  SOS we re resilient . EOS


TARGET:  SOS you re punctual . EOS
PREDICTION:  SOS you re punctual . EOS


TA



TARGET:  SOS we re all going . EOS
PREDICTION:  SOS we re all going . EOS


TARGET:  SOS they are all alike . EOS
PREDICTION:  SOS they are all alike . EOS


TARGET:  SOS you re very astute . EOS
PREDICTION:  SOS you re very brave . EOS


TARGET:  SOS he s a liar . EOS
PREDICTION:  SOS he s a liar . EOS


TARGET:  SOS you re still young . EOS
PREDICTION:  SOS you re still young . EOS


TARGET:  SOS i m just curious . EOS
PREDICTION:  SOS i m just curious . EOS


TARGET:  SOS we re all scared . EOS
PREDICTION:  SOS we re all afraid . EOS


TARGET:  SOS i m quite serious . EOS
PREDICTION:  SOS i m very serious . EOS


TARGET:  SOS you re very tall . EOS
PREDICTION:  SOS you are very big . EOS


TARGET:  SOS you aren t looking . EOS
PREDICTION:  SOS you aren t looking . EOS


TARGET:  SOS i m very happy . EOS
PREDICTION:  SOS i m very happy . EOS


TARGET:  SOS you are very insensitive . EOS
PREDICTION:  SOS you are very very . EOS


TARGET:  SOS you re scaring me . EOS
PREDICTION:  SOS



TARGET:  SOS i m from tokyo . EOS
PREDICTION:  SOS i m from france . EOS


TARGET:  SOS he is very learned . EOS
PREDICTION:  SOS he is very very . EOS


TARGET:  SOS you re the oldest . EOS
PREDICTION:  SOS you re the oldest . EOS


TARGET:  SOS you re very curious . EOS
PREDICTION:  SOS you re very curious . EOS


TARGET:  SOS we re getting closer . EOS
PREDICTION:  SOS we re getting getting . EOS


TARGET:  SOS you re too loud . EOS
PREDICTION:  SOS you re too loud . EOS


TARGET:  SOS you re so pathetic . EOS
PREDICTION:  SOS you re so picky . EOS


TARGET:  SOS we re taking over . EOS
PREDICTION:  SOS we re out . . EOS


TARGET:  SOS you re very efficient . EOS
PREDICTION:  SOS you re very efficient . EOS


TARGET:  SOS you re very timid . EOS
PREDICTION:  SOS you re very timid . EOS


TARGET:  SOS you are the one . EOS
PREDICTION:  SOS you are the one . EOS


TARGET:  SOS you re very wise . EOS
PREDICTION:  SOS you re very wise . EOS


TARGET:  SOS i m studying french . EOS
PRE


TARGET:  SOS he is not likely to succeed . EOS
PREDICTION:  SOS he is more likely to be . EOS


TARGET:  SOS we re totally cool with that . EOS
PREDICTION:  SOS we re totally ignorant with this . EOS


TARGET:  SOS i am sure of his success . EOS
PREDICTION:  SOS i am sure his his success . EOS


TARGET:  SOS i am happy with my girlfriend . EOS
PREDICTION:  SOS i m glad with my girlfriend . EOS


TARGET:  SOS i m ready for a break . EOS
PREDICTION:  SOS i m ready for a job . EOS


TARGET:  SOS they re two very different things . EOS
PREDICTION:  SOS they are both very busy people . EOS


TARGET:  SOS i m surprised to see you . EOS
PREDICTION:  SOS i m surprised to save you . EOS


TARGET:  SOS he is an authority on china . EOS
PREDICTION:  SOS he is an authority on china . EOS


TARGET:  SOS she s a very wise mother . EOS
PREDICTION:  SOS she s a very beautiful than . EOS


TARGET:  SOS i m here to help you . EOS
PREDICTION:  SOS i m here to help you . EOS


TARGET:  SOS she s a very n

TARGET:  SOS he s not young anymore . EOS
PREDICTION:  SOS he s no young anymore . EOS


TARGET:  SOS i m smarter than you . EOS
PREDICTION:  SOS i m smarter than you . EOS


TARGET:  SOS i m not your son . EOS
PREDICTION:  SOS i m not your son . EOS


TARGET:  SOS we re all done here . EOS
PREDICTION:  SOS we re all done here . EOS


TARGET:  SOS they re thirty dollars each . EOS
PREDICTION:  SOS they are thirty dollars each . EOS


TARGET:  SOS you re a little late . EOS
PREDICTION:  SOS you re a bit late . EOS


TARGET:  SOS they re anxious for peace . EOS
PREDICTION:  SOS they are anxious for peace . EOS


TARGET:  SOS we re not your enemy . EOS
PREDICTION:  SOS we re not your enemy . EOS


TARGET:  SOS we re not dead yet . EOS
PREDICTION:  SOS we re not dead yet . EOS


TARGET:  SOS you are such a liar ! EOS
PREDICTION:  SOS you re such a filthy . EOS


TARGET:  SOS you re looking very well . EOS
PREDICTION:  SOS you re looking very well . EOS


TARGET:  SOS we are brother and sis

PREDICTION:  SOS i m not good at this . EOS


TARGET:  SOS she is quite a clever girl . EOS
PREDICTION:  SOS she s a as smart girl . EOS


TARGET:  SOS he is sometimes absent from school . EOS
PREDICTION:  SOS he is absent from from school . EOS


TARGET:  SOS i m proud of you guys . EOS
PREDICTION:  SOS i m proud of you guys . EOS


TARGET:  SOS he s very likely to come . EOS
PREDICTION:  SOS he is very likely to come . EOS


TARGET:  SOS you re german aren t you ? EOS
PREDICTION:  SOS you re german aren t you ? EOS


TARGET:  SOS i m happy you re here . EOS
PREDICTION:  SOS i m glad you re here . EOS


TARGET:  SOS i m not here for you . EOS
PREDICTION:  SOS i m not here to you . EOS


TARGET:  SOS we re doing all we can . EOS
PREDICTION:  SOS we re doing all all all . EOS


TARGET:  SOS they re lucky to be alive . EOS
PREDICTION:  SOS they re lucky to be be . EOS


TARGET:  SOS you re disappointed aren t you ? EOS
PREDICTION:  SOS you re disappointed aren t you ? EOS


TARGET:  SOS 

PREDICTION:  SOS he is always complaining . EOS . . . .


TARGET:  SOS we re not speaking . EOS PAD PAD PAD PAD
PREDICTION:  SOS we re not talking . EOS . . . .


TARGET:  SOS i m watching tv . EOS PAD PAD PAD PAD
PREDICTION:  SOS i m watching tv . EOS . . . .


TARGET:  SOS i am peeling apples . EOS PAD PAD PAD PAD
PREDICTION:  SOS i m hers apples . EOS . . . .


TARGET:  SOS we re halfway home . EOS PAD PAD PAD PAD
PREDICTION:  SOS we re on home . EOS . . . .


TARGET:  SOS i m learning basque . EOS PAD PAD PAD PAD
PREDICTION:  SOS i m learning french . EOS . . . .


TARGET:  SOS they re not following . EOS PAD PAD PAD PAD
PREDICTION:  SOS they re not talking . EOS . . . .


TARGET:  SOS i m studying french . EOS PAD PAD PAD PAD
PREDICTION:  SOS i am studying french . EOS now . . .


TARGET:  SOS i m pressing charges . EOS PAD PAD PAD PAD
PREDICTION:  SOS i m pressing charges . EOS . . . .


TARGET:  SOS you are imagining things . EOS PAD PAD PAD PAD
PREDICTION:  SOS you are imaginin

TARGET:  SOS you aren t looking . EOS
PREDICTION:  SOS you re t looking for EOS


TARGET:  SOS you re very understanding . EOS
PREDICTION:  SOS you re very understanding . EOS


TARGET:  SOS i m still sleepy . EOS
PREDICTION:  SOS i m still sleepy . EOS


TARGET:  SOS they re all nuts . EOS
PREDICTION:  SOS they re all nuts . EOS


TARGET:  SOS you re very rude . EOS
PREDICTION:  SOS you re very sophisticated . EOS


TARGET:  SOS you are so cute . EOS
PREDICTION:  SOS you re so late . EOS


TARGET:  SOS you re all mad . EOS
PREDICTION:  SOS you re all mad . EOS


TARGET:  SOS you re very brave . EOS
PREDICTION:  SOS you re very brave . EOS


TARGET:  SOS you re very clever . EOS
PREDICTION:  SOS you re very clever . EOS


TARGET:  SOS she is extremely attractive . EOS
PREDICTION:  SOS she is extremely attractive . EOS


TARGET:  SOS i m so sorry . EOS
PREDICTION:  SOS i m so sorry . EOS


TARGET:  SOS you re very talented . EOS
PREDICTION:  SOS you re very talented . EOS


TARGET:  SOS



TARGET:  SOS i m sorry i hurt you . EOS
PREDICTION:  SOS i m sorry i hurt you . EOS


TARGET:  SOS i am grateful for your help . EOS
PREDICTION:  SOS i m grateful for your help . EOS


TARGET:  SOS i m tired of watching tv . EOS
PREDICTION:  SOS i m tired of watching tv . EOS


TARGET:  SOS she s much heavier than him . EOS
PREDICTION:  SOS she s much younger than him . EOS


TARGET:  SOS i m taking care of my grandfather EOS
PREDICTION:  SOS i m taking my of my father father


TARGET:  SOS i m proud of you guys . EOS
PREDICTION:  SOS i m proud of you guys . EOS


TARGET:  SOS i m not so sure anymore . EOS
PREDICTION:  SOS i m not too sure anymore . EOS


TARGET:  SOS she is not afraid to die . EOS
PREDICTION:  SOS she isn not afraid to die . EOS


TARGET:  SOS i m sorry i hurt you . EOS
PREDICTION:  SOS i m sorry i hurt you . EOS


TARGET:  SOS he is almost six feet tall . EOS
PREDICTION:  SOS he is almost as as likely . EOS


TARGET:  SOS i m proud of you guys . EOS
PREDICTION:  SO

PREDICTION:  SOS you re off the hook . EOS


TARGET:  SOS you re blocking my view . EOS
PREDICTION:  SOS you re blocking my kids . EOS


TARGET:  SOS he s hungry for power . EOS
PREDICTION:  SOS he s hungry and power . EOS


TARGET:  SOS i m no longer tired . EOS
PREDICTION:  SOS i m no longer tired . EOS


TARGET:  SOS he is a great scientist . EOS
PREDICTION:  SOS he is a fast scientist . EOS


TARGET:  SOS i am a stranger here . EOS
PREDICTION:  SOS i m a stranger here . EOS


TARGET:  SOS he s afraid to dance . EOS
PREDICTION:  SOS he is afraid of die . EOS


TARGET:  SOS he s a computer nerd . EOS
PREDICTION:  SOS he s a bit nerd . EOS


TARGET:  SOS he is a born artist . EOS
PREDICTION:  SOS he is an artist artist . EOS


TARGET:  SOS you re a weird kid . EOS
PREDICTION:  SOS you re a weird weird . EOS


TARGET:  SOS i m a little busy . EOS
PREDICTION:  SOS i m a little busy too EOS


TARGET:  SOS he is a harsh critic . EOS
PREDICTION:  SOS he is a harsh critic . EOS


TARGET:  S

TARGET:  SOS they are the ones who want to go . EOS
PREDICTION:  SOS they are the ones who want to go . EOS


TARGET:  SOS i m going to new york next week . EOS
PREDICTION:  SOS i m going to work week week week . EOS


TARGET:  SOS i m really not supposed to do this . EOS
PREDICTION:  SOS i m not not really to be tom . EOS


TARGET:  SOS i m sure we can work this out . EOS
PREDICTION:  SOS i m sure we can work this out . EOS


TARGET:  SOS you aren t as short as i am . EOS
PREDICTION:  SOS you are t as as as i m . EOS


TARGET:  SOS she s six years older than i am . EOS
PREDICTION:  SOS she s six years younger than me am . EOS


TARGET:  SOS i m not taking no for an answer . EOS
PREDICTION:  SOS i m not taking for for him answer . EOS


TARGET:  SOS i am finnish but i speak also swedish . EOS
PREDICTION:  SOS i m finnish to i don also swedish . EOS


TARGET:  SOS i am free till o clock this evening . EOS
PREDICTION:  SOS i m free to clock clock this evening . EOS


TARGET:  SOS you re 


TARGET:  SOS i m not married yet . EOS
PREDICTION:  SOS i m not married . . EOS


TARGET:  SOS you are not a doctor . EOS
PREDICTION:  SOS you re not a doctor . EOS


TARGET:  SOS i am shorter than you . EOS
PREDICTION:  SOS i m shorter than you . EOS


TARGET:  SOS he s likely to come . EOS
PREDICTION:  SOS he s likely to come . EOS


TARGET:  SOS he is not altogether wrong . EOS
PREDICTION:  SOS he is not entirely entirely . EOS


TARGET:  SOS he s a talented writer . EOS
PREDICTION:  SOS he is a very writer . EOS


TARGET:  SOS i m not wearing socks . EOS
PREDICTION:  SOS i m not wearing socks . EOS


TARGET:  SOS we re all going home . EOS
PREDICTION:  SOS we re all going home . EOS


TARGET:  SOS he s addicted to heroin . EOS
PREDICTION:  SOS he s addicted to heroin . EOS


TARGET:  SOS you re smarter than me . EOS
PREDICTION:  SOS you are taller than me . EOS


TARGET:  SOS you re smarter than me . EOS
PREDICTION:  SOS you are taller than me . EOS


TARGET:  SOS she is just goin



TARGET:  SOS they re wrong . EOS
PREDICTION:  SOS they re nuts ! EOS


TARGET:  SOS you re productive . EOS
PREDICTION:  SOS you re productive . EOS


TARGET:  SOS they are exhausted . EOS
PREDICTION:  SOS they are exhausted . EOS


TARGET:  SOS you re incorrigible . EOS
PREDICTION:  SOS you re incorrigible . EOS


TARGET:  SOS i m vegetarian . EOS
PREDICTION:  SOS i m a . EOS


TARGET:  SOS you re famous . EOS
PREDICTION:  SOS you re famous . EOS


TARGET:  SOS he s swiss . EOS
PREDICTION:  SOS he s swiss . EOS


TARGET:  SOS he is old . EOS
PREDICTION:  SOS he s old . EOS


TARGET:  SOS you re winning . EOS
PREDICTION:  SOS you re winning . EOS


TARGET:  SOS they re boring . EOS
PREDICTION:  SOS they re boring . EOS


TARGET:  SOS you re free . EOS
PREDICTION:  SOS you re free . EOS


TARGET:  SOS you re amazing . EOS
PREDICTION:  SOS you re incredible . EOS


TARGET:  SOS i m powerful . EOS
PREDICTION:  SOS i m different . EOS


TARGET:  SOS we re freezing . EOS
PREDICTION:  SOS 



TARGET:  SOS he is rarely in a good mood . EOS
PREDICTION:  SOS he is well in a good mood . EOS


TARGET:  SOS you are in part responsible for it . EOS
PREDICTION:  SOS you are responsible responsible responsible for responsible . EOS


TARGET:  SOS i m as shocked as you are . EOS
PREDICTION:  SOS i m as smart as you are . EOS


TARGET:  SOS she is very much like her mother . EOS
PREDICTION:  SOS she s much much as her mother . EOS


TARGET:  SOS i m just trying to be friendly . EOS
PREDICTION:  SOS i m just trying to be to . EOS


TARGET:  SOS i m going to move next month . EOS
PREDICTION:  SOS i m going to figure next month . EOS


TARGET:  SOS you re not dressed . EOS
PREDICTION:  SOS you re not dressed . EOS


TARGET:  SOS i m not selfish . EOS
PREDICTION:  SOS i m not selfish . EOS


TARGET:  SOS i m not shy . EOS
PREDICTION:  SOS i m not shy . EOS


TARGET:  SOS you are not kind . EOS
PREDICTION:  SOS you re not kind . EOS


TARGET:  SOS i m not mean . EOS
PREDICTION:  SOS i m 

PREDICTION:  SOS he is old enough to know this . EOS


TARGET:  SOS you aren t a spy are you ? EOS
PREDICTION:  SOS you re t a only are you ? EOS


TARGET:  SOS i m starting to feel better already . EOS
PREDICTION:  SOS i m starting to feel better . . EOS


TARGET:  SOS he s a cheat and a liar . EOS
PREDICTION:  SOS he s a good liar a liar . EOS


TARGET:  SOS i m employed by a french lawyer . EOS
PREDICTION:  SOS i m a in a job . . EOS


TARGET:  SOS he is a friendly person . EOS
PREDICTION:  SOS she is a friendly person . EOS


TARGET:  SOS they re all on vacation . EOS
PREDICTION:  SOS they re all on vacation . EOS


TARGET:  SOS we re not friends anymore . EOS
PREDICTION:  SOS we re not friends . . EOS


TARGET:  SOS i m not a doctor . EOS
PREDICTION:  SOS i m not a doctor . EOS


TARGET:  SOS they re right of course . EOS
PREDICTION:  SOS they re right . course . EOS


TARGET:  SOS i m hungry and thirsty . EOS
PREDICTION:  SOS i m thirsty and thirsty . EOS


TARGET:  SOS i m in no


TARGET:  SOS she s just putting up a front . EOS PAD
PREDICTION:  SOS she s just a a a model . EOS .


TARGET:  SOS i m going to miss your cooking . EOS PAD
PREDICTION:  SOS i m getting to miss your work . EOS .


TARGET:  SOS we are going to have a baby . EOS PAD
PREDICTION:  SOS we re asking to have a baby . EOS .


TARGET:  SOS you aren t busy now are you ? EOS PAD
PREDICTION:  SOS you re t married now . you ? EOS .


TARGET:  SOS i m in way over my head . EOS PAD
PREDICTION:  SOS i m in my over my head . EOS .


TARGET:  SOS they re eating high on the hog . EOS PAD
PREDICTION:  SOS they re getting . . it hog . EOS .


TARGET:  SOS we re going to eat right now . EOS PAD
PREDICTION:  SOS we re going . eat now now . EOS .


TARGET:  SOS i am bored out of my mind . EOS PAD
PREDICTION:  SOS i m so . . it mind . EOS .


TARGET:  SOS i m going to the police station . EOS PAD
PREDICTION:  SOS i m going to the police station . EOS .


TARGET:  SOS she is as busy as a bee . EOS PAD
PREDICTI

In [19]:
%%time
pred_list, tgt_list, tag_list_lg = oos_infer_batched2(model, val_loader, max_seq_length)

RuntimeError: CUDA out of memory. Tried to allocate 504.00 MiB (GPU 0; 11.91 GiB total capacity; 9.37 GiB already allocated; 385.06 MiB free; 11.01 GiB reserved in total by PyTorch)

In [16]:
from tsf_infer_utils import to_list_npint64, gen_nopeek_mask, pop_padding_ts, get_tk_from_proba, IDX_SOS, IDX_EOS, get_EOS_indices

In [21]:
        pred_list_lg, tgt_list_lg, tag_list_lg = [], [], []

        (src, src_kety_padding_mask, tgt, tgt_key_padding_mask) = next(iter(val_loader))
        print(src.shape, tgt.shape)

torch.Size([1, 113, 7]) torch.Size([1, 113, 11])


In [17]:
        

        # format tags
        tag_list = [to_list_npint64(pop_padding_ts(src[:,[i],:]).flatten().tolist()) for i in range(src.shape[1])]

        src = src[0].to(device)

        pred = IDX_SOS * torch.ones((tgt.shape[1], 1), dtype=torch.long, device=device)
        pred_mask = gen_nopeek_mask(pred.shape[1]).to(device)

        while  not (pred == IDX_EOS).any(1).all() and pred.shape[1] < max_seq_length + 1:
            output = model(src, pred, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_mask=pred_mask)#[[-1],:])
            pred = torch.cat([pred, get_tk_from_proba(output)[:,[-1]]], dim=1)
            pred_mask = gen_nopeek_mask(pred.shape[1]).to(device)

        # format pred sentence output
        idx_eos = get_EOS_indices(pred)
        pred_list = [to_list_npint64(pred[i,:idx_eos[i]].tolist()+[IDX_EOS]) for i in range(pred.shape[0])]

        # format tgt sentence output
        tgt_list = [to_list_npint64(pop_padding_ts(tgt[:,[i],:]).flatten().tolist()) for i in range(tgt.shape[1])]

        # aggregate results
        pred_list_lg += pred_list
        tgt_list_lg += tgt_list
        tag_list_lg += tag_list
        
        del pred, pred_mask, src, src_key_padding_mask, tgt, tgt_key_padding_mask, idx_eos

0 torch.Size([1, 188, 5]) torch.Size([1, 188, 9])
1 torch.Size([1, 118, 8]) torch.Size([1, 118, 11])
2 torch.Size([1, 68, 6]) torch.Size([1, 68, 7])
3 torch.Size([1, 127, 8]) torch.Size([1, 127, 10])
4 torch.Size([1, 113, 7]) torch.Size([1, 113, 11])
5 torch.Size([1, 145, 6]) torch.Size([1, 145, 9])


RuntimeError: CUDA out of memory. Tried to allocate 510.00 MiB (GPU 0; 11.91 GiB total capacity; 9.25 GiB already allocated; 413.06 MiB free; 10.99 GiB reserved in total by PyTorch)

False

In [25]:
from tsf_utils import get_tk_from_proba, format_list_for_bleu, bleu_score
pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
val_bleu = bleu_score(pred_list_bleu, tgt_list_bleu)
print('{} BLEU in-sample: {:.2%}'.format(
    'val', val_bleu))

val BLEU in-sample: 35.20%


In [26]:
print_nl_pred_vs_tgt(pred_list, tgt_list, eng_index2word)

TARGET:  SOS we are traveling on a tight budget . EOS
PREDICTION:  SOS we re totally ignorant . EOS


TARGET:  SOS we re all to blame for that . EOS
PREDICTION:  SOS we are all both on the same time . EOS


TARGET:  SOS i m at a friend s house . EOS
PREDICTION:  SOS i m a friend friend friend . EOS


TARGET:  SOS she s waiting for you at home . EOS
PREDICTION:  SOS she s waiting for you for at home . EOS


TARGET:  SOS she s waiting for you at home . EOS
PREDICTION:  SOS she s waiting for you for the help . EOS


TARGET:  SOS i m going to fix some dinner . EOS
PREDICTION:  SOS i m going to need some more dinner . EOS


TARGET:  SOS we re all to blame for that . EOS
PREDICTION:  SOS we are all both on the same time . EOS


TARGET:  SOS i m saving money for a car . EOS
PREDICTION:  SOS i am making a car car . EOS


TARGET:  SOS i m really disappointed in you tom . EOS
PREDICTION:  SOS i m really very disappointed as you . EOS


TARGET:  SOS i m going to change my shirt . EOS
PREDICTION: 

In [32]:
output

tensor([[[-4.4335, -4.2908, -0.6041,  ..., -4.7109, -4.4505, -4.3680],
         [-3.4102, -3.2483, -0.3126,  ..., -3.1505, -3.3007, -3.2806],
         [-4.0832, -4.1757, -2.3306,  ..., -3.9958, -3.9015, -4.2023],
         ...,
         [-5.3381, -5.2517, 12.2863,  ..., -5.3323, -4.7756, -5.2067],
         [-6.5541, -6.3681,  1.3144,  ..., -6.5573, -6.0976, -6.5049],
         [-5.3567, -5.3788,  3.3152,  ..., -5.4406, -5.3754, -5.5412]],

        [[-3.4995, -3.3090, -0.5436,  ..., -3.8077, -3.4574, -3.2997],
         [-3.1788, -2.9952,  2.0538,  ..., -3.1980, -3.1645, -3.2535],
         [-2.7192, -2.8293,  0.8992,  ..., -2.9711, -2.9444, -2.9860],
         ...,
         [-3.3632, -3.3528, 16.4275,  ..., -3.5590, -3.1003, -3.2996],
         [-3.5803, -3.5629, 15.0240,  ..., -3.7647, -3.3274, -3.5025],
         [-3.8001, -3.7839, 12.9715,  ..., -3.9726, -3.5613, -3.7066]],

        [[-7.9758, -8.1194,  0.3790,  ..., -7.9027, -7.8827, -7.6969],
         [-8.2481, -8.0431, -0.3178,  ..., -7

### OSS eval - one loader, batched

In [19]:
from transformer_translation.translate_sentence import gen_nopeek_mask
from transformer_translation.dataset import IDX_EOS, IDX_SOS, IDX_PAD
import numpy as np

In [20]:
from transformer_translation.translate_sentence import gen_nopeek_mask
from tsf_infer_utils import prep_transf_inputs, infer, oos_infer_batched, get_EOS_indices, IDX_SOS, IDX_EOS, to_list_npint64, pop_padding_ts
from tsf_utils import get_tk_from_proba

In [21]:
%%time
model.eval()
pred_list_lg, tgt_list_lg, tag_list_lg = oos_infer_batched(model, loader, max_seq_length)

CPU times: user 1h 37min, sys: 3min 40s, total: 1h 40min 40s
Wall time: 2min 18s


In [36]:
# print BLEU
print('{}BLEU: {:.2%}'.format(
    '',bleu_score(*format_list_for_bleu(pred_list_lg, tgt_list_lg))
))

# print in nat lg
print('\n')
print_nl_pred_vs_tgt(pred_list_lg, tgt_list_lg, reports_index2word, tag_list_lg, tags_index2word)

BLEU: 14.25%


TAGS:  rib_fracture rib pneumothorax
TARGET:  SOS  . moderate right sided pneumothorax measuring approximately . cm in the right apex . . minimally displaced right lateral th rib fracture probable nondisplaced right lateral th rib fracture . there is a moderate right sided pneumothorax measuring approximately . cm in the right apex . there is a minimally displaced right lateral th rib fracture and probable nondisplaced right lateral th rib fracture . cardiomediastinal silhouette is within normal limits . left lung is clear . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacity . . . left basilar atelectasis and pleural thickening bilaterally . . heart size is normal . mediastinal contour is unremarkable . no pneumothorax . no focal consolidation . no displaced rib fractures . EOS


TAGS:  opacity
TARGET:  SOS  mm ring shaped opacity with central lucency seen with certainty on pa view only projects over right midlung . uncertain if this 

TAGS:  thoracic_vertebrae opacity pleural_diseases pleural_plaque scarring ribs tortuous_aorta
TARGET:  SOS xxxx scarring or pleural plaque in the left upper lobe with partial resection of the posterior fourth rib . no acute findings . heart size is normal . tortuous aorta . irregular . cm opacity in the left upper lung is identified both on pa and lateral views and xxxx represents chronic scarring . she has a partial resection of the posterior fourth rib . no pneumothorax . no pleural effusion . no focal infiltrate . anterior wedging of multiple vertebral bodies including t t t and t . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS


TAGS:  opacity infection surgery sternotomy
TARGET:  SOS posterior airspace opacity consistent 

TAGS:  normal
TARGET:  SOS no acute cardiopulmonary abnormality . the lungs are clear and without focal air space opacity . the cardiomediastinal silhouette is normal in size and contour . there is no pneumothorax or large pleural effusion . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  pericardial_effusion granuloma nodule cardiomegaly hilar_lymphadenopathy
TARGET:  SOS marked increase in heart size . cardiomegaly . question pericardial effusion . stable . cm nodule left mid chest xxxx a granuloma . lungs are clear . no effusion . stable left hilar lymphadenopathy . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left hilar lymph xxxx . . . . left lower lobe calcified granuloma . heart size is normal . no pneumothorax pleural effusion or focal airspa

TAGS:  thoracic_vertebrae kyphosis deformity calcified_granuloma osteoporoses osteoporosis kyphoses
TARGET:  SOS no acute pulmonary disease . multiple thoracic xxxx deformities xxxx due to osteoporosis . lungs appear to be clear other than a calcified granuloma on left . heart is not enlarged . there are atherosclerotic changes of the aorta . there is increased kyphosis of the thoracic spine and there are multiple xxxx deformities . a stimulator is seen . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . heart size is normal . tortuous aorta . no pneumothorax pleural effusion or focal airspace consolidation . mild degenerative changes are present in the thoracic spine . EOS


TAGS:  cholecystectomies calcified_granuloma osteophytes
TARGET:  SOS no acute cardiopulmonary abnormality . there is a single calcified granuloma in the right lung base . the lungs are otherwise grossly clear bilaterally . there is no pneumothorax or pleural effusion . cardiac and mediastinal silhouette

TAGS:  right_upper_lobe_pneumonia opacity pneumonia
TARGET:  SOS right upper lobe pneumonia . consideration may be given for followup chest x xxxx following appropriate therapy . the patient is rotated to left . the cardiomediastinal silhouette is normal in size . xxxx lucency along the left ventricular xxxx xxxx related to interface between the heart and aerated lung . patchy right perihilar upper lobe opacities which abut the xxxx fissure on lateral projection . no pneumothorax or large pleural effusion . exaggerated thoracic kyphosis . no definite acute bone abnormality . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacities . the heart size is normal . no pneumothorax . no pleural effusions . no focal air space opacities . EOS


TAGS:  copd hyperinflation fibroses interstitial_lung_disease pulmonary_disease__chronic_obstructive arthritic_changes pulmonary_fibrosis
TARGET:  SOS xxxx of copd and interstitial lung disease . no definite pneumonia . t

TAGS:  normal
TARGET:  SOS there is no evidence of acute cardiopulmonary disease . . the cardiac silhouette and mediastinum size are within normal limits . there is no pulmonary edema . there is no focal consolidation . there are no xxxx of a large pleural effusion . there is no evidence of pneumothorax . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  hiatal_hernia catheters
TARGET:  SOS no acute disease . the heart is top normal in size . the mediastinum is stable . there is a small retrocardiac density which may be secondary to small hiatal hernia . left ij catheter tip at cavoatrial junction . no pneumothorax is seen . the lungs are clear . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . hiatal hernia . . . left basilar atelectasis . the lungs are c

TAGS:  normal
TARGET:  SOS no acute cardiopulmonary abnormality . normal heart size and mediastinal contours . no focal airspace consolidation . no pleural effusion or pneumothorax . visualized osseous structures are unremarkable . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  
TARGET:  SOS no acute cardiopulmonary abnormality . lungs are hyperexpanded bilaterally with no focal consolidation pleural effusion or pneumothoraces . cardiomediastinal silhouette is within normal limits . xxxx are unremarkable . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  nor

TAGS:  atelectases atelectasis
TARGET:  SOS low lung volume exam demonstrates small amount of right basilar atelectasis . there is no acute consolidation or pneumothorax . there are low lung volumes . the heart size and upper mediastinum have a normal appearance . there is no pulmonary vascular congestion . there is minimal right basilar atelectasis . there is no large effusion or pneumothorax . the osseous structures appear intact . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS


TAGS:  central_venous_catheters thoracolumbar_scoliosis central_venous_catheter catheter cardiomegaly catheterization__central_venous
TARGET:  SOS  . interval central catheter exchange . no acute cardiopulmonary abnormality . . stable cardiomegaly . . a

TAGS:  chronic_interstitial_lung_disease pulmonary_edema deformity clavicle lung_diseases__interstitial old_injury aortic_ectasia
TARGET:  SOS heart size within normal limits minimal aortic ectasia tortuosity . abnormal interstitial pattern nonspecific in appearance with xxxx differential diagnosis including chronic interstitial lung disease infectious inflammatory process atypical pulmonary edema . not highly characteristic appearance of contusion or aspiration . chronic appearing contour deformity of the distal right clavicle suggests old injury . no definite pleural effusion seen no pneumothorax . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . no acute pulmonary abnormality . . . heart size and mediastinal contours appear within normal limits . pulmonary vascularity is within normal limits . no focal consolidation suspicious pulmonary opacity pneumothorax or definite pleural effusion . visualized osseous structures appear intact . EOS


TAGS:  normal
TARGET:  SOS

TAGS:  scarring
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . there is xxxx biapical scarring . the lungs are otherwise clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bony abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute cardiopulmonary abnormalities . cardiomediastinal silhouettes are within normal limits . lungs are clear without focal consolidation pneumothorax or pleural effusion . bony thorax is unremarkable . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS heart size upper limits of normal with clea

TAGS:  normal
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silh

TAGS:  normal
TARGET:  SOS no acute abnormality . . heart size is normal . the lungs are clear . there are no focal air space consolidations . no pleural effusions or pneumothoraces . the hilar and mediastinal contours are normal . normal pulmonary vascularity . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute cardiopulmonary abnormality . . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . 

# WIP area

### investigate noTF eval on train set (IS data, OOS decoding)

In [22]:
# print BLEU
print('{}BLEU: {:.2%}'.format(
    '',bleu_score(*format_list_for_bleu(pred_list_lg, tgt_list_lg))
))

# print in nat lg
print('\n')
print_nl_pred_vs_tgt(pred_list_lg, tgt_list_lg, reports_index2word, tag_list_lg, tags_index2word)

BLEU: 14.25%


TAGS:  rib_fracture rib pneumothorax
TARGET:  SOS  . moderate right sided pneumothorax measuring approximately . cm in the right apex . . minimally displaced right lateral th rib fracture probable nondisplaced right lateral th rib fracture . there is a moderate right sided pneumothorax measuring approximately . cm in the right apex . there is a minimally displaced right lateral th rib fracture and probable nondisplaced right lateral th rib fracture . cardiomediastinal silhouette is within normal limits . left lung is clear . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacity . . . left basilar atelectasis and pleural thickening bilaterally . . heart size is normal . mediastinal contour is unremarkable . no pneumothorax . no focal consolidation . no displaced rib fractures . EOS


TAGS:  opacity
TARGET:  SOS  mm ring shaped opacity with central lucency seen with certainty on pa view only projects over right midlung . uncertain if this 


TAGS:  spondylosis
TARGET:  SOS stable appearance of chest without active process evident and without evidence of progression of disease in patient with history of hodgkin s lymphoma . if one would like to discuss this case further please xxxx . xxxx at xxxx . thanks . stable appearance of chest with no findings of disease progression . heart and mediastinum stable configuration . stable elevation of left hemidiaphragm . lungs clear of consolidation . no pneumothorax or pleural effusion . bony thorax intact . minimal spondylosis of the lower thoracic spine . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . heart size is normal . no pneumothorax pleural effusion or focal airspace disease . bony structures appear intact . EOS


TAGS:  pleural_effusion opacity pneumonitis arthritic_changes bilateral_pleural_effusion congestion
TARGET:  SOS  . focal opacity in the right midlung zone worrisome for pneumonitis . . mild pulmonary vascular congestion . there is a focal area of opac

TAGS:  atelectases opacity atelectasis congestion infiltrates
TARGET:  SOS  . interval improvement in consolidative left base opacity . multifocal scattered bibasilar patchy and xxxx pulmonary opacities again noted most consistent with atelectasis infiltrate . . stable enlarged cardiomediastinal silhouette . stable pulmonary vascular congestion . . bilateral patchy pulmonary opacities noted . interval improvement in left base consolidative opacity . pulmonary vascular congestion again noted . stable enlarged cardiomediastinal silhouette . stable left xxxx . no evidence of pneumothorax . no large pleural effusions . EOS
PREDICTION:  SOS  . left basilar atelectasis versus infiltrate . . . cardiomegaly . . left basilar atelectasis . heart size is normal . no pneumothorax . no pleural effusions . EOS


TAGS:  
TARGET:  SOS  . vague density in right xxxx xxxx related to scapular tip and superimposed ribs . consider oblique images to exclude true nodule . . no acute cardiopulmonary abnormali

TAGS:  calcified_granuloma
TARGET:  SOS no acute cardiopulmonary findings . heart size within normal limits . no focal airspace disease . stable mm lateral left midlung calcified granuloma . no pneumothorax or pleural effusion . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  
TARGET:  SOS no acute cardiopulmonary findings heart size within normal limits . no focal alveolar consolidation no definite pleural effusion seen . no typical findings of pulmonary edema . no pneumothorax . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS the heart 

TAGS:  degenerative_change
TARGET:  SOS no acute cardiopulmonary finding . specifically there is no evidence of active tuberculosis infection . the heart and mediastinum are normal in size and contour . there is no focal airspace opacity pleural effusion or pneumothorax . there are degenerative changes in the thoracic spine . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . heart size and mediastinal contours appear within normal limits . pulmonary vascularity is within normal limits . no focal consolidation suspicious pulmonary opacity pneumothorax or definite pleural effusion . visualized osseous structures appear intact . EOS


TAGS:  degenerative_change
TARGET:  SOS  . no acute intrathoracic abnormality . the cardiomediastinal silhouette is within normal limits for appearance . no focal areas of pulmonary consolidation . no pneumothorax . no pleural effusion . mild degenerative changes of the thoracic spine . . no acute displaced rib fractures . EOS
PREDICTION:  SOS no a

TAGS:  normal
TARGET:  SOS normal chest heart size normal . lungs are clear . xxxx are normal . no pneumonia effusions edema pneumothorax adenopathy nodules or masses . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  atelectases granuloma calcified_granuloma atelectasis infiltrates
TARGET:  SOS heart size is normal . left midlung small calcified granulomas unchanged . persistent partial middle lobe atelectasis and infiltrate seen xxxx on the lateral EOS
PREDICTION:  SOS  . left basilar atelectasis or infiltrate . . . left basilar atelectasis . . left basilar atelectasis . no pneumothorax . heart size is normal . no pleural effusion . EOS


TAGS:  opacity edemas interstitial_pulmonary_edema pulmonary_edema
TARGET:  SOS interstitial pulmonary edema . cardiomegaly . intersti

TAGS:  normal
TARGET:  SOS no acute process . cardiac and mediastinal contours are within normal limits . the lungs are clear . bony structures are intact . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute cardiopulmonary abnormality . mediastinal contours are normal . lungs are clear . there is no pneumothorax or large pleural effusion . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS comparison xxxx well expanded and clear lungs . mediastinal contour within normal limits . no acute cardiopulmonary abnormality

TAGS:  degenerative_change
TARGET:  SOS no evidence of active disease . the heart size and pulmonary vascularity appear within normal limits . the lungs are free of focal airspace disease . no pleural effusion or pneumothorax is seen . calcified lymph xxxx are present . degenerative changes are present in the spine . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . heart size and mediastinal contours appear within normal limits . pulmonary vascularity is within normal limits . no focal consolidation suspicious pulmonary opacity pneumothorax or definite pleural effusion . visualized osseous structures appear intact . EOS


TAGS:  atelectases scarring atelectasis pleural_thickening
TARGET:  SOS the heart size and cardiomediastinal silhouette are within normal limits . pulmonary vasculature appears normal . minimal blunting of the lateral sulci bilaterally xxxx reflects pleural thickening or scarring no dependent pleural fluid posteriorly . minimal right base subsegmental atele

TAGS:  opacity infection
TARGET:  SOS compared to xxxx there are xxxx extensive bilateral reticulonodular interstitial opacities concerning for atypical infection . result notification xxxx primordial . . normal heart size . diffuse bilateral reticulonodular interstitial opacities . there are no xxxx of a large pleural effusion . there is no evidence of pneumothorax . heart is not enlarged . xxxx are unremarkable . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . stable cardiomegaly without pulmonary edema . the lungs and pleural spaces show no acute abnormality . heart size is normal . EOS


TAGS:  granulomatous_disease
TARGET:  SOS no acute cardiopulmonary process . the cardiomediastinal silhouette is within normal limits for size and contour . the lungs are normally inflated without evidence of focal airspace disease pleural effusion or pneumothorax . multiple small calcified bilateral hilar nodules xxxx sequela of prior granulomatous disease . radiopaque contrast 

TAGS:  normal
TARGET:  SOS no acute cardiopulmonary findings . heart size within normal limits . no focal airspace disease . no pneumothorax or effusions . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is stable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomedi

TAGS:  deformity
TARGET:  SOS  . no acute cardiopulmonary process . . age indeterminant grade anterior wedge xxxx deformity of l . heart size and mediastinal contour are normal . pulmonary vascularity is normal . lungs are clear . no pleural effusions or pneumothoraces . there is a mild anterior wedge xxxx deformity of l age indeterminate . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS


TAGS:  normal
TARGET:  SOS  . chest . no active disease . . lumbar spine negative . chest . both lungs are clear and expanded with no pleural air collections or parenchymal consolidations . heart and mediastinum remain normal . lumbosacral spine . xxxx disc spaces and alignment are normal . sacrum and sacroiliac joints are normal . EOS
PREDICTI

In [23]:
zero_bleu = []
bleus = []
for i, _ in enumerate(pred_list_lg):
    bleu = bleu_score(*format_list_for_bleu(
            [pred_list_lg[i]]
            ,[tgt_list_lg[i]]
        ))
    print(
        i, ':'
        ,len(tgt_list_lg[i])
        ,len(pred_list_lg[i])
        ,'{}BLEU: {:.2%}'.format('',bleu)
        ,[tags_index2word[j.item()] for j in tag_list_lg[i]]
    )
    if bleu == 0:
        zero_bleu.append(i)
    bleus.append(bleu)
        
pos_bleu = list(set(range(len(tgt_list_lg))) - set(zero_bleu))

0 : 80 48 BLEU: 0.00% ['rib_fracture', 'rib', 'pneumothorax']
1 : 80 51 BLEU: 5.18% ['opacity']
2 : 80 35 BLEU: 4.48% ['normal']
3 : 80 43 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis']
4 : 80 32 BLEU: 0.00% ['opacity', 'infection', 'pneumonia']
5 : 80 39 BLEU: 0.00% ['central_venous_catheters', 'opacity', 'degenerative_change', 'central_venous_catheter', 'pneumothorax', 'diaphragm']
6 : 80 34 BLEU: 0.00% ['atelectases', 'opacity', 'cardiac_monitor', 'scarring', 'atelectasis', 'pulmonary_fibroses', 'pulmonary_fibrosis']
7 : 80 43 BLEU: 5.69% ['scoliosis', 'deformity']
8 : 80 42 BLEU: 0.00% ['scarring', 'pleura']
9 : 80 48 BLEU: 5.51% ['pleural_effusion', 'atelectases', 'pleural_fluid', 'cabg', 'scarring', 'scar', 'sternotomy', 'atelectasis', 'cardiomegaly']
10 : 80 37 BLEU: 4.92% ['copd', 'aorta', 'degenerative_change', 'pulmonary_disease__chronic_obstructive', 'osteoporoses', 'osteoporosis', 'pacemaker__artificial', 'aortic_valve', 'thoracic_aorta']
11 : 81 52 BLEU: 0.00% ['ate

172 : 14 35 BLEU: 0.00% ['normal']
173 : 14 17 BLEU: 39.57% ['edemas', 'edema', 'cardiomegaly']
174 : 14 38 BLEU: 0.00% ['granuloma']
175 : 14 32 BLEU: 0.00% ['emphysemas', 'pulmonary_emphysema', 'emphysema']
176 : 15 50 BLEU: 0.00% ['chronic_interstitial_lung_disease', 'lung_diseases__interstitial']
177 : 15 42 BLEU: 0.00% ['solitary_pulmonary_nodule', 'nodule']
178 : 15 35 BLEU: 0.00% ['normal']
179 : 15 30 BLEU: 0.00% ['diaphragm']
180 : 15 35 BLEU: 0.00% ['normal']
181 : 15 35 BLEU: 0.00% []
182 : 15 26 BLEU: 0.00% ['aorta', 'infiltrates']
183 : 15 26 BLEU: 0.00% ['granuloma', 'apical_granuloma']
184 : 15 26 BLEU: 0.00% ['mastectomies', 'mastectomy', 'surgery']
185 : 15 38 BLEU: 0.00% ['granuloma']
186 : 15 43 BLEU: 0.00% ['picc', 'catheter', 'catheterization__central_venous']
187 : 15 30 BLEU: 13.46% ['diaphragm']
188 : 16 35 BLEU: 0.00% ['normal']
189 : 16 32 BLEU: 0.00% ['left_ventricle', 'aorta']
190 : 16 38 BLEU: 0.00% ['granuloma']
191 : 16 34 BLEU: 0.00% ['infiltrates', 'car

341 : 55 38 BLEU: 0.00% ['effusion', 'gases', 'pleural_diseases', 'air', 'pleural_thickening']
342 : 55 41 BLEU: 14.28% ['aorta', 'cabg', 'degenerative_change', 'coronary_artery_bypass', 'sternotomy']
343 : 55 42 BLEU: 0.00% ['copd', 'pulmonary_disease']
344 : 55 26 BLEU: 0.00% ['asbestosis', 'pleural_calcification', 'asbestos_exposure', 'cardiomegaly']
345 : 55 43 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis']
346 : 55 42 BLEU: 16.34% ['central_venous_catheters', 'central_venous_catheter', 'catheterization__central_venous']
347 : 55 42 BLEU: 0.00% ['focal_atelectasis']
348 : 55 33 BLEU: 8.45% ['pleural_effusion', 'atelectases', 'opacity', 'pericardial_effusion', 'atelectasis', 'cardiomegaly', 'pleural_effusions']
349 : 55 29 BLEU: 9.85% ['granulomatous_disease', 'aorta', 'tortuous_aorta']
350 : 55 35 BLEU: 0.00% ['normal']
351 : 55 60 BLEU: 13.56% ['hiatal_hernia']
352 : 55 42 BLEU: 18.88% ['central_venous_catheters', 'central_venous_catheter', 'catheterization__central_venous'

496 : 63 43 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis']
497 : 63 39 BLEU: 14.33% ['eventration', 'atelectases', 'opacity', 'atelectasis', 'infiltrates', 'diaphragm']
498 : 63 24 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis', 'cardiomegaly']
499 : 63 43 BLEU: 13.23% ['cardiomegaly']
500 : 63 33 BLEU: 11.41% ['degenerative_change', 'deformity', 'shoulder']
501 : 63 42 BLEU: 13.94% ['degenerative_change']
502 : 63 24 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis', 'cardiomegaly']
503 : 63 38 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis', 'infiltrates']
504 : 63 53 BLEU: 0.00% ['emphysemas', 'pulmonary_emphysema', 'spondylosis', 'pneumonia', 'emphysema']
505 : 63 60 BLEU: 11.91% ['hiatal_hernia']
506 : 63 26 BLEU: 4.85% ['opacity', 'degenerative_change']
507 : 79 29 BLEU: 0.00% ['deformity', 'catheters', 'rib', 'subclavian_vein', 'old_injury', 'catheterization__central_venous']
508 : 79 46 BLEU: 9.50% ['scleroses', 'atelectases', 'opacity', 'cholelithiasis', '

649 : 36 29 BLEU: 0.00% ['eventration', 'hyperinflation', 'diaphragm']
650 : 36 35 BLEU: 0.00% ['normal']
651 : 36 35 BLEU: 15.24% ['normal']
652 : 36 35 BLEU: 15.24% ['normal']
653 : 36 35 BLEU: 20.66% []
654 : 36 35 BLEU: 0.00% ['normal']
655 : 36 35 BLEU: 0.00% []
656 : 36 35 BLEU: 15.24% ['normal']
657 : 36 32 BLEU: 19.83% ['emphysemas', 'pulmonary_emphysema', 'emphysema']
658 : 36 17 BLEU: 0.00% ['opacity', 'edemas', 'edema', 'pulmonary_edema', 'cardiomegaly']
659 : 36 43 BLEU: 26.73% ['deformity']
660 : 36 29 BLEU: 0.00% ['ectasia', 'aortic_calcifications']
661 : 36 35 BLEU: 23.64% ['normal']
662 : 36 35 BLEU: 37.31% ['normal']
663 : 36 29 BLEU: 0.00% ['aorta']
664 : 20 35 BLEU: 0.00% ['spinal_osteophytosis']
665 : 20 32 BLEU: 0.00% ['atelectases', 'effusion', 'atelectasis', 'infiltrates', 'cardiomegaly']
666 : 20 35 BLEU: 0.00% ['normal']
667 : 20 41 BLEU: 0.00% ['gallbladder', 'clip']
668 : 20 35 BLEU: 0.00% ['spinal_osteophytosis']
669 : 20 35 BLEU: 9.67% []
670 : 20 60 BLEU: 

828 : 32 43 BLEU: 0.00% ['cardiomegaly']
829 : 32 35 BLEU: 40.95% ['normal']
830 : 32 35 BLEU: 30.89% ['normal']
831 : 32 35 BLEU: 28.25% ['normal']
832 : 32 35 BLEU: 11.73% ['normal']
833 : 32 26 BLEU: 0.00% ['thoracic_vertebrae', 'catheters']
834 : 32 42 BLEU: 0.00% ['degenerative_change']
835 : 32 26 BLEU: 16.13% ['calcified_granuloma', 'cardiomegaly']
836 : 32 35 BLEU: 35.84% ['normal']
837 : 32 35 BLEU: 11.73% []
838 : 32 35 BLEU: 0.00% ['calcinosis']
839 : 62 26 BLEU: 0.00% ['cardiac_monitor', 'pericardial_effusion', 'vascular_calcification', 'cardiomegaly']
840 : 62 42 BLEU: 0.00% ['spondylosis', 'cardiomegaly']
841 : 62 27 BLEU: 0.00% ['thoracic_vertebrae', 'spinal_fractures', 'lumbar_vertebrae', 'old_injury', 'fusion', 'fractures__bone', 'fracture']
842 : 62 36 BLEU: 0.00% ['atelectases', 'pulmonary_emphysema', 'atelectasis', 'hyperexpansion']
843 : 62 40 BLEU: 0.00% ['atelectases', 'opacity', 'scarring', 'calcified_granuloma', 'nodule', 'atelectasis']
844 : 62 34 BLEU: 0.00% 

936 : 95 39 BLEU: 4.79% ['degenerative_change', 'emphysemas', 'pulmonary_emphysema', 'pneumonia', 'emphysema']
937 : 95 42 BLEU: 0.00% ['opacity', 'fibroses', 'calcified_granuloma', 'nodule']
938 : 95 38 BLEU: 2.59% ['opacity', 'calcified_granuloma']
939 : 95 29 BLEU: 1.97% ['aorta__thoracic', 'rib_fractures', 'rib', 'aortic_ectasia', 'kyphoses', 'fracture']
940 : 96 36 BLEU: 0.00% ['emphysemas', 'pulmonary_emphysema', 'diaphragm', 'emphysema']
941 : 96 47 BLEU: 12.69% ['atelectases', 'opacity', 'surgical_resection', 'scolioses', 'scarring', 'atelectasis', 'cardiomegaly', 'diaphragm']
942 : 96 34 BLEU: 0.00% ['hilar_calcification', 'opacity', 'hyperinflation', 'cabg', 'demineralization', 'scarring', 'fracture']
943 : 96 58 BLEU: 0.00% ['atelectases', 'opacity', 'degenerative_change', 'scarring', 'nodule', 'atelectasis']
944 : 96 43 BLEU: 0.00% ['pleural_effusion', 'atelectases', 'atelectasis', 'picc', 'pleural_effusions']
945 : 96 47 BLEU: 3.70% ['pleural_diseases', 'ribs', 'pleural_th

1089 : 33 35 BLEU: 29.06% ['normal']
1090 : 33 35 BLEU: 20.97% ['normal']
1091 : 33 35 BLEU: 0.00% []
1092 : 33 35 BLEU: 0.00% ['normal']
1093 : 33 35 BLEU: 28.57% ['normal']
1094 : 33 35 BLEU: 0.00% ['normal']
1095 : 33 17 BLEU: 10.06% ['pleural_effusion', 'cardiomegaly', 'pleural_effusions']
1096 : 33 35 BLEU: 28.94% []
1097 : 33 35 BLEU: 27.04% ['normal']
1098 : 33 35 BLEU: 27.04% ['normal']
1099 : 33 28 BLEU: 0.00% ['copd', 'bulla']
1100 : 33 35 BLEU: 16.27% ['normal']
1101 : 33 35 BLEU: 0.00% ['normal']
1102 : 84 44 BLEU: 0.00% ['atelectases', 'opacity', 'degenerative_change', 'atelectasis', 'picc', 'diaphragm']
1103 : 84 40 BLEU: 0.00% ['pleural_effusion', 'degenerative_disc_diseases', 'rib_fracture', 'pleural_effusions']
1104 : 84 43 BLEU: 4.64% ['copd', 'atelectases', 'opacity', 'pulmonary_disease__chronic_obstructive', 'scarring', 'calcified_granuloma', 'atelectasis']
1105 : 84 38 BLEU: 0.00% ['ribs', 'lobectomy', 'shoulder', 'cardiomegaly', 'diaphragm']
1106 : 84 44 BLEU: 6.8

1241 : 45 42 BLEU: 0.00% ['nodule']
1242 : 45 38 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis', 'infiltrates']
1243 : 45 33 BLEU: 16.74% ['right_lower_lobe_pneumonia', 'pneumonia']
1244 : 45 35 BLEU: 0.00% ['normal']
1245 : 45 42 BLEU: 0.00% ['nodule', 'lung_cancer', 'diaphragm']
1246 : 45 51 BLEU: 11.27% ['opacity']
1247 : 45 36 BLEU: 0.00% ['pleural_effusion', 'atelectases', 'atelectasis', 'pneumonia', 'pleural_effusions']
1248 : 45 51 BLEU: 0.00% ['atelectases', 'opacity', 'scarring', 'atelectasis']
1249 : 45 26 BLEU: 0.00% ['pleural_fluid', 'infiltrates']
1250 : 45 42 BLEU: 21.32% ['degenerative_change', 'granulomatous_infection']
1251 : 45 35 BLEU: 17.20% ['normal']
1252 : 45 42 BLEU: 21.32% ['degenerative_change', 'granulomatous_infection']
1253 : 45 51 BLEU: 9.34% ['opacity']
1254 : 45 35 BLEU: 0.00% ['normal']
1255 : 45 41 BLEU: 9.40% ['opacity', 'pneumonia']
1256 : 45 35 BLEU: 0.00% []
1257 : 45 43 BLEU: 24.13% ['atelectases', 'opacity', 'atelectasis']
1258 : 45 42 BLEU

1396 : 76 49 BLEU: 0.00% ['left_lower_lobe_pneumonia', 'pleural_effusion', 'opacity', 'deformity', 'bilateral_pleural_effusion', 'pneumonia', 'fracture']
1397 : 76 42 BLEU: 6.94% ['aorta__thoracic', 'lymphadenopathy']
1398 : 76 39 BLEU: 0.00% ['atelectases', 'cervical_spine_fusion', 'atelectasis']
1399 : 41 35 BLEU: 26.34% ['normal']
1400 : 41 42 BLEU: 10.23% ['degenerative_change']
1401 : 41 35 BLEU: 26.34% ['normal']
1402 : 41 29 BLEU: 0.00% ['deformity', 'cardiomegaly', 'aortic_diseases', 'aortic_ectasia', 'fracture']
1403 : 41 28 BLEU: 0.00% ['copd', 'eventration', 'pulmonary_disease__chronic_obstructive', 'diaphragm']
1404 : 41 38 BLEU: 0.00% ['atelectases', 'pleural_disease', 'lung_volume_reduction', 'atelectasis']
1405 : 41 35 BLEU: 19.49% ['normal']
1406 : 41 35 BLEU: 34.87% ['normal']
1407 : 41 35 BLEU: 26.34% ['normal']
1408 : 41 35 BLEU: 19.49% ['normal']
1409 : 41 35 BLEU: 26.34% ['normal']
1410 : 41 26 BLEU: 0.00% ['central_venous_catheters', 'cardiomegaly']
1411 : 41 35 B

1571 : 88 39 BLEU: 3.85% ['thoracic_vertebrae', 'atelectases', 'vertebroplasty', 'hypertrophy', 'degeneration', 'atelectasis', 'adenopathy']
1572 : 88 32 BLEU: 0.00% ['opacity', 'infection', 'pneumonia']
1573 : 88 51 BLEU: 7.11% ['hiatal_hernia', 'degenerative_change', 'descending_aorta']
1574 : 88 42 BLEU: 0.00% ['atelectases', 'opacity', 'dish', 'degenerative_change', 'atelectasis', 'cardiomegaly']
1575 : 88 25 BLEU: 1.80% ['opacity', 'degenerative_change', 'hypertension__pulmonary', 'hypertension', 'cardiomegaly', 'pulmonary_artery_hypertension']
1576 : 88 50 BLEU: 0.00% ['opacity', 'hyperinflation', 'scarring']
1577 : 88 35 BLEU: 0.00% ['degenerative_joint_disease', 'osteophytes', 'osteoarthritis']
1578 : 88 43 BLEU: 4.15% ['pleural_effusion', 'right_sided_pleural_effusion', 'picc_line', 'pneumonia', 'acute_pneumonia']
1579 : 88 42 BLEU: 7.08% ['opacity', 'lymphadenopathy', 'granuloma', 'lymph_node', 'calcified_granuloma']
1580 : 88 35 BLEU: 0.00% ['rib_fractures', 'fracture']
1581

1705 : 75 34 BLEU: 0.00% ['atelectases', 'atelectasis', 'pacemaker__artificial']
1706 : 75 37 BLEU: 5.84% ['pleural_effusion', 'atelectases', 'effusion', 'atelectasis', 'pneumonia', 'pleural_effusions']
1707 : 75 35 BLEU: 6.08% ['calcified_granuloma']
1708 : 75 53 BLEU: 4.56% ['opacity', 'hyperexpansion']
1709 : 75 42 BLEU: 5.77% ['degenerative_change', 'granuloma', 'calcified_granuloma', 'nodule']
1710 : 75 35 BLEU: 6.31% ['calcified_granuloma']
1711 : 75 51 BLEU: 7.80% ['lung_diseases', 'chronic_edema', 'fibroses', 'pulmonary_edema', 'congestion', 'cardiomegaly']
1712 : 75 59 BLEU: 13.29% ['pleural_effusion', 'opacity', 'degenerative_change', 'bilateral_pleural_effusion', 'congestion', 'enlarged_heart']
1713 : 75 38 BLEU: 0.00% ['right_upper_lobe_pneumonia', 'opacity', 'degenerative_change', 'pneumonia', 'osteophytes']
1714 : 75 51 BLEU: 12.14% ['atelectases', 'opacity', 'scarring', 'atelectasis']
1715 : 75 33 BLEU: 7.29% ['degenerative_change', 'diaphragm']
1716 : 75 22 BLEU: 2.48% 

1878 : 49 35 BLEU: 21.38% ['normal']
1879 : 49 29 BLEU: 0.00% ['tortuous_aorta']
1880 : 49 35 BLEU: 13.25% ['normal']
1881 : 77 37 BLEU: 8.03% ['atelectases', 'atelectasis', 'infiltrates', 'diaphragm']
1882 : 77 34 BLEU: 2.99% ['atelectases', 'degenerative_change', 'scarring', 'atelectasis']
1883 : 77 43 BLEU: 0.00% ['cardiomegaly']
1884 : 77 35 BLEU: 7.22% ['normal']
1885 : 77 58 BLEU: 6.44% ['opacity', 'cystic_fibrosis']
1886 : 77 37 BLEU: 0.00% ['effusion', 'volume_overload', 'cardiomegaly', 'renal_dialysis']
1887 : 77 59 BLEU: 14.56% ['chronic_interstitial_lung_disease', 'atheroscleroses', 'opacity', 'chronic_lung_disease', 'lung_diseases__interstitial']
1888 : 77 26 BLEU: 2.40% ['chronic_disease', 'atheroscleroses', 'atelectases', 'opacity', 'degenerative_disc_diseases', 'chronic_lung_disease', 'atelectasis', 'thoracic_aorta', 'hyperexpansion']
1889 : 77 34 BLEU: 8.42% ['calcinosis', 'vascular_diseases', 'sternotomy', 'mitral_annular_calcification', 'cardiomegaly', 'thoracic_aorta

2041 : 50 43 BLEU: 9.71% ['cervical_vertebrae', 'spinal_fusion', 'scoliosis', 'cervical_fusion', 'emphysemas', 'pulmonary_emphysema', 'thoracic_spondylosis', 'emphysema']
2042 : 50 52 BLEU: 16.61% ['atelectases', 'scarring', 'calcified_granuloma', 'atelectasis']
2043 : 50 37 BLEU: 17.17% ['atelectases', 'atelectasis', 'infiltrates']
2044 : 50 44 BLEU: 19.04% ['atelectases', 'opacity', 'lymph_nodes', 'atelectasis']
2045 : 50 35 BLEU: 22.81% ['normal']
2046 : 50 24 BLEU: 0.00% ['atelectases', 'opacity', 'atelectasis', 'cardiomegaly']
2047 : 50 35 BLEU: 23.34% ['normal']
2048 : 50 43 BLEU: 18.62% ['atelectases', 'opacity', 'atelectasis']
2049 : 50 35 BLEU: 24.51% ['normal']
2050 : 50 36 BLEU: 9.01% ['pacemaker__artificial', 'cardiomegaly']
2051 : 50 30 BLEU: 9.71% ['diaphragm']
2052 : 50 32 BLEU: 23.30% ['emphysemas', 'pulmonary_emphysema', 'emphysema']
2053 : 50 32 BLEU: 0.00% ['bullae', 'bulla']
2054 : 50 29 BLEU: 0.00% ['kyphosis', 'cervical_spine_surgeries', 'deformity', 'kyphoses']
2

2208 : 40 35 BLEU: 10.59% ['normal']
2209 : 40 35 BLEU: 0.00% ['calcified_granuloma']
2210 : 40 42 BLEU: 0.00% ['metastatic_disease']
2211 : 40 35 BLEU: 26.82% []
2212 : 40 35 BLEU: 39.70% ['normal']
2213 : 40 49 BLEU: 11.72% ['air_trapping']
2214 : 40 35 BLEU: 26.82% ['normal']
2215 : 40 35 BLEU: 39.70% ['normal']
2216 : 40 35 BLEU: 0.00% ['normal']
2217 : 40 35 BLEU: 10.59% ['normal']
2218 : 40 35 BLEU: 26.82% ['normal']
2219 : 40 35 BLEU: 35.88% ['normal']
2220 : 40 28 BLEU: 27.94% ['calcinosis', 'calcifications_of_the_aorta']
2221 : 40 35 BLEU: 10.59% ['normal']
2222 : 40 34 BLEU: 33.38% ['granuloma']
2223 : 40 55 BLEU: 8.00% ['pulmonary_atelectasis', 'focal_atelectasis', 'thoracic_aorta']
2224 : 40 35 BLEU: 35.88% ['normal']
2225 : 40 35 BLEU: 10.59% ['normal']
2226 : 40 35 BLEU: 10.59% ['normal']
2227 : 40 35 BLEU: 0.00% ['normal']
2228 : 40 35 BLEU: 10.59% ['normal']
2229 : 40 42 BLEU: 17.92% ['degenerative_change']
2230 : 40 35 BLEU: 0.00% []
2231 : 40 41 BLEU: 0.00% ['in_dwell

2383 : 30 35 BLEU: 10.27% ['normal']
2384 : 30 35 BLEU: 14.83% ['normal']
2385 : 30 35 BLEU: 0.00% ['normal']
2386 : 30 35 BLEU: 11.29% ['normal']
2387 : 30 38 BLEU: 0.00% ['granuloma', 'calcified_granuloma']
2388 : 30 35 BLEU: 20.97% ['normal']
2389 : 30 35 BLEU: 14.83% ['normal']
2390 : 30 35 BLEU: 32.51% []
2391 : 30 36 BLEU: 13.40% ['osteophytes']
2392 : 30 35 BLEU: 32.51% ['normal']
2393 : 30 35 BLEU: 27.23% ['normal']
2394 : 30 35 BLEU: 0.00% []
2395 : 30 35 BLEU: 0.00% ['normal']
2396 : 30 50 BLEU: 0.00% ['hypoventilation']
2397 : 30 35 BLEU: 11.29% ['normal']
2398 : 30 35 BLEU: 0.00% ['normal']
2399 : 30 35 BLEU: 11.29% ['normal']
2400 : 30 35 BLEU: 22.38% ['normal']
2401 : 30 35 BLEU: 10.27% ['normal']
2402 : 30 35 BLEU: 0.00% ['normal']
2403 : 30 35 BLEU: 0.00% ['normal']
2404 : 30 35 BLEU: 11.61% ['normal']
2405 : 30 35 BLEU: 0.00% ['calcified_granuloma']
2406 : 30 33 BLEU: 14.61% ['degenerative_change', 'tortuous_aorta']
2407 : 30 35 BLEU: 0.00% ['scarring']
2408 : 30 35 BL

2581 : 23 35 BLEU: 0.00% ['normal']
2582 : 23 35 BLEU: 0.00% ['normal']
2583 : 23 35 BLEU: 0.00% ['normal']
2584 : 23 35 BLEU: 25.41% ['normal']
2585 : 23 35 BLEU: 18.40% ['normal']
2586 : 23 35 BLEU: 0.00% ['normal']
2587 : 23 42 BLEU: 0.00% ['atelectases', 'atelectasis']
2588 : 23 35 BLEU: 0.00% ['normal']
2589 : 23 35 BLEU: 0.00% ['normal']
2590 : 23 35 BLEU: 0.00% []
2591 : 23 35 BLEU: 0.00% ['normal']
2592 : 23 35 BLEU: 0.00% ['normal']
2593 : 23 35 BLEU: 0.00% ['normal']
2594 : 23 14 BLEU: 0.00% ['pleural_effusion', 'bilateral_pleural_effusion', 'congestion', 'cardiomegaly']
2595 : 23 35 BLEU: 0.00% ['normal']
2596 : 23 35 BLEU: 0.00% ['normal']
2597 : 23 35 BLEU: 0.00% ['normal']
2598 : 23 35 BLEU: 24.45% ['normal']
2599 : 23 35 BLEU: 0.00% []
2600 : 23 35 BLEU: 0.00% ['normal']
2601 : 23 35 BLEU: 0.00% ['normal']
2602 : 23 35 BLEU: 0.00% ['normal']
2603 : 23 35 BLEU: 0.00% ['normal']
2604 : 23 35 BLEU: 0.00% ['normal']
2605 : 23 35 BLEU: 0.00% ['normal']
2606 : 23 35 BLEU: 0.00

2730 : 70 34 BLEU: 0.00% ['eventration', 'atelectases', 'opacity', 'atelectasis', 'congestion', 'cardiomegaly', 'diaphragm']
2731 : 70 34 BLEU: 9.71% ['tipss', 'degenerative_change', 'aortic_calcifications']
2732 : 47 36 BLEU: 25.98% ['granulomatous_disease', 'degenerative_change', 'cardiomegaly']
2733 : 47 35 BLEU: 9.56% ['normal']
2734 : 47 47 BLEU: 10.66% ['opacity', 'pneumonitis']
2735 : 47 35 BLEU: 10.53% []
2736 : 47 49 BLEU: 0.00% ['pleural_effusion', 'atelectases', 'opacity', 'bilateral_pleural_effusion', 'atelectasis', 'pneumonia']
2737 : 47 41 BLEU: 13.48% ['pleural_effusion', 'left_sided_pleural_effusion', 'atelectases', 'atelectasis', 'thoracentesis']
2738 : 47 35 BLEU: 16.86% ['normal']
2739 : 47 35 BLEU: 0.00% ['normal']
2740 : 47 29 BLEU: 0.00% ['eventration', 'hyperinflation', 'aortic_ectasia', 'diaphragm']
2741 : 47 57 BLEU: 0.00% ['atelectases', 'scarring', 'atelectasis', 'aortic_calcifications']
2742 : 47 38 BLEU: 55.39% ['granulomatous_disease', 'lymph_nodes']
2743 

2913 : 39 33 BLEU: 0.00% ['thoracic_vertebrae', 'scoliosis', 'aorta', 'scolioses', 'lumbar_vertebrae']
2914 : 39 35 BLEU: 31.13% []
2915 : 39 35 BLEU: 0.00% []
2916 : 39 35 BLEU: 30.87% ['normal']
2917 : 39 35 BLEU: 18.96% ['rib_fracture', 'rib_fractures', 'arthroplasties']
2918 : 39 26 BLEU: 10.21% ['right_upper_lobe_pneumonia', 'pneumonia']
2919 : 39 26 BLEU: 0.00% ['granuloma', 'fracture']
2920 : 39 43 BLEU: 16.08% ['cardiomegaly']
2921 : 39 43 BLEU: 16.69% ['deformity']
2922 : 39 35 BLEU: 28.69% ['normal']
2923 : 39 35 BLEU: 28.69% ['normal']
2924 : 39 35 BLEU: 10.76% ['normal']
2925 : 39 36 BLEU: 36.05% ['cholecystectomies']
2926 : 39 38 BLEU: 0.00% ['cardiac_monitor']
2927 : 39 35 BLEU: 17.87% []
2928 : 39 37 BLEU: 12.34% ['opacity', 'edemas', 'edema', 'pulmonary_edema', 'congestion']
2929 : 39 34 BLEU: 29.41% ['aorta__thoracic', 'aorta', 'degenerative_change', 'tortuous_aorta']
2930 : 39 29 BLEU: 35.95% ['hyperinflation']
2931 : 39 42 BLEU: 16.35% ['degenerative_change']
2932 : 

In [32]:
len(zero_bleu), len(pos_bleu), '{:.2%}'.format(np.mean(bleus)), '{:.2%}'.format(np.mean([bleus[i] for i in pos_bleu]))

(1181, 1819, '11.12%', '18.34%')

In [34]:
a = [tag_list_lg[i].tolist() for i in zero_bleu]
a = [[str(i) for i in ll] for ll in a]
a = [' '.join(ll) for ll in a]
len(set(a)), len(zero_bleu)

(589, 1181)

In [25]:
print_nl_pred_vs_tgt(
    [pred_list_lg[i] for i in zero_bleu]
    ,[tgt_list_lg[i] for i in zero_bleu]
    ,reports_index2word
    ,[tag_list_lg[i] for i in zero_bleu]
    ,tags_index2word
)

TAGS:  rib_fracture rib pneumothorax
TARGET:  SOS  . moderate right sided pneumothorax measuring approximately . cm in the right apex . . minimally displaced right lateral th rib fracture probable nondisplaced right lateral th rib fracture . there is a moderate right sided pneumothorax measuring approximately . cm in the right apex . there is a minimally displaced right lateral th rib fracture and probable nondisplaced right lateral th rib fracture . cardiomediastinal silhouette is within normal limits . left lung is clear . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacity . . . left basilar atelectasis and pleural thickening bilaterally . . heart size is normal . mediastinal contour is unremarkable . no pneumothorax . no focal consolidation . no displaced rib fractures . EOS


TAGS:  atelectases opacity atelectasis
TARGET:  SOS  . low lung volumes . . opacity in the lingula is favored to represent prominent pericardial fat but lingular atelectasi

TAGS:  normal
TARGET:  SOS no active disease . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no active disease . . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no evidence of acute cardiopulmonary disease EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  cardiomegaly
TARGET:  SOS slight cardiomegaly . 

TAGS:  atelectases atelectasis pneumothorax
TARGET:  SOS stable cm right apical pneumothorax . minimal atelectasis left base . heart size normal EOS
PREDICTION:  SOS  . left basilar atelectasis . . left basilar atelectasis . . left basilar atelectasis . . left basilar atelectasis . . left basilar atelectasis . no pneumothorax . heart size is normal . no pleural effusions . EOS


TAGS:  normal
TARGET:  SOS no pneumothorax or effusion . no pneumonia . heart size normal . lungs clear EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  normal
TARGET:  SOS no comparisons . the heart size is normal and the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal li

TAGS:  atelectases opacity scarring scar atelectasis
TARGET:  SOS xxxx and vague opacity at the right lower lobe of uncertain significance . could represent subsegmental atelectasis or scarring . borderline heart size related to recent pregnancy . consider followup chest radiographs to show clearing . left lung clear . questioned some vague right lower lobe opacity on the frontal film . xxxx subsegmental atelectasis or scar seen on lateral view . cardiac silhouette borderline in size within lv contour . xxxx sulci . xxxx unremarkable . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacities . the heart size and mediastinal silhouette are within normal limits for appearance . the pulmonary vascularity is within normal limits . no focal consolidation pneumothorax or pleural effusion . the visualized osseous structures appear intact . EOS


TAGS:  atelectases scarring emphysemas pulmonary_emphysema atelectasis emphysema
TARGET:  SOS chronic changes of emp

TAGS:  atelectases opacity scarring calcified_granuloma nodule atelectasis
TARGET:  SOS mild xxxx xxxx opacities with scarring no acute disease . the heart is normal in size . the mediastinum is unremarkable . there are xxxx opacities in both lung bases compatible with scarring or atelectasis . calcified granuloma in the left upper lung is noted as well as right retrocardiac calcified nodule . no significant pleural effusion is seen . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . stable left basilar opacities . the heart size is normal . the mediastinal silhouette is unremarkable . no focal consolidation pleural effusion or pneumothorax . no acute bony abnormality . EOS


TAGS:  thoracic_vertebrae kyphosis deformity calcified_granuloma osteoporoses osteoporosis kyphoses
TARGET:  SOS no acute pulmonary disease . multiple thoracic xxxx deformities xxxx due to osteoporosis . lungs appear to be clear other than a calcified granuloma on left . heart is not enlarged . ther

TAGS:  atelectases opacity atelectasis
TARGET:  SOS low lung volumes with bibasilar streaky opacities most xxxx representing subsegmental atelectasis . there are low lung volumes with bibasilar opacities xxxx representing subsegmental atelectasis . the cardio the cardiac silhouette is of the xxxx of normal . there is no pneumothorax or pleural effusion . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . left basilar opacities . the heart size is normal . no pneumothorax . no large pleural effusion . the pulmonary vasculature is within normal limits . no focal air space opacity . EOS


TAGS:  normal
TARGET:  SOS  . chest . no active disease . . right foot . negative . two view chest . both lungs are clear and expanded . heart and mediastinum normal . right foot . hindfoot midfoot forefoot xxxx are intact with no fractures or bone destruction . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary

TAGS:  normal
TARGET:  SOS normal chest heart size normal . lungs are clear . xxxx are normal . no pneumonia effusions edema pneumothorax adenopathy nodules or masses . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  cardiomegaly
TARGET:  SOS cardiomegaly . no pulmonary edema . no parenchymal infiltrates . no xxxx of pleural effusions . no xxxx of acute or pulmonary disease . EOS
PREDICTION:  SOS cardiomegaly with no acute cardiopulmonary abnormality . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS


TAGS:  granuloma calcified_granuloma
TARGET:  SOS no active disease . a calcif

TAGS:  cardiomegaly
TARGET:  SOS no evidence of active tuberculosis . mild cardiomegaly . lungs are clear . calcified hilar xxxx . no pleural effusion or pneumothorax . soft tissues and showed unremarkable . EOS
PREDICTION:  SOS cardiomegaly with no acute cardiopulmonary abnormality . the lungs are clear bilaterally . specifically no evidence of focal consolidation pneumothorax or pleural effusion . cardio mediastinal silhouette is unremarkable . visualized osseous structures of the thorax are without acute abnormality . EOS


TAGS:  
TARGET:  SOS clear lungs . lungs are clear . no pleural effusions or pneumothoraces . heart and mediastinum of normal size and contour . degenerative changes in the thoracic spine . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  aorta enlar

TAGS:  deformity sternotomy
TARGET:  SOS  . no definite evidence of metastatic disease . . age indeterminate xxxx deformity of t xxxx since study dated xxxx xxxx . . sternotomy xxxx mediastinal clips noted . heart size within normal limits . negative for focal pulmonary consolidation pleural effusion or pneumothorax . there is xxxx deformity of t xxxx since x xxxx thoracic spine xxxx xxxx . mild xxxx deformity of t stable . prior cholecystectomy . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . . . no focal consolidation pneumothorax or pleural effusion . heart size is normal . stable cardiomediastinal silhouette . xxxx intact . EOS


TAGS:  surgical_clip shoulder torso
TARGET:  SOS  . no active disease . . there are numerous small surgical clips seen overlying the upper thorax bilaterally and the lower cervical region of uncertain significance . the lungs are clear . there are multiple surgical xxxx seen near the apical regions and lower cervical region bilaterally . the h

TAGS:  osteophytes
TARGET:  SOS no radiographic evidence of tuberculosis or sarcoidosis . the heart size is normal . cardiomediastinal silhouette is normal in contour . the lungs are clear bilaterally . lateral views obscured by patient body habitus . there is no evidence of apical disease . xxxx are unchanged from previous exam and appear normal . thoracic spine shows osteophyte formations at several levels . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or large pleural effusion . no acute bone abnormality . EOS


TAGS:  lung_surgeries
TARGET:  SOS  . no acute cardiopulmonary disease . no suspicious pulmonary nodules or masses . no evidence of disease recurrence . the lungs appear clear . the heart and pulmonary xxxx are normal . the pleural spaces are clear . surgical clips and suture material are noted in the right hilar region

TAGS:  normal
TARGET:  SOS rib films . no fractures or dislocations . chest . heart size normal . lungs are clear . no effusion or pneumothorax . minimal degenerative disease thoracic spine EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  opacity
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . xxxx xxxx opacity in left midlung . the lungs are clear . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . . no acute cardiopulmonary abnormality . . . stable left basilar opacities . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumothorax or pleural effusion . no acute bone abnormality . EOS


TAGS:  surgery granuloma
TARGET:  SOS no 

TAGS:  eventration renal_osteodystrophies cardiomegaly
TARGET:  SOS  . cardiomegaly and mild vascular prominence . . no evidence of acute disease . cardiomegaly is present . the upper lobe pulmonary vascularity appears mildly prominent consistent with pulmonary venous hypertension . the lungs are free of focal airspace disease . no pleural effusion or pneumothorax is seen . there is eventration of the right hemidiaphragm . bony changes of renal osteodystrophy are noted . EOS
PREDICTION:  SOS  . no acute cardiopulmonary abnormality . . mild cardiomegaly without pulmonary edema . the lungs and pleural spaces show no acute abnormality . heart size is normal . EOS


TAGS:  normal
TARGET:  SOS no acute disease . the heart is normal in size . the mediastinum is unremarkable . the lungs are clear . EOS
PREDICTION:  SOS no acute cardiopulmonary abnormality . the cardiomediastinal silhouette is within normal limits . pulmonary vasculature is within normal limits . no focal consolidation pneumot

### OSS eval - one loader, batched - WIP add loss

In [25]:
def padd_to_same_shape(output, tgt, device):
    if tgt.shape[2] > output.shape[1]:
        output_pad = torch.zeros(tgt.shape[1], tgt.shape[2] - output.shape[1] - 1,10004).to(device)
        # do not deal with tokens non-PAD after EOS as they should be PAD anyways
        output_pad[:,:,IDX_PAD] = 1
        output_padded = torch.cat(
            [output,
             output_pad
            ], dim=1).to(device)
        tgt_padded = tgt[0][:, 1:].to(device)
    elif tgt.shape[2] < output.shape[1]:
        tgt_pad = torch.zeros(tgt.shape[1], output.shape[1] - tgt.shape[2] + 1, dtype=torch.long)
        tgt_padded = torch.cat(
            [tgt[0][:, 1:]
             ,tgt_pad
            ], dim=1).to(device)
        output_padded = output
    else:
        output_padded = output
        tgt_padded = tgt[0][:, 1:].to(device)
    return output_padded, tgt_padded

In [43]:
%%time
import torch.nn as nn
model.eval()
pred_list_lg, tgt_list_lg, tag_list_lg, loss_per_batch = [], [], [], []
criterion = nn.CrossEntropyLoss(ignore_index=0)

for (src, src_key_padding_mask, tgt, tgt_key_padding_mask) in iter(val_loader):

        src = src.to(device)

        pred = IDX_SOS * torch.ones((tgt.shape[1], 1), dtype=torch.long, device=device)
        pred_mask = gen_nopeek_mask(pred.shape[1]).to(device)

        while  not (pred == IDX_EOS).any(1).all() and pred.shape[1] < max_seq_length + 1:
            #print('tgt: ', tgt.shape, ', pred: ', pred.shape, ', pred mask: ', pred_mask.shape)
            output = model(src, pred, src_key_padding_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None, tgt_mask=pred_mask)#[[-1],:])
            pred = torch.cat([pred, get_tk_from_proba(output)[:,[-1]]], dim=1)
            pred_mask = gen_nopeek_mask(pred.shape[1]).to(device)
            #print(not (pred == IDX_EOS).any(1).all(), pred.shape[1] < max_seq_length + 1, '\n')

        output_padded, tgt_padded = padd_to_same_shape(output, tgt, device)
        loss_per_batch.append(
            criterion(rearrange(output_padded, 'b t v -> (b t) v'), rearrange(tgt_padded, 'b o -> (b o)')).item())
            
        # format pred sentence output
        idx_eos = get_EOS_indices(pred)
        pred_list = [to_list_npint64(pred[i,:idx_eos[i]].tolist()+[IDX_EOS]) for i in range(pred.shape[0])]

        # format tgt sentence output
        tgt_list = [to_list_npint64(pop_padding_ts(tgt[:,[i],:]).flatten().tolist()) for i in range(tgt.shape[1])]

        # format tags
        tag_idx = src.nonzero()
        tag_list = [tag_idx[tag_idx[:,1] == i, 2] for i in range(src.shape[1])]

        # aggregate results
        pred_list_lg += pred_list
        tgt_list_lg += tgt_list
        tag_list_lg += tag_list

CPU times: user 7min 35s, sys: 19.5 s, total: 7min 55s
Wall time: 11.2 s


In [None]:
# print BLEU
print('{}BLEU: {:.2%}'.format(
    '',bleu_score(*format_list_for_bleu(pred_list_lg, tgt_list_lg))
))

# print BLEU
print('{}loss: {:.2}'.format(
    '',sum(loss_per_batch) / len(loss_per_batch))
)

# print in nat lg
print('\n')
print_nl_pred_vs_tgt(pred_list_lg, tgt_list_lg, reports_index2word, tag_list_lg, tags_index2word)

# end WIP area

### OOS eval - one sentence

In [1]:
from tsf_infer_utils import oos_infer_sent

In [33]:
(src, src_key_padding_mask, tgt, tgt_key_padding_mask) = next(iter(val_loader))

In [34]:
%%time
# select single sentence
i = 2
src_i = src[:,[i],:]
tgt_i = tgt[:,[i],:]

# run inference
pred_out, tgt_out, tag_out = oos_infer_sent(model, src_i, tgt_i, max_seq_length, device)

CPU times: user 1min 38s, sys: 4.26 s, total: 1min 42s
Wall time: 2.54 s


In [39]:
# print BLEU
print('{}BLEU: {:.2%}\n\n'.format(
    '',bleu_score(*format_list_for_bleu(pred_out, tgt_out))
))

# print sent lengths
print('target length: {} words, pred legnth: {} words\n'.format(len(tgt_out[0]), len(pred_out[0])))

# print in nat lg
print_nl_pred_vs_tgt(pred_out, tgt_out, reports_index2word, tag_out, tags_index2word)

BLEU: 0.00%


target length: 10 words, pred legnth: 97 words

TAGS:  normal
TARGET:  SOS heart size normal . lungs are clear . EOS
PREDICTION:  SOS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .




### OSS eval - one loader, sent-by-sent

In [41]:
from tsf_infer_utils import oos_infer

In [42]:
%%time
model.eval()
pred_list, tgt_list, tag_list = oos_infer(model, val_loader, max_seq_length)

CPU times: user 2h 3min 52s, sys: 3min 53s, total: 2h 7min 46s
Wall time: 2min 54s


In [43]:
# print BLEU
print('{}BLEU: {:.2%}\n\n'.format(
    '',bleu_score(*format_list_for_bleu(pred_list, tgt_list))
))

BLEU: 13.79%




### test the model with our previous way to eval

In [15]:
from tsf_utils import get_tk_from_proba, format_list_for_bleu

In [16]:
    %%time
    pred_list = []
    tgt_list = []
    device = model.embed_tgt.weight.device
    
    for (src, src_key_padding_mask, tgt, tgt_key_padding_mask) in iter(val_loader):
        
        # prepare inputs
        src, src_key_padding_mask, tgt, tgt_key_padding_mask, memory_key_padding_mask, tgt_inp, tgt_out, tgt_mask = prep_transf_inputs(
            src, src_key_padding_mask, tgt, tgt_key_padding_mask, device)

        # run inference
        outputs = model(src, tgt_inp, src_key_padding_mask, tgt_key_padding_mask[:, :-1], memory_key_padding_mask, tgt_mask)

        # get predictions from proba
        pred = get_tk_from_proba(outputs)
        
        # get pred and ground truth ready for metric eval
        pred_list += [list(pred[row, :].cpu().numpy()) for row in range(pred.shape[0])]
        tgt_list += [list(tgt[row, :].cpu().numpy()) for row in range(pred.shape[0])]

CPU times: user 14 s, sys: 764 ms, total: 14.8 s
Wall time: 549 ms


In [17]:
pred_list_bleu, tgt_list_bleu = format_list_for_bleu(pred_list, tgt_list)
print('{} BLEU: {:.2%}'.format(
                '\tval',bleu_score(pred_list_bleu, tgt_list_bleu)))

	val BLEU: 0.00%
