# ChestX-Ray data

In [1]:
import numpy as np
import data.preprocessing as pr

# Load data
reports = pr.reports
projections = pr.projections

uids = np.unique(projections.index)
uids = np.random.choice(uids, 100, replace=False)
train_data, train_loader, val_data, val_loader, test_data, test_loader = pr.create_dataloaders(uids, projections, reports, pr.IMAGES_PATH)
vocab_size = len(train_data.vocab.union(val_data.vocab))

# Get reports as text sequences
train_reports = pr.get_sequences(train_data, train_data.word2idx['</s>'], train_data.word2idx['<pad>'])
val_reports = pr.get_sequences(val_data, val_data.word2idx['</s>'], val_data.word2idx['<pad>'])

[nltk_data] Downloading package punkt to /home/mdelucasg/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Generating data: 34/60

In [None]:
t_loader = pr.bachify(train_reports, batch_size=32)
v_loader = pr.bachify(val_reports, batch_size=32)
pr.print_sequence(t_loader[0][0], train_data.idx2word)
pr.print_sequence(v_loader[0][1], val_data.idx2word)

<s> Lungs are overall hyperexpanded with flattening of the diaphragms . Lungs are clear without focal airspace disease . No pleural effusions or pneumothoraces . Heart and mediastinum of normal size and contour . degenerative changes within the spine . There are expansile changes within the right clavicle which were seen on the previous XXXX/CT . Findings are consistent with changes of multiple myeloma . </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> </s>
<s> XXXX sternotomy XXXX and numerous mediastinal clips appear stable in position . There is aortic atherosclerotic calcification . The thoracic aorta is tortuous . Stable widening of the upper mediastinum . Stable cardiomegaly . Prominent mitral annular calcification demonstrated on the lateral view . No pneumothorax , pleural effusion or airspace consolidation . XXXX XXXX appear 

# Training

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import models.training as tr
import models.reportnet as M

device = "cuda" if torch.cuda.is_available() else "cpu"

# Model
model = M.Transformer(
    num_tokens=vocab_size,
    dim_model=vocab_size*2,
    num_heads=2,
    num_encoder_layers=3,
    num_decoder_layers=3,
    dropout_p=0.3,
)

# Training
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss(ignore_index=train_data.word2idx['<pad>'])
trainer = tr.fit(model, optimizer, criterion, t_loader, v_loader, 2)

NameError: name 'vocab_size' is not defined

# Predict

In [None]:
for idx, example in enumerate(val_reports):
    print(example)
    break

pr.print_sequence(val_reports[0], val_data.idx2word)

[78, 132, 172, 174, 51, 168, 190, 17, 121, 163, 179, 184, 168, 25, 163, 44, 56, 168, 107, 163, 64, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 64]
<s> Cardiac and mediastinal contours are within normal limits . The lungs are clear . Bony structures are intact . </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pa

In [None]:
# Test reports
for idx, example in enumerate(train_reports):
    result = tr.predict(model, torch.tensor([example]))
    print(f"Example {idx}")
    pr.print_sequence(example, train_data.idx2word)
    pr.print_sequence(result, train_data.idx2word)
    break

Example 0
<s> Lungs are overall hyperexpanded with flattening of the diaphragms . Lungs are clear without focal airspace disease . No pleural effusions or pneumothoraces . Heart and mediastinum of normal size and contour . degenerative changes within the spine . There are expansile changes within the right clavicle which were seen on the previous XXXX/CT . Findings are consistent with changes of multiple myeloma . </s> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> <pad> </s>
recess </s> </s> </s> </s> </s> </s> </s> </s> </s> </s> </s> </s> </s> </s> </s>


In [None]:
torch.tensor([example])

NameError: name 'torch' is not defined