In [None]:
!pip install deeplake[enterprise]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import deeplake
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, models

In [None]:
train_ds = deeplake.load('hub://activeloop/nih-chest-xray-train')
test_ds = deeplake.load('hub://activeloop/nih-chest-xray-test')

balanced_view = train_ds.query("select * sample by max_weight(contains(findings, 'Hernia'): 20, contains(findings, 'Pneumonia'): 8, contains(findings, 'Fibrosis'): 5, contains(findings, 'Edema'): 5, contains(findings, 'Emphysema'): 2, True: 1)")

train_ds, val_ds = train_ds.random_split([0.8, 0.2])

\

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/nih-chest-xray-train



-

hub://activeloop/nih-chest-xray-train loaded successfully.



|

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/nih-chest-xray-test



/

hub://activeloop/nih-chest-xray-test loaded successfully.



  

In [None]:
print(f'Size of train dataset: {len(train_ds)}')
print(f'Size of validation dataset: {len(val_ds)}')
print(f'Size of test dataset: {len(test_ds)}')

Size of train dataset: 69220
Size of validation dataset: 17304
Size of test dataset: 25596


In [None]:
classes_labels = train_ds.findings.info.class_names
print(f'Number of classes: {len(classes_labels)}')
for i, label in enumerate(classes_labels):
  print(f'{i}. {label}')

Number of classes: 15
0. No Finding
1. Hernia
2. Emphysema
3. Nodule
4. Pneumonia
5. Consolidation
6. Cardiomegaly
7. Effusion
8. Mass
9. Pleural_Thickening
10. Atelectasis
11. Pneumothorax
12. Fibrosis
13. Infiltration
14. Edema


In [None]:
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),
])

def findings_transform(findings_list):
    multi_hot_encoded = [0] * len(classes_labels)
    for index in findings_list:
        multi_hot_encoded[index] = 1

    return torch.Tensor(multi_hot_encoded)

batch_size = 128
num_workers = 2

train_loader = train_ds.dataloader()\
                 .transform({'images': image_transform, 'findings': findings_transform})\
                 .batch(batch_size)\
                 .shuffle(False)\
                 .pytorch(num_workers = num_workers, decode_method={'images': 'pil'})

val_loader = val_ds.dataloader()\
                 .transform({'images': image_transform, 'findings': findings_transform})\
                 .batch(batch_size)\
                 .shuffle(False)\
                 .pytorch(num_workers = num_workers, decode_method={'images': 'pil'})

test_loader = test_ds.dataloader()\
                 .transform({'images': image_transform, 'findings': findings_transform})\
                 .batch(batch_size)\
                 .shuffle(False)\
                 .pytorch(num_workers = num_workers, decode_method={'images': 'pil'})

In [None]:
def train_one_epoch(model, optimizer, data_loader, device, threshold):
  # Set the model to training mode.
  model.train()

  total_loss = 0.0
  start_time = time.time()
  total = 0
  correct = 0

  for i, data in enumerate(data_loader):
    inputs = data['images']
    labels = data['findings']

    inputs = inputs.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # Compute binary predictions by thresholding the output probabilities.
    predicted = (outputs > threshold).float()
    # Calculate correctness of entire label vectors in the current batch
    batch_correct = (predicted == labels).all(dim=1).float().sum().item()
    # Update the total number of processed samples and correct samples.
    total += labels.size(0)
    correct += batch_correct

    batch_loss = loss.item()
    total_loss += batch_loss

    if i % 100 == 0 and i > 0:
      batch_time = time.time()
      elapsed_time = batch_time - start_time
      speed = total / elapsed_time
      accuracy = 100 * correct / total
      average_loss = total_loss / (i + 1)
      print(f'[{i}]: Average loss so far: {average_loss:.4f}, Speed: {speed:.2f} Samples/s, Average accuracy so far: {accuracy:.2f}%')

  average_loss = total_loss / len(data_loader)
  print(f'Epoch completed. Average loss: {average_loss:.4f}')

In [None]:
def test_model(model, data_loader, threshold):
  model.eval()

  total = 0
  correct = 0
  last_10_correct = 0
  last_10_total = 0

  # Disable gradient calculation for faster evaluation
  with torch.no_grad():
    for i, data in enumerate(data_loader):
      inputs = data['images']
      labels = data['findings']

      inputs = inputs.to(device)
      labels = labels.to(device)

      outputs = model(inputs)

      # Compute binary predictions by thresholding the output probabilities
      predicted = (outputs > threshold).float()
      # Calculate the number of correct predictions in the current batch
      batch_correct = (predicted == labels).all(dim=1).float().sum().item()
      # Calculate the total number of samples in the current batch
      batch_total = labels.size(0)

      correct += batch_correct
      total += batch_total

      last_10_correct += batch_correct
      last_10_total += batch_total

      if (i + 1) % 50 == 0 and i > 0:
        last_10_accuracy = 100 * last_10_correct / last_10_total
        average_accuracy = 100 * correct / total

        print(f'[{i + 1}]: Last 50 batches accuracy: {last_10_accuracy:.2f}%, Average accuracy so far: {average_accuracy:.2f}%')

        last_10_correct = 0
        last_10_total = 0

  accuracy = 100 * correct / total
  print('Finished Testing')
  print(f'Testing accuracy: {accuracy:.2f}%')

In [None]:
def save_model(model, optimizer, epoch, save_path, model_name):
  # Create the save directory if it doesn't exist
  if not os.path.exists(save_path):
    os.makedirs(save_path)

  # Create the full path for the saved model
  model_file = os.path.join(save_path, f"{model_name}_epoch_{epoch}.pth")

  # Save the model and optimizer state_dicts
  torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
  }, model_file)

  print(f"Model saved: {model_file}")

In [None]:
def load_model(model, optimizer, load_path, device):
  # Load the saved model and optimizer state_dicts
  checkpoint = torch.load(load_path)

  # Load the model and optimizer state_dicts into the model and optimizer objects
  model.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

  # Move the model to the appropriate device (GPU or CPU)
  model.to(device)

  # Set the starting epoch for the model
  start_epoch = checkpoint['epoch']

  print(f"Model loaded: {load_path}, starting from epoch {start_epoch}")

# Usage example:
#load_path = "/content/drive/MyDrive/SSN_Projekt/Saved_Models/MultiLabelCNN_epoch_1.pth"
#load_model(model, optimizer, load_path, device)

In [None]:
class MultiLabelCNN(nn.Module):
    def __init__(self, num_labels=15):
        super(MultiLabelCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, num_labels)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        # Max pooling is appliede 3 times
        x = x.view(-1, 64 * 28 * 28)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return torch.sigmoid(x)

In [None]:
# Checking if GPU is available and setting the device accordingly
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Running on {device}')

# Initialize the network and move it to the device
model = MultiLabelCNN().to(device)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Running on cuda


In [None]:
num_epochs = 3
save_path = "/content/drive/MyDrive/SSN_Projekt/Saved_Models" 
model_name = "CustomNetwork"

for epoch in range(num_epochs):  # loop over the dataset multiple times
    print(f'------------------ Training Epoch {epoch + 1} ------------------')
    train_one_epoch(model, optimizer, train_loader, device, threshold=0.5)

    save_model(model, optimizer, epoch+1, save_path, model_name)

print('Finished Training')

print(f'------------------ Testing ------------------')
test_model(model, test_loader, threshold=0.5)

------------------ Training Epoch 1 ------------------
[100]: Average loss so far: 0.2076, Speed: 94.45 Samples/s, Average accuracy so far: 42.89%
[200]: Average loss so far: 0.1984, Speed: 96.15 Samples/s, Average accuracy so far: 46.01%
[300]: Average loss so far: 0.1967, Speed: 96.87 Samples/s, Average accuracy so far: 46.29%
[400]: Average loss so far: 0.1960, Speed: 97.27 Samples/s, Average accuracy so far: 45.86%
[500]: Average loss so far: 0.1949, Speed: 97.91 Samples/s, Average accuracy so far: 45.89%
Epoch completed. Average loss: 0.1948
Model saved: /content/drive/MyDrive/SSN_Projekt/Saved_Models/CustomNetwork_epoch_1.pth
------------------ Training Epoch 2 ------------------
[100]: Average loss so far: 0.1897, Speed: 98.00 Samples/s, Average accuracy so far: 48.33%
[200]: Average loss so far: 0.1869, Speed: 100.14 Samples/s, Average accuracy so far: 48.86%
[300]: Average loss so far: 0.1878, Speed: 100.73 Samples/s, Average accuracy so far: 48.30%
[400]: Average loss so far:

In [None]:
load_path = "/content/drive/MyDrive/SSN_Projekt/Saved_Models/CustomNetwork_epoch_1.pth"
load_model(model, optimizer, load_path, device)

print(f'------------------ Testing ------------------')
test_model(model, test_loader, threshold=0.5)

Model loaded: /content/drive/MyDrive/SSN_Projekt/Saved_Models/CustomNetwork_epoch_1.pth, starting from epoch 1
------------------ Testing ------------------
[50]: Last 50 batches accuracy: 19.56%, Average accuracy so far: 19.56%
[100]: Last 50 batches accuracy: 17.34%, Average accuracy so far: 18.45%
[150]: Last 50 batches accuracy: 15.52%, Average accuracy so far: 17.47%
[200]: Last 50 batches accuracy: 30.96%, Average accuracy so far: 20.84%
Finished Testing
Testing accuracy: 20.84%


In [None]:
load_path = "/content/drive/MyDrive/SSN_Projekt/Saved_Models/CustomNetwork_epoch_2.pth"
load_model(model, optimizer, load_path, device)

print(f'------------------ Testing ------------------')
test_model(model, test_loader, threshold=0.5)

Model loaded: /content/drive/MyDrive/SSN_Projekt/Saved_Models/CustomNetwork_epoch_2.pth, starting from epoch 2
------------------ Testing ------------------
[50]: Last 50 batches accuracy: 18.56%, Average accuracy so far: 18.56%
[100]: Last 50 batches accuracy: 16.86%, Average accuracy so far: 17.71%
[150]: Last 50 batches accuracy: 14.72%, Average accuracy so far: 16.71%
[200]: Last 50 batches accuracy: 32.08%, Average accuracy so far: 20.55%
Finished Testing
Testing accuracy: 20.55%
