In [1]:
import os
import torch
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms
from torchsummary import summary

from oct_dataset import OCTDataset

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
print(device)

cuda


In [13]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs --port 6006

In [4]:
transforms = transforms.Compose([transforms.ToTensor()])

In [5]:
hparams = {
    "batch_size": 32,
    "learning_rate": 2e-3,
    "input_size": 1 * 1024 * 512,
    "in_channels": 1,
    "out_channels": 5,
    "device": device,
    "epochs": 10,
    "weight_decay": 1e-5,
}

In [6]:
train_dataset = OCTDataset(root_dir='data/train_data', transform=transforms)
test_dataset = OCTDataset(root_dir='data/test_data', transform=transforms)
val_dataset = OCTDataset(root_dir='data/val_data', transform=transforms)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=hparams['batch_size'], shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=hparams['batch_size'], shuffle=True)

In [None]:
def visualize_data(dataloader):
    for batch in dataloader:
        images, masks = batch

        image = images[0].permute(1, 2, 0).numpy()
        mask = masks[0].permute(1, 2, 0).numpy()

        # plt.figure(figsize=(10, 5))

        # Plot the image
        plt.subplot(1, 2, 1)
        plt.imshow(image, cmap='gray')
        plt.title('Image')
        plt.axis('off')

        # Plot the segmentation mask
        plt.subplot(1, 2, 1)
        plt.imshow(mask, cmap='viridis', alpha=0.5)  # Adjust cmap based on your segmentation task
        plt.title('Segmentation Mask')
        plt.axis('off')

        plt.show()
        break

visualize_data(train_dataloader)
visualize_data(test_dataloader)
visualize_data(val_dataloader)

In [7]:
from seg_model import UNet

model = UNet(hparams["in_channels"], hparams['out_channels'])
model.to(device)

summary(model, input_size=(1, 1024, 512), batch_size=32)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [32, 32, 1024, 512]             320
       BatchNorm2d-2        [32, 32, 1024, 512]              64
              ReLU-3        [32, 32, 1024, 512]               0
         MaxPool2d-4         [32, 32, 512, 256]               0
            Conv2d-5         [32, 32, 512, 256]           9,248
       BatchNorm2d-6         [32, 32, 512, 256]              64
              ReLU-7         [32, 32, 512, 256]               0
         MaxPool2d-8         [32, 32, 256, 128]               0
            Conv2d-9         [32, 64, 256, 128]          18,496
      BatchNorm2d-10         [32, 64, 256, 128]             128
             ReLU-11         [32, 64, 256, 128]               0
        MaxPool2d-12          [32, 64, 128, 64]               0
           Conv2d-13          [32, 64, 128, 64]          36,928
      BatchNorm2d-14          [32, 64, 

  total_output += np.prod(summary[layer]["output_shape"])


In [11]:
def train(model, train_dataloader, val_dataloader, criterion, optimizer, device, num_epochs):
    model.to(device)
    model.train()
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        val_loss = 0.0
        
        # Training step
        for images, masks in train_dataloader:
            images = images.to(device)
            masks = masks.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(images)
            
            loss = criterion(outputs, masks)
            
            loss.backward()
            
            optimizer.step()
            
            train_loss += loss.item()
        
        train_loss /= len(train_dataloader)
        
        # Validation step
        model.eval()
        with torch.no_grad():
            for images, masks in val_dataloader:
                images = images.to(device)
                masks = masks.to(device)
                
                outputs = model(images)
                
                loss = criterion(outputs, masks)
                
                val_loss += loss.item()
            
            val_loss /= len(val_dataloader)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    
    print("Training complete!")


In [12]:
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
 

def create_tqdm_bar(iterable, desc):
    return tqdm(enumerate(iterable),total=len(iterable), ncols=150, desc=desc)


def train_model(model, train_loader, val_loader, loss_func, tb_logger, optimizer, epochs=10, name="default"):
    """
    Train the classifier for a number of epochs.
    """
    loss_cutoff = len(train_loader) // 10
    
    # The scheduler is used to change the learning rate every few "n" steps.
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(epochs * len(train_loader) / 5), gamma=hparams.get('gamma', 0.8))
    
    for epoch in range(epochs):
        
        model.train() 
        
        training_loss = []
        validation_loss = []
        
        # Create a progress bar for the training loop.
        training_loop = create_tqdm_bar(train_loader, desc=f'Training Epoch [{epoch + 1}/{epochs}]')
        for train_iteration, batch in training_loop:
            optimizer.zero_grad() 
            images, labels = batch
            images, labels = images.to(device), labels.to(device) 

            pred = model(images)
            loss = loss_func(pred, labels) 
            loss.backward()
            optimizer.step()
            scheduler.step()

            training_loss.append(loss.item())
            training_loss = training_loss[-loss_cutoff:]

            training_loop.set_postfix(curr_train_loss = "{:.8f}".format(np.mean(training_loss)), 
                                      lr = "{:.8f}".format(optimizer.param_groups[0]['lr'])
            )

            tb_logger.add_scalar(f'classifier_{name}/train_loss', loss.item(), epoch * len(train_loader) + train_iteration)


        model.eval()
        val_loop = create_tqdm_bar(val_loader, desc=f'Validation Epoch [{epoch + 1}/{epochs}]')
        
        with torch.no_grad():
            for val_iteration, batch in val_loop:
                images, labels = batch
                images, labels = images.to(device), labels.to(device)

                pred = model(images)
                loss = loss_func(pred, labels)
                validation_loss.append(loss.item())

                val_loop.set_postfix(val_loss = "{:.8f}".format(np.mean(validation_loss)))

                tb_logger.add_scalar(f'classifier_{name}/val_loss', loss.item(), epoch * len(val_loader) + val_iteration)
        

In [14]:
def test(model, dataloader):
    test_scores = []
    model.eval()
    model = model.to(device)
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)

        outputs = model.forward(inputs)
        _, preds = torch.max(outputs, 1)
        targets_mask = (targets >= 0).cpu()
        test_scores.append(np.mean((preds.cpu() == targets.cpu())[targets_mask].numpy()))

    return np.mean(test_scores)

In [15]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'], weight_decay=hparams['weight_decay'])
path = os.path.join('logs')
num_of_runs = len(os.listdir(path)) if os.path.exists(path) else 0
path = os.path.join(path, f'run_{num_of_runs + 1}')
tb_logger = SummaryWriter(path)

In [16]:
train_model(model=model, train_loader=train_dataloader, val_loader=val_dataloader, loss_func=loss, tb_logger=tb_logger, optimizer=optimizer, epochs=hparams['epochs'], name='oct seg model v1')

Training Epoch [1/10]:   0%|                                                                                                  | 0/116 [00:19<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 GiB. GPU 0 has a total capacty of 6.00 GiB of which 0 bytes is free. Of the allocated memory 10.28 GiB is allocated by PyTorch, and 84.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [10]:
np.random.seed(45)
input_data = np.random.rand(1, 5, 2, 2)
print(input_data)
softmaxed = torch.nn.functional.softmax(torch.tensor(input_data), dim=1)
print('softmaxed:\n', softmaxed)
_, max = torch.max(softmaxed, dim=1)
print('get the max: \n', max)
print(max.shape)


[[[[0.98901151 0.54954473]
   [0.2814473  0.07728957]]

  [[0.4444695  0.47280797]
   [0.048522   0.16332445]]

  [[0.11595071 0.62739168]
   [0.85618205 0.65010242]]

  [[0.99072168 0.47035075]
   [0.61829448 0.28266721]]

  [[0.97600332 0.673068  ]
   [0.44053089 0.28968734]]]]
softmaxed:
 tensor([[[[0.2508, 0.1975],
          [0.1628, 0.1580]],

         [[0.1455, 0.1829],
          [0.1290, 0.1722]],

         [[0.1048, 0.2135],
          [0.2893, 0.2802]],

         [[0.2513, 0.1825],
          [0.2280, 0.1941]],

         [[0.2476, 0.2235],
          [0.1909, 0.1954]]]], dtype=torch.float64)
get the max: 
 tensor([[[3, 4],
         [2, 2]]])
torch.Size([1, 2, 2])
