In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import os

USE_DRIVE = False
if USE_DRIVE:
    from google.colab import drive

    drive.mount('/content/gdrive')
    ROOT_DRIVE = '/content/gdrive/MyDrive/'
    DRIVE_FOLDER_PATH = os.path.join(ROOT_DRIVE, 'HuBMAP')
    IMAGES_PATH = os.path.join(DRIVE_FOLDER_PATH, 'hubmap-256x256', 'train')
    MASKS_PATH = os.path.join(DRIVE_FOLDER_PATH, 'hubmap-256x256', 'masks')
else:
    IMAGES_PATH = os.path.join('data', '256x256', 'train')
    MASKS_PATH = os.path.join('data', '256x256', 'masks')

# Data analysis and exploration

## Images

In [3]:
from easyimages import EasyImageList

image_list = EasyImageList.from_folder(IMAGES_PATH)
image_list.html(sample=51, size=256)

In [4]:
# TODO: Consideration on the complete images

## Mask

In [5]:
import cv2
import numpy as np

num_glomeruli_per_image = []
glomeruli_widths = []
glomeruli_heights = []
glomeruli_areas = []

for filename in os.listdir(MASKS_PATH):
    mask = cv2.imread(os.path.join(MASKS_PATH, filename), 0)
    # output is (num_labels, labelled image, stats, centroids)
    # stats is [[leftmost x coord, topmost y coord, width, height, area] for each label]
    output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
    # Update number of glomeruli found
    num_glomeruli_per_image.append(output[0] - 1)   
    for stat in output[2]:
        if stat[2] == stat[3] == 256:
            continue
        glomeruli_widths.append(stat[2])
        glomeruli_heights.append(stat[3])
        glomeruli_areas.append(stat[4])

print(f'Average number of glomeruli per image: {np.mean(num_glomeruli_per_image)}')
print(f'Average glomerulis\' width: {np.mean(glomeruli_widths)}')
print(f'Average glomerulis\' height: {np.mean(glomeruli_heights)}')
print(f'Average glomerulis\' area: {np.mean(glomeruli_areas)}')

Average number of glomeruli per image: 0.7290533188248096
Average glomerulis' width: 60.70186567164179
Average glomerulis' height: 60.588432835820896
Average glomerulis' area: 3122.658582089552


# Baseline and model overfitting

In [6]:
from utils import get_device_colab, set_deterministic_colab

NOT_COLAB = True
if not NOT_COLAB:

    DEVICE = get_device_colab()
else:
    DEVICE = 'cpu'

SEED = 42
set_deterministic_colab(SEED)

In [7]:
import itertools

from preprocessing.dataset import get_training_validation_sets, denormalize_images, HuBMAPDataset
from preprocessing.augmentation import get_augmentations
from training.loop import Trainer
from training.loss_functions import BinaryDiceLoss
from visualization.visualize_data import visualize

import torch
from torchvision import transforms

# From https://pytorch.org/docs/stable/torchvision/models.html
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

training_dataset, training_images, validation_dataset, validation_images = get_training_validation_sets(IMAGES_PATH,
                                                                                                        MASKS_PATH,
                                                                                                        0.3, 
                                                                                                        {'train': None,
                                                                                                         'val': None},
                                                                                                        mean,
                                                                                                        std)

# for image, mask in itertools.islice(training_dataset, 0, 1):
    # visualize(image=denormalize_images(image, mean, std), mask=mask)

  plt.show()


# 