In [1]:
import sys
import torch
sys.path.append("..")

from pytorch_utils import *
from preprocess import *
from model_container import ModelContainer
from torchsummary import summary

In [2]:
resnet50 = ModelContainer(
    nnet = get_instrument_segmentation_model_base(
        Models.RESNETV50
    ),
    returns_dict = True
)

resnet50._network

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [3]:
unet = ModelContainer(
    nnet = get_instrument_segmentation_model_base(
        Models.UNET
    ),
    returns_dict = False
)

summary(unet._network, (3, 96, 128), 64)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [64, 64, 48, 64]           9,408
       BatchNorm2d-2           [64, 64, 48, 64]             128
              ReLU-3           [64, 64, 48, 64]               0
         MaxPool2d-4           [64, 64, 24, 32]               0
            Conv2d-5           [64, 64, 24, 32]           4,096
       BatchNorm2d-6           [64, 64, 24, 32]             128
              ReLU-7           [64, 64, 24, 32]               0
            Conv2d-8           [64, 64, 24, 32]          36,864
       BatchNorm2d-9           [64, 64, 24, 32]             128
             ReLU-10           [64, 64, 24, 32]               0
           Conv2d-11          [64, 256, 24, 32]          16,384
      BatchNorm2d-12          [64, 256, 24, 32]             512
           Conv2d-13          [64, 256, 24, 32]          16,384
      BatchNorm2d-14          [64, 256,

In [4]:
input_shape = (96, 128)

custom = \
    ModelContainer(
        get_instrument_segmentation_model_base(Models.CUSTOM, shape = input_shape),
        returns_dict = False
    )

summary(custom._network, (3, 96, 128), 64)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [64, 32, 94, 126]             896
       BatchNorm2d-2          [64, 32, 94, 126]              64
              ReLU-3          [64, 32, 94, 126]               0
            Conv2d-4          [64, 64, 94, 126]          51,264
       BatchNorm2d-5          [64, 64, 94, 126]             128
         MaxPool2d-6           [64, 64, 47, 63]               0
              ReLU-7           [64, 64, 47, 63]               0
            Conv2d-8          [64, 128, 45, 61]         401,536
         MaxPool2d-9          [64, 128, 22, 30]               0
             ReLU-10          [64, 128, 22, 30]               0
           Conv2d-11          [64, 128, 20, 28]         147,584
      BatchNorm2d-12          [64, 128, 20, 28]             256
             ReLU-13          [64, 128, 20, 28]               0
  ConvTranspose2d-14          [64, 128,

  return self._call_impl(*args, **kwargs)


In [5]:
dataloader_name = "training_full_96_128"

from model_container import _pickle_path

if not os.path.exists(
    os.path.join(_pickle_path, dataloader_name + ".pkl")
):
    input_videos, output_videos = load_endovis_videos(DatasetType.TRAINING)
    
    # Add extra frame to make training size 4800 in order for batch size to be uniform across iterations
    input_videos.extend([input_videos[-1]])
    output_videos.extend([output_videos[-1]])
    
    input_preprocessed = preprocess_source_endovis_images(input_videos, (96, 128))
    output_preprocessed = preprocess_endovis_target_images(output_videos, (96, 128))
    
    resnet50.set_dataset(input_preprocessed, output_preprocessed, 128)
    unet.set_dataset(input_preprocessed, output_preprocessed, 128)
    custom.set_dataset(input_preprocessed, output_preprocessed, 128)
    
    resnet50.save_loader("training_full_96_128")
else:
    resnet50.load_loader(os.path.join(_pickle_path, dataloader_name + ".pkl"))
    unet.load_loader(os.path.join(_pickle_path, dataloader_name + ".pkl"))
    custom.load_loader(os.path.join(_pickle_path, dataloader_name + ".pkl"))

In [6]:
resnet50.set_optimizer(torch.optim.Adam(params = resnet50._network.parameters(), lr = 0.01))
resnet50.set_cost(torch.nn.BCEWithLogitsLoss())

unet.set_optimizer(torch.optim.Adam(params = unet._network.parameters(), lr = 0.01))
unet.set_cost(torch.nn.BCEWithLogitsLoss())

custom.set_optimizer(torch.optim.Adam(params = custom._network.parameters(), lr = 0.01))
custom.set_cost(torch.nn.BCEWithLogitsLoss())

In [7]:
resnet50._loader.dataset.tensors[0].shape

torch.Size([4480, 3, 96, 128])

In [8]:
NUM_EPOCHS = 40

resnet50.train(NUM_EPOCHS)
unet.train(NUM_EPOCHS)
custom.train(NUM_EPOCHS)

Epoch: 1/40. Last loss: 0.08604232221841812. Duration: 612.36 sec
Epoch: 2/40. Last loss: 0.0487963892519474. Duration: 602.89 sec
Epoch: 3/40. Last loss: 0.03425125777721405. Duration: 605.95 sec
Epoch: 4/40. Last loss: 0.029312796890735626. Duration: 602.89 sec
Epoch: 5/40. Last loss: 0.02830924652516842. Duration: 605.74 sec
Epoch: 6/40. Last loss: 0.02677743323147297. Duration: 602.79 sec
Epoch: 7/40. Last loss: 0.026832932606339455. Duration: 605.50 sec
Epoch: 8/40. Last loss: 0.023519285023212433. Duration: 602.68 sec
Epoch: 9/40. Last loss: 0.02155611850321293. Duration: 605.52 sec
Epoch: 10/40. Last loss: 0.02344406768679619. Duration: 602.78 sec
Epoch: 11/40. Last loss: 0.02306094765663147. Duration: 605.52 sec
Epoch: 12/40. Last loss: 0.022571230307221413. Duration: 602.61 sec
Epoch: 13/40. Last loss: 0.02113419771194458. Duration: 605.93 sec
Epoch: 14/40. Last loss: 0.018908914178609848. Duration: 602.70 sec
Epoch: 15/40. Last loss: 0.02124330773949623. Duration: 605.39 sec


In [9]:
resnet50.save_model("resnet50_model_full")
resnet50.save_losses("resnet50_losses_full")

unet.save_model("unet_model_full")
unet.save_losses("unet_losses_full")

custom.save_model("custom_model_full")
custom.save_losses("custom_losses_full")