<a href="https://colab.research.google.com/github/flosch9/deep_learning_home_exam/blob/main/Task_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load modules

In [2]:
pip install utils

Collecting utils
  Downloading utils-1.0.2.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: utils
  Building wheel for utils (setup.py) ... [?25l[?25hdone
  Created wheel for utils: filename=utils-1.0.2-py2.py3-none-any.whl size=13906 sha256=976e92dcc0aec7e191dfd177648dcecea7997a32a00983bdec6fb102c562b17c
  Stored in directory: /root/.cache/pip/wheels/b8/39/f5/9d0ca31dba85773ececf0a7f5469f18810e1c8a8ed9da28ca7
Successfully built utils
Installing collected packages: utils
Successfully installed utils-1.0.2


# Define and load model (model.py)

```
# Als Code formatiert
```



In [3]:
import utils
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision.models import vgg16, VGG16_Weights


class FCN32(nn.Module):
    def __init__(self, output_size = (128,128), num_classes = 2):
        super(FCN32, self).__init__()

        # first part is regualr vgg16 (with batch normalization)?
        self.features = vgg16(weights = VGG16_Weights.IMAGENET1K_V1).features#[0:28]

        # only choose some parts of vgg16? [0:28]

        # set ceil mode to true
        #self.features[6].ceil_mode = True
        #self.features[13].ceil_mode = True
        #self.features[23].ceil_mode = True
        #self.features[33].ceil_mode = True
        #self.features[43].ceil_mode = True

        # classifier is now replaced with another cnn (instead of a fc)
        self.classifier = nn.Sequential(
            nn.Conv2d(512, 4024, kernel_size=(3,3), stride=(1,1), padding=(1,1)), # 512 output from last cnn/maxpool layer #maybe only 1000 channels
            # use filter size 1024 or 4096
            nn.ReLU(True),
            #with Relu? with Batchnorm? with maxpool?
            # 7x7 filter
            nn.Conv2d(4024, num_classes, kernel_size=1, stride=(1,1), padding=(1,1))
            #nn.ReLU(True)

            #nn.Softmax()
            #softmax produces niceer output in the end

            # makes difference in the output, and in the loss which (none) activation is used
        )
        self.upsample = nn.Sequential(
            # what is with upsampling meant? this (just resizing) or the deconvolution before upsample and transposeconv2d the same???
            # this one is not trainable but easier
            nn.UpsamplingBilinear2d(size=(output_size)),
            nn.Softmax()

            #nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=0)#, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)
            # then output needs to be adjusted and classes in one channel
            # try to set ceiling of maxpool to true
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        #print("Output after classifier")
        #print(x.shape)
        x = self.upsample(x)
        #print("Output after upsampling")
        #print(x.shape)
        return x

#model = FCN32(output_size=(128,128))

# print outputs of a model
#print(vgg16_bn().features)
#print(vgg16_bn().classifier)

#print(vgg16_bn())
#model = FCN32()

# print(FCN32().features)
# set ceil_modes to true
#print(model.features[6].ceil_mode)



# Define test functions (test.py)

In [None]:
import csv
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision.datasets import OxfordIIITPet
from torch.utils.data import DataLoader
from torchvision.transforms import PILToTensor, ToTensor, ToPILImage, Resize

from model import FCN32


###############################################################################
# load model and parameters from training for testing
###############################################################################

# for loading model with trained parameters
name_model = "home_exam\data\problem2\model_data\\" + "FCN32_version_01"

batch_size = 32
resize_x = 128 #or256 or whatever
resize_y = 128



#model = FCN32()
#model.load_state_dict(torch.load(name_model + "_trained"))

###############################################################################
# load dataset for testing
###############################################################################

# open test data, has 3669 samples
# transform images (input images and lapels / mregion maps)to same sizes
pets_test = OxfordIIITPet(root="home_exam\data\problem2", split="test", transform = Resize((resize_x,resize_y)), target_transform = Resize((resize_x,resize_y)), target_types="segmentation", download=False)
#print(len(pets_test))

# do not use ToTensor for target image since it destroys the class
def custom_collate(batch):

    images = []
    labels = []
    for dataset in batch:
        image = ToTensor()(dataset[0])
        label = PILToTensor()(dataset[1]) # important else target fckd up with classes
        label = label.view(resize_x, resize_y) #to get rid of the (implizit) given channel

        label = label.long() # also importantz for CE-Loss, excpects long
        label = torch.sub(label, 1) # also important since 3 classes -> [0,3), but original it was [1,3]

        images.append(image)
        labels.append(label)

    return(torch.stack(images), torch.stack(labels))

# load data for testing, use custom collate function
# to handle non-tensor format of original dataset
test_dataload = DataLoader(pets_test, batch_size=batch_size, shuffle=True, collate_fn= custom_collate)


# short function for displaying initial image next to segmentation
def display_data(data_point):

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize =(10,6))
    ax[0].imshow(ToPILImage()(data_point[0][0]))
    ax[1].imshow(ToPILImage()(data_point[1][0]))
    fig.tight_layout()
    plt.show()

    return()

###############################################################################
# testing model and displaying outputs
###############################################################################

def show_output(initial_image, model_output):

    #print("Shape initial: {}".format(initial_image.shape)) #(3, H,W)
    #print("Shape segmentation: {}".format(model_output.shape)) #(2,H,W)
    #print("Unique values segmentation: {}".format(torch.unique(model_output)))
    #print("Output segmentation:")
    #print(model_output)

    model_output = model_output[1,:,:]

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize =(10,6))
    ax[0].imshow(ToPILImage()(initial_image))
    ax[1].imshow(ToPILImage()(model_output), cmap = "gray")
    fig.tight_layout()
    plt.show()



    return()


def show_uncertainity_map():
    return()

def test_model(model, test_images, test_labels_merched, original_labels):

    model.eval()

    output = model(test_images)

    show_output(test_images[0], output[0])

    return()



# Train model (train.py)

In [None]:
import utils
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision.models import vgg16, VGG16_Weights


class FCN32(nn.Module):
    def __init__(self, output_size = (128,128), num_classes = 2):
        super(FCN32, self).__init__()

        # first part is regualr vgg16 (with batch normalization)?
        self.features = vgg16(weights = VGG16_Weights.IMAGENET1K_V1).features#[0:28]

        # only choose some parts of vgg16? [0:28]

        # set ceil mode to true
        #self.features[6].ceil_mode = True
        #self.features[13].ceil_mode = True
        #self.features[23].ceil_mode = True
        #self.features[33].ceil_mode = True
        #self.features[43].ceil_mode = True

        # classifier is now replaced with another cnn (instead of a fc)
        self.classifier = nn.Sequential(
            nn.Conv2d(512, 4024, kernel_size=(3,3), stride=(1,1), padding=(1,1)), # 512 output from last cnn/maxpool layer #maybe only 1000 channels
            # use filter size 1024 or 4096
            nn.ReLU(True),
            #with Relu? with Batchnorm? with maxpool?
            # 7x7 filter
            nn.Conv2d(4024, num_classes, kernel_size=1, stride=(1,1), padding=(1,1))
            #nn.ReLU(True)

            #nn.Softmax()
            #softmax produces niceer output in the end

            # makes difference in the output, and in the loss which (none) activation is used
        )
        self.upsample = nn.Sequential(
            # what is with upsampling meant? this (just resizing) or the deconvolution before upsample and transposeconv2d the same???
            # this one is not trainable but easier
            nn.UpsamplingBilinear2d(size=(output_size)),
            nn.Softmax()

            #nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=0)#, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros', device=None, dtype=None)
            # then output needs to be adjusted and classes in one channel
            # try to set ceiling of maxpool to true
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        #print("Output after classifier")
        #print(x.shape)
        x = self.upsample(x)
        #print("Output after upsampling")
        #print(x.shape)
        return x

#model = FCN32(output_size=(128,128))

# print outputs of a model
#print(vgg16_bn().features)
#print(vgg16_bn().classifier)

#print(vgg16_bn())
#model = FCN32()

# print(FCN32().features)
# set ceil_modes to true
#print(model.features[6].ceil_mode)



