## Loss function ###

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossEntropyLoss2d(nn.Module):

    def __init__(self, weight=None,size_average=True):
        super().__init__()

        self.loss = nn.NLLLoss2d(weight,size_average)

    def forward(self, outputs, targets):
        return self.loss(F.log_softmax(outputs), targets)

## Image transformation ###

In [None]:
import numpy as np
import torch

from PIL import Image

def colormap(n):
    cmap=np.zeros([n, 3]).astype(np.uint8)

    for i in np.arange(n):
        r, g, b = np.zeros(3)

        for j in np.arange(8):
            r = r + (1<<(7-j))*((i&(1<<(3*j))) >> (3*j))
            g = g + (1<<(7-j))*((i&(1<<(3*j+1))) >> (3*j+1))
            b = b + (1<<(7-j))*((i&(1<<(3*j+2))) >> (3*j+2))

        cmap[i,:] = np.array([r, g, b])

    return cmap

class Relabel:

    def __init__(self, olabel, nlabel):
        self.olabel = olabel
        self.nlabel = nlabel

    def __call__(self, tensor):
        assert isinstance(tensor, torch.LongTensor), 'tensor needs to be LongTensor'
        tensor[tensor == self.olabel] = self.nlabel
        return tensor


class ToLabel:

    def __call__(self, image):
        return torch.from_numpy(np.array(image)).long().unsqueeze(0)


class Colorize:

    def __init__(self, n=3):
        self.cmap = colormap(256)
        self.cmap[n] = self.cmap[-1]
        self.cmap = torch.from_numpy(self.cmap[:n])

    def __call__(self, gray_image):
        size = gray_image.size()
        color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)

        for label in range(1, len(self.cmap)):
            mask = gray_image[0] == label

            color_image[0][mask] = self.cmap[label][0]
            color_image[1][mask] = self.cmap[label][1]
            color_image[2][mask] = self.cmap[label][2]

        return color_image

## Data loader ##

In [None]:
import os
import collections
import torch
import torchvision
import numpy as np
import scipy.misc as m
import matplotlib.pyplot as plt
import yaml
from PIL import Image
from torch.utils import data


class ImageLoader(data.Dataset):
    
    
    def make_dataset(self,dir, set):
        images = []
        if set == 'train':
            fname = os.path.join(dir, 'train_test_split.yaml')
        elif set == 'test':
            fname = os.path.join(dir, 'train_test_split.yaml')

        # read the content of the file
        with open(fname,'r') as f:
            doc = yaml.load(f)

        imagesNum = doc[set]
        imageFolderPath = dir + os.sep + 'images' + os.sep
        file_list = []
        
        for x in imagesNum:
            item = imageFolderPath + os.sep + str(x)+'_image.png'

            width, height = Image.open(open(item,'rb')).size
            #print("Width ==> ",width," Height ==> ",height)
            
            file_list.append(item)
        
        self.files[set]=file_list
            
        return
    
    def __init__(self, root, split="train", img_size=None,input_transform=None, target_transform=None):
        self.root = root
        self.split = split
        self.img_size = [1296, 966]
        
        
        self.n_classes = 2
        self.files = collections.defaultdict(list)
        
        self.input_transform = input_transform
        self.target_transform = target_transform
        
        self.make_dataset(root,split)

    def __len__(self):
        return len(self.files[self.split])

    def __getitem__(self, index):
        img_name = self.files[self.split][index]
                
        img_path = img_name
        img_name = os.path.split(img_path)[1]
        #print("Img name ==> ",img_name)
        img_num = img_name.split("_")[0]
        #print("Img num ==> ",img_num)
        
        lbl_path = self.root + os.sep + 'annotations' + os.sep + img_num +"_" + "annotation.png"

        with open(img_path, 'rb') as f:
            image = Image.open(f).convert('RGB')
            
        
        with open(lbl_path, 'rb') as f:
            label = Image.open(f).convert('P')
            
        if self.input_transform is not None:
            image = self.input_transform(image)
            #print("image ==> ",image)
            
        if self.target_transform is not None:
            label = self.target_transform(label)
            #print("label ==> ",label)
            
            
            
        return image, label


## Model (PSPNet) ###

In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import numpy as np
# from torch.utils import model_zoo
from torchvision import models
from torch.autograd import Variable

class PSPModifierClass(nn.Module):

    def __init__(self, in_features, out_features, downsize, upsize=18):
        super().__init__()

        self.features = nn.Sequential(
            nn.AvgPool2d(downsize, stride=downsize),
            nn.Conv2d(in_features, out_features, 1, bias=False),
            nn.BatchNorm2d(out_features,momentum=.95),
            nn.ReLU(inplace=True),
            nn.UpsamplingBilinear2d(upsize)
        )

    def forward(self, x):
        return self.features(x)
    

    
    


In [None]:
class PSPNet(nn.Module):

    def __init__(self, num_classes=2):
        super().__init__()

        resnet = models.resnet101(pretrained=True)
        
        self.conv1 = resnet.conv1
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4
        
        self.avgPool =  nn.AvgPool2d(14, stride=14)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.stride = 1
                m.requires_grad = False
            if isinstance(m, nn.BatchNorm2d):
                m.requires_grad = False
        
        
        self.layer5a = PSPModifierClass(2048, 512, 60)
        self.layer5b = PSPModifierClass(2048, 512, 30)
        self.layer5c = PSPModifierClass(2048, 512, 20)
        self.layer5d = PSPModifierClass(2048, 512, 10)
        
        
        self.final = nn.Sequential(
            #nn.Conv2d(2048, 512, 3, padding=1, bias=False),
            
            nn.Conv2d(4096, 512, 3, padding=1, bias=False),
            nn.BatchNorm2d(512, momentum=.95),
            nn.ReLU(inplace=True),
            nn.Dropout(.1),
            nn.Conv2d(512, num_classes, 1)
            
            #nn.Conv2d(512, num_classes, 2)

        )

    def forward(self, x):
        
        #print('x', x.size())
        
        x = self.conv1(x)
        #print('conv1', x.size())
        
        x = self.layer1(x)
        #print('layer1', x.size())
        
        x = self.layer2(x)
        #print('layer2', x.size())
        
        x = self.layer3(x)
        #print('layer3', x.size())
        
        x = self.layer4(x)
        #print('layer4', x.size())
        
        #x=self.down(x)
        
       
        temp_x=self.avgPool(x)
        #print('After downsample x ==> ',temp_x.size())

        
        t5a = self.layer5a(x)
        #print('layer5a',t5a.size())
        
        
        t5b = self.layer5b(x)
        #print('layer5b',t5b.size())
        
        t5c = self.layer5c(x)
        #print('layer5c',t5c.size())
        
        t5d = self.layer5d(x)
        #print('layer5d',t5d.size())
        
        
        x = self.final(torch.cat([
            temp_x,
            t5a,
            t5b,
            t5c,
            t5d,
        ], 1))
        
        #print('final', x.size())
        
        return F.upsample_bilinear(x,[256,256])
        #return F.upsample_bilinear(self.final, x.size()[2:])

## Visualization class ###

In [None]:
import numpy as np

from torch.autograd import Variable

from visdom import Visdom

class Dashboard:

    def __init__(self, port):
        self.vis = Visdom(port=port)

    def loss(self, losses, title):
        x = np.arange(1, len(losses)+1, 1)

        self.vis.line(losses, x, env='loss', opts=dict(title=title))

    def image(self, image, title):
        if image.is_cuda:
            image = image.cpu()
        if isinstance(image, Variable):
            image = image.data
        image = image.numpy()

        self.vis.image(image, env='images', opts=dict(title=title))

In [None]:
import numpy as np
import torch

from PIL import Image
from argparse import ArgumentParser

from torch.optim import SGD, Adam
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, Normalize,Scale,Resize
from torchvision.transforms import ToTensor, ToPILImage

In [None]:
NUM_CHANNELS = 3
NUM_CLASSES = 3

In [None]:
color_transform = Colorize()
image_transform = ToPILImage()

input_transform = Compose([
    CenterCrop(256),
    #Resize(136),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])
target_transform = Compose([
    CenterCrop(256),
    #Resize(136),
    ToLabel(),
    Relabel(255, 2),
])


# input_transform = Compose([
#     #CenterCrop(512),
#     Resize(500),
#     ToTensor()
#     #Normalize([.485, .456, .406], [.229, .224, .225]),
# ])
# target_transform = Compose([
#     #CenterCrop(256),
#     Resize(500),
#     ToLabel()
#     #Relabel(255, 21),
# ])

## Initializing PSPNet ##

In [None]:

model = PSPNet(num_classes=3)

In [None]:
model.train()

In [None]:
dataset_folder = 'dataset-1.0/'
train_dataset = ImageLoader(dataset_folder,split="train",input_transform=input_transform,target_transform=target_transform)
test_dataset = ImageLoader(dataset_folder,split="test",input_transform=input_transform,target_transform=target_transform)

In [None]:
x,y=train_dataset.__getitem__(0)

In [None]:
y

In [None]:
weight = torch.ones(3)

In [None]:
weight

In [None]:
weight[2] = 0

In [None]:
weight

In [None]:
num_workers=1
batch_size=1
num_epochs=1

In [None]:
trainLoader = DataLoader(train_dataset,num_workers=num_workers,batch_size=batch_size,shuffle=True)

In [None]:
#criterion = CrossEntropyLoss2d(weight)
criterion = CrossEntropyLoss2d(weight)

In [None]:
#optimizer=SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
optimizer = SGD(model.parameters(), 1e-3, .9)


In [None]:
print(len(trainLoader))

In [None]:
board = Dashboard(5000)

In [None]:
for epoch in range(1, num_epochs+1):
    epoch_loss = []
    iteration=1
    for step, (images, labels) in enumerate(trainLoader):
        print("Iter:"+str(iteration))
        im_arr = np.array(labels)
        #print("Labels ==> ",np.unique(np.array(labels)))
        im_arr[(im_arr!=0) & (im_arr!=15) & (im_arr!=40)] = 0
        
        # Set to classes
        im_arr[im_arr==15] = 1
        im_arr[im_arr==40] = 2
        #print(np.unique(im_arr))
        
        iteration=iteration+1
        inputs = Variable(images)
        targets = Variable(torch.from_numpy(im_arr))
        
        outputs = model(inputs)
        optimizer.zero_grad()
        #print("outputs size ==> ",outputs.size())
        
        #print("outputs[:,0] size ==> ",outputs[:,0].size())
        #print("targets[:, 0] size ==> ",targets[:, 0].size())


        #loss = criterion(outputs, targets[:, 0])
        
        #print(targets[:,0])
        loss = criterion(outputs,targets[:,0])
        
        loss.backward()
        optimizer.step()
        epoch_loss.append(loss.data[0])
        
        average = sum(epoch_loss) / len(epoch_loss)
        
        print("loss: "+str(average)+" epoch: "+str(epoch)+", step: "+str(step))