In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from functools import partial
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

from torch.utils.data import Dataset,DataLoader
import matplotlib.pyplot as plt
import numpy as np

import os
import random
import shutil
import time

import cv2
import json

from data_tools import dataset_with_mask 
from unet import UNet
from custom_losses import IoULoss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 
torch.set_default_tensor_type('torch.cuda.FloatTensor')
device = torch.device("cuda:0")

In [3]:
EPOCH_NUM = 10
BATCH_SIZE = 8

In [4]:
train_json = json.load(open('mask_train.json'))
val_json = json.load(open('mask_val.json'))
test_json = json.load(open('mask_test.json'))

train_folder = '/mnt/NVME4/vizilabda_videos/Mask_RCNN/train'
val_folder = '/mnt/NVME4/vizilabda_videos/Mask_RCNN/val'
test_folder = '/mnt/NVME4/vizilabda_videos/Mask_RCNN/test'

In [5]:
class dataset(Dataset):

    def __init__(self,folder, json, ratio = 0.5, transforms=None):

        self.whole_dict = json
        self.ratio = ratio
        self.folder = folder
        self.transforms = transforms
        self.img_data = json
        self.fnames = []
        for key in list(json.keys()):
            self.fnames.append(key)
            
        self.fnames.sort()


    def __len__(self):
        return len(self.img_data)


    def __getitem__(self,idx):
        
        fname = self.fnames[idx]
        png_path = os.path.join(self.folder,fname)

        png = cv2.imread(png_path)
        png = cv2.resize(png, (0,0), fx=self.ratio, fy=self.ratio) 
        image = transforms.functional.to_tensor(np.array(png))

        if self.transforms:
              image = self.transforms(image)

        label = np.zeros([int(1080*self.ratio),int(1920*self.ratio), 1])
        for max_y,max_x,min_y,min_x in self.img_data[fname]:
            try:
                max_y = int(max_y * 1080 * self.ratio)
                max_x = int(max_x * 1920 * self.ratio)
                min_y = int(min_y * 1080 * self.ratio)
                min_x = int(min_x * 1920 * self.ratio)
                label[min_y:max_y,min_x:max_x,:] = 1
            except:
                print('ERROR WITH LABELS')
        label = transforms.functional.to_tensor(np.array(label))
        
        #print(fname, label)
        return image, label, fname

In [6]:
train_ds = dataset(train_folder,train_json,ratio = 0.25)
val_ds = dataset(val_folder,val_json,ratio = 0.25)
test_ds = dataset(test_folder,test_json,ratio = 0.25)

In [7]:
train_iterator = DataLoader(train_ds, 
                                 batch_size = BATCH_SIZE)
val_iterator = DataLoader(val_ds, 
                                batch_size = BATCH_SIZE)
test_iterator = DataLoader(test_ds, 
                                batch_size = BATCH_SIZE)

In [8]:
#net = seq_model#.to(device)
#criterion = nn.MSELoss()#.to(device)
#criterion = nn.L1Loss(reduction = 'none').to(device)
model = UNet(in_channels=3,
             out_channels=1,
             n_blocks=2,
             start_filters=32,
             activation='relu',
             normalization='batch',
             conv_mode='same',
             dim=2)
#optimizer = optim.Adam(model.parameters(),lr=0.0005)
#optimizer = optim.Adagrad(model.parameters(),lr=0.001)
#net.load_state_dict(torch.load('net_epoch1_0_loss=0.0258271936327219.pt'))

# criterion
criterion = IoULoss()

# optimizer
optimizer = torch.optim.Adamax(model.parameters(), lr=0.002)

In [None]:
val_loss = 1
for epoch in range(EPOCH_NUM): 

    running_loss = 0.0
    for i, data in enumerate(train_iterator, 0):
        inputs, labels,fnames = data[0].cuda(), data[1].cuda(),data[2]
        optimizer.zero_grad()
        outputs = model(inputs).cuda()
        #print(np.amin(inputs.cpu().detach().numpy()),np.amax(inputs.cpu().detach().numpy()))
        #print(inputs.shape,labels.shape,outputs.shape)
        
        #print(outputs.shape,labels.shape)
        
        loss_ops = torch.squeeze(outputs,dim=1)
        loss_labs = torch.squeeze(labels,dim=1)
        loss = criterion(loss_ops,loss_labs)#.permute(0,2,1).type(torch.DoubleTensor).to(device), labels.type(torch.DoubleTensor).to(device)).mean(dim=1)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        
        prev_loss = 1
        if i % 10 == 0:
            print('[%d, %5d] loss: %.7f' % (epoch + 1, i + 1, running_loss/10))
            
            '''img = inputs[7].cpu().detach().numpy()
            label = labels[7].cpu().detach().numpy()
            pred = outputs[7].cpu().detach().numpy()
            print(np.amin(pred),np.amax(pred))
            
            img = np.moveaxis(img,0,-1)
            label = np.moveaxis(label,0,-1)
            pred = np.moveaxis(pred,0,-1)
            
            img = np.stack((img[:,:,2],img[:,:,1],img[:,:,0]),axis=2)
            
            SI = superimpose_mask(img,label,color_index = 0,grayscale=False) # ground truth: piros
            SI = superimpose_mask(SI,pred,color_index = 1,grayscale=False) # predikció: zöld
            
            plt.figure(figsize = (20,15))
            plt.imshow(SI)
            '''
                
            running_loss = 0.0
        
    for i, data in enumerate(val_iterator, 0):
        inputs, labels,fnames = data[0].cuda(), data[1].cuda(),data[2]
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        prev_loss = 1

    print('validation loss: %.7f' % (running_loss / i))
    if running_loss/i < val_loss:
        print('SAVE')
        torch.save(model.state_dict(),"otf_model_net_epoch"+str(epoch)+'_loss='+str(float(running_loss/i))+'.pt')
        val_loss = running_loss/i

In [None]:
train_ds = dataset(train_folder,train_json,ratio = 0.25)
val_ds = dataset(val_folder,val_json,ratio = 0.25)
test_ds = dataset(test_folder,test_json,ratio = 0.25)
train_iterator = data.DataLoader(train_ds, 
                                 batch_size = 1)
val_iterator = data.DataLoader(val_ds, 
                                batch_size = 1)
test_iterator = data.DataLoader(test_ds, 
                                batch_size = 1)

In [9]:
model.load_state_dict(torch.load('otf_model_net_epoch8_loss=0.6656731444160158.pt'))
model.eval()

{'UNet': {'in_channels': 3, 'out_channels': 1, 'n_blocks': 2, 'start_filters': 32, 'activation': 'relu', 'normalization': 'batch', 'conv_mode': 'same', 'dim': 2, 'up_mode': 'transposed'}}

In [10]:
mask_folder = '/mnt/NVME4/vizilabda_videos/Mask_RCNN/mask'

In [11]:
def assert_color(col_len,col_idx):
    if col_len<= col_idx:
        col_idx = assert_color(col_len,col_idx-1)
    return col_idx

In [13]:
from tqdm import tqdm
from scipy.ndimage import label
colors = [[255,255,255],[255,255,0],[255,0,255],[0,255,255],
          [0,0,255],[0,255,0],[255,0,0],[0,0,0],
          [125,125,125],[125,125,0],[125,0,125],[0,125,125],
          [0,0,125],[0,125,0],[125,0,0]]
for i, data in tqdm(enumerate(train_iterator, 0)):
        inputs, labels,fnames = data[0].cuda(), data[1].cuda(),data[2]
        outputs = model(inputs)
        outputs = outputs.cpu().detach().numpy()
        outputs = (outputs > 80) * 1
        for i in range(outputs.shape[0]):
            fname = fnames[i]
            op = outputs[i,0,:,:].astype(np.uint8)
            op = cv2.resize(op, (0,0), fx=4, fy=4) 
            instances,num = label(op)
            r_channel = np.zeros(op.shape)
            g_channel = np.zeros(op.shape)
            b_channel = np.zeros(op.shape)
            for idx in range(num):
                if idx<len(colors):
                    col_idx = idx
                else:
                    col_idx = idx - len(colors) 
                if col_idx >= len(colors):
                    col_idx = assert_color(len(colors),col_idx)
                col = colors[col_idx]
                r_channel += (instances == idx+1) * col[0] 
                g_channel += (instances == idx+1) * col[1] 
                b_channel += (instances == idx+1) * col[2] 
            
            res = np.stack((b_channel,g_channel,r_channel),axis=2)
            res == res.astype(np.uint8)
            cv2.imwrite(os.path.join(mask_folder,fname),res)
            
                
        
        #with open(os.path.join('/mnt/NVME4/vizilabda_videos/Mask_RCNN/mask',fnames[0][:-4]+'.npy'),'wb') as f:
            #np.save(f,outputs)

188it [17:09,  5.48s/it]
