In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# !unzip /content/gdrive/MyDrive/Bee/ASAS_ACORES_2017.zip #old
!unzip /content/gdrive/MyDrive/Bee/images_manuel.zip

In [None]:
# !unzip /content/gdrive/MyDrive/Bee/masks.zip #old
!unzip /content/gdrive/MyDrive/Bee/masks_manuel.zip

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import numpy as np
import cv2

# Dataset Class


In [None]:
class WingDataset(Dataset):
    def __init__(self, X_train, Y_train, transforms=None, target_transform=None):
        self.targets = Y_train #vector of (batch_size, 3, h, w)
        self.input = X_train # image with size (batch_size, 3, h, w)
        self.transform = transforms
        self.target_transform = target_transform

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
        data = self.input[idx]
        target = self.targets[idx]

        if self.transform:
            data = self.transform(data)
            target = self.target_transform(target)

        data = torch.tensor(data)
        target = torch.tensor(target)

        return data, target


# Preprocess the data


In [None]:
def reshape(img):

	desired_size = 400 #novo tamanho de imagem
	
	img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
	#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	
	print(img.shape)
	rows, cols = img.shape


	old_size = img.shape[:2] # old_size is in (height, width) format

	ratio = float(desired_size)/max(old_size)
	new_size = tuple([int(x*ratio) for x in old_size])

	# new_size should be in (width, height) format
	img = cv2.resize(img, (new_size[1], new_size[0]))

	delta_w = desired_size - new_size[1]
	delta_h = desired_size - new_size[0]
	top, bottom = delta_h//2, delta_h-(delta_h//2)
	left, right = delta_w//2, delta_w-(delta_w//2)

	color = [0, 0, 0]
	new_im = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
	
	return new_im

In [None]:
def preprocess(data_path='/content/images', label_path="/content/masks"):
    current_path = os.getcwd()  
    print("Preprocessing...")
    dataset = []
    targets = []

    cannotFind = 0
    for i in range(len(os.listdir(os.path.join(current_path,data_path)))):
        file_name = os.listdir(data_path)[i]
        # print(file_name)
        image_path = os.path.join(data_path, file_name)
        target_path = os.path.join(label_path, file_name)
        # try:
            # img = Image.open(image_path)
            # target = Image.open(target_path)
        if not os.path.isfile(target_path):
            print("Could not find the image {}, current missing image count is {}.".format(target_path, cannotFind+1))
            cannotFind += 1
            continue
        img = cv2.imread(image_path)
        img = reshape(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        target = cv2.imread(target_path, cv2.IMREAD_GRAYSCALE)
        dataset.append(img)
        targets.append(target)
        # except:
        #     print("Could not find the image {}, current missing image count is {}.".format(target_path, cannotFind+1))
        #     cannotFind += 1

        
    # dataset = np.array(dataset)
    # print(dataset.shape)


    return dataset,targets


# Create Dataloaders

In [None]:
images, targets = preprocess()

# print(coordinates)
# create own Dataset
dataset = WingDataset(images,
                    targets,
                    transforms = torchvision.transforms.Compose([
                                torchvision.transforms.ToPILImage(),
                                torchvision.transforms.Resize(400),
                                torchvision.transforms.ColorJitter(brightness=.5, hue=.3),
                                torchvision.transforms.RandomApply(
                                    transforms = [torchvision.transforms.GaussianBlur(kernel_size=(1, 3), sigma=(0.1, 2))],
                                    p = 0.5
                                ),
                                torchvision.transforms.RandomEqualize(p=0.15),
                                torchvision.transforms.RandomAutocontrast(),
                                torchvision.transforms.ToTensor()]),

                    target_transform = torchvision.transforms.Compose([
                                torchvision.transforms.ToPILImage(),
                                torchvision.transforms.Resize(400),
                                torchvision.transforms.ToTensor()])
                )



# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))


# Batch size
train_batch_size = 1

train_length=int(0.80* len(dataset))
test_length=len(dataset)-train_length

train_dataset,test_dataset=torch.utils.data.random_split(dataset,(train_length,test_length))


dataloader_train=torch.utils.data.DataLoader(train_dataset,
                                        batch_size=train_batch_size,
                                        shuffle=True,
                                        collate_fn=collate_fn)

dataloader_test = torch.utils.data.DataLoader(test_dataset,
                                            batch_size=train_batch_size,
                                            shuffle=True,
                                            collate_fn = collate_fn)



# Download the UNet

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device is {}".format(device))


model = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
    in_channels=3, out_channels=1, init_features=32, pretrained=False)

# for param in model.parameters():
#     print(param.requires_grad)

model = model.to(device)
print(model)

device is cuda
UNet(
  (encoder1): Sequential(
    (enc1conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (enc1norm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (enc1relu1): ReLU(inplace=True)
    (enc1conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (enc1norm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (enc1relu2): ReLU(inplace=True)
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (encoder2): Sequential(
    (enc2conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (enc2norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (enc2relu1): ReLU(inplace=True)
    (enc2conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (enc2norm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, a

Using cache found in /root/.cache/torch/hub/mateuszbuda_brain-segmentation-pytorch_master


# Accuracy Calculator

In [None]:
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [12, 8]
def accuracy_check(mask, prediction, print_every, verbose=False):
    # ims = [mask, prediction]
    np_ims = []
    # probs = nn.functional.sigmoid(prediction)
    probs = prediction>0.90
    mask = mask>0.9
    ims = [mask,probs]
    # print("ALOO", (mask))
    # print("ALOO2", probs)
    for item in ims:
        if 'str' in str(type(item)):
            item = np.array(Image.open(item))
        elif 'PIL' in str(type(item)):
            item = np.array(item)
        elif 'torch' in str(type(item)):
            item = item.detach().cpu().numpy()
        np_ims.append(item)

    if verbose:
        fig = plt.figure()
        fig.add_subplot(1, 2, 1)
        plt.imshow(np_ims[0][0], interpolation='nearest')
        plt.title("correct landmarks")
        fig.add_subplot(1, 2, 2)
        plt.imshow(np_ims[1][0], interpolation='nearest')
        plt.title("prediction")
        plt.show()

    white_points = np.sum(np_ims[0])
    
    w_b = np.sum(np.greater(np_ims[0], np_ims[1]))
    b_w = np.sum(np.less(np_ims[0], np_ims[1]))
    eq = np.sum(np.equal(np_ims[0], np_ims[1]))
   
    if print_every:
        print("White point count: ", white_points, " White - Black: ", w_b,
             " Black - White: ", b_w , " Equal pixels : ", eq)
    

    # accuracy = np.sum(np_ims[1])

    white_accuracy_point = ((white_points-w_b)/white_points)*100
    if print_every:
        print("gained accuracy for correct white predictions: ",white_accuracy_point)
    wrong_deduction = (b_w**2)*0.00002 # point deduction from wrong predictions
    if print_every:
        print("deduction acc from wrong black predictions: ", wrong_deduction)
    return white_accuracy_point-wrong_deduction


def accuracy_check_for_batch(masks, predictions, batch_size, print_every, verbose=False):
    total_acc = 0
    for index in range(batch_size):
        # print(masks.shape)
        # print(predictions.shape)
        total_acc += accuracy_check(masks[index], predictions[index], print_every, verbose)
    return total_acc/batch_size


# Trainer

In [None]:
import torch
from torch import Tensor


def dice_coeff(input: Tensor, target: Tensor, reduce_batch_first: bool = False, epsilon=1e-6):
    # Average of Dice coefficient for all batches, or for a single mask
    assert input.size() == target.size()
    if input.dim() == 2 and reduce_batch_first:
        raise ValueError(f'Dice: asked to reduce batch but got tensor without batch dimension (shape {input.shape})')

    if input.dim() == 2 or reduce_batch_first:
        inter = torch.dot(input.reshape(-1), target.reshape(-1))
        sets_sum = torch.sum(input) + torch.sum(target)
        if sets_sum.item() == 0:
            sets_sum = 2 * inter

        return (2 * inter + epsilon) / (sets_sum + epsilon)
    else:
        # compute and average metric for each batch element
        dice = 0
        for i in range(input.shape[0]):
            dice += dice_coeff(input[i, ...], target[i, ...])
        return dice / input.shape[0]


def multiclass_dice_coeff(input: Tensor, target: Tensor, reduce_batch_first: bool = False, epsilon=1e-6):
    # Average of Dice coefficient for all classes
    assert input.size() == target.size()
    dice = 0
    for channel in range(input.shape[1]):
        dice += dice_coeff(input[:, channel, ...], target[:, channel, ...], reduce_batch_first, epsilon)

    return dice / input.shape[1]


def dice_loss(input: Tensor, target: Tensor, multiclass: bool = False):
    # Dice loss (objective to minimize) between 0 and 1
    assert input.size() == target.size()
    fn = multiclass_dice_coeff if multiclass else dice_coeff
    return 1 - fn(input, target, reduce_batch_first=True)

In [None]:
import time 
import torch.nn.functional as F

def train(model, epochs=20):
    pos_weight = torch.ones(400).cuda()
    pos_weight = pos_weight*50
    loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    print("Training Starts...")
    #####################################################################3
    #Training

    best_acc = -100.0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        temp_accuracy = []
        plot_acc = []
        plot_starting_batch=0
        
        for i, (inputs, labels) in enumerate(dataloader_train):
            # print(inputs.shape,labels.shape)
            inputs = torch.stack(inputs)
            plt.imshow(inputs[0].permute(1,2,0))
            plt.plot()
            labels = torch.stack(labels)
            inputs = inputs.to(device)
            labels = labels.to(device)      
               
            # labels = torch.argmax(labels, dim=1)
            # print(torch.unique(labels))   

            # Clean existing gradients
            optimizer.zero_grad()
            
            # temp_inp = reshape(inputs.cpu())
            # plt.plot(inputs.cpu())

            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)
  
            # Compute loss
            loss = loss_func(outputs.float(), labels.float())  
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)

            # Compute the accuracy
            if(i%100 == 0):
                print_every = True
            else:
                print_every = False

            acc = accuracy_check_for_batch(labels,outputs,train_batch_size,print_every,verbose=True)
            temp_accuracy.append(acc)
            
            if(i == 500):
                plt.scatter(list(range(plot_starting_batch,i)),plot_acc)
                plt.title('Training Accuracy over Batch')
                plt.xlabel('Batch')
                plt.ylabel('Accuracy')
                plt.show()
                plot_acc = []
                plot_starting_batch=500
            elif i == 1200:
                print(len(plot_acc))
                plt.scatter(list(range(plot_starting_batch,i)),plot_acc)
                plt.title('Training Accuracy over Batch')
                plt.xlabel('Batch')
                plt.ylabel('Accuracy')
                plt.show()
                plot_acc = []
                plot_starting_batch=1200
            plot_acc.append(acc)

            if print_every:
                print("Epoch : {}, Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(epoch,i, loss.item(), acc))

        accuracy = np.array(temp_accuracy).mean()
        plt.scatter(list(range(plot_starting_batch,i+1)),plot_acc)
        plt.title('Training Accuracy over Batch')
        plt.xlabel('Batch')
        plt.ylabel('Accuracy')
        plt.show()
        print("End of epoch: {}, Accuracy is {}.".format(epoch,accuracy))
        train_acc = accuracy

        temp_accuracy = []
        #Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            #Validation loop
            for j, (inputs, labels) in enumerate(dataloader_test):
                
                inputs = torch.stack(inputs)
                labels = torch.stack(labels)
                inputs = inputs.to(device)
                labels = labels.to(device)  
                

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs)
                # outputs = torch.argmax(outputs, dim=1)
                # preds = outputs > 0.0

                # Compute loss
                loss = loss_func(outputs, labels) 

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                if(i%100 == 0):
                    print_every = True
                else:
                    print_every = False

                acc = accuracy_check_for_batch(labels,outputs,train_batch_size, print_every, verbose=True)
                temp_accuracy.append(acc)
            
                if print_every:
                    print("Epoch: {}, Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(epoch,j, loss.item(), acc))
            accuracy = np.array(temp_accuracy).mean()
            plt.scatter(list(range(0,j+1)),temp_accuracy)
            plt.title('Validation Accuracy over Batch, Accuracy: {:.4f}, Epoch: {}'.format(accuracy,epoch))
            plt.xlabel('Batch')
            plt.ylabel('Accuracy')
            plt.show()
            
            print("End of epoch: {}, Test Accuracy is {}.".format(epoch,accuracy))
            valid_acc = accuracy
            if accuracy > best_acc:
                print("Best validation accuracy improved from {} to {}, saving model...".format(best_acc, accuracy))
                best_acc = accuracy
                torch.save(model.state_dict(), "/content/model-acc{:.4f}-e{}.pt".format(accuracy, epoch))
       # Find average training loss and training accuracy
        avg_train_loss = train_loss/train_length 
        avg_train_acc = train_acc/train_length

        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss 
        avg_valid_acc = valid_acc
                
        epoch_end = time.time()
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))        

    return model


# Train

In [None]:
UNet = train(model)

In [None]:
1model = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
    in_channels=3, out_channels=1, init_features=32, pretrained=False)
model.cuda()
model.load_state_dict(torch.load("/content/gdrive/MyDrive/Bee/model-acc97.5291-e14.pt"))

for j, (inputs, labels) in enumerate(dataloader_test):
    
    inputs = torch.stack(inputs)
    labels = torch.stack(labels)
    inputs = inputs.to(device)
    labels = labels.to(device)  
    

    # Forward pass - compute outputs on input data using the model
    outputs = model(inputs)

    # Calculate validation accuracy
    print_every = True
    acc = accuracy_check_for_batch(labels,outputs,train_batch_size, print_every, verbose=True)

