In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
os.getcwd()
os.chdir(r'D:\PyTorch\Airbus_Image_Segmentation\airbus-ship-detection\src')

In [None]:
from math import nan
import os
import pandas as pd
import skimage
import numpy as np
class config_():
    def __init__(self):
        self.DATA_DIR = r"../input"
        self.BATCH_SIZE = 16
        self.IMAGE_WIDTH = 320
        self.IMAGE_HEIGHT = 320
        self.NUM_WORKERS = 2
        self.EPOCHS = 1
        self.DEVICE = "cuda"
        self.CROP_SIZE = (self.IMAGE_WIDTH, self.IMAGE_HEIGHT)
        self.NUM_CLASSES = 1

        self.TRAIN_PATH = os.listdir('../input/train_v2')
        self.TEST_PATH = os.listdir('../input/test_v2')
        self.MASKS = pd.read_csv('../input/train_ship_segmentations_v2.csv')

In [None]:
config = config_()

In [None]:
from math import nan
import os
import torch
import torchvision
from d2l import torch as d2l

import cv2
import numpy as np 
import pandas as pd 
import skimage
import matplotlib.pyplot as plt

MASKS = config.MASKS

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    if isinstance(mask_rle, float):
        return np.zeros((768, 768))
    else:
        s = mask_rle.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        return img.reshape(shape).T  # Needed to align to RLE direction

def read_img(train_files):
    images = []
    labels = []
    path = '../input/train_v2'

    for ImageId in train_files:
        img = cv2.imread(os.path.join(path, ImageId))
        img_masks = MASKS.loc[MASKS['ImageId'] == ImageId, 'EncodedPixels'].tolist()

        # Take the individual ship masks and create a single mask array for all ships
        all_masks = np.zeros((768, 768))
        for mask in img_masks:
            all_masks += rle_decode(mask)
            
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img = img.astype("float32") / 255.0
        img = cv2.resize(img, config.CROP_SIZE)
        all_masks = cv2.resize(all_masks, config.CROP_SIZE)

        images.append(img)
        labels.append(all_masks)
    return images, labels

class AirbusDataset(torch.utils.data.Dataset):
    # """A customized dataset to load the VOC dataset."""
    def __init__(self, train_files, transforms):
        self.transforms = transforms
        self.features, self.labels = read_img(train_files=train_files)
        print('reading ' + str(len(self.features)) + ' examples')
    
    def __getitem__(self, idx):
        feature, label = self.features[idx], self.labels[idx]
        if self.transforms is not None:
			# apply the transformations to both image and its mask
            feature = self.transforms(feature)
            label = self.transforms(label)
        return (feature, label)

    def __len__(self):
        return len(self.features)

In [None]:
train_f = config.TRAIN_PATH[:5000]
test_f = config.TRAIN_PATH[2000:2500]
features, labels = read_img(train_f[:5000])

In [None]:
features[0].shape, labels[0].shape, type(features[0]), type(labels[0])

In [None]:
# # image2 = cv2.cvtColor(features[2], cv2.COLOR_BGR2RGB)
# image2 = cv2.resize(features[0], (128, 128))
# gtMask = cv2.resize(labels[0], (128, 128))
# plt.imshow(image2)
# # plt.imshow(gtMask)
# image2.shape, gtMask.shape

In [None]:
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms

from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import time
import os
import config
import torchvision
from d2l import torch as d2l
import torch.nn.functional as F
import dataset
import model
import engine



transforms = transforms.Compose([
                                 # transforms.ToPILImage(),
                                transforms.ToTensor()]
)

train_f = config.TRAIN_PATH[:2000]
test_f = config.TRAIN_PATH[2000:2500]

def loss_fn(inputs, targets):
    return F.cross_entropy(inputs, targets, reduction='none').mean(1).mean(1) 

def run_training():
    voc_train = dataset.AirbusDataset(train_files=train_f, transforms=transforms)
    voc_test = dataset.AirbusDataset(train_files=test_f, transforms=transforms)

    train_loader = DataLoader(voc_train, batch_size = config.BATCH_SIZE,num_workers = config.NUM_WORKERS, shuffle = True, drop_last=True)

    test_loader = DataLoader(voc_test, batch_size = config.BATCH_SIZE,num_workers = config.NUM_WORKERS, shuffle = False, drop_last=True)

    # Build Model

    net = model.ImageSegmentation(num_classes=config.NUM_CLASSES)

    num_epochs, lr, wd, devices = config.EPOCHS, 0.001, 1e-3, d2l.try_all_gpus()
    optimizer = torch.optim.SGD(net.parameters(), lr=lr, weight_decay=wd)
    engine.train_fn(net, train_loader, test_loader, loss_fn, optimizer, num_epochs, devices)


In [None]:
voc_train = dataset.AirbusDataset(train_files=train_f[:50], transforms=transforms)

In [None]:
train_loader = DataLoader(voc_train, batch_size = 8, shuffle = True, drop_last=True)

In [None]:
images_b, labels_b = next(iter(train_loader))
images_b.shape, labels_b.shape

In [None]:
for (x) in voc_train.labels:
    print(x.shape)
    break

In [None]:
features[0].shape

In [None]:
from torchvision import transforms
transforms = transforms.Compose([transforms.ToPILImage(),
        transforms.Resize(config.CROP_SIZE),
        transforms.ToTensor()])

In [None]:
import numpy as np
from PIL import Image

def read_img2(train_files):
    images = []
    labels = []
    path = '../input/train_v2'
    path2 = r'D:\PyTorch\Airbus_Image_Segmentation\airbus-ship-detection\input\train_v2_masks'
    
    for ImageId in train_files:
        img_path = os.path.join(path, ImageId)
        img_masks = MASKS.loc[MASKS['ImageId'] == ImageId, 'EncodedPixels'].tolist()

        # Take the individual ship masks and create a single mask array for all ships
        all_masks = np.zeros((768, 768))
        for mask in img_masks:
            all_masks += rle_decode(mask)

        I = all_masks
        I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8)
        mask = Image.fromarray(I8)
        if os.path.exists(os.path.join(path2,f'{ImageId}')) == False:
            mask.save(os.path.join(path2,f'{ImageId}'))
        images.append(img_path)
        labels.append(os.path.join(path2,f'{ImageId}'))
    return images, labels

In [None]:
class AirbusDataset(torch.utils.data.Dataset):
    # """A customized dataset to load the VOC dataset."""
    def __init__(self, train_files, transforms):
        self.transforms = transforms
        self.train_files = train_files
        
        self.imagePaths, self.maskPaths = read_img2(train_files=self.train_files)
        print('reading ' + str(len(self.imagePaths)) + ' examples')
        
    def __len__(self):
        # return the number of total samples contained in the dataset
        return len(self.imagePaths)    
    
    def __getitem__(self, idx):
        # grab the image path from the current index
        imagePath = self.imagePaths[idx]

        # load the image from disk, swap its channels from BGR to RGB,
        # and read the associated mask from disk in grayscale mode
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.maskPaths[idx], 0)

        # check to see if we are applying any transformations
        if self.transforms is not None:
            # apply the transformations to both image and its mask
            image = self.transforms(image)
            mask = self.transforms(mask)

        # return a tuple of the image and its mask
        return (image, mask)

In [None]:
train_f = config.TRAIN_PATH[:5000]
#test_f = config.TRAIN_PATH[2000:2500]
features, labels = read_img2(train_f[:5000])

In [None]:
voc_train = AirbusDataset(train_files=train_f[:5000], transforms=transforms)
train_loader = torch.utils.data.DataLoader(voc_train, batch_size = 5000, shuffle = True, drop_last=True)

In [None]:
train_loader = torch.utils.data.DataLoader(voc_train, batch_size = 5000, shuffle = True, drop_last=True)

In [None]:
train_loader.dataset.

In [None]:
images_b, labels_b = next(iter(train_loader))
images_b.shape, labels_b.shape

In [None]:
mask = cv2.imread(labels[5], 0)
mask.shape

In [None]:
images_b[0].shape

In [None]:
k = torch.permute(images_b[7], (2, 1, 0))

In [None]:
plt.imshow(k)

In [None]:
def ImageSegmentation(num_classes):
    PRE_TRAINED_NET = torchvision.models.resnet18(pretrained=True)
    model = nn.Sequential(*list(PRE_TRAINED_NET.children())[:-2])
    model.add_module('final_conv', nn.Conv2d(512, num_classes, kernel_size=1))
    model.add_module('transpose_conv', nn.ConvTranspose2d(num_classes, num_classes, kernel_size=64, padding=16, stride=32))

    W = bilinear_kernel(num_classes, num_classes, 64)
    model.transpose_conv.weight.data.copy_(W)

    return model

def bilinear_kernel(in_channels, out_channels, kernel_size):
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = (torch.arange(kernel_size).reshape(-1, 1),torch.arange(kernel_size).reshape(1, -1))
    
    filt = (1 - torch.abs(og[0] - center) / factor) * (1 - torch.abs(og[1] - center) / factor)
    weight = torch.zeros((in_channels, out_channels,
    kernel_size, kernel_size))
    weight[range(in_channels), range(out_channels), :, :] = filt
    return weight

In [None]:
import torch
import torchvision
import torch.nn as nn
path2 = r'D:\PyTorch\Airbus_Image_Segmentation\airbus-ship-detection\src\model_1.pt'
imagePath = r'D:\PyTorch\Airbus_Image_Segmentation\test_airbus\00e90efc3.jpg'
path = r"D:\PyTorch\Airbus_Image_Segmentation\airbus-ship-detection\models\model_1.pt"
checkpoint = torch.load(path)
net = ImageSegmentation(1)
net.load_state_dict(checkpoint['model'])
# net.load_state_dict(checkpoint['model_state_dict'])
net = net.eval()
net = net.to('cuda')

In [None]:
from torchvision import transforms
transforms_image = transforms.Compose([transforms.ToPILImage(),
                                 transforms.Resize((768, 768)),
                                 transforms.ToTensor(),
                                 torchvision.transforms.Normalize(
                                        mean=[0.5, 0.5, 0.5],
                                        std=[0.29, 0.29, 0.29], )
                                 ])

def make_predictions(model, imagePath):
    # set model to evaluation mode
    model.eval()
    config = config_()
    # turn off gradient tracking
    with torch.no_grad():
        # load the image from disk, swap its color channels, cast it
        # to float data type, and scale its pixel values
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # image = image.astype("float32") / 255.0
        # resize the image and make a copy of it for visualization
        image = transforms_image(image)
        image = np.expand_dims(image, 0)
        image = torch.Tensor(image)
        image = image.to('cuda')
        print(image.shape)
        # find the filename and generate the path to ground truth
        # mask
#         filename = imagePath.split(os.path.sep)[-1]
#         groundTruthPath = os.path.join(config.MASK_DATASET_PATH,
#             filename)
#         # load the ground-truth segmentation mask in grayscale mode
#         # and resize it
#         gtMask = cv2.imread(groundTruthPath, 0)
#         gtMask = cv2.resize(gtMask, (320, 320))
        return model(image)

In [None]:
import cv2
pred = make_predictions(net, imagePath)
k = torch.permute(pred, (0, 2, 3, 1))
k = torch.squeeze(k)
k = k.cpu() 
#k = np.uint8(255) - k

In [None]:
import matplotlib.pyplot as plt
image = cv2.imread(imagePath )
plt.imshow(k, cmap='gray')

In [None]:
plt.imshow(image)

In [None]:
m = nn.Sigmoid()
loss = nn.BCELoss()
loss2 = nn.BCEWithLogitsLoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
output = loss(m(input), target)
output2 = loss2(input, target)
print(output, input, target , output2, m(input))

In [None]:
target = torch.ones([10, 64], dtype=torch.float32)  # 64 classes, batch size = 10
output = torch.full([10, 64], 1.5)  # A prediction (logit)
pos_weight = torch.ones([64])  # All weights are equal to 1
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
print(criterion(output, target), target.shape, output.shape)  # -log(sigmoid(1.5))

In [None]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
print(output, input.shape, target)

In [None]:
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)
print(output, input.shape, target)

In [None]:
labels_b.shape

In [None]:
labels_2 = torch.squeeze(labels_b)

In [None]:
torch.sum(labels_2)

In [None]:
5000 * 320 * 320

In [None]:
num_positives = 537256.1250
num_negatives = 512000000 - num_positives
pos_weight  = num_negatives / num_positives
pos_weight

In [None]:
len(labels), np.sum(labels[0])

In [None]:
k = [np.sum(x) for x in labels]

In [None]:
sum(k) / 512000000

In [None]:
320 * 320 * 5000