In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

from tensorflow.keras.losses import binary_crossentropy
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Set TensorFlow to only use the first GPU
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
else:
    print("No GPUs found!")

In [None]:
input_path = '/kaggle/input/airbus-ship-detection'
train_dir_path = os.path.join(input_path, 'train_v2/')
test_dir_path = os.path.join(input_path, 'test_v2/')

In [None]:
train_masks = pd.read_csv(os.path.join(input_path,'train_ship_segmentations_v2.csv'))
train_masks

In [None]:
train_masks.shape

We have 231723 images

In [None]:
train_masks.isna().sum()

We can see, that there is no NaN or empty values in ImageIds, and we have 150000 empty maskencdoing. It means, that our data contains 150000 images with no ships in it, and 81723 with ships in images

In [None]:
train_masks['ImageId'].value_counts()

In [None]:
train_masks['ImageId'].value_counts().shape[0]

We can see, that we have some duplicates in ImageIds, and have 192556 unique images.

In [None]:
train_masks[train_masks['ImageId'] == 'e6fd0c12e.jpg']

In [None]:
def show_img(img_id):
    img = Image.open(os.path.join(train_dir_path, img_id))
    plt.imshow(img)

In [None]:
show_img('e6fd0c12e.jpg')

In [None]:
def rle_decode(mask_rle, input_shape=(768,768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    img=np.zeros(input_shape[0]*input_shape[1], dtype=np.float32)
    if not(type(mask_rle) is float):
        s = mask_rle.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1.0
    return img.reshape((input_shape[0],input_shape[1])).T

def show_decode(mask_rle, shape=(768, 768)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list):
    all_masks = np.zeros((768, 768), dtype = np.int16)
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += show_decode(mask)
    return np.expand_dims(all_masks, -1)

In [None]:
mask_img = masks_as_image(train_masks.query('ImageId=="e6fd0c12e.jpg"')['EncodedPixels'])
plt.imshow(mask_img[:, :, 0])
plt.show()

In [None]:
mask_img = masks_as_image(train_masks.query('ImageId=="e6fd0c12e.jpg"')['EncodedPixels'])
plt.imshow(mask_img[:, :, 0])
plt.xlim((600, 800))  
plt.ylim((200, 500))  
plt.show()

We can be now sure, that the duplicates imageids means, that we have more then one ship on image. And amount of ships on image equals amount of duplicates

So now, we can make feature like has ship, and then aggregate it with sum and become amount of ships in each image, and then we can plot distribution of it.

Also i will research image sizes, so we can see, if all images have normal size and there are or not some corupted files or something else, with ploting its distribution 

In [None]:
train_masks['has_ship'] = train_masks['EncodedPixels'].map(lambda x: 1 if x is not np.NaN else 0)
train_masks

In [None]:
train_masks_aggregated = train_masks.groupby('ImageId').agg({'has_ship':'sum'}).reset_index()
train_masks_aggregated = train_masks_aggregated.rename(columns={'has_ship': 'ship_amount'})
train_masks_aggregated

In [None]:
plt.hist(train_masks_aggregated['ship_amount'], bins=10)  
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Distribution of ' + 'ship_amount')
plt.show()

Now, we can observe, that most of our images doesn't contain any ships and are empty, so we have some class imbalance, which i need to handle before training process.

Now, let's drop zero values, to see distribution of ships amount on non-empty images

In [None]:
plt.hist(train_masks_aggregated[train_masks_aggregated['ship_amount']>0]['ship_amount'], bins=15)  
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Distribution of ship_amount without zeros')
plt.show()

We can observe, that mostly non-empty images have only one or two ships, and small amount, that have more then 5.

Now, let's create one more feature "has_ship", and plot its distribution, so we can see our class imbalance more obvious. 

In [None]:
train_masks_aggregated['has_ship'] = train_masks_aggregated['ship_amount'].map(lambda x: 1 if x > 0 else 0)


In [None]:
ship_counts = train_masks_aggregated['has_ship'].value_counts()

total_count = ship_counts.sum()

plt.bar(ship_counts.index, ship_counts.values)
plt.xlabel('has_ship')
plt.ylabel('Frequency')
plt.title('Distribution of Ship Amounts')

for i, count in enumerate(ship_counts.values):
    plt.text(i, count, f'{count / total_count * 100:.2f}%', ha='center', va='bottom')

plt.xticks(ship_counts.index, ['No Ship', 'Has Ship'])  
plt.show()

In [None]:
ship_counts

In [None]:
plt.figure(figsize=(6, 6))
plt.pie(ship_counts.values, labels=['No Ship', 'Has Ship'], autopct='%1.2f%%', startangle=140)
plt.title('Distribution of Ship Amounts')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
plt.show()

Now, we can see, how our classes are imbalanced, and i think for this issue, the best strategy will be undersampling of empty images to amount of non-empty images, so our model can distinguish images with ships and without good.

Now, let's research our image sizes

In [None]:
train_masks_aggregated['file_size_kb'] = train_masks_aggregated['ImageId'].map(lambda c_img_id: 
                                                               os.stat(os.path.join(train_dir_path, 
                                                                                    c_img_id)).st_size/1024)
train_masks_aggregated

In [None]:
train_masks_aggregated['file_size_kb'].hist()

In [None]:
train_masks_aggregated[train_masks_aggregated['file_size_kb']<70]

In [None]:
show_img('0080bd6a5.jpg')

In [None]:
show_img('0005d01c8.jpg')

In [None]:
show_img('001bfb70a.jpg')

We can observe, that images, that have under 40 kb size, are corrupted or don't contain useful information, so we need to drop them before training stage  

In [None]:
train_masks_aggregated = train_masks_aggregated[train_masks_aggregated['file_size_kb']>50]

Now let's undersample our data

In [None]:
minority_class = train_masks_aggregated[train_masks_aggregated['has_ship'] == 1]
majority_class = train_masks_aggregated[train_masks_aggregated['has_ship'] == 0]

undersampled_majority_class = resample(majority_class, 
                                      replace=False,  
                                      n_samples=len(minority_class),  
                                      random_state=42)  

undersampled_train_masks = pd.concat([minority_class, undersampled_majority_class])
undersampled_train_masks

In [None]:
undersampled_train_masks['has_ship'].value_counts()

Now i will take sample 5000 empty images and 5000 images with ships, it will help to see results of different models, and saves time, so i can train only model with best performance on full data

In [None]:
EMPTY_AMOUNT, SHIPS_AMOUNT = 1000, 15000

In [None]:
input_undersampled_df = pd.merge(train_masks,undersampled_train_masks )
input_undersampled_df

In [None]:
df = pd.concat([input_undersampled_df[input_undersampled_df["EncodedPixels"].isna()].sample(EMPTY_AMOUNT), input_undersampled_df[~input_undersampled_df["EncodedPixels"].isna()].sample(SHIPS_AMOUNT)])
df

In [None]:
# df.drop(columns=['has_ship','ship_amount','file_size_kb']).to_csv('prep_data.csv')

In [None]:
SIZE=256

In [None]:
def crop3x3(img, i):
    """img: np.ndarray - original image 768x768
       i: int 0-8 - image index from crop: 0 1 2
                                           3 4 5
                                           6 7 8
       returns: image 256x256 
    """
    return img[(i//3)*SIZE: ((i//3)+1)*SIZE,(i%3)*SIZE: (i%3+1)*SIZE]


def crop3x3_mask(img):
    """Returns crop image, crop index with maximum ships area"""
    i = K.argmax((
        K.sum(crop3x3(img, 0)),
        K.sum(crop3x3(img, 1)),
        K.sum(crop3x3(img, 2)),
        K.sum(crop3x3(img, 3)),
        K.sum(crop3x3(img, 4)),
        K.sum(crop3x3(img, 5)),
        K.sum(crop3x3(img, 6)),
        K.sum(crop3x3(img, 7)),
        K.sum(crop3x3(img, 8)),
    ))
    return (crop3x3(img, i), i)

In [None]:
class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self, datapath, batch_size, df_mask: pd.DataFrame, augmentation_dict=None):
        self.datapath = datapath
        self.batch_size = batch_size
        self.df = df_mask.sample(frac=1)
        self.l = len(self.df) // batch_size
        self.augmentation = None
        if augmentation_dict is not None:
            self.augmentation = tf.keras.preprocessing.image.ImageDataGenerator(
                **augmentation_dict
            )

    def __len__(self):
        return self.l

    def on_epoch_end(self):
        self.df = self.df.sample(frac=1)

    def __getitem__(self, index):
        mask = np.empty((self.batch_size, SIZE, SIZE), np.float32)
        image = np.empty((self.batch_size, SIZE, SIZE, 3), np.float32)

        batch_df = self.df[index * self.batch_size: (index + 1) * self.batch_size]

        for b, _, row in zip(range(self.batch_size), range(len(batch_df)), batch_df.itertuples()):
            temp = tf.keras.preprocessing.image.load_img(self.datapath + '/' + row.ImageId)
            temp = tf.keras.preprocessing.image.img_to_array(temp) / 255

            mask[b], i = crop3x3_mask(
                rle_decode(
                    row.EncodedPixels
                )
            )
            image[b] = crop3x3(temp, i)

        if self.augmentation is not None:
            augmented_images = []
            augmented_masks = []
            for i in range(self.batch_size):
                augmented = self.augmentation.flow(np.expand_dims(image[i], axis=0),
                                                    np.expand_dims(mask[i], axis=0),
                                                    batch_size=1)
                augmented_image, augmented_mask = next(augmented)
                augmented_images.append(augmented_image.squeeze())
                augmented_masks.append(augmented_mask.squeeze())
            image = np.array(augmented_images)
            mask = np.array(augmented_masks)

        return image, mask
    def show_samples(self, num_samples=5):
        fig, axes = plt.subplots(num_samples, 2, figsize=(10, num_samples * 5))
        indices = np.random.randint(0, len(self.df), num_samples)
        
        for i, idx in enumerate(indices):
            row = self.df.iloc[idx]
            image_path = os.path.join(self.datapath, row['ImageId'])
            temp = tf.keras.preprocessing.image.load_img(image_path)
            temp = tf.keras.preprocessing.image.img_to_array(temp) / 255

            mask, _ = crop3x3_mask(rle_decode(row['EncodedPixels']))
            image = crop3x3(temp, _)

            axes[i, 0].imshow(image)
            axes[i, 0].set_title(f"Image {i+1}")
            axes[i, 0].axis('off')

            axes[i, 1].imshow(mask, cmap='gray')
            axes[i, 1].set_title(f"Mask {i+1}")
            axes[i, 1].axis('off')

        plt.tight_layout()
        plt.show()

Splitting data into train/validation set and creating DataGenerators

In [None]:
dg_args = dict(featurewise_center = False, 
                  samplewise_center = False,
                  rotation_range = 90, 
                  width_shift_range = 0.2, 
                  height_shift_range = 0.2, 
                  shear_range = 0.1,
                  zoom_range = [0.9, 1.25],  
                  horizontal_flip = True, 
                  vertical_flip = True,
                  fill_mode = 'reflect',
                   data_format = 'channels_last')

In [None]:
batch_size = 16
train_df, valid_df = train_test_split(df, test_size=0.2)
train = DataGenerator(train_dir_path, batch_size, train_df)
valid = DataGenerator(train_dir_path, batch_size, valid_df)


In [None]:
train.show_samples(5)

In [None]:
len(train), len(valid)

In [None]:
train.show_samples(5)

Losses

In [None]:
def IoU(y_true, y_pred, eps=1e-6):
    if K.max(y_true) == 0.0:
        return IoU(1-y_true, 1-y_pred) ## empty image; calc IoU of zeros
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3]) - intersection
    return -K.mean( (intersection + eps) / (union + eps), axis=0)

def dice_coef(y_true, y_pred, smooth=1):
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
    return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)

def combo_loss(y_true, y_pred):
    return 1e-3 * binary_crossentropy(y_true, y_pred) - dice_coef(y_true, y_pred)

def jaccard_index(y_true, y_pred, smooth=1):
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3]) - intersection
    return -K.mean((intersection + smooth) / (union + smooth), axis=0)

In [None]:
def BCE_dice(y_true, y_pred):
    return  K.binary_crossentropy(y_true, y_pred)+  (1-dice_score(y_true, y_pred))

In [None]:
def dice_score(y_true, y_pred):
    return (2.0*K.sum(y_pred * y_true)+0.0001) / (K.sum(y_true)+ K.sum(y_pred)+0.0001)

# Baseline model 


In [None]:
 
def encoder_block(inputs, num_filters): 
  
    # Convolution with 3x3 filter followed by ReLU activation 
    x = tf.keras.layers.Conv2D(num_filters,  
                               3,  
                               padding = 'same')(inputs) 
    x = tf.keras.layers.Activation('elu')(x) 
      
    # Convolution with 3x3 filter followed by ReLU activation 
    x = tf.keras.layers.Dropout(0.2)(x)
    
    x = tf.keras.layers.Conv2D(num_filters,  
                               3,  
                               padding = 'same')(x) 
    x = tf.keras.layers.Activation('elu')(x) 
  
    # Max Pooling with 2x2 filter 
    x = tf.keras.layers.MaxPool2D(pool_size = (2, 2), 
                                  strides = 2)(x) 
      
    return x

In [None]:
def decoder_block(inputs, skip_features, num_filters): 
  
    # Upsampling with 2x2 filter 
    x = tf.keras.layers.Conv2DTranspose(num_filters, 
                                        (2, 2),  
                                        strides = 2,  
                                        padding = 'same')(inputs) 
      
    # Copy and crop the skip features  
    # to match the shape of the upsampled input 
    skip_features = tf.image.resize(skip_features, 
                                    size = (x.shape[1], 
                                            x.shape[2])) 
    x = tf.keras.layers.Concatenate()([x, skip_features]) 
    
    x = tf.keras.layers.Dropout(0.2)(x)

    # Convolution with 3x3 filter followed by ReLU activation 
    x = tf.keras.layers.Conv2D(num_filters, 
                               3,  
                               padding = 'same')(x) 
    x = tf.keras.layers.Activation('elu')(x) 
    
    # Convolution with 3x3 filter followed by ReLU activation 
    x = tf.keras.layers.Conv2D(num_filters, 3, padding = 'same')(x) 
    x = tf.keras.layers.Activation('elu')(x) 
      
    return x

In [None]:
def unet_model(input_shape = (SIZE, SIZE, 3), num_classes = 1): 
    inputs = tf.keras.layers.Input(input_shape) 
      
    # Contracting Path 
    s1 = encoder_block(inputs, 16) 
    s2 = encoder_block(s1, 32) 
    s3 = encoder_block(s2, 64) 
    s4 = encoder_block(s3, 128) 
      
    # Bottleneck 
    b1 = tf.keras.layers.Conv2D(128, 3, padding = 'same')(s4) 
    b1 = tf.keras.layers.Activation('elu')(b1) 
    b1 = tf.keras.layers.Dropout(0.2)(b1)
    b1 = tf.keras.layers.Conv2D(128, 3, padding = 'same')(b1) 
    b1 = tf.keras.layers.Activation('elu')(b1) 
      
    # Expansive Path 
    s5 = decoder_block(b1, s4, 128) 
    s6 = decoder_block(s5, s3, 64) 
    s7 = decoder_block(s6, s2, 32) 
    s8 = decoder_block(s7, s1, 16) 
      
    # Output 
    outputs = tf.keras.layers.Conv2D(num_classes,  
                                     1,  
                                     padding = 'same',  
                                     activation = 'sigmoid')(s8) 
      
    model = tf.keras.models.Model(inputs = inputs,  
                                  outputs = outputs,  
                                  name = 'U-Net') 
    return model 

In [None]:
model = unet_model(num_classes=1) 
model.summary()

In [None]:
model.compile(optimizer='adam', loss=BCE_dice, metrics=[dice_score])

In [None]:
model_dir = '/kaggle/working/models'

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau

if not os.path.isdir(model_dir):
    os.mkdir(model_dir)
    
weight_path="{}_weights.best.hdf5".format(model_dir)

checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min', save_weights_only = True)

reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.33, 
                                   patience=1, 
                                   verbose=1, mode='min', min_delta=0.0001, cooldown=0, min_lr=1e-8)
early = EarlyStopping(monitor="val_loss", 
                      mode="min",
                      verbose = 2,
                      patience=10) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [None]:
loss_history = model.fit(train,
                             epochs=10, 
                             validation_data=valid,
                             callbacks=callbacks_list,
                             batch_size=16
                                       )

In [None]:
model.load_weights('/kaggle/input/saved-model/models_weights.best (1).hdf5')

In [None]:
pred_y = model.predict(valid)
print(pred_y.shape, pred_y.min(), pred_y.max(), pred_y.mean())

In [None]:
test_paths = os.listdir(test_dir_path)
print(len(test_paths), 'test images found')

In [None]:
IMG_SCALING = (3,3)

In [None]:
def predict(img_name):
    c_path = os.path.join(test_dir_path, img_name)
    c_img = imread(c_path)
    img = np.expand_dims(c_img, 0)/255.0
#     img = crop
    if IMG_SCALING is not None:
        img = img[:, ::IMG_SCALING[0], ::IMG_SCALING[1]]
    return img, model.predict(img, verbose=0) 

In [None]:
from skimage.io import imread

In [None]:
fig, m_axs = plt.subplots(20, 2, figsize = (10, 40))
[c_ax.axis('off') for c_ax in m_axs.flatten()]
for (ax1, ax2), c_img_name in zip(m_axs, test_paths):
    img, pred = predict(c_img_name)
    ax1.imshow(img[0])
    ax1.set_title('Image')
    ax2.imshow(pred[0, :, :, 0], vmin = 0, vmax = 1)
    ax2.set_title('Prediction')
fig.savefig('test_predictions.png')

In [None]:
from scipy import ndimage

def split_mask(mask):
    threshold = 0.6
    threshold_obj = 8 #ignor predictions composed of "threshold_obj" pixels or less
    labeled,n_objs = ndimage.label(mask > threshold)
    result = []
    for i in range(n_objs):
        obj = (labeled == i + 1).astype(int)
        if(obj.sum() > threshold_obj): result.append(obj)
    return result

In [None]:
def rle_encode(img):
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


In [None]:
def get_run_length_encoded_predictions(y_pred, img_name):
    list_dict = []
    masks = split_mask(y_pred)
#     masks = multi_rle_encode(y_pred)
    if len(masks) == 0:
        list_dict.append({"ImageId": img_name, "EncodedPixels": np.nan})
    for mask in masks:
        list_dict.append({"ImageId": img_name, "EncodedPixels": rle_encode(mask)})
    return list_dict

In [None]:
def predict_and_decode(test_img_names):
    list_dict = []
    for img_name in test_img_names:
        _ , pred = predict(img_name)
        rle_pred = get_run_length_encoded_predictions(pred[0], img_name)
        list_dict += rle_pred
    return pd.DataFrame(list_dict, columns=["ImageId", "EncodedPixels"])    

In [None]:
test_decoded_pred_df = predict_and_decode(test_paths)
test_decoded_pred_df.to_csv('submission.csv', index=False)
test_decoded_pred_df

In [None]:
import zipfile
from PIL import Image

In [None]:
import warnings

# Disable displaying warnings
warnings.filterwarnings('ignore')

In [None]:
with zipfile.ZipFile('prep_images.zip', 'w') as zip:
    for id in df.ImageId:
        img_path = os.path.join(train_dir_path, id)
        img = Image.open(img_path)
            # Add the image file to the Zip file
        zip.write(img_path, os.path.basename(img_path))