### Motivation
With the increase in ship traffic, ship detection from satellite images has become essential. The risks of ship accidents and illegal activities have further fueled the need to keep a closer watch on ships in the sea.

### Dataset Information
The train_ship_segmentations.csv file provides the ground truth (in run-length encoding format) for the training images.

### Problem Statement
In this work I will use Mask R-CNN along with image processing techniques to build a model that will take satellite images as input and output the  satellite images as input and outputs a bounding box and a mask that segments each ship instance in the image. 

# **1. Import Libraries**

In [1]:
import os
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from random import random
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Conv2D, BatchNormalization, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications.resnet50 import ResNet50
import tensorflow as tf
from tensorflow.python.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K
import warnings
!pip install -U segmentation-models
import segmentation_models as sm
from segmentation_models import Unet
from segmentation_models import get_preprocessing
from PIL import Image

# 2. **Importing and preprocessing of Dataset**

In [2]:
df = pd.read_csv('../input/airbus-ship-detection/train_ship_segmentations_v2.csv')
df.head()

In [3]:
#get the number of images in the train set and test set
train_path = '../input/airbus-ship-detection/train_v2/'
test_path = '../input/airbus-ship-detection/test_v2/'
train_files = sorted(glob(train_path + "*.jpg"))
test_files  = sorted(glob(test_path + "*.jpg"))
print("Number of images for training: {}".format(len(train_files)))
print("Number of images for testing: {}".format(len(test_files)))
#os.listd

In [4]:
def process_dataframe(meta_path):
    """
    Extract labels from metadata csv file.
    Output:
    - dataframe of image file names and boolean of whether there is >= 1 ship or not
    (unique images present)
    - dataframe of image file names filtered to those with >= 1 ship 
    (multiple references to the same image if multiple ships)
    """
    # load
    df = pd.read_csv(meta_path)

    # does image have vessel
    df["label"] = df["EncodedPixels"].notnull().astype(int) #binary label to check if ship is present or not

    # remove corrupted images. Source: https://www.kaggle.com/iafoss/fine-tuning-resnet34-on-ship-detection
    excluded_images = [
        "6384c3e78.jpg",
        "13703f040.jpg",
        "14715c06d.jpg",
        "33e0ff2d5.jpg",
        "4d4e09f2a.jpg",
        "877691df8.jpg",
        "8b909bb20.jpg",
        "a8d99130e.jpg",
        "ad55c3143.jpg",
        "c8260c541.jpg",
        "d6c7f17c7.jpg",
        "dc3e7c901.jpg",
        "e44dffe88.jpg",
        "ef87bad36.jpg",
        "f083256d8.jpg",
    ]  # corrupted images

    mask_uncorrupt = ~(df["ImageId"].isin(excluded_images)) # list of uncorrupted images

    df_classify = df.loc[
        mask_uncorrupt, ["ImageId","label"]
    ].drop_duplicates().reset_index(drop = True) #because there are same images with multiple different masks

    df_segment = df.loc[mask_uncorrupt & df["label"]].reset_index(drop = True) #only those images with ship

    return df_classify, df_segment

In [5]:
df_classify, df_segment = process_dataframe('../input/airbus-ship-detection/train_ship_segmentations_v2.csv')

In [6]:
df_classify

In [8]:
df_segment

In [8]:
df_classify.label.value_counts().sort_index(ascending = True)

In [9]:
plt.figure(figsize=(8,5))
sns.countplot(data= df_classify, x='label')
plt.ylabel('Number of images')
plt.xlabel('Label')
plt.title('Number of images per class')
plt.show()

# **3. Ship Detection**

### Preparing Tensorflow dataset

In [23]:
def image_batch_generators(
    train_df, dev_df, target_size=(256, 256), input_dir="../input/airbus-ship-detection" 
):
    train_datagen = ImageDataGenerator(
        rescale=1.0 / 255,
        shear_range = 0.2,
        zoom_range = 0.2,
        horizontal_flip=True,
        vertical_flip=True,
    )

    test_datagen = ImageDataGenerator(rescale=1.0 / 255)

    train_gen = train_datagen.flow_from_dataframe(
        dataframe=train_df.astype(str),
        directory=input_dir + "/train_v2/",
        x_col="ImageId",
        y_col="label",
        target_size=target_size,
        batch_size=128,
        seed = 42,
        class_mode="binary"
    )

    val_gen = test_datagen.flow_from_dataframe(
        dataframe=dev_df.astype(str),
        directory=input_dir + "/train_v2/",
        x_col="ImageId",
        y_col="label",
        target_size=target_size,
        batch_size=128,
        seed = 42,
        class_mode="binary"
    )

    return train_gen, val_gen

In [24]:
def create_dataset_classification(
    target_size=(256, 256), input_dir="../input/airbus-ship-detection"):
   
    #get metadata
    df, _ = process_dataframe( meta_path=input_dir + "/train_ship_segmentations_v2.csv")

    train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

    train_gen, val_gen = image_batch_generators(
        train_df, test_df, target_size=target_size, input_dir=input_dir
    )

    return train_gen, val_gen

In [25]:
train_gen,val_gen = create_dataset_classification(
    target_size=(256, 256), input_dir="../input/airbus-ship-detection")

In [26]:
set(train_gen.classes)


### Visualizing images from the dataset

In [27]:
x,y = next(train_gen)
class_dict = {'ship': 1, 'no-ship':0}
class_dict_inv = dict((v, k) for k, v in class_dict.items())
y_names = [class_dict_inv[key] for key in y]

In [28]:
#Lets visualize the augmented images
plt.figure(figsize=(10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(x[i])
    plt.title(y_names[i])
    plt.axis()

### Developing the model

In [29]:
def binary_classification_model(input_shape):
#https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202
    base_model = ResNet50(input_shape= input_shape, include_top=False)
#     base_model.trainable=False #freezing the layer

    x=base_model.output
    x=GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    x = Dense(64, activation='relu')(x)
    out=Dense(1,activation='sigmoid')(x) #final layer binary classifier

    model_binary=Model(inputs=base_model.input,outputs=out) 
    #model_binary.summary()
    return model_binary

In [30]:
binary_model = binary_classification_model(input_shape=(256,256,3))

In [31]:
epochnum = 5
#compile the model
binary_model.compile(optimizer='adam', 
                     loss='binary_crossentropy',
                     metrics=['acc','mse'])



#model_checkpoint
mc = ModelCheckpoint('ship_detection_classifier.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

#train the model
r = binary_model.fit_generator(train_gen, 
                                validation_data = val_gen, 
                                epochs = epochnum, verbose=1,callbacks = [mc])
n_epochs = len(r.history['loss'])