## Loading Data and Importing of libraries

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### importing other necessary libraries

In [2]:
import gc # Garbage collector module for memory management
from matplotlib import pyplot # For data visualization
from matplotlib.image import imread
import matplotlib.pyplot as plt
import cv2 # OpenCV for image manipulation
from tensorflow import keras # We need keras library
from tqdm import tqdm # To read in images in batches and see progress
from sklearn.model_selection import train_test_split # For the creation of training and validation sets
# Define model related parameters
from keras import optimizers
from keras.models import Sequential , Model
from keras.layers import Input , Dense , Dropout , Flatten
from keras.layers import Conv2D,MaxPooling2D , BatchNormalization
from keras.callbacks import EarlyStopping,ModelCheckpoint 
from keras.preprocessing.image import ImageDataGenerator # Used for Data augmentation
from keras import backend as K # For specialized and optimized tensor manipulation


In [3]:
# Defining the fbeta metric
def fbeta(y_true, y_pred, threshold_shift=0):
    beta = 2
 
    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)
 
    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)
 
    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))
 
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
 
    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

## Exploratory data analysis

Let's now perform some EDA on the dataset which involves having a look at the images and reading in the csv files.

In [4]:
# Let's view some images
plt.figure(figsize=(20,20))
# define location of dataset
folder = '../input/planets-dataset/planet/planet/train-jpg/'
# plot first few images
for i in range(9):
	# define subplot
	pyplot.subplot(330 + 1 + i)
	# define filename
	filename = folder + 'train_' + str(i) + '.jpg'
	# load image pixels
	image = imread(filename)
	# plot raw pixel data
	pyplot.imshow(image)
# show the figure
pyplot.show()

In [5]:
# Reading in the training and test csv files
df_train_data = pd.read_csv("../input/planets-dataset/planet/planet/train_classes.csv" )
df_test_data = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
df_train_data.head() # Checking out the first five rows


In [6]:
# Flatten the 'tags' column of the training dataset into a list
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list (set(flatten([l.split (' ') for l in df_train_data ['tags'].values])))

In [7]:
# Organizing a label mapping
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}
label_map

In [8]:
gc.collect() # Used frequently to avoid session crashing due to memory exhaustion

## Data Preprocessing

In this Part, we preprocess the data so that it can be used to train the model. We reshape and normalize the images, One-hot encode the labels and split our training sets further into training and validation set using train_test_split.

In [9]:
# Reading in the train image dataset
x_train= []
y_train= []
for img, label in tqdm(df_train_data.values, miniters = 1000):
  target = np.zeros(17)
  
  # We create the 17-dimensional binary label vectors i.e One-hot encoding it
  for tag in label.split(' '):
    target[label_map[tag]]=1
  
  # Reshaping and assigning to arbitrary variables
  x_train.append(cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.format(img)), (64,64)))
  y_train.append(target)

In [10]:
len (x_train) # Prints out 40,479


In [11]:
gc.collect()

In [12]:
# We Read in the test image dataset and merge the test_additional jpg file to give an output of 61191 rows
x_test = []
 
for img, label in tqdm(df_test_data[0:40669].values, miniters=1000):
    fil = cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(img)), (64, 64))
    x_test.append(fil)
 
for img, label in tqdm(df_test_data[40669:].values, miniters=1000):
    fil = cv2.resize(cv2.imread('../input/planets-dataset/test-jpg-additional/test-jpg-additional/{}.jpg'.format(img)), (64, 64))
    x_test.append(fil)

In [13]:
len (x_test) # prints 61191

In [14]:
gc.collect()

In [15]:
#Change lists to numpy arrays and normalize
x_train = np.array(x_train, np.float16)/255.
y_train = np.array(y_train, np.uint8)
x_test = np.array(x_test, np.float16)/255.

# Splitting the training dataset into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True, random_state = 1)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
# prints  (32383, 64, 64, 3) (32383, 17) (8096, 64, 64, 3) (8096, 17)

In [16]:
gc.collect()

# Building Model Architecture

To tackle this multi-label problem, a combination of a custom deep CNN
architecture along with the pre-trained CNN architecture(VGG16) was implemented in Keras with Tensorflow backend

The custom CNN deep architecture includes a
sequence of Convolution-Convolution-Maxpooling (CCM) “super-layers,” each of which are
made up of two back-to-back convolutional layers followed by a maximum pooling layer. In
these super-layers, each convolutional layer has a ReLu activation function, and a specific
number of 3 × 3 filters. The depth of each layer indicates the number of filters applied to
it, which increases towards the output of the architecture.
The output of the maximum pooling layer, belonging to the last super-layer, is fed to a
classification block consisting of a fully connected (FC) layer and an output layer. All 512
neurons of the FC layer connect to each of the 17 neurons in the output layer. To bound
the neuron values of the FC layer, ReLU activation is applied. The output layer produces
prediction probabilities, corresponding to the 17 unique labels, using sigmoid activation.
To prevent overfitting, dropout regularization is applied to each CCM super-layer and
the FC layer. More specifically, the CMM super-layers have a dropout rate of 0.25, while
the fully connected layer has a dropout rate of 0.5.

Custom CNN Architecture

In [17]:
input_size = 64
input_channels = 3
 

model = Sequential()

# Input layer
model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels)))

# CCM_1
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

#CCM_2
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
 
#CCM_3
model.add(Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
 
#CCM_4
model.add(Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))


# Create a feature vector from the CCM_4 final layer
model.add(Flatten())

# Fully Connected (FC) Layer
model.add(Dense(512, activation='relu'))
model .add(BatchNormalization())
model.add(Dropout(0.5))

# Output layer
model.add(Dense(17, activation='sigmoid'))

Loading Pre-trained CNN Architectures

To supplement the custom CNN architecture,the pre-trained CNN architecture(VGG16) was implemented.

In [18]:
import tensorflow.keras as keras
# Loading the pre-trained VGG16 architecture module
from tensorflow.keras.applications.vgg16 import VGG16



# Extract the pre - trained architecture
base_model = VGG16(input_shape =(input_size,input_size,3),include_top =False,weights ='imagenet')
base_model.summary()

# Get the output of the base_model formed above
x = base_model.output
# Flatten to obtain a feature vector
x = Flatten()(x)
# Connect the feature vector to to the fully connected (FC) layer
x = Dense (512 , activation ='relu')(x)
# Form the output label predictions
predictions = Dense (17 , activation ='sigmoid')(x)
model = Model(inputs= base_model.input,outputs = predictions)

In [19]:
gc.collect()

## Model Training

In [20]:
# Implementing ImageDataGenerator for data augmentation. This is a very good technique which reduces overfitting as it generates extra images by flipping, zooming e.t.c the images. This makes the model have more images to learn from.
datagen = ImageDataGenerator ( horizontal_flip =True ,
vertical_flip =True ,
zoom_range =0.2,
rotation_range =90 ,
fill_mode ='reflect')

In [21]:
# Defining other parameters
epochs=20 # An epoch is one complete pass through the training data, We specify 20 here

opt = keras.optimizers.Adam(learning_rate=0.0001) # Defining our Adam optimizer and learning rate

In [22]:

# Compiling our model
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=[fbeta])


callbacks = [EarlyStopping(monitor='val_loss',
                           patience=2,
                           verbose=0)]
             


 

In [23]:
gc.collect()

In [24]:
# We fit our model now. The code below fits the model while generating extra images due to the Imagedatagenerator and fitting them on the fly!
model.fit_generator(datagen.flow(x_train,
y_train,
batch_size =32),
steps_per_epoch =len(x_train)/32 ,
validation_data = datagen.flow ( x_val,
y_val,
batch_size =32),
validation_steps =len(x_val)/32 ,
epochs =epochs ,
callbacks = callbacks ,
verbose =1)

In [25]:
gc.collect()

In [26]:
test_1 =[]
test_1.append (model.predict (x_test , batch_size = 128 , verbose =2) ) # We use the trained model for our test data prediction

In [27]:
gc.collect()

In [28]:
# After prediction, we compile the results in a pandas dataframe 
result = np.array (test_1[0])
for i in range (1,len(test_1) ):
 result += np. array (test_1)
result = pd.DataFrame (result,columns = labels )

In [29]:
result

In [30]:
preds = []
for i in tqdm(range(result.shape[0]), miniters=1000):
    a = result.loc[[i]]
    a = a.apply(lambda x: x > 0.2, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))
    

In [31]:
df_test_data['tags'] = preds
df_test_data.to_csv('final_final_submission.csv', index=False)