# [Happywhale - Whale and Dolphin Identification](https://www.kaggle.com/c/happy-whale-and-dolphin)
> Identify whales and dolphins by unique characteristics

<img src="https://storage.googleapis.com/kaggle-competitions/kaggle/22962/logos/header.png?t=2021-03-17-22-44-09">

# 🛠 Dependencies

In [None]:
import os
import pandas as pd
import cv2
import matplotlib.pyplot as plt

# Meta Data

* train_images/ - a folder containing the training images

* train.csv - provides the species and the individual_id for each of the training images

* test_images/ - a folder containing the test images; for each image, your task is to predict the individual_id; no   species information is given for the test data; there are individuals in the test data that are not observed in the training data, which should be predicted as new_individual.

* sample_submission.csv - a sample submission file in the correct format

# Data 

In [None]:
path = '/kaggle/input/happy-whale-and-dolphin/'
os.listdir(path)

In [None]:
train_data = pd.read_csv(path+'train.csv')
samp_sub = pd.read_csv(path+'sample_submission.csv')

In [None]:
print('Number train samples: ', len(train_data))
print('Number train images: ', len(os.listdir(path+'train_images')))
print('Number test images: ', len(os.listdir(path+'test_images')))

In [None]:
train_data.head()

# EDA

In [None]:
train_data

In [None]:
print("Length of train data", len(train_data))
print("Number of Species: ",len(train_data['species'].unique()))
print("number of Individuals: ", len(train_data['individual_id'].unique()))

In [None]:
train_data['species'].unique()

In [None]:
train_data['species'].value_counts()

In [None]:
train_data['individual_id'].value_counts()

### Load Single Image

In [None]:
file = train_data.loc[0, 'image']
species = train_data.loc[0, 'species']
individual_id = train_data.loc[0, 'individual_id']
print(file)

In [None]:
img = cv2.imread(path+'train_images/'+file)
print('Shape: ', img.shape)

In [None]:
fig, ax = plt.subplots(1, 1, figsize = (10, 10))
ax.imshow((cv2.cvtColor(img, cv2.COLOR_BGR2RGB)))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_title(individual_id)
fig.suptitle(species)
plt.show()

## Individuals of different species

In [None]:
def plot_examples(species = 'bottlenose_dolphin'):
    """ Plot 5 images of a given species """
    
    fig, axs = plt.subplots(1, 5, figsize=(25, 20))
    fig.subplots_adjust(hspace = .1, wspace=.1)
    axs = axs.ravel()
    temp = train_data[train_data['species']==species].copy()
    temp.index = range(len(temp.index))
    for i in range(5):
        file = temp.loc[i, 'image']
        species = temp.loc[i, 'species']
        indicidual = temp.loc[i, 'individual_id']
        img = cv2.imread(path+'train_images/'+file)
        axs[i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        axs[i].set_title('individual')
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])
    plt.show()

In [None]:
plot_examples('bottlenose_dolphin')

In [None]:
plot_examples(species = 'beluga')

In [None]:
plot_examples(species = 'humpback_whale')

In [None]:
plot_examples(species = 'melon_headed_whale')

## Image Preprocessing

In [None]:
def image_preprocessing(image, image_size):
    """ Image Preprocessing """

    # Load Image
    readFlag=cv2.COLOR_BGR2GRAY
    #image = np.asarray(bytearray(resp.read()), dtype="uint8")
    #image = cv2.imdecode(image, readFlag)
    image_gray = cv2.cvtColor(image, readFlag)
    
    # Crop Image
    mid_row = int(image_gray.shape[0]/2)
    mid_col = int(image_gray.shape[1]/2)
    if image_gray.shape[0]>image_gray.shape[1]:
        image_cropped = image_gray[mid_row-mid_col:mid_row+mid_col,
                                   0:image_gray.shape[1]]
    else:
        image_cropped = image_gray[0:image_gray.shape[0],
                                   mid_col-mid_row:mid_col+mid_row]
    
    # Rescale Image
    image_rescale = cv2.resize(image_cropped,
                               dsize=(image_size, image_size),
                               interpolation=cv2.INTER_AREA)
    return image_rescale

def plot_befor_after(image):
    """ Compare original and prepared image """
    
    fig, axs = plt.subplots(1, 2, figsize=(15, 10))
    fig.subplots_adjust(hspace = .1, wspace=.1)
    axs = axs.ravel()
    # Plot Original Image
    axs[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    axs[0].set_title('original shape: '+str(image.shape))
    # Image Preprocessing
    image_rescale = image_preprocessing(image, image_size)
    # Plot Prepared Image
    axs[1].imshow(image_rescale, cmap='gray')
    axs[1].set_title('rescaled shape: '+str(image_rescale.shape))
    for i in range(2):
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])
    plt.show()

In [None]:
image_size = 128

In [None]:
row = 100
file = train_data.loc[row, 'image']
species = train_data.loc[row, 'species']
image = cv2.imread(path+'train_images/'+file)
print('Shape:', image.shape)

In [None]:
plot_befor_after(image)

# Raw Submission

In [None]:
samp_sub.head()

In [None]:
samp_sub.to_csv('submission.csv', index = False)