# Import Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import tensorflow as tf
import glob
from tqdm import tqdm
import random
import os
import tensorflow.keras.layers as L
import tensorflow.keras.applications.efficientnet as efn
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import image

# Data Directories

In [None]:
train_img_dir = '/kaggle/input/happy-whale-and-dolphin/train_images'
test_img_dir = '/kaggle/input/happy-whale-and-dolphin/test_images'
sub_path = '/kaggle/input/happy-whale-and-dolphin/sample_submission.csv'
train_path = '/kaggle/input/happy-whale-and-dolphin/train.csv'

# Label Inspection

In [None]:
train_df = pd.read_csv(train_path)
train_df.loc[train_df.species == "bottlenose_dolpin", "species"] = "bottlenose_dolphin"
train_df.loc[train_df.species == "kiler_whale", "species"] = "killer_whale"
train_df.loc[train_df.species == "globis", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "pilot_whale", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "beluga", "species"] = "beluga_whale"
train_df = train_df.head(7000)
print(len(train_df['species'].unique()))

# Image Label Creation

In [None]:
train_df.loc[train_df.species.str.contains("whale")==True, "label"] = "whale"
train_df.loc[train_df.species.str.contains("dolphin")==True, "label"] = "dolphin"

# Image URL creation 

In [None]:
train_df['image_path'] = train_img_dir+'/'+train_df['image']
train_df['image_path']

In [None]:
print(len(train_df['species'].unique()))
list_of_species = train_df['species'].unique()
species_to_neumeric = dict()
for i in range(0,len(list_of_species)):
    species_to_neumeric[list_of_species[i]] = i #mapping string label to neumeric
inverse_species_to_neumeric = dict((v, k) for k, v in species_to_neumeric.items()) #inverse label mapping 


# Resizing Image

In [None]:
#function to resize each image 
def resize_images(path,n_w,n_h):#n_w =new width n_h = new_height
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)#can decode to another format 
    img = tf.image.resize(img, [n_w, n_h])
    return img


In [None]:
N = len(train_df)
image_size = 128
image_train = np.empty((N, image_size, image_size, 3), dtype=np.uint8)
# loop through the images from the images ids from the target\id dataset
# then grab the cooresponding image from disk, pre-process, and store in matrix in memory
for index,row in train_df.iterrows():
    re_img = resize_images(row.image_path,image_size,image_size)
#     image_train[i, :, :, :] = re_img
#     image = plt.imread(re_img) #after resizing It could not be plot 
    #hence used tf casting to plot the image
    image = tf.cast(re_img, np.uint8)
    print((re_img.shape))
    plt.imshow(image)
    break

# generate training data

In [None]:
train_image_list = []
image_size = 128
for index,row in tqdm(train_df.iterrows()):
#     img = tf.io.read_file(row.image_path)
#     img = tf.image.decode_jpeg(img, channels=3)#can decode to another format 
#     img = tf.image.resize(img, [128, 128])
    img = tf.keras.preprocessing.image.load_img(row.image_path, target_size=(128,128,1), grayscale=True)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = img/255
    train_image_list.append(img)
X = np.array(train_image_list)

# Pre trained Efficient net model

In [None]:
IMAGE_SIZE = [128, 128]
dense_layer_size = len(species_to_neumeric.values())
model = tf.keras.Sequential([efn.EfficientNetB7(input_shape=(*IMAGE_SIZE, 3),
                                                weights='imagenet',
                                                include_top=False,classes=species_to_neumeric.values()),
                             L.GlobalAveragePooling2D(),
                             L.Flatten(),
                             L.Dense(512, activation='relu'),
                             L.Dense(dense_layer_size, activation='softmax')])
model.compile(optimizer='adam',
              loss = 'categorical_crossentropy',
              metrics=['accuracy']
             )
model.summary()

In [None]:
# y = train_df['label'].values
# print(y)

species_as_label = train_df['species'].values
species_label_list = []
for species in species_as_label:
    species_label_list.append(species_to_neumeric[species])
# label = []
# for i in y:
#     if i=='whale':
#         label.append(1)
#     else:
#         label.append(0)

# tr_labes = np.array(label)
tr_sp_lables = np.array(species_label_list)
tr_sp_one_hot_label = to_categorical(tr_sp_lables)
tr_sp_one_hot_label

# divide into train and validation set 

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, tr_sp_one_hot_label, random_state=42, test_size=0.2)

# train with training and validation data

In [None]:
model_h = model.fit(X_train, y_train, batch_size = 32,epochs=10,validation_data=(X_valid, y_valid)) #the batch size should be changed 
#according to the total input size if train image is very small and batch size is almost equal to train image numbers it will
#return OOM -->out of memory error

# Plot the trained model

In [None]:
plt.plot(model_h.history['accuracy'])
plt.plot(model_h.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(model_h.history['loss'])
plt.plot(model_h.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
model.save('whale_detection_model.h5') 

# Testing Images

In [None]:
list_of_test_image_paths = glob.glob(test_img_dir+'/*')
test_image_list = []
t_num = len(list_of_test_image_paths) #27956 number of images 
f_t_num = int(t_num/100) # trying to test for small number 
# for t_path in list_of_test_image_paths:#when all images will be tested 
for i in tqdm(range(0,f_t_num)):
    img = tf.keras.preprocessing.image.load_img(list_of_test_image_paths[i], target_size=(128,128,1), grayscale=True)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = img/255
    test_image_list.append(img)
X_T = np.array(test_image_list)

In [None]:
probabilities = model.predict(X_T)
print(len(probabilities))
lsit = np.argmax(probabilities,axis = 1)
for i in lsit:
    print(inverse_species_to_neumeric[i])