# Import Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import tensorflow as tf
import glob
from tqdm import tqdm
import random
import os
import tensorflow.keras.layers as L
import tensorflow.keras.applications.efficientnet as efn
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import image

# Data Directories

In [None]:
train_img_dir = '/kaggle/input/happy-whale-and-dolphin/train_images'
test_img_dir = '/kaggle/input/happy-whale-and-dolphin/test_images'
sub_path = '/kaggle/input/happy-whale-and-dolphin/sample_submission.csv'
train_path = '/kaggle/input/happy-whale-and-dolphin/train.csv'

# Label Inspection

In [None]:
train_df = pd.read_csv(train_path)
train_df.loc[train_df.species == "bottlenose_dolpin", "species"] = "bottlenose_dolphin"
train_df.loc[train_df.species == "kiler_whale", "species"] = "killer_whale"
train_df.loc[train_df.species == "globis", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "pilot_whale", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "beluga", "species"] = "beluga_whale"
print(len(train_df['species'].unique()))
print(train_df['species'].unique())

# Image Label Creation

In [None]:
train_df.loc[train_df.species.str.contains("whale")==True, "label"] = "whale"
train_df.loc[train_df.species.str.contains("dolphin")==True, "label"] = "dolphin"

# Image URL creation 

In [None]:
train_df['image_path'] = train_img_dir+'/'+train_df['image']
train_df['image_path']

# Resizing Image

In [None]:
#function to resize each image 
def resize_images(path,n_w,n_h):#n_w =new width n_h = new_height
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)#can decode to another format 
    img = tf.image.resize(img, [n_w, n_h])
    return img


In [None]:
N = len(train_df)
image_size = 128
image_train = np.empty((N, image_size, image_size, 3), dtype=np.uint8)
# loop through the images from the images ids from the target\id dataset
# then grab the cooresponding image from disk, pre-process, and store in matrix in memory
for index,row in train_df.iterrows():
    re_img = resize_images(row.image_path,image_size,image_size)
#     image_train[i, :, :, :] = re_img
#     image = plt.imread(re_img) #after resizing It could not be plot 
    #hence used tf casting to plot the image
    image = tf.cast(re_img, np.uint8)
    print((re_img.shape))
    plt.imshow(image)
    break

# Pre trained Efficient net model

In [None]:
IMAGE_SIZE = [128, 128]
model = tf.keras.Sequential([efn.EfficientNetB7(input_shape=(*IMAGE_SIZE, 3),
                                                weights='imagenet',
                                                include_top=False),
                             L.GlobalAveragePooling2D(),
                             L.Dense(1, activation='sigmoid')])
model.compile(optimizer='adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy']
             )
model.summary()

# generate training data

In [None]:
train_image_list = []
image_size = 128
for index,row in tqdm(train_df.iterrows()):
    img = image.load_img(row.image_path, target_size=(128,128,1), grayscale=True)
    img = image.img_to_array(img)
    img = img/255
    train_image_list.append(img)
X = np.array(train_image_list)
y = train_df['label'].values
y = to_categorical(y)

# divide into train and validation set 

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=42, test_size=0.2)

# train with training and validation data

In [None]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))