In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import tensorflow as tf
import glob
from tqdm import tqdm
import random
import os
import tensorflow.keras.layers as L
from tensorflow.keras.models import Sequential
import tensorflow.keras.applications.efficientnet as efn
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import image
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn import svm
from random import sample

In [None]:
image_h = 224
image_w = 224
sample_size = 12000
train_img_dir = '/kaggle/input/happy-whale-and-dolphin/train_images'
test_img_dir = '/kaggle/input/happy-whale-and-dolphin/test_images'
sub_path = '/kaggle/input/happy-whale-and-dolphin/sample_submission.csv'
train_path = '/kaggle/input/happy-whale-and-dolphin/train.csv'

In [None]:
train_df = pd.read_csv(train_path)
train_df.loc[train_df.species == "bottlenose_dolpin", "species"] = "bottlenose_dolphin"
train_df.loc[train_df.species == "kiler_whale", "species"] = "killer_whale"
train_df.loc[train_df.species == "globis", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "pilot_whale", "species"] = "short_finned_pilot_whale"
train_df.loc[train_df.species == "beluga", "species"] = "beluga_whale"
train_df.loc[train_df.species.str.contains("whale")==True, "label"] = "whale"
train_df.loc[train_df.species.str.contains("dolphin")==True, "label"] = "dolphin"
train_df_whale = train_df[train_df['label']=='whale']
train_df_dolphin = train_df[train_df['label']=='dolphin']
train_df_whale = shuffle(train_df_whale)
train_df_dolphin = shuffle(train_df_dolphin)
whale_F = train_df_whale.head(3000)
dolphin_F = train_df_dolphin.head(3000)
merged_train_f = pd.concat([whale_F,dolphin_F])
merged_train_f = shuffle(merged_train_f)
train_frame, test_frame = train_test_split(merged_train_f, test_size=0.15)
print(len(train_df['species'].unique()))

In [None]:
train_frame['image_path'] = train_img_dir+'/'+train_frame['image']
test_frame['image_path'] = train_img_dir+'/'+test_frame['image']

In [None]:
print(len(train_frame['label'].unique()))
list_of_labels = train_frame['label'].unique()
labels_to_neumeric = dict()
for i in range(0,len(list_of_labels)):
    labels_to_neumeric[list_of_labels[i]] = i
print(labels_to_neumeric)

In [None]:
def train_data_genration(train_df):#takes data frame input
    train_image_list = []
    for index,row in tqdm(train_df.iterrows()):
    #     img = tf.io.read_file(row.image_path)
    #     img = tf.image.decode_jpeg(img, channels=3)#can decode to another format 
    #     img = tf.image.resize(img, [128, 128])
        img = tf.keras.preprocessing.image.load_img(row.image_path, target_size=(image_h,image_w,3), grayscale=False)
        img = tf.keras.preprocessing.image.img_to_array(img)
        img = img/255
        train_image_list.append(img)
    X_train = np.array(train_image_list)
    return X_train
def test_data_generation(list_of_test_image_paths,f_t_num): #takes the image directory array and number of images
    test_image_list = []
    for i in tqdm(range(0,f_t_num)):
        img = tf.keras.preprocessing.image.load_img(list_of_test_image_paths[i],
                                                    target_size=(image_h,image_w,3), grayscale=False)
        img = tf.keras.preprocessing.image.img_to_array(img)
        img = img/255
        test_image_list.append(img)
    X_T = np.array(test_image_list)
    return X_T

def plot_hist(hist):
    plt.plot(hist.history["acc"])
    plt.plot(hist.history["val_acc"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

In [None]:
X_train = train_data_genration(train_frame)
X_test = train_data_genration(test_frame)

In [None]:
vgg19 = tf.keras.applications.vgg19
conv_model = vgg19.VGG19(weights='imagenet', include_top=False, input_shape=(image_h,image_w,3))
for layer in conv_model.layers: 
    layer.trainable = False
x = L.Flatten()(conv_model.output)
x = L.Dense(100, activation='relu')(x)
x = L.Dense(100, activation='relu')(x)
x = L.Dense(100, activation='relu')(x)
predictions = L.Dense(2, activation='softmax')(x)
full_model = tf.keras.models.Model(inputs=conv_model.input, outputs=predictions)
full_model.summary()

In [None]:
full_model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.Adamax(lr=0.001),
                  metrics=['acc'])

In [None]:
labels= train_frame['label'].values
neumeric_label_list = []
for label in labels:
    neumeric_label_list.append(labels_to_neumeric[label])
tr_lables = np.array(neumeric_label_list)
print(tr_lables)
count_0 = 0
count_1 = 0
for i in tr_lables:
    if(i==0):
        count_0 = count_0+1
    else:
        count_1 = count_1+1
print(count_0," ",count_1," ",len(tr_lables))
tr_one_hot_label = to_categorical(tr_lables)
print(tr_one_hot_label)

In [None]:
X_train_wd, X_valid_wd, y_train_wd, y_valid_wd = train_test_split(X_train, tr_one_hot_label, test_size=0.2)

In [None]:
model_history_l = full_model.fit(X_train_wd,y_train_wd, batch_size = 32,epochs=10,validation_data=(X_valid_wd,y_valid_wd)) #the batch size should be changed 


In [None]:
plot_hist(model_history_l)
probabilities = full_model.predict(X_test)
print(len(probabilities))
lsit = np.argmax(probabilities,axis = 1)
inverse_labels_to_neumeric = dict((v, k) for k, v in labels_to_neumeric.items())

In [None]:
i = 0
true_pre = 0
false_pre = 0
for index,row in test_frame.iterrows():
#     print(row)
    if(row.label == inverse_labels_to_neumeric[lsit[i]]):
        true_pre = true_pre + 1
    else:
        false_pre = false_pre + 1
#     print("actual Lable: ",row.label,' Predicted as:', inverse_labels_to_neumeric[lsit[i]])
    i = i+1
accuracy = true_pre/(true_pre + false_pre)
print(accuracy)