In [1]:
import os
import zipfile
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2 as cv
import numpy as np
import pandas as pd
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, load_img
from shutil import copy
import shutil
import random
from skimage import io

In [None]:
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

In [None]:
# Getting the files needed for training and testing 

ROOT_DIR = '/kaggle/input/'
WORKING_DIR = '/kaggle/working/'

In [None]:
# shutil.rmtree(os.path.join(ROOT_DIR, 'training'))
# shutil.rmtree(os.path.join(ROOT_DIR, 'validation'))

In [None]:
# with zipfile.ZipFile(os.path.join(ROOT_DIR, 'train_zebras.zip'), 'r') as d_file:
#     d_file.extractall(ROOT_DIR)

# with zipfile.ZipFile(os.path.join(ROOT_DIR, 'train_elephants.zip'), 'r') as d_file:
#     d_file.extractall(ROOT_DIR)

# with zipfile.ZipFile(os.path.join(ROOT_DIR, 'test.zip'), 'r') as d_file:
#     d_file.extractall(ROOT_DIR)

In [None]:
os.listdir(ROOT_DIR)

In [None]:
zebras = os.listdir(os.path.join(ROOT_DIR, 'zebras-train/train_zebras'))
elephants = os.listdir(os.path.join(ROOT_DIR, 'elephants-train/train_elephants'))

In [None]:
plt.imshow(load_img(os.path.join(ROOT_DIR, 'train_zebras/ASG001e0xq_0.jpeg')))

In [None]:
random.shuffle(zebras)
random.shuffle(elephants)

In [None]:
# i'm gonna split the images into a train and validation set

train_dir = os.path.join(WORKING_DIR, 'training') 
os.makedirs(train_dir)

valid_dir = os.path.join(WORKING_DIR, 'validation')
os.makedirs(valid_dir)

train_zeb_dir = os.path.join(train_dir, 'zebras')
train_elp_dir = os.path.join(train_dir, 'elephants')

valid_zeb_dir = os.path.join(valid_dir, 'zebras')
valid_elp_dir = os.path.join(valid_dir, 'elephants')

for dir in [train_zeb_dir, train_elp_dir, valid_zeb_dir, valid_elp_dir]:
    os.makedirs(dir)

In [None]:
len(zebras)

In [None]:
len(elephants)

In [None]:
# Check the number of images for each class and set

for img_file in zebras[:5600]:
    copy(os.path.join(ROOT_DIR, 'zebras-train/train_zebras', img_file), os.path.join(train_zeb_dir, img_file))

for img_file in zebras[5600:]:
    copy(os.path.join(ROOT_DIR, 'zebras-train/train_zebras', img_file), os.path.join(valid_zeb_dir, img_file))

for img_file in elephants[:5600]:
    copy(os.path.join(ROOT_DIR, 'elephants-train/train_elephants', img_file), os.path.join(train_elp_dir, img_file))

for img_file in elephants[5600:]:
    copy(os.path.join(ROOT_DIR, 'elephants-train/train_elephants', img_file), os.path.join(valid_elp_dir, img_file))

In [None]:
# Checking the number of images for each class and set
print(f"There are {len(os.listdir(train_zeb_dir))} images of zebras for training")
print(f"There are {len(os.listdir(train_elp_dir))} images of elephants for training")
print(f"There are {len(os.listdir(valid_zeb_dir))} images of zebras for validation")
print(f"There are {len(os.listdir(valid_elp_dir))} images of elephants for validation")

In [None]:
test_dir = os.path.join(ROOT_DIR, 'el-zeb-test/test')

In [None]:
print(os.listdir(train_zeb_dir)[0])
print(os.listdir(test_dir)[0])

In [None]:
'.DS_Store' in os.listdir(train_zeb_dir)

In [None]:
# os.remove(os.path.join(train_zeb_dir, '.DS_Store'))

In [None]:
# '.DS_Store' in os.listdir(train_zeb_dir)

In [None]:
'.DS_Store' in os.listdir(test_dir)

In [None]:
# Checking the number of images for each class and set
print(f"There are {len(os.listdir(train_zeb_dir))} images of zebras for training")
print(f"There are {len(os.listdir(train_elp_dir))} images of elephants for training")
print(f"There are {len(os.listdir(valid_zeb_dir))} images of zebras for validation")
print(f"There are {len(os.listdir(valid_elp_dir))} images of elephants for validation")

In [None]:
len(elephants)

In [None]:
sample_img = load_img(os.path.join(train_zeb_dir, os.listdir(train_zeb_dir)[30]))

In [None]:
sample_img_array = img_to_array(sample_img)

In [None]:
# conda install -c menpo opencv=2.4.11

In [None]:
img_ = cv.cvtColor(sample_img_array, cv.COLOR_BGR2RGB)
ret, thresh = cv.threshold(img_, 50, 255, cv.THRESH_TOZERO)
 
# Displaying the output image
plt.imshow(thresh)

In [None]:
thresh.shape

In [None]:
print(sample_img_array.shape)

In [None]:
### Creating an Image Generator

def train_val_generators(TRAINING_DIR, VALIDATION_DIR):

  """
    Creates the training and validation data generators

    Args:
    TRAINING_DIR (string): directory path containing the training images
    VALIDATION_DIR (string): directory path containing the testing/validation images

    Returns:
    train_generator, validation_generator: tuple containing the generators
  """
  ### Instantiate the ImageDataGenerator class
  train_datagen = ImageDataGenerator(rescale=1./255.,
                                     rotation_range=40,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=True,
                                     fill_mode='nearest'
                                    )

  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=32,
                                                      class_mode='binary',
                                                      target_size=(150, 150))
  

  validation_datagen = ImageDataGenerator(rescale=1./255.)

  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                 batch_size=32,
                                                                 class_mode='binary',
                                                                 target_size=(150, 150)
                                                                 )
  return train_generator, validation_generator

In [None]:
train_generator, validation_generator = train_val_generators(train_dir, valid_dir)

In [None]:
def preprocess_image(img_path):
  img_arr = img_to_array(load_img(img_path))
  img_arr = cv.resize(img_arr, (150, 150))
  img_arr = np.expand_dims(img_arr, axis=0)
  return img_arr/255.

In [None]:
### Transfer Learning

# # Download the inception v3 weights
# !wget --no-check-certificate \
#     https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
#     -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.efficientnet import EfficientNetB7


# local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [None]:
def create_pretrained_model(): # args: local_weights_file
  """
    Initializes an InceptionV3 model.

    Args: local_weights_file (string): path pointing to a pretrained weights H5 file

    Returns:
      pretrained_model: the initialized InceptionV3 model
  """


  pretrained_model = InceptionV3(input_shape=(150, 150, 3),
                                 include_top = False,
                                 weights = 'imagenet'
                                 )

  # pretrained_model = EfficientNetB7(
  #     weights='imagenet',
  #     include_top=False,
  #     input_shape=(150, 150, 3)
  # )

  # pretrained_model.load_weights(local_weights_file)

  for layer in pretrained_model.layers:
    layer.trainable = False

  return pretrained_model

In [None]:
pretrained_model = create_pretrained_model()

pretrained_model.summary()

In [None]:
total_params = pretrained_model.count_params()
num_trainable_params = sum([w.shape.num_elements() for w in pretrained_model.trainable_weights])

print(f"{total_params:,} total trainable params")
print(f"{num_trainable_params:,} total trainable params")

In [None]:
## Feeding the last output of the pretrained model as input for my own

def output_(pretrained_model):
  last_desired_layer = pretrained_model.get_layer('mixed9')
  print('last desired layer output shape: ', last_desired_layer.output_shape)
  last_output = last_desired_layer.output
  print('last layer output: ', last_output)
  return last_output

In [None]:
last_output = output_(pretrained_model)

In [None]:
def create_final_model(pretrained_model, last_output):
    

    """
        Appends a custom model to a pretrained model
    """
    
    x = layers.Conv2D(16, (3,3))(last_output)
    x = layers.MaxPooling2D(2,2)(x)
    x = layers.Conv2D(32, (3,3))(x)
    x = layers.MaxPooling2D(2,2)(x)
    x = layers.Conv2D(64, (3,3))(x)
    x = layers.MaxPooling2D(2,2)(x)
#     x = layers.LayerNormalization()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(units=1024, activation='relu')(x)
#     x = layers.Dense(units=512, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(units=1, activation='sigmoid')(x)
    model = Model(inputs=pretrained_model.input, outputs=x)
    model.compile(optimizer=Adam(learning_rate=1e-3),
                loss='binary_crossentropy',
                metrics=['accuracy'])
  
    return model

In [None]:
model = create_final_model(pretrained_model, last_output)

In [None]:
# defining model callback class

class Callback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):

    if (logs.get('val_accuracy')>0.9999 and logs.get('accuracy')>0.9999):
      print("Stopping training, desired accuracy has been met.......")
      self.model.stop_training = True
 

callback = Callback()

In [None]:
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    epochs=50,
                    verbose=1, callbacks=[callback])

In [None]:
# '.DS_Store' in os.listdir(test_dir)

In [None]:
# os.remove(os.path.join(test_dir, '.DS_Store'))

In [None]:
proc_imgs = np.zeros((len(os.listdir(os.path.join(ROOT_DIR, 'test'))), 150, 150, 3))

# proc_imgs.shape
for idx, file in enumerate(os.listdir(os.path.join(ROOT_DIR, 'test'))):
    prec_img = preprocess_image(os.path.join(ROOT_DIR, 'test', file))
    print(f"{idx+1}/{proc_imgs.shape[0]}")
    proc_imgs[idx] = np.array(prec_img)

In [None]:
preds = model.predict(proc_imgs)

In [None]:
preds = [i.flatten()[0] for i in preds]
preds_ = [1 if i >= 0.5 else 0  for i in preds]
files = os.listdir(os.path.join(ROOT_DIR, 'test'))
sub_file = pd.DataFrame({'id': files, 'label': preds_})
sub_file.to_csv(os.path.join(ROOT_DIR, 'sub.csv'), index=False)

In [None]:
preds_.count(1)

In [None]:
preds_.count(0)