# Importing Modules

In [None]:
# Standard imports
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import trange
from colorama import Fore
from glob import glob
import json
from pprint import pprint
import time
import cv2
from enum import Enum
from IPython.display import display
import random
import inspect

# For Data preparation
from sklearn.preprocessing import *
from sklearn.model_selection import *
from sklearn.metrics import *

# Tensorflow modules
import tensorflow as tf
from tensorflow.keras import layers as tf_l
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint 
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model, save_model, Model
import tensorflow.keras.backend as K
from tensorflow.keras import Input


import warnings
warnings.filterwarnings("ignore")

# To ignore tensorflow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

print(f"GPU is available : {tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)}")

# Configs and helper functions

In [None]:
class Config(Enum):
    DATA_CSV = "../input/petfinder-cv-dataset/data.csv"
    TEST_CSV = "../input/petfinder-cv-dataset/test.csv"
    IMG_SHAPE = 224
    BATCH_SIZE = 64
    EPOCHS = 10

    META_FEATURES = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
    LABEL = "Pawpularity"

In [None]:
def setSeed(seed):
    """
    Setting the seed of all the random function to maintain reproducibility
    
    """
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = str(seed)
    tf.random.set_seed(seed)
    print('SEEDITIZATION DONE !')

setSeed(0)

In [None]:
data_df = pd.read_csv(Config.DATA_CSV.value)
test_df = pd.read_csv(Config.TEST_CSV.value)

In [None]:
data_df.head()

In [None]:
test_df.head()

# Building a custom neural network

In [None]:
class CustomModel:
    def __init__(self, filters : list, kernel_sizes : list, img_dim = 512, use_dropout = False):
        INPUT = tf_l.Input(shape = [img_dim, img_dim, 3])
        
        x = tf_l.Conv2D(
                filters = filters[0],
                kernel_size = kernel_sizes[0],
                padding = 'same',
                activation = "relu",
            )(INPUT)

        x = tf_l.BatchNormalization()(x)
        x = tf_l.MaxPooling2D(pool_size = (2, 2))(x)

        if use_dropout:
            x = tf_l.Dropout(rate = 0.2)(x)
        
        for i in range(1, len(filters)):
            x = tf_l.Conv2D(
                filters = filters[i],
                kernel_size = kernel_sizes[i],
                padding = 'same',
                activation = "relu",
            )(x)

            x = tf_l.BatchNormalization()(x)
            x = tf_l.MaxPooling2D(pool_size = (2, 2))(x)

            if use_dropout:
                x = tf_l.Dropout(rate = 0.2)(x)
                
        x = tf_l.GlobalMaxPooling2D()(x)
        
        x = tf_l.Dense(128, kernel_initializer = "he_uniform", activation = "relu")(x)
        x = tf_l.Dense(32, kernel_initializer = "he_uniform", activation = "relu")(x)
        OUTPUT = tf_l.Dense(1, kernel_initializer = "he_uniform", activation = "relu")(x)
        
        self.model = tf.keras.Model(inputs = INPUT, outputs = OUTPUT)
    
    def __call__(self):
        return self.model
    

In [None]:
custom_model = CustomModel(
    filters = [32, 64, 128],
    kernel_sizes = [3, 3, 3],
    img_dim = Config.IMG_SHAPE.value,
    use_dropout= True
)()

# plot_model(custom_model, show_shapes=True, show_layer_names=True)

In [None]:
custom_model.summary()

#  Lets create a data loader class

In [None]:
class ImgDataLoader:
    """
    Gives img data in the form of batches

    """

    def __init__(self,
                 df: "Data_File",
                 path_col: list,
                 target_col: str,
                 regression_type=True,
                 rescale=False,
                 batch_size=32,
                 img_shape=224,
                 resize_with_pad = False,
                 do_augment = False,
                 repeat = False,
                 shuffle = False
                 ):
        
        self.df = df
        self.path_col = path_col
        self.target_col = target_col
        self.regression_type = regression_type
        self.rescale = rescale
        self.batch_size = batch_size
        self.img_shape = img_shape        
        self.resize_with_pad = resize_with_pad
        self.do_augment = do_augment
        self.repeat = repeat
        self.shuffle = shuffle
        
    @tf.function
    def doAugment(self, img : "Tensor"):
        """
        Perform augmentation over the image tensor
        """
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.95, 1.05)
        img = tf.image.random_brightness(img, 0.05)
        img = tf.image.random_contrast(img, 0.95, 1.05)
        img = tf.image.random_hue(img, 0.05)        
        
        return img
    
    @tf.function
    def process_img(self, path : str, label = None):
        """
        A function to apply augmentation and process the images
        
        """
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels = 3)
        img = tf.cast(img, dtype = tf.float32)
        
        if self.rescale:
            img = img/255.0
        
        if self.resize_with_pad:
            img = tf.image.resize_with_pad(img, self.img_shape, self.img_shape)
        else:
            img = tf.image.resize(img, (self.img_shape, self.img_shape))
        
        if self.do_augment:
            img = self.doAugment(img)
        
        if label is not None:
            return img, label
        
        return img
    
    def __call__(self):
        if self.target_col is not None:
            data_gen = tf.data.Dataset.from_tensor_slices((self.df[self.path_col].values, self.df[self.target_col].values))
        else:
            data_gen = tf.data.Dataset.from_tensor_slices((self.df[self.path_col].values))
        
        AUTOTUNE = tf.data.experimental.AUTOTUNE
        
        data_gen = data_gen.map(self.process_img, num_parallel_calls = AUTOTUNE)
        if self.repeat:
            data_gen = data_gen.repeat()
        
        if self.shuffle:
            data_gen = data_gen.shuffle(1024, reshuffle_each_iteration = True)
        
        return data_gen.batch(self.batch_size).prefetch(AUTOTUNE)
 

In [None]:
# Just testing

data_gen = ImgDataLoader(
    data_df, 
    "path", 
    "Pawpularity", 
    rescale = True, 
    img_shape = Config.IMG_SHAPE.value,
    do_augment = True,
    repeat = False,
    shuffle = False
)()

# catch a few image of data_gen.
for x, y in data_gen:
    plt.figure(figsize=(12, 9))
    for k, (img, lbl) in enumerate(zip(x, y)):
        if(k + 1 > 4*4):
            break
        plt.subplot(4, 4, k+1)
        plt.imshow(img)
        plt.title(f"GT : {lbl}, {img.shape}")
        plt.axis('off')
    break
    

# Training Engine 🚂🌫

In [None]:
def trainEngine(tf_model : "tf compiled model", tf_model_name : "give a name to model", data_df : "cv dataframe"):
    """
    It will take the model and will perfrom the full k-folds training
        > model : can be a class with __call__ method or a function
    
    """
    
    def givePlotsInOne(training_summary : dict, useDark = False, title = "Plot"):
        """
        Helper function to plot the training result
        """

        fig = go.Figure()
        for k in summary.keys():
            if(k != "epochs"):
                fig.add_trace(go.Scatter(x=summary["epochs"], y=summary[k],
                            mode='lines+markers',
                            name=k))

                fig.update_layout(
                    title_text = title,
                    title_x = .5,
                    xaxis_title = "Epochs",
                    yaxis_title = "Values",
                    template = "plotly_dark" if useDark else "ggplot2"
                )

        fig.show()  
    
    def train_model(tf_model : "TF model", fold : int):
        model = tf_model()
        
        K.clear_session()
        LEARNING_RATE = 1e-2
        DECAY_STEPS = 100
        DECAY_RATE = 0.99

        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=LEARNING_RATE,
            decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
            staircase=True
        )

        # Creating Callbacks
        early_stop = EarlyStopping(
            monitor='val_loss', patience = 3, restore_best_weights=True
        )
        
        if not os.path.exists(f"./{tf_model_name}"):
            os.mkdir(f"./{tf_model_name}")
            
        model_chkpt = ModelCheckpoint(
            monitor = 'val_loss', 
            patient = 3, 
            mode = 'min', 
            save_best_only = True, 
            filepath = f"./{tf_model_name}/{fold}.h5"
        )

        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
    
        
        training_history = model.fit_generator(
            train_data_gen,
            validation_data = val_data_gen,
            epochs = Config.EPOCHS.value,
            verbose = 1,
            use_multiprocessing=True,
            workers=-1,
            callbacks = [early_stop, model_chkpt]
        )
        return training_history
        
    
    folds = max(data_df['kfold']) + 1
    for fold in range(folds):
        print(Fore.BLUE)
        print("_ "*20, "\n")
        print(f"{' '*11}Current Fold : {fold + 1}")
        print("_ "*20, "\n")
        
        train_data = data_df.loc[data_df.kfold != fold]
        val_data = data_df.loc[data_df.kfold == fold]
        
        train_data_gen = ImgDataLoader(
                                    train_data, 
                                    "path", 
                                    "Pawpularity", 
                                    rescale = True, 
                                    img_shape = Config.IMG_SHAPE.value,
                                    do_augment = True,
                                    repeat = False,
                                    shuffle = True,
                                    batch_size = 32,
                                )()
        val_data_gen = ImgDataLoader(
                                    val_data, 
                                    "path", 
                                    "Pawpularity", 
                                    rescale = True, 
                                    img_shape = Config.IMG_SHAPE.value,
                                    do_augment = False,
                                    repeat = False,
                                    shuffle = False,
                                    batch_size = 16
                                )()
        
        training_history = train_model(tf_model, fold)
        
        summary = {
            "epochs" : [d for d in range(1, Config.EPOCHS.value + 1)],
            "loss" : training_history.history['loss'],
            "val_loss" : training_history.history['val_loss'],
#             "lr" : training_history.history['lr']
        }
        
        givePlotsInOne(training_summary = summary, useDark = False, title = f"For Fold {fold + 1}")

# Lets train

In [None]:
trainEngine(tf_model = CustomModel(
                        filters = [32, 64, 128],
                        kernel_sizes = [3, 3, 3],
                        img_dim = Config.IMG_SHAPE.value,
                        use_dropout= True
                    ),
            tf_model_name = "Custom_CNN", 
            data_df = data_df)

# Saving Model Weights


In [None]:
def saveModelsKaggle(dir_name: str, title: "title of dataset", token_path="../input/kaggletoken/kaggle.json"):
    """
     > Helper function to automate the process of saving models 
        as kaggle datasets using kaggle API   
     > dir_name should be compatible with hyperlink formats

    """
    if not os.path.exists(token_path):
        print("Token doesn't exist")
        return

    if not os.path.exists(f"./{dir_name}"):
        print("Directory doesn't exist")
        return

    os.system(
        f"""
        
        pip install kaggle
        cp {token_path} ./
        cp ./kaggle.json ../../root/
        mkdir ../../root/.kaggle
        mv ../../root/kaggle.json ../../root/.kaggle/kaggle.json

        chmod 600 /root/.kaggle/kaggle.json
        kaggle datasets init -p ./{dir_name}
        
        """
    )
    # Upto this we will be having a meta data file in the form of a json
    with open(f"./{dir_name}/dataset-metadata.json", 'r+') as file_:
        meta_data = json.load(file_)
        meta_data['title'] = f'{title}'
        meta_data['id'] = f'hotsonhonet/{title}'
        file_.seek(0)
        json.dump(meta_data, file_, indent=4)
        file_.truncate()

    os.system(f"""
        kaggle datasets create -p ./{dir_name} --dir-mode zip
    """)

    print("[INFO] Dataset saved successfully")

In [None]:
saveModelsKaggle(dir_name = "Custom_CNN", title =  "petfinderCustomModel")