# **Loading imagenet weights for Densenet121**

In [None]:
!pip install /kaggle/input/keras-pretrained-imagenet-weights/image_classifiers-1.0.0-py3-none-any.whl

# **Library Imports**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# ML tools 
import tensorflow as tf
from tensorflow import keras
from kaggle_datasets import KaggleDatasets
from keras.models import Sequential
from tensorflow.keras import layers
# from keras.optimizers import Adam
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
# import tensorflow.keras.applications.efficientnet as efn
from keras.applications import *
import os
from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from classification_models.tfkeras import Classifiers
Classifiers.models_names()



# **Reading image paths and their actual labels(One hot Encoded)**

In [None]:
df = pd.read_csv('/kaggle/input/nih-dataframe/NIH_Dataframe.csv')
df.img_ind= df.img_ind.apply(lambda x: x.split('.')[0])
display(df.head(4))
print(df.shape)

# **Setting the parameters for model**

In [None]:
target_cols = df.drop(['img_ind'], axis=1).columns.to_list()
n_classes = len(target_cols)
img_size = 600
n_epochs = 15    # for now
lr= 0.0001
val_split= 0.2
seed= 33
batch_size=64
print("no of classes",n_classes)

# 1. **Data Augmentation**
# 2. **TPU Strategy**
# 3. **Image Decoder**

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(img_size, img_size), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        img = tf.image.decode_png(file_bytes, channels=3) if ext == 'png' else tf.image.decode_jpeg(file_bytes, channels=3)
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)
        return img

    return lambda path, label=None: (decode(path), label) if with_labels else decode(path)


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        return img

    return lambda img, label=None: (augment(img), label) if with_labels else augment(img)


def build_dataset(paths, labels=None, bsize=32,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024):

    decode_fn = decode_fn if decode_fn else build_decoder(labels is not None)
    augment_fn = augment_fn if augment_fn else build_augmenter(labels is not None)

    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)

    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    return dset



In [None]:
DATASET_NAME = "nih-image-600x600-data"
strategy = auto_select_accelerator()
batch_size = 2 * batch_size
# strategy.num_replicas_in_sync
print('batch size', batch_size)

# **Loadding  NIH image dataset**

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path(DATASET_NAME)

paths = GCS_DS_PATH + "/NIH_Images/" + df['img_ind'] + '.jpg'

#Get the multi-labels
label_cols = df.columns[:-1]
labels = df[label_cols].values

# **Train test split**


In [None]:
(train_paths, valid_paths, 
  train_labels, valid_labels) = train_test_split(paths, labels, test_size=val_split, random_state=11)

print(train_paths.shape, valid_paths.shape)
train_labels.sum(axis=0), valid_labels.sum(axis=0)

In [None]:
# Build the tensorflow datasets

decoder = build_decoder(with_labels=True, target_size=(img_size, img_size))

# Build the tensorflow datasets
dtrain = build_dataset(
    train_paths, train_labels, bsize=batch_size, decode_fn=decoder
)

dvalid = build_dataset(
    valid_paths, valid_labels, bsize=batch_size, 
    repeat=False, shuffle=False, augment=False, decode_fn=decoder
)

# **Model building**

In [None]:
def build_model():
    base = tf.keras.applications.DenseNet121(
        include_top=False, 
        weights='imagenet', 
        input_shape=(img_size, img_size, 3)
    )
    
    inp = layers.Input(shape = (img_size, img_size, 3))
    x = base(inp)
    x = layers.GlobalAveragePooling2D()(layers.Dropout(0.16)(x))
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(n_classes, 'sigmoid')(x)
    return Model(inp, x)

# **Model compiling**

In [None]:
with strategy.scope():
    model= build_model()
    loss= tf.keras.losses.BinaryCrossentropy(label_smoothing=0.0)
    model.compile(optimizers.Adam(learning_rate=lr),loss=loss,metrics=[tf.keras.metrics.AUC(multi_label=True)])
#     model.compile(optimizer=tf.optimizers.Adam(lr=lr), loss=loss, metrics=[tf.keras.metrics.AUC(multi_label=True)])


In [None]:
model.summary()

## **Steps per epoch**

In [None]:
steps_per_epoch = (train_paths.shape[0] // batch_size)
steps_per_epoch

# **Callback functions**

In [None]:
name= 'densenet121.keras'

rlr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, 
                                min_delta = 1e-4, min_lr = 1e-6, mode = 'min', cooldown=1)
        
ckp = ModelCheckpoint(name,monitor = 'val_loss',
                      verbose = 1, save_best_only = True, mode = 'min')
        
es = EarlyStopping(monitor = 'val_loss', min_delta = 1e-4, patience = 5, mode = 'min', 
                    restore_best_weights = True, verbose = 1)

# **Model training**

In [None]:
history = model.fit(dtrain,                      
                    validation_data=dvalid,                                       
                    epochs=n_epochs,
                    callbacks=[rlr,ckp],
                    steps_per_epoch=steps_per_epoch,
                    verbose=1)

# **Training Loss vs Validation Loss(Graph)**

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.plot( history.history["loss"], label = "Training Loss", marker='o')
plt.plot( history.history["val_loss"], label = "Validation Loss", marker='+')
plt.grid(True)
plt.legend()
plt.show()



# **Training Acuracy vs Validation Acuracy(Graph)**

In [None]:
plt.figure(figsize = (12, 6))
plt.xlabel("Epochs")
plt.ylabel("AUC")
plt.plot( history.history["auc"], label = "Training AUC" , marker='o')
plt.plot( history.history["val_auc"], label = "Validation AUC", marker='+')
plt.grid(True)
plt.legend()
plt.show()

# **Model saving**

In [None]:
model.save('/kaggle/working/final_densenet121_r.h5')