<a href="https://www.kaggle.com/code/maliabbasi/mobileappmodelv1?scriptVersionId=166175051" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
###### This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
from warnings import filterwarnings as filt 
import cv2

filt('ignore')
plt.style.use('dark_background')


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
path = "/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset"
train_path = os.path.join(path, 'train')
test_path  = os.path.join(path, 'test')

In [None]:
def get_dataset(path):
    data = {
        'img_path' : [],
        'target'   : [],
    }
    for cls in os.listdir(path):
        ip = [os.path.join(cls, i) for i in os.listdir(os.path.join(path, cls))]
        data['img_path'] += ip
        data['target']   += [cls] * len(ip)
        
    print(f'length of image path : {len(data["img_path"])}')
    print(f'length of target     : {len(data["target"])}')
    return data

In [None]:
data = get_dataset(train_path)
df = pd.DataFrame(data)
df.head()

In [None]:
import albumentations as A
from keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.utils as utils 

In [None]:
def create_data_gen(df, base_path, x, y, data_gen_args = {}, **kwargs):
    datagen = ImageDataGenerator(**data_gen_args)
    loader = datagen.flow_from_dataframe(df, base_path, x_col = x, y_col = y, **kwargs)
    return loader

In [None]:
def get_row(n_img, col):
    if n_img % col == 0:
        return n_img // col
    return (n_img // col) + 1

def show_imgs(loader, n_img, col):
    n_img = min(loader[0][0].shape[0], n_img)
    row = get_row(n_img, col)
    images, labels = loader[0]
    ohe_rev = {
        0 : 'benign',
        1 : 'malignant'
    }
    
    plt.figure(figsize = (col * 4, row * 4))
    
    for i in range(n_img):
        img = images[i]
        lbl = labels[i]
        
        plt.subplot(row, col, i + 1)
        
        plt.imshow(img)
        plt.title(f'{ohe_rev[np.argmax(lbl)]} : {img.shape}')
        plt.axis(False)
        
    plt.show()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
def sample(x, y, frac = 0.2, return_val = True):
    x,xt, y, yt = train_test_split(x, y, test_size = frac, stratify = y)
    train = pd.concat([x, y], axis = 1)
    val = pd.concat([xt, yt], axis = 1)
    if return_val:
        return train, val
    return train

In [None]:
df.shape[0] * 0.15

In [None]:
x = df.drop(['target'], axis = 1)
y = df.target

train_df, val_df = sample(x, y, 0.15)
train_df.shape

In [None]:
def transforms(image, normalize = True):
    transform = A.Compose([
        A.HorizontalFlip(),
        A.VerticalFlip(),
        A.CenterCrop(250 ,250, p = 0.4),
        A.GaussianBlur(p = 0.5),
#         A.ChannelShuffle(),
        A.OneOf([
            A.Rotate(limit = 70, border_mode = cv2.BORDER_CONSTANT),
            A.Rotate(limit = 50, border_mode = cv2.BORDER_CONSTANT),
            A.Rotate(limit = 120, border_mode = cv2.BORDER_CONSTANT),
        ], p = 1),
        A.Resize(300, 300)
    ])
    
    x = transform(image = image)['image']
    if normalize:
        x = x / 255
    return x

In [None]:
t_dargs = {
    'preprocessing_function' : transforms
}

v_dargs = {
    'preprocessing_function' : lambda x : x / 255
}

train_loader = create_data_gen(train_df, train_path, 'img_path', 'target', t_dargs, batch_size = 80, 
                               color_mode = 'rgb', class_mode = 'categorical', target_size = (300, 300), shuffle = True)

val_loader = create_data_gen(val_df, train_path, 'img_path', 'target', v_dargs, batch_size = 80, 
                               color_mode = 'rgb', class_mode = 'categorical', target_size = (300, 300), shuffle = False)

In [None]:
show_imgs(train_loader, 15, 5)

In [None]:
256 * 2

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.python.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [None]:
from keras.models import Model
from keras.layers import Dense, MaxPool2D, Flatten

def ResNet50V2test(ip, op, activation = 'relu', output_activation = 'softmax'):
    
    output_activation = 'sigmoid' if op == 1 else output_activation
    
    model = tf.keras.applications.ResNet50V2(weights = 'imagenet', include_top = False, input_shape = ip)
    for layer in model.layers[3:]:
        layer.trainable = False
        
    x = Flatten()(model.output)
    x = Dense(1024, activation = activation)(x)
    #x = Dense(512,  activation = activation)(x)
    #x = Dense(256,  activation = activation)(x)
    x = Dense(2, activation = output_activation)(x)
    
    model = Model(inputs = [model.input], outputs = [x])
        
    print(f'layer activation  :==> {activation}')
    print(f'output activation :==> {output_activation}')
    print()
    print(model.summary())
    return model

In [None]:
IMG_SHAPE = (300, 300, 3)
classes = 2
model = ResNet50V2test(IMG_SHAPE, classes)

In [None]:
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, SGD, Adagrad

model.compile(optimizer = Adagrad(learning_rate = 0.01),
              loss = CategoricalCrossentropy(),
              metrics = ['accuracy'])

In [None]:
from  tensorflow import keras
keras.utils.plot_model(
    model,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
)

In [None]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', patience = 2, mode = 'min', verbose = 1)
checkpoint = ModelCheckpoint('model.h5', monitor = 'val_loss', verbose = 1, save_best_only = True, mode = 'min')
early_stop = EarlyStopping(monitor = 'val_loss', patience = 4, verbose = 1)

In [None]:
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, SGD, Adagrad

model.compile(optimizer = Adagrad(learning_rate = 0.01),
              loss = CategoricalCrossentropy(),
              metrics = ['accuracy'])

In [None]:
epoch = 10
model.fit_generator(train_loader, steps_per_epoch = np.ceil(train_df.shape[0] / train_loader.batch_size), 
                    epochs = epoch, callbacks = [reduce_lr, checkpoint, early_stop], validation_data = val_loader, shuffle = True)

In [None]:
model.evaluate_generator(val_loader)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import metrics

def pred_loader(model, loader):
    pred_ohe = model.predict_generator(loader)
    pred = np.argmax(pred_ohe, axis = 1)
    ytrue = loader.classes
    print(classification_report(ytrue, pred))
    print()
    sns.heatmap(confusion_matrix(ytrue, pred), fmt = '.2f', annot = True )
    auc = metrics.roc_auc_score(ytrue, pred)
    false_positive_rate, true_positive_rate, thresolds = metrics.roc_curve(ytrue, pred)
    plt.figure(figsize=(10, 8), dpi=100)
    plt.axis('scaled')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.title("AUC & ROC Curve")
    plt.plot(false_positive_rate, true_positive_rate, 'g')
    plt.fill_between(false_positive_rate, true_positive_rate, facecolor='lightgreen', alpha=0.7)
    plt.text(0.95, 0.05, 'AUC = %0.4f' % auc, ha='right', fontsize=12, weight='bold', color='blue')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.show()

In [None]:
pred_loader(model, val_loader)

In [None]:
import tensorflow as tf

class IntegratedGradient:
    def __init__(self, model, img, n_alpha, target_idx):
        self.model = model
        self.target_idx = target_idx
        _, h, w, c = model.input.shape
        self.input_shape = (h, w, c)
        self.output_shape = model.output.shape[-1]
        
        self.img = self.validate(img)
        self.alphas = np.linspace(0.0, 1.0, n_alpha + 1)
        self.baseline = np.zeros(self.input_shape)
        
    def validate(self, img):
        return img.reshape(self.input_shape) / 255
    
    def sensitivity(self, img, baseline):
        return img - baseline
    
    def interpolate_images(self):
        alpha_x = self.alphas[:, tf.newaxis, tf.newaxis, tf.newaxis]
        baseline_x = tf.expand_dims(self.baseline, axis = 0)
        image_x = tf.expand_dims(self.img, axis = 0)
        delta = self.sensitivity(image_x, baseline_x)
        return delta * (alpha_x + baseline_x)
    
    def compute_gradients(self, interpolated_images):
        
        with tf.GradientTape() as tape:
            tape.watch(interpolated_images)
            probs = self.model(interpolated_images)
            if self.output_shape > 1:
                probs = probs[:, self.target_idx]

            gradient = tape.gradient(probs, interpolated_images)
        assumption = self.integrate_assumptions(gradient)
        return assumption
                
    def integrate_assumptions(self, gradient):
        grad = (gradient[:-1] + gradient[1:]) / 2
        return tf.reduce_mean(grad, axis = 0)
    
    def get_attributions(self, compute_summation = False):
        interpolated_images = self.interpolate_images()
        assumptions = self.compute_gradients(interpolated_images)
        if compute_summation:
            assumptions = tf.reduce_sum(tf.math.abs(assumptions), axis = -1)
                                        
        return assumptions

In [None]:
def read_image(df, cls, base_path):
    if not cls:
        data = df.sample(1)
    else:
        data = df[df.target == cls].sample(1)
        
    img = data['img_path'].values[0]
    lbl = data['target'].values[0]
    
    return plt.imread(os.path.join(base_path, img)), lbl


def show_attrib_imgs(attrib, img, lbl):
    
    plt.figure(figsize = (10, 5))
    
    plt.subplot(1, 2, 1)
    plt.imshow(attrib, cmap = 'hot_r')
    plt.title(f'Attribution : {lbl}')
    plt.axis(False)
    
    plt.subplot(1, 2, 2)
    plt.imshow(img)
    plt.title(f'Original img : {lbl}')
    
    plt.axis(False)
    plt.show()

In [None]:
bimg, blbl = read_image(train_df, 'benign', train_path)
mimg, mlbl = read_image(train_df, 'malignant', train_path)

In [None]:
# benign
ig = IntegratedGradient(model, bimg, 50, 0)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, bimg, 'benign')

# malignant
ig = IntegratedGradient(model, mimg, 50, 1)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, mimg, 'malignant')

In [None]:
bimg, blbl = read_image(val_df, 'benign', train_path)
mimg, mlbl = read_image(val_df, 'malignant', train_path)

In [None]:
# benign
ig = IntegratedGradient(model, bimg, 50, 0)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, bimg, 'benign')

# malignant
ig = IntegratedGradient(model, mimg, 50, 1)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, mimg, 'malignant')

In [None]:
# i = cv2.cvtColor(mimg, cv2.COLOR_RGB2GRAY)
img = cv2.threshold(cv2.GaussianBlur(mimg, (5,5), 0), 125, 255, cv2.THRESH_BINARY_INV)[1]
plt.imshow(img);
plt.title(img.shape)

plt.axis(False)

In [None]:
test_df = get_dataset(test_path)
test_df = pd.DataFrame(test_df)
test_loader = create_data_gen(test_df, test_path, 'img_path', 'target', v_dargs, batch_size = 64, 
                               color_mode = 'rgb', class_mode = 'categorical', target_size = (300, 300), shuffle = False)

In [None]:
model.evaluate(test_loader)

In [None]:
pred_loader(model, test_loader)

In [None]:
bimg, blbl = read_image(test_df, 'benign', test_path)
mimg, mlbl = read_image(test_df, 'malignant', test_path)

In [None]:
# benign
ig = IntegratedGradient(model, bimg, 50, 0)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, bimg, 'benign')

# malignant
ig = IntegratedGradient(model, mimg, 50, 1)
attrib = ig.get_attributions(compute_summation = True)
show_attrib_imgs(attrib, mimg, 'malignant')

In [None]:
def show_predicted_true_lbls(loader, model, n_imgs, col = 5):
    images, labels = loader[np.random.choice(len(loader))]
    n_imgs = min(labels.shape[0], n_imgs)
    row = get_row(n_imgs, col)
    preds = model(images)
    
    plt.figure(figsize = (col * 4, row * 4))
    
    label_rev = {
        0 : 'benign',
        1 : 'malignant'
    }
    
    for i in range(n_imgs):
        
        plt.subplot(row, col, i + 1)
        
        plt.imshow(images[i])
        pred = label_rev[np.argmax(preds[i])]
        true = label_rev[np.argmax(labels[i])]
        plt.title(f'{true} (t) | {pred} (p)', color = 'green' if true == pred else 'red')
        plt.axis(False)
        
    plt.show()

In [None]:
show_predicted_true_lbls(test_loader, model, 20)

import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model('/kaggle/working/model.h5')

# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('/kaggle/working/model.h5/mobileapp.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:

model = tf.keras.models.load_model('/kaggle/working/model.h5')

# Convert the Keras model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Optimize the TensorFlow Lite model for size
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()

# Save the optimized TensorFlow Lite model to a file
with open('/kaggle/working/model.tflite', 'wb') as f:
    f.write(tflite_model)
