In [2]:
!pip install tensorflow==2.9.1

Collecting tensorflow==2.9.1
  Downloading tensorflow-2.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting flatbuffers<2,>=1.12 (from tensorflow==2.9.1)
  Downloading flatbuffers-1.12-py2.py3-none-any.whl.metadata (872 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.9.1)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.10.0,>=2.9.0rc0 (from tensorflow==2.9.1)
  Downloading keras-2.9.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras-preprocessing>=1.1.1 (from tensorflow==2.9.1)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting protobuf<3.20,>=3.9.2 (from tensorflow==2.9.1)
  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (787 bytes)
Collecting tensorboard<2.10,>=2.9 (from tensorflow==2.9.1)
  Downloading tensorboard-2.9.1-py3-none-any.whl.metadata (1.9 kB)
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0 (from 

In [3]:
# import system libs
import os
import time
import shutil
import pathlib
import itertools

# import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# import Deep learning Libraries
from tensorflow.keras.preprocessing import image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers
from keras.models import load_model
# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

modules loaded


In [4]:
def define_paths(data_dir):
    filepaths = []
    labels = []

    folds = os.listdir(data_dir)
    for fold in folds:            
        if fold == 'READ.ME' or fold == 'data':
            continue
            
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)

    return filepaths, labels


# Concatenate data paths with labels into one dataframe ( to later be fitted into the model )
def define_df(files, classes):
    Fseries = pd.Series(files, name= 'filepaths')
    Lseries = pd.Series(classes, name='labels')
    return pd.concat([Fseries, Lseries], axis= 1)

def split_data(data_dir):
    # train dataframe
    files, classes = define_paths(data_dir)
    df = define_df(files, classes)
    strat = df['labels']
    train_df, dummy_df = train_test_split(df,  train_size= 0.8,test_size = 0.2, shuffle= True, random_state= 123, stratify= strat)

    # valid and test dataframe
    strat = dummy_df['labels']
    valid_df, test_df = train_test_split(dummy_df,  test_size= 0.5, shuffle= True, random_state= 123, stratify= strat)

    return train_df, valid_df, test_df

In [5]:
def create_gens (train_df, valid_df, test_df, batch_size):
    '''
    This function takes train, validation, and test dataframe and fit them into image data generator, because model takes data from image data generator.
    Image data generator converts images into tensors. '''


    # define model parameters
    img_size = (224, 224)
    channels = 3 # either BGR or Grayscale
    color = 'rgb'
    img_shape = (img_size[0], img_size[1], channels)

    # Recommended : use custom function for test data batch size, else we can use normal batch size.
    ts_length = len(test_df)
    test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
    test_steps = ts_length // test_batch_size

    # This function which will be used in image data generator for data augmentation, it just take the image and return it again.
    def scalar(img):
        return img

    tr_gen = ImageDataGenerator(preprocessing_function= scalar, horizontal_flip= True)
    ts_gen = ImageDataGenerator(preprocessing_function= scalar)

    train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    valid_gen = ts_gen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    # Note: we will use custom test_batch_size, and make shuffle= false
    test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= False, batch_size= test_batch_size)

    return train_gen, valid_gen, test_gen

In [6]:
data_dir = '/kaggle/input/the-kvasir-dataset/kvasir-dataset-v2'
train_df, valid_df, test_df = split_data(data_dir)
batch_size = 40
train_gen, valid_gen, test_gen = create_gens(train_df, valid_df, test_df, batch_size)

Found 6400 validated image filenames belonging to 8 classes.
Found 800 validated image filenames belonging to 8 classes.
Found 800 validated image filenames belonging to 8 classes.


In [7]:
model = load_model('/kaggle/input/endoscopy-94/tensorflow2/default/1/efficientnetb3-Endoscopic-94.13.h5')


# LIME

In [None]:
!pip install lime



In [12]:
import os
import numpy as np
import matplotlib.pyplot as plt
from lime import lime_image
from skimage.segmentation import mark_boundaries

# Prediction function for the model
def predict_fn(images):
    return model.predict(images)

# Initialize the LIME image explainer
explainer = lime_image.LimeImageExplainer()

# Assuming test_gen is set to provide images and labels in batches
# and that we have a mapping from class indices to names
g_dict = test_gen.class_indices
class_names = {v: k for k, v in g_dict.items()}

# Set the output directory for saving images
output_dir = "/kaggle/working/output"
os.makedirs(output_dir, exist_ok=True)

# Loop over each class to get one image per class and its LIME explanation
for class_index, class_name in class_names.items():
    # Get a sample image for the current class
    while True:
        # Retrieve a batch of images and labels
        sample_image_batch, sample_labels_batch = next(test_gen)
        
        # Find an image that belongs to the current class
        class_images = sample_image_batch[sample_labels_batch[:, class_index] == 1]
        
        if len(class_images) > 0:
            sample_image = class_images[0]  # Take the first matching image
            break  # Exit the loop once we have a sample image for the current class
    
    # Use the explainer to generate explanations for the sample image
    explanation = explainer.explain_instance(
        sample_image,  # The image to explain
        predict_fn,    # Prediction function
        top_labels=8,  # Number of top labels to explain (set to the total number of classes)
        hide_color=0,  # Color for hidden segments
        num_samples=1000  # Number of perturbations
    )
    
    # Get the image and mask for the specific class label
    temp, mask = explanation.get_image_and_mask(
        label=class_index, positive_only=True, hide_rest=False, num_features=5, min_weight=0.0
    )
    
    # File paths for saving the original and LIME images
    original_image_path = os.path.join(output_dir, f"{class_name}.jpg")
    lime_image_path = os.path.join(output_dir, f"{class_name}_lime.jpg")
    
    # Save the original image
    plt.imsave(original_image_path, sample_image / 255.0)
    print(f"Saved original image as {original_image_path}")
    
    # Save the LIME explanation image
    plt.imsave(lime_image_path, mark_boundaries(temp / 255.0, mask))
    print(f"Saved LIME explanation as {lime_image_path}")


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/dyed-lifted-polyps.jpg
Saved LIME explanation as /kaggle/working/output/dyed-lifted-polyps_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/dyed-resection-margins.jpg
Saved LIME explanation as /kaggle/working/output/dyed-resection-margins_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/esophagitis.jpg
Saved LIME explanation as /kaggle/working/output/esophagitis_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/normal-cecum.jpg
Saved LIME explanation as /kaggle/working/output/normal-cecum_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/normal-pylorus.jpg
Saved LIME explanation as /kaggle/working/output/normal-pylorus_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/normal-z-line.jpg
Saved LIME explanation as /kaggle/working/output/normal-z-line_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/polyps.jpg
Saved LIME explanation as /kaggle/working/output/polyps_lime.jpg


  0%|          | 0/1000 [00:00<?, ?it/s]

Saved original image as /kaggle/working/output/ulcerative-colitis.jpg
Saved LIME explanation as /kaggle/working/output/ulcerative-colitis_lime.jpg
