# KVASIR

In [1]:
import tensorflow as tf 
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd

#setting the batch size
BATCH_SIZE = 32
EPOCH = 10 

AT = tf.data.AUTOTUNE
#buffersize
BUFFER = 1000

STEPS_PER_EPOCH = 800//BATCH_SIZE  
VALIDATION_STEPS = 200//BATCH_SIZE

INPUT_SIZE = (224, 224, 3)

In [2]:
df = pd.read_hdf('kvasir_dataset.h5')
df

Unnamed: 0,filepath,label,label_name
0,kvasir-dataset/dyed-lifted-polyps/0053d7cd-549...,0,dyed-lifted-polyps
1,kvasir-dataset/dyed-lifted-polyps/007d5aa7-728...,0,dyed-lifted-polyps
2,kvasir-dataset/dyed-lifted-polyps/00cf9508-6ad...,0,dyed-lifted-polyps
3,kvasir-dataset/dyed-lifted-polyps/01507fbd-4d3...,0,dyed-lifted-polyps
4,kvasir-dataset/dyed-lifted-polyps/018ed8ed-e48...,0,dyed-lifted-polyps
...,...,...,...
3995,kvasir-dataset/ulcerative-colitis/fded0688-c4c...,7,ulcerative-colitis
3996,kvasir-dataset/ulcerative-colitis/fe7f77f0-6bd...,7,ulcerative-colitis
3997,kvasir-dataset/ulcerative-colitis/fe847a94-738...,7,ulcerative-colitis
3998,kvasir-dataset/ulcerative-colitis/ff493607-67f...,7,ulcerative-colitis


In [3]:
class_mapping = {
    "dyed-lifted-polyps": 0,
    "dyed-resection-margins": 1,
    "esophagitis": 2,
    "normal-cecum": 3,
    "normal-pylorus": 4,
    "normal-z-line": 5,
    "polyps": 6,
    "ulcerative-colitis": 7,
}

In [4]:
from sklearn.model_selection import train_test_split

trains_df = []
tests_df = []

for key, value in class_mapping.items():
    train_df, test_df = train_test_split(df[df['label'] == value], test_size=0.1, random_state=42)

    trains_df.append(train_df)
    tests_df.append(test_df)

In [5]:
train_df= pd.concat(trains_df)
test_df= pd.concat(tests_df)

train_df.to_hdf('train_df.h5', key='data')
test_df.to_hdf('test_df.h5', key='data')

# DermaMNIST

In [6]:
df = pd.read_hdf('dermamnist.h5')
df

Unnamed: 0,filepath,label,label_name
0,dermamnist/actinic keratoses and intraepitheli...,0,actinic keratoses and intraepithelial carcinoma
1,dermamnist/actinic keratoses and intraepitheli...,0,actinic keratoses and intraepithelial carcinoma
2,dermamnist/actinic keratoses and intraepitheli...,0,actinic keratoses and intraepithelial carcinoma
3,dermamnist/actinic keratoses and intraepitheli...,0,actinic keratoses and intraepithelial carcinoma
4,dermamnist/actinic keratoses and intraepitheli...,0,actinic keratoses and intraepithelial carcinoma
...,...,...,...
7002,dermamnist/vascular lesions/f3e8f529-88ad-4061...,6,vascular lesions
7003,dermamnist/vascular lesions/f63f73bf-98c9-41b9...,6,vascular lesions
7004,dermamnist/vascular lesions/fb5f7ba4-68fc-4ba3...,6,vascular lesions
7005,dermamnist/vascular lesions/fe7dbb19-8164-4d70...,6,vascular lesions


In [7]:
class_mapping = {
    'actinic keratoses and intraepithelial carcinoma': 0,
    'basal cell carcinoma': 1,
    'benign keratosis-like lesions': 2,
    'dermatofibroma': 3,
    'melanoma': 4,
    'melanocytic nevi': 5,
    'vascular lesions': 6
}

In [8]:
from sklearn.model_selection import train_test_split

trains_df = []
tests_df = []

for key, value in class_mapping.items():
    train_df, test_df = train_test_split(df[df['label'] == value], test_size=0.1, random_state=42)

    trains_df.append(train_df)
    tests_df.append(test_df)

In [9]:
train_df= pd.concat(trains_df)
test_df= pd.concat(tests_df)

train_df.to_hdf('dermamnist_train_df.h5', key='data')
test_df.to_hdf('dermamnist_test_df.h5', key='data')

# Load Image to tensor

In [7]:
def load_images_to_tensor(images):
    tensor_images = []
    for image in images:
        # Read the image from the file
        image = tf.io.read_file(f'../Dataset/{image}')
        # Decode the image into a tensor
        image = tf.image.decode_jpeg(image, channels=3)
        # Resize the image to the desired size
        image = tf.image.resize(image, INPUT_SIZE[:2])
        # Normalize the image to [-1, 1]    
        image = image / 127.5 - 1
        # Normalize the image to [-1, 1]
        # image = image / 255

        # Check for NaNs in the image tensor
        tf.debugging.check_numerics(image, message="NaN found in image tensor")

        tensor_images.append(image)
    return tensor_images

In [8]:
def load_labels_to_tensor(labels):
    tensor_labels = []
    for label in labels:
        label = tf.convert_to_tensor(label)
        tensor_labels.append(label)
    return tensor_labels

In [10]:
train_image = load_images_to_tensor(train_df['filepath'].values)
train_labels = load_labels_to_tensor(train_df['label'].values)

In [None]:
print(len(train_image))
print(train_image[0].shape)

print(tf.unique_with_counts(train_labels))

In [12]:
import os

augmentation = True
color_augmentation = True
geo_augmentation = True
folder = 'kvasir_scaled_min1_to_1'

if not os.path.exists(folder):
    os.makedirs(folder)

In [13]:
import random

def random_augmentation(img, label):
    if color_augmentation :
        # Convert image from [-1, 1] to [0, 1]
        img = (img + 1.0) / 2.0

        # Randomly adjust brightness
        img = tf.cond(
            tf.random.uniform([], 0, 1) < 0.5,
            lambda: tf.image.random_brightness(img, max_delta=0.2),
            lambda: img
        )

        # Randomly adjust contrast
        img = tf.cond(
            tf.random.uniform([], 0, 1) < 0.5,
            lambda: tf.image.random_contrast(img, lower=0.5, upper=2.0),
            lambda: img
        )

        # Randomly adjust saturation
        img = tf.cond(
            tf.random.uniform([], 0, 1) < 0.5,
            lambda: tf.image.random_saturation(img, lower=0.75, upper=1.25),
            lambda: img
        )

        # Randomly adjust hue
        img = tf.cond(
            tf.random.uniform([], 0, 1) < 0.5,
            lambda: tf.image.random_hue(img, max_delta=0.1),
            lambda: img
        )

        # Clip image values to be in the range [0, 1]
        img = tf.clip_by_value(img, 0.0, 1.0)

        # Convert the image back to [-1, 1]
        img = (img * 2.0) - 1.0

    if geo_augmentation :
        # Randomly crop and resize with a random crop size between 0.7 and 0.9
        random_crop_size = tf.random.uniform([], minval=0.7, maxval=0.9)
        img = tf.cond(
            tf.random.uniform([], 0, 1) > 0.5,
            lambda: tf.image.resize(tf.image.central_crop(img, random_crop_size), INPUT_SIZE[:2]),
            lambda: img
        )

        # Randomly flip horizontally
        img = tf.cond(
            tf.random.uniform([], 0, 1) > 0.5,
            lambda: tf.image.flip_left_right(img),
            lambda: img
        )

        # Randomly rotate by 0, 90, 180, or 270 degrees
        num_rotations = tf.random.uniform([], minval=0, maxval=4, dtype=tf.int32)  # Randomly choose 0, 1, 2, or 3
        img = tf.image.rot90(img, k=num_rotations)

    return img, label

# Create Dataset

In [None]:
train_dataset = list(zip(train_image, train_labels))
print(train_dataset[0][1] == 0)
print(len(train_dataset))

In [None]:
import numpy as np 

img, label = train_dataset[0]

aug_img, aug_label = random_augmentation(img, label)
aug2_img, aug2_label = random_augmentation(img, label)

# Convert the tensor images to numpy arrays and scale from [-1, 1] to [0, 255]
img_np = (img.numpy() * 127.5 + 127.5).astype(np.uint8)
aug_img_np = (aug_img.numpy() * 127.5 + 127.5).astype(np.uint8)
aug2_img_np = (aug2_img.numpy() * 127.5 + 127.5).astype(np.uint8)

# Plot the images using matplotlib
plt.subplot(1, 3, 1)
plt.imshow(img_np)
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(aug_img_np)
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(aug2_img_np)
plt.axis('off')

plt.show()

In [17]:
import numpy as np

if augmentation:
    aug1 = list(map(lambda x: random_augmentation(*x), train_dataset))
    aug2 = list(map(lambda x: random_augmentation(*x), train_dataset))

    train_dataset.extend(aug1)
    train_dataset.extend(aug2)

In [None]:
len(train_dataset)

In [None]:
# coba filter
filtered = [(a, b) for a, b in train_dataset if (b == 1)]
filtered[0]

In [19]:
import pickle
with open(f'{folder}/training', 'wb') as file:
    pickle.dump(train_dataset, file)