In [None]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layersimport Dense, GlobalAveragePooling2D
from skimage.restoration import denoise_nl_means, estimate_sigma
from skimage.transform import resize
import zipfile
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.preprocessing import LabelEncoder

In [None]:
# Install and configure Kaggle
!pip install kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

# Extract the dataset
with zipfile.ZipFile('/content/skin-cancer-mnist-ham10000.zip', 'r') as zip_ref:
zip_ref.extractall('dataset')

# Load metadata
df = pd.read_csv("/content/dataset/HAM10000_metadata.csv")
print(df)
print(df['dx'].value_counts())
df.drop(['lesion_id', 'dx_type', 'sex', 'localization', 'age'], axis=1, inplace=True)
df = df.rename(columns={'dx': 'label'})
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
df['label'] = df['label'].astype(str)
print(df['label'].value_counts())

# Copy images to a single directory
source_folder1 = '/content/dataset/HAM10000_images_part_1'
source_folder2 = '/content/dataset/HAM10000_images_part_2'
destination_folder = '/content/images'
shutil.copytree(source_folder1, destination_folder)
shutil.copytree(source_folder2, destination_folder, dirs_exist_ok=True)

# Define image paths
df['image_path'] = df['image_id'].apply(lambda x: f'/content/images/{x}.jpg')
df['preprocessed_path'] = df['image_id'].apply(lambda x: f'/content/preprocessed_images/{x}.jpg')

os.makedirs('/content/preprocessed_images', exist_ok=True)

def preprocess_and_save_image(row):
    img = load_img(row['image_path'])
    img = img_to_array(img)
    img = resize(img, (224, 224), preserve_range=True, anti_aliasing=True)

    # Apply Non-Local Means filter
    sigma_est = np.mean(estimate_sigma(img, multichannel=True))
    patch_kw = dict(patch_size=5, patch_distance=6, multichannel=True)
    img = denoise_nl_means(img, h=1.15 * sigma_est, fast_mode=True, **patch_kw)

    img = preprocess_input(img)
    save_img(row['preprocessed_path'], img)

    df.apply(preprocess_and_save_image, axis=1)

In [None]:
# Display images before and after preprocessing
img1 = mpimg.imread('/content/images/ISIC_0024306.jpg')
img2 = mpimg.imread('/content/preprocessed_images/ISIC_0024306.jpg')