In [1]:
import zipfile

# Update the path to the absolute path
zip_path = "/sfs/weka/scratch/axu5pa/DS_4002_Project_3/PH2 Dataset images.zip"

# Open the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # List all files in the zip
    file_list = zip_ref.namelist()
    print("Files in zip:", file_list)

Files in zip: ['PH2 Dataset images/', '__MACOSX/._PH2 Dataset images', 'PH2 Dataset images/IMD168/', '__MACOSX/PH2 Dataset images/._IMD168', 'PH2 Dataset images/IMD157/', '__MACOSX/PH2 Dataset images/._IMD157', 'PH2 Dataset images/IMD365/', '__MACOSX/PH2 Dataset images/._IMD365', 'PH2 Dataset images/IMD396/', '__MACOSX/PH2 Dataset images/._IMD396', 'PH2 Dataset images/IMD150/', '__MACOSX/PH2 Dataset images/._IMD150', 'PH2 Dataset images/IMD159/', '__MACOSX/PH2 Dataset images/._IMD159', 'PH2 Dataset images/IMD166/', '__MACOSX/PH2 Dataset images/._IMD166', 'PH2 Dataset images/IMD398/', '__MACOSX/PH2 Dataset images/._IMD398', 'PH2 Dataset images/IMD161/', '__MACOSX/PH2 Dataset images/._IMD161', 'PH2 Dataset images/IMD135/', '__MACOSX/PH2 Dataset images/._IMD135', 'PH2 Dataset images/IMD132/', '__MACOSX/PH2 Dataset images/._IMD132', 'PH2 Dataset images/IMD331/', '__MACOSX/PH2 Dataset images/._IMD331', 'PH2 Dataset images/IMD103/', '__MACOSX/PH2 Dataset images/._IMD103', 'PH2 Dataset images

In [2]:
import os
print(os.getcwd())


/sfs/weka/scratch/axu5pa/DS_4002_Project_3/PH2Dataset


In [3]:
pip install opencv-python

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [23]:
import os
import numpy as np
from PIL import Image
import cv2
import shutil

# Define the path to the images folder
images_path = "/sfs/weka/scratch/axu5pa/DS_4002_Project_3/PH2Dataset/Images"

# Define an output folder for normalized images
output_folder = "./normalized_images"
os.makedirs(output_folder, exist_ok=True)

# Function to normalize an image
def normalize_image(image):
    image_array = np.asarray(image, dtype=np.float32)  # Convert to NumPy array
    normalized_array = image_array / 255.0  # Normalize to [0, 1]
    return normalized_array

# Function to enhance contrast using histogram equalization
def enhance_contrast(image):
    # Convert the image to OpenCV format (NumPy array)
    image_array = np.asarray(image)

    # Convert to LAB color space for better contrast enhancement
    lab = cv2.cvtColor(image_array, cv2.COLOR_RGB2LAB)

    # Apply histogram equalization to the L channel
    l, a, b = cv2.split(lab)
    l_eq = cv2.equalizeHist(l)
    lab_eq = cv2.merge((l_eq, a, b))

    # Convert back to RGB color space
    enhanced_image = cv2.cvtColor(lab_eq, cv2.COLOR_LAB2RGB)
    return enhanced_image

# Merge lesion and base images
def merge_images(base_image, lesion_image):
    # Ensure both images have the same size
    lesion_image = lesion_image.convert("L")  # Convert to grayscale for masking
    base_image = base_image.resize(lesion_image.size, Image.Resampling.LANCZOS)
    
    # Create a mask where white areas in lesion image are preserved
    mask = np.array(lesion_image) > 0  # Convert lesion image to a binary mask
    base_array = np.array(base_image)  # Convert base image to array
    
    # Apply the mask to the base image
    masked_image_array = np.zeros_like(base_array)  # Create a black image of same size
    masked_image_array[mask] = base_array[mask]  # Apply lesion area from base image
    
    # Convert back to PIL Image
    masked_image = Image.fromarray(masked_image_array)
    return masked_image

# List all files in the images folder
image_files = os.listdir(images_path)

# Check if the folder contains any files
if not image_files:
    print("No files found in the images folder.")
else:
    # Loop through the files and process them
    for image_file in image_files:
        # Filter only bmp images (base and lesion)
        if image_file.endswith('.bmp'):
            try:
                # Check if the file is a base or lesion image
                if '_lesion' not in image_file and '_Label' not in image_file:
                    base_image_path = os.path.join(images_path, image_file)
                    base_name = os.path.splitext(image_file)[0]
                    lesion_image_path = os.path.join(images_path, f"{base_name}_lesion.bmp")
                    
                    if os.path.exists(lesion_image_path):
                        print(f"Processing: {base_name}")
                        
                        # Open the base and lesion images
                        with Image.open(base_image_path) as base_img, Image.open(lesion_image_path) as lesion_img:
                            # Merge the images
                            merged_image = merge_images(base_img, lesion_img)

                            # Enhance contrast
                            enhanced_image = enhance_contrast(merged_image)

                            # Normalize the image
                            normalized_array = normalize_image(enhanced_image)

                            # Resize the image to 256x256
                            resized_image = Image.fromarray((normalized_array * 255).astype(np.uint8)).resize((256, 256))

                            # Save the normalized image
                            output_path = os.path.join(output_folder, f"{base_name}_merged.bmp")
                            resized_image.save(output_path)

                            # Confirm the image is saved
                            print(f"Processed and saved: {output_path}")
                    else:
                        print(f"No corresponding lesion file found for {base_name}")
            except Exception as e:
                print(f"Error processing file {image_file}: {e}")



Processing: IMD417
Processed and saved: ./normalized_images/IMD417_merged.bmp
Processing: IMD254
Processed and saved: ./normalized_images/IMD254_merged.bmp
Processing: IMD020
Processed and saved: ./normalized_images/IMD020_merged.bmp
Processing: IMD039
Processed and saved: ./normalized_images/IMD039_merged.bmp
Processing: IMD175
Processed and saved: ./normalized_images/IMD175_merged.bmp
Processing: IMD279
Processed and saved: ./normalized_images/IMD279_merged.bmp
Processing: IMD155
Processed and saved: ./normalized_images/IMD155_merged.bmp
Processing: IMD328
Processed and saved: ./normalized_images/IMD328_merged.bmp
Processing: IMD085
Processed and saved: ./normalized_images/IMD085_merged.bmp
Processing: IMD142
Processed and saved: ./normalized_images/IMD142_merged.bmp
Processing: IMD312
Processed and saved: ./normalized_images/IMD312_merged.bmp
Processing: IMD143
Processed and saved: ./normalized_images/IMD143_merged.bmp
Processing: IMD154
Processed and saved: ./normalized_images/IMD1

In [24]:
# Check if output folder contains files
output_files = os.listdir(output_folder)
if not output_files:
    print("No files saved to the output folder.")
else:
    print(f"Files saved to the output folder: {output_files}")

# Compress the normalized images folder into a zip file
shutil.make_archive("normalized_merged_images", 'zip', output_folder)

# Confirmation of zip file creation
print("Normalized and merged images are zipped and ready to download.")

Files saved to the output folder: ['IMD279_merged.bmp', 'IMD420_merged.bmp', 'IMD112_merged.bmp', 'IMD075_merged.bmp', 'IMD156_merged.bmp', 'IMD256_merged.bmp', 'IMD403_merged.bmp', 'IMD304_merged.bmp', 'IMD385_merged.bmp', 'IMD153_merged.bmp', 'IMD133_merged.bmp', 'IMD432_merged.bmp', 'IMD058_merged.bmp', 'IMD285_merged.bmp', 'IMD170_merged.bmp', 'IMD413_merged.bmp', 'IMD038_merged.bmp', 'IMD437_merged.bmp', 'IMD154_merged.bmp', 'IMD003_merged.bmp', 'IMD105_merged.bmp', 'IMD057_merged.bmp', 'IMD132_merged.bmp', 'IMD080_merged.bmp', 'IMD107_merged.bmp', 'IMD418_merged.bmp', 'IMD368_merged.bmp', 'IMD203_merged.bmp', 'IMD126_merged.bmp', 'IMD397_merged.bmp', 'IMD103_merged.bmp', 'IMD138_merged.bmp', 'IMD161_merged.bmp', 'IMD328_merged.bmp', 'IMD037_merged.bmp', 'IMD383_merged.bmp', 'IMD417_merged.bmp', 'IMD434_merged.bmp', 'IMD140_merged.bmp', 'IMD426_merged.bmp', 'IMD025_merged.bmp', 'IMD389_merged.bmp', 'IMD019_merged.bmp', 'IMD378_merged.bmp', 'IMD243_merged.bmp', 'IMD006_merged.bmp',

In [19]:
pip install torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/d1/35/e8b2daf02ce933e4518e6f5682c72fd0ed66c15910ea1fb4168f442b71c4/torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata
  Downloading torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/de/e9/e190ecec448d5a2abad8348cf085fcb39962a491e3f40dcb023721e04feb/torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl.metadata
  Downloading torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Obtaining dependency information for torchaudio from https://files.pythonhosted.org/packages/41/33/0f21b15f8e231bb55578f6b32e8c18675585b7bf97cb0aee96b1591e4193/torchaudio-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata
  Downloading torchaudio-2.5.1-cp311-cp