# Resize and pad images

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from tqdm import tqdm
import os
from PIL import Image
import numpy as np

In [None]:
def read_16bit_to_8bit(image_path):
    # Open the 16-bit image
    img_16bit = Image.open(image_path)
    # Convert to numpy array
    img_array = np.array(img_16bit)
    # Normalize to range 0-255
    img_8bit = ((img_array - img_array.min()) / (img_array.max() - img_array.min()) * 255.0).astype(np.uint8)
    # Convert back to PIL Image
    img_8bit_pil = Image.fromarray(img_8bit)
    return img_8bit_pil

def preprocess_image(image_path, target_size=(224, 224)):
    # Read the image
    img = read_16bit_to_8bit(image_path)
    
    # Get the original dimensions
    width, height = img.size
    
    # Determine which dimension needs padding
    if width < height:
        # Pad the width
        padding_width = height - width
        new_size = (height, height)
        img_padded = Image.new('RGB', new_size, (0, 0, 0))
        img_padded.paste(img, ((padding_width + 1) // 2, 0))
    else:
        # Pad the height
        padding_height = width - height
        new_size = (width, width)
        img_padded = Image.new('RGB', new_size, (0, 0, 0))
        img_padded.paste(img, (0, (padding_height + 1) // 2))
    
    # Resize the image
    img_resized = img_padded.resize(target_size, Image.LANCZOS)
    
    return img_resized

def plot_images(image_paths):
    num_images = len(image_paths)
    fig, axs = plt.subplots(num_images, 2, figsize=(10, 5*num_images))
    
    for i, path in enumerate(image_paths):
        # Load and preprocess the image
        img = Image.open(path)
        path2 = OUTPUT_FOLDER + '/'.join(path.split('/')[-3:])
        img_preprocessed = Image.open(path2)
        
        # Display original image
        axs[i, 0].imshow(img, cmap='gray')
        axs[i, 0].set_title(f'Original {i+1}')
        axs[i, 0].axis('off')
        
        # Display preprocessed image
        axs[i, 1].imshow(img_preprocessed, cmap='gray')  # Change channel order for display
        axs[i, 1].set_title(f'Preprocessed {i+1}')
        axs[i, 1].axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
IMAGE_PATH = '/home/fli40/Data/Datathon24_SummerSchool_CXR/Data1/'
OUTPUT_FOLDER = '/home/fli40/Data/Datathon24_SummerSchool_CXR/Data1_Preprocessed/'

In [None]:
imgs = glob(IMAGE_PATH + '*/*/*.png')
len(imgs)

In [None]:
for p in tqdm(imgs):
    preprocessed_img = preprocess_image(p)
    
    # Source directory
    folder_str = '/'.join(p.split('/')[-3:-1])
    
    # Destination directory
    destination_dir = OUTPUT_FOLDER + folder_str
    os.makedirs(destination_dir, exist_ok=True)
    
    # Save the image as a PNG file
    preprocessed_img.save(os.path.join(destination_dir, os.path.basename(p)))

### Visualize Results

In [None]:
plot_images(imgs[0:20])