# **IMAGE DATA - SIZE REDUCTION & STANDARDIZATION**

# 1. Installation & Packages Setup 
<div style="float:left; font-size:0.7em;"><a href="#returnToTop">Return to Top</a></div> <a id='setup'></a>

In [3]:
import pandas as pd
import numpy as np
import gzip
import os
import requests
from urllib.parse import urljoin
import subprocess

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns  # for nicer plots
sns.set(style="darkgrid")  # default style

from copy import deepcopy
import re
import pprint
pd.options.display.max_columns = 999

from collections import Counter

# Set pandas to display the entire content of each column
pd.set_option('display.max_colwidth', None)

import shutil
from IPython.display import display, HTML
from sklearn.ensemble import IsolationForest
import cv2

# 2. Setup Constants & Functions
<div style="float:left; font-size:0.7em;"><a href="#returnToTop">Return to Top</a></div> <a id='setup'></a>

In [4]:
# Directories and file paths
local_base_dir  = '/tf/mount/0 Berkeley/0 Spring 2024/w210 Capstone/Project/mimic_data/mimic_images'
processed_file  = '/Processed_Image_Data_Feb_28_2024.csv'
processed_file_path = os.path.join(local_base_dir, processed_file)

In [3]:
# Establish Source & Target paths
source_dirs = {
    'train': local_base_dir + '/train_images/',
    'validate': local_base_dir + '/validate_images/',
    'test': local_base_dir + '/test_images/',
    'sample_train': local_base_dir + '/sample_train_images/',
    'sample_validate': local_base_dir + '/sample_validate_images/',
    'sample_test': local_base_dir + '/sample_test_images/',
}
target_512_dirs = {
    'train': local_base_dir + '/train_images_512/',
    'validate': local_base_dir + '/validate_images_512/',
    'test': local_base_dir + '/test_images_512/',
    'sample_train': local_base_dir + '/sample_train_images_512/',
    'sample_validate': local_base_dir + '/sample_validate_images_512/',
    'sample_test': local_base_dir + '/sample_test_images_512/',
}
target_256_dirs = {
    'train': local_base_dir + '/train_images_256/',
    'validate': local_base_dir + '/validate_images_256/',
    'test': local_base_dir + '/test_images_256/',
    'sample_train': local_base_dir + '/sample_train_images_256/',
    'sample_validate': local_base_dir + '/sample_validate_images_256/',
    'sample_test': local_base_dir + '/sample_test_images_256/',
}
# Create target directories if they don't exist
for dir_path in target_512_dirs.values():
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
for dir_path in target_256_dirs.values():
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# 3. Inspect Original Images Color Scheme
<div style="float:left; font-size:0.7em;"><a href="#returnToTop">Return to Top</a></div> <a id='setup'></a>



In [6]:
# Check the dimensionality of the downloaded images

# Load a sample image
img = cv2.imread(local_base_dir+'/size_original/sample_images/sample_train_images/00b654f0-892f76c1-f94ee4c4-26191a53-1f7a525c.jpg')
print(img.shape)

# Check if the image is grayscale
if len(img.shape) == 2:
    print("The image is genuine grayscale.")
elif len(img.shape) == 3 and img.shape[2] == 3:
    print("The image is in color (RGB).")

(3056, 2544, 3)
The image is in color (RGB).


# 4. Help Methods
<div style="float:left; font-size:0.7em;"><a href="#returnToTop">Return to Top</a></div> <a id='setup'></a>



In [6]:
# Padding & Resizing method
def pad_and_resize_image(image_path, sizes=[(512, 512), (256, 256)]):
    # Read the image
    img = cv2.imread(image_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Determine the longer side and calculate the difference with the shorter side
    longer_side = max(img.shape[:2])
    top_bottom_padding = (longer_side - img.shape[0]) // 2
    left_right_padding = (longer_side - img.shape[1]) // 2

    # Pad the shorter side with black pixels
    padded_img = cv2.copyMakeBorder(img_rgb, top_bottom_padding, top_bottom_padding,
                                    left_right_padding, left_right_padding,
                                    cv2.BORDER_CONSTANT, value=[0, 0, 0])

    # Resize the padded image to the new sizes
    resized_imgs = [cv2.resize(padded_img, size, interpolation=cv2.INTER_AREA) for size in sizes]

    return [img_rgb] + resized_imgs

In [7]:
# Process & Save method
def process_and_save_images(source_dir, target_512_dir, target_256_dir, sizes=[(512, 512), (256, 256)]):
    # List all image files in the source directory
    image_files = [f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))]

    # Loop over each image file
    for image_file in image_files:
        # Full path to the source image
        image_path = os.path.join(source_dir, image_file)

        # Process the image to get the resized versions for specified sizes
        _, resized_512, resized_256 = pad_and_resize_image(image_path, sizes)

        # Define target paths for the processed images
        target_512_path = os.path.join(target_512_dir, image_file)
        target_256_path = os.path.join(target_256_dir, image_file)

        # Save the processed images
        cv2.imwrite(target_512_path, cv2.cvtColor(resized_512, cv2.COLOR_RGB2BGR))  # Convert back to BGR for saving
        cv2.imwrite(target_256_path, cv2.cvtColor(resized_256, cv2.COLOR_RGB2BGR))

# 5. Image Processing & Standardization
<div style="float:left; font-size:0.7em;"><a href="#returnToTop">Return to Top</a></div> <a id='setup'></a>

In [8]:
# Loop over every set of directories (train, validate, test, and samples)
for data_type in source_dirs.keys():
    print(f"Processing {data_type} images...")
    process_and_save_images(source_dirs[data_type], target_512_dirs[data_type], target_256_dirs[data_type])
    print(f"Completed processing {data_type} images.")

Processing train images...
Completed processing train images.
Processing validate images...
Completed processing validate images.
Processing test images...
Completed processing test images.
Processing sample_train images...
Completed processing sample_train images.
Processing sample_validate images...
Completed processing sample_validate images.
Processing sample_test images...
Completed processing sample_test images.
