1) Load Google Colab, Mount the Directory.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


2) Inspect the Source - Number of images in each of the classes, common image size in every class, image channel in every class, overall image sizes and classes.

In [None]:
import os
from PIL import Image
from collections import defaultdict, Counter

# Path to source samples
src_dir = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Source - MedMNIST Samples"

# Prepare data structures
class_counts = Counter()
class_channels = defaultdict(Counter)
class_resolutions = defaultdict(Counter)

# Walk through each class folder
for class_name in sorted(os.listdir(src_dir)):
    class_path = os.path.join(src_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    # Iterate over files in class folder
    for fname in os.listdir(class_path):
        if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            continue
        class_counts[class_name] += 1

        # Open image and inspect
        img_path = os.path.join(class_path, fname)
        try:
            with Image.open(img_path) as img:
                # Channel info: mode 'RGB' vs 'L' (grayscale), etc.
                mode = img.mode
                class_channels[class_name][mode] += 1

                # Resolution
                res = img.size  # (width, height)
                class_resolutions[class_name][res] += 1
        except Exception as e:
            print(f"Error opening {img_path}: {e}")

# Display per-class summaries
total_images = sum(class_counts.values())

print(f"\n{'Class':<15}  Images  Channels (mode:count)           Top Resolutions (res:count)")
print("-"*80)
for cls in sorted(class_counts):
    cnt = class_counts[cls]
    channels_summary = ", ".join(f"{mode}:{n}" for mode, n in class_channels[cls].items())
    # show the three most common resolutions
    top_res = class_resolutions[cls].most_common(3)
    res_summary = ", ".join(f"{w}x{h}:{n}" for (w,h), n in top_res)
    print(f"{cls:<15}  {cnt:<6}  {channels_summary:<30}  {res_summary}")

print("\nTotal images across all classes:", total_images)


Class            Images  Channels (mode:count)           Top Resolutions (res:count)
--------------------------------------------------------------------------------
AbdomenCT        130     L:130                           64x64:130
BreastMRI        130     L:130                           64x64:130
CXR              130     L:130                           64x64:130
ChestCT          130     L:130                           64x64:130
HandXR           130     L:130                           64x64:130
HeadCT           130     L:130                           64x64:130

Total images across all classes: 780


"L" means 1 channeled Gray scale.
64x64 is not expected by VGG-19.

The images must be converted from 64x64, Gray to 224x224, RGB for VGG-19.

3) Convert the source data to 224x224, RGB (3-channels), and inspect befor and after conversions.

In [None]:
import os
from PIL import Image
from collections import Counter, defaultdict

# ── Configuration ──────────────────────────────────────────────────────────
src_dir  = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Source - MedMNIST Samples"
dest_dir = "/content/drive/MyDrive/Research Project 2025/Preprocessed Datasets/Samples/Source - MedMNIST Labelled"
csv_src  = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Source - MedMNIST Samples/Source - MedMNIST Samples.csv"
csv_dst  = dest_dir + "/Source - MedMNIST Samples.csv"

TARGET_SIZE = (224, 224)  # (width, height)
# ── End configuration ──────────────────────────────────────────────────────


def inspect_directory(path):
    """
    Walk each class-subfolder under `path`, count images, record channel modes and resolutions.
    Returns (class_counts, class_modes, class_resolutions).
    """
    class_counts = Counter()
    class_modes  = defaultdict(Counter)
    class_res    = defaultdict(Counter)

    for cls in sorted(os.listdir(path)):
        cls_path = os.path.join(path, cls)
        if not os.path.isdir(cls_path):
            continue
        for fname in os.listdir(cls_path):
            if not fname.lower().endswith(('.png','.jpg','.jpeg','.bmp','.tiff')):
                continue
            class_counts[cls] += 1
            img_path = os.path.join(cls_path, fname)
            try:
                with Image.open(img_path) as img:
                    class_modes[cls][img.mode] += 1
                    class_res[cls][img.size] += 1
            except Exception as e:
                print(f"❌ Error opening {img_path}: {e}")

    total = sum(class_counts.values())
    print(f"\nInspection of `{path}`")
    print(f"{'Class':<12}  Images  Modes               Top Resolutions")
    print("-"*60)
    for cls, cnt in class_counts.items():
        modes_s = ", ".join(f"{m}:{n}" for m,n in class_modes[cls].items())
        top3    = class_res[cls].most_common(3)
        res_s   = ", ".join(f"{w}×{h}:{n}" for (w,h),n in top3)
        print(f"{cls:<12}  {cnt:<6}  {modes_s:<18}  {res_s}")
    print(f"\n→ Total images: {total}\n")


def convert_and_save(src, dst, size):
    """
    For each class folder in `src`, create same under `dst`, convert images to RGB and resize.
    """
    os.makedirs(dst, exist_ok=True)
    # copy csv
    if os.path.exists(csv_src):
        os.makedirs(os.path.dirname(csv_dst), exist_ok=True)
        !cp "{csv_src}" "{csv_dst}"

    for cls in sorted(os.listdir(src)):
        src_cls = os.path.join(src, cls)
        dst_cls = os.path.join(dst, cls)
        if not os.path.isdir(src_cls):
            continue
        os.makedirs(dst_cls, exist_ok=True)

        for fname in os.listdir(src_cls):
            if not fname.lower().endswith(('.png','.jpg','.jpeg','.bmp','.tiff')):
                continue
            in_path  = os.path.join(src_cls, fname)
            out_name = os.path.splitext(fname)[0] + ".png"
            out_path = os.path.join(dst_cls, out_name)
            try:
                with Image.open(in_path) as img:
                    rgb = img.convert("RGB")
                    resized = rgb.resize(size, Image.BILINEAR)
                    resized.save(out_path, format="PNG")
            except Exception as e:
                print(f"⚠️ Failed to convert {in_path}: {e}")


# ── Run everything ─────────────────────────────────────────────────────────
print("🔍 Before conversion:")
inspect_directory(src_dir)

print("🔄 Converting to 224×224 RGB and saving into destination…")
convert_and_save(src_dir, dest_dir, TARGET_SIZE)

print("🔍 After conversion:")
inspect_directory(dest_dir)

🔍 Before conversion:

Inspection of `/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Source - MedMNIST Samples`
Class         Images  Modes               Top Resolutions
------------------------------------------------------------
AbdomenCT     130     L:130               64×64:130
BreastMRI     130     L:130               64×64:130
CXR           130     L:130               64×64:130
ChestCT       130     L:130               64×64:130
HandXR        130     L:130               64×64:130
HeadCT        130     L:130               64×64:130

→ Total images: 780

🔄 Converting to 224×224 RGB and saving into destination…
🔍 After conversion:

Inspection of `/content/drive/MyDrive/Research Project 2025/Preprocessed Datasets/Samples/Source - MedMNIST Labelled`
Class         Images  Modes               Top Resolutions
------------------------------------------------------------
AbdomenCT     130     RGB:130             224×224:130
BreastMRI     130     RGB:130             224×224:13

4) Inspect the target directory (labelled) before conversions. This is done on labelled directory for study purpose. We will be using only the unlabelled dataset for target in the domain adaptation experiments we are going to perform.

In [None]:
import os
from PIL import Image
from collections import Counter, defaultdict

# ── Configuration ──────────────────────────────────────────────────────────
target_dir = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Labelled"
# ── End configuration ──────────────────────────────────────────────────────

def inspect_directory(path):
    """
    Walk each class-subfolder under `path`, count images,
    record channel modes and resolutions, then print a summary.
    """
    class_counts = Counter()
    class_modes  = defaultdict(Counter)
    class_res    = defaultdict(Counter)

    for cls in sorted(os.listdir(path)):
        cls_path = os.path.join(path, cls)
        if not os.path.isdir(cls_path):
            continue
        for fname in os.listdir(cls_path):
            if not fname.lower().endswith(('.png','.jpg','.jpeg','.bmp','.tiff')):
                continue
            class_counts[cls] += 1
            img_path = os.path.join(cls_path, fname)
            try:
                with Image.open(img_path) as img:
                    class_modes[cls][img.mode] += 1
                    class_res[cls][img.size] += 1
            except Exception as e:
                print(f"❌ Error opening {img_path}: {e}")

    total = sum(class_counts.values())
    print(f"\nInspection of `{path}`")
    print(f"{'Class':<12}  Images  Modes               Top Resolutions")
    print("-"*60)
    for cls, cnt in class_counts.items():
        modes_s = ", ".join(f"{m}:{n}" for m,n in class_modes[cls].items())
        top3    = class_res[cls].most_common(3)
        res_s   = ", ".join(f"{w}×{h}:{n}" for (w,h),n in top3)
        print(f"{cls:<12}  {cnt:<6}  {modes_s:<18}  {res_s}")
    print(f"\n→ Total images: {total}\n")

# Run the inspection
inspect_directory(target_dir)


Inspection of `/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Labelled`
Class         Images  Modes               Top Resolutions
------------------------------------------------------------
AbdomenCT     130     RGBA:130            512×512:130
BreastMRI     130     L:73, RGBA:57       640×542:67, 512×512:57, 640×603:6
CXR           130     L:130               1472×1171:2, 1634×1272:1, 1570×1156:1
ChestCT       130     L:130               512×512:126, 260×197:1, 311×224:1
HandXR        130     L:67, RGB:63        406×512:27, 407×512:9, 379×512:8
HeadCT        130     L:130               512×512:130

→ Total images: 780



5) Now Target unlabelled directory is inspected.

In [None]:
import os
from PIL import Image
from collections import Counter

# ── Configuration ──────────────────────────────────────────────────────────
unlabelled_dir = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Unlabelled/Images"
# ── End configuration ──────────────────────────────────────────────────────

# Prepare counters
total_images = 0
mode_counter = Counter()
res_counter  = Counter()

# Iterate all image files in the directory
for fname in os.listdir(unlabelled_dir):
    if not fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        continue
    total_images += 1
    img_path = os.path.join(unlabelled_dir, fname)
    try:
        with Image.open(img_path) as img:
            mode_counter[img.mode] += 1
            res_counter[img.size] += 1
    except Exception as e:
        print(f"❌ Error opening {img_path}: {e}")

# Display results
print(f"\nInspection of unlabelled target directory: `{unlabelled_dir}`")
print(f"→ Total images: {total_images}\n")

print("Modes:")
for mode, cnt in mode_counter.items():
    print(f"  {mode}: {cnt}")

print("\nTop 5 Resolutions:")
for (w, h), cnt in res_counter.most_common(5):
    print(f"  {w}×{h}: {cnt}")


Inspection of unlabelled target directory: `/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Unlabelled/Images`
→ Total images: 780

Modes:
  RGBA: 187
  L: 530
  RGB: 63

Top 5 Resolutions:
  512×512: 444
  640×542: 67
  406×512: 27
  407×512: 9
  379×512: 8


5) Now let us convert these target (unlabelled) images to 224x224, with RGB (3 channels).

In [None]:
import os
from PIL import Image
from collections import Counter

# ── Configuration ──────────────────────────────────────────────────────────
src_img_dir   = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Unlabelled/Images"
src_csv       = "/content/drive/MyDrive/Research Project 2025/Datasets/Samples/Target - VS Unlabelled/Target sample labels.csv"
dst_base_dir  = "/content/drive/MyDrive/Research Project 2025/Preprocessed Datasets/Samples/Target - VS Unlabelled"
dst_img_dir   = os.path.join(dst_base_dir, "Images")
dst_csv       = os.path.join(dst_base_dir, "Target sample labels.csv")

TARGET_SIZE   = (224, 224)  # width, height
# ── End configuration ──────────────────────────────────────────────────────

# 1) Create destination folders
os.makedirs(dst_img_dir, exist_ok=True)

# 2) Copy CSV
if os.path.exists(src_csv):
    os.makedirs(os.path.dirname(dst_csv), exist_ok=True)

    import shutil
    shutil.copy2(src_csv, dst_csv)
    print(f"✅ Copied CSV to {dst_csv}")
else:
    print(f"⚠️ Source CSV not found at {src_csv}")

# 3) Convert images
failed = []
for fname in os.listdir(src_img_dir):
    if not fname.lower().endswith(('.png','.jpg','.jpeg','.bmp','.tiff')):
        continue
    src_path = os.path.join(src_img_dir, fname)
    dst_fname = os.path.splitext(fname)[0] + ".png"
    dst_path = os.path.join(dst_img_dir, dst_fname)
    try:
        with Image.open(src_path) as img:
            rgb = img.convert("RGB")
            resized = rgb.resize(TARGET_SIZE, Image.BILINEAR)
            resized.save(dst_path, format="PNG")
    except Exception as e:
        failed.append((fname, str(e)))
if failed:
    print("⚠️ Failed conversions:", failed)
else:
    print(f"✅ Converted all images to {TARGET_SIZE} RGB and saved to {dst_img_dir}")

# 4) Re-inspect converted directory
total = 0
mode_counter = Counter()
res_counter  = Counter()

for fname in os.listdir(dst_img_dir):
    if not fname.lower().endswith('.png'):
        continue
    total += 1
    path = os.path.join(dst_img_dir, fname)
    try:
        with Image.open(path) as img:
            mode_counter[img.mode] += 1
            res_counter[img.size] += 1
    except Exception as e:
        print(f"❌ Error opening {path}: {e}")

# 5) Display results
print("\nPost-conversion inspection:")
print(f"→ Total images: {total}")
print("\nModes:")
for mode, cnt in mode_counter.items():
    print(f"  {mode}: {cnt}")
print("\nTop 5 Resolutions:")
for (w, h), cnt in res_counter.most_common(5):
    print(f"  {w}×{h}: {cnt}")


✅ Copied CSV to /content/drive/MyDrive/Research Project 2025/Preprocessed Datasets/Samples/Target - VS Unlabelled/Target sample labels.csv
✅ Converted all images to (224, 224) RGB and saved to /content/drive/MyDrive/Research Project 2025/Preprocessed Datasets/Samples/Target - VS Unlabelled/Images

Post-conversion inspection:
→ Total images: 780

Modes:
  RGB: 780

Top 5 Resolutions:
  224×224: 780
