In [4]:
import os
import cv2
from collections import Counter

def get_image_sizes(base_dir):
    sizes = []
    counts = {}

    for label_folder in os.listdir(base_dir):
        label_path = os.path.join(base_dir, label_folder)

        images_path = os.path.join(label_path, 'images')
        label_count = 0

        for image_name in os.listdir(images_path):
            image_path = os.path.join(images_path, image_name)
            img = cv2.imread(image_path)
            h, w = img.shape[:2]
            sizes.append((w, h))
            label_count += 1

        counts[label_folder] = label_count

    return sizes, counts

### Lung Segmentation Data

In [None]:
train_lung_dir  = 'archive/Lung Segmentation Data/Lung Segmentation Data/Train'
val_lung_dir    = 'archive/Lung Segmentation Data/Lung Segmentation Data/Val'
test_lung_dir   = 'archive/Lung Segmentation Data/Lung Segmentation Data/Test'


train_sizes, train_counts = get_image_sizes(train_lung_dir)
val_sizes, val_counts = get_image_sizes(val_lung_dir)
test_sizes, test_counts = get_image_sizes(test_lung_dir)

print("=== Image Counts ===")
for split, counts in [("Train", train_counts), ("Val", val_counts), ("Test", test_counts)]:
    print(f"\n{split}:")
    for label, count in counts.items():
        print(f"  {label}: {count}")
    print(f"  Total: {sum(counts.values())}")

=== Image Counts ===

Train:
  Non-COVID: 7208
  Normal: 6849
  COVID-19: 7658
  Total: 21715

Val:
  Non-COVID: 1802
  Normal: 1712
  COVID-19: 1903
  Total: 5417

Test:
  Non-COVID: 2253
  Normal: 2140
  COVID-19: 2395
  Total: 6788


In [None]:
print("=== Image Sizes ===")
# Combine all sizes
all_sizes = train_sizes + val_sizes + test_sizes

# Get unique sizes
unique_sizes = set(all_sizes)
print(f"Unique sizes found: {len(unique_sizes)}")
for size in sorted(unique_sizes):
    count = all_sizes.count(size)
    print(f"  {size[0]}x{size[1]}: {count} images")

=== Image Sizes ===
Unique sizes found: 1
  256x256: 33920 images


### Infection Segmentation Data

In [7]:
train_infect_dir  = 'archive/Infection Segmentation Data/Infection Segmentation Data/Train'
val_infect_dir    = 'archive/Infection Segmentation Data/Infection Segmentation Data/Val'
test_infect_dir   = 'archive/Infection Segmentation Data/Infection Segmentation Data/Test'


train_sizes, train_counts = get_image_sizes(train_infect_dir)
val_sizes, val_counts = get_image_sizes(val_infect_dir)
test_sizes, test_counts = get_image_sizes(test_infect_dir)

print("=== Image Counts ===")
for split, counts in [("Train", train_counts), ("Val", val_counts), ("Test", test_counts)]:
    print(f"\n{split}:")
    for label, count in counts.items():
        print(f"  {label}: {count}")
    print(f"  Total: {sum(counts.values())}")

=== Image Counts ===

Train:
  Non-COVID: 932
  Normal: 932
  COVID-19: 1864
  Total: 3728

Val:
  Non-COVID: 233
  Normal: 233
  COVID-19: 466
  Total: 932

Test:
  Non-COVID: 292
  Normal: 291
  COVID-19: 583
  Total: 1166


In [8]:
print("=== Image Sizes ===")
# Combine all sizes
all_sizes = train_sizes + val_sizes + test_sizes

# Get unique sizes
unique_sizes = set(all_sizes)
print(f"Unique sizes found: {len(unique_sizes)}")
for size in sorted(unique_sizes):
    count = all_sizes.count(size)
    print(f"  {size[0]}x{size[1]}: {count} images")

=== Image Sizes ===
Unique sizes found: 1
  256x256: 5826 images


### Diff Summary

| Feature | Lung Segmentation | Infection Segmentation |
|---------|-------------------|------------------------|
| Size | ~33,920 images | ~5,826 images |
| Lung masks | ✅ Yes | ✅ Yes |
| Infection masks | ❌ No | ✅ Yes |

### Phase Training

| Phase | Frozen | Trainable | Classification Input
|---|---|---|---
| Phase 1 | Infection decoder, Infection head | Encoder, PPM, Lung decoder, Lung head, Classification head | Lung mask only
| Phase 2| (none) | All | Lung + Infection masks
