In [1]:
import importlib
from collections import Counter

import numpy as np
from torchgeo.datasets.utils import BoundingBox

from data.sampler import BalancedGridGeoSampler, BalancedRandomBatchGeoSampler
from train import initialize_dataset

NameError: name 'path' is not defined

In [None]:
# Load the configuration module
config_module = "configs.config"  # Replace with your actual config module path
config = importlib.import_module(config_module)

# Initialize the dataset
naip_dataset, kc_dataset = initialize_dataset(config)


def analyze_kc_class_distribution(kc_dataset):
    """Analyze the class distribution in the Kane County dataset."""
    class_counts = Counter()

    # Define bounding box grid based on dataset bounds and resolution
    minx, miny, maxx, maxy = kc_dataset.bounds[:4]
    grid_size = (maxx - minx) // 20

    for x in np.arange(minx, maxx, grid_size):
        for y in np.arange(miny, maxy, grid_size):
            bbox = BoundingBox(
                minx=x, miny=y, maxx=x + grid_size, maxy=y + grid_size, mint=0, maxt=1
            )
            try:
                sample = kc_dataset[bbox]
                masks = sample["mask"]
                flattened = masks.flatten().tolist()
                class_counts.update(flattened)
            except IndexError:
                continue

    return class_counts


kc_class_distribution = analyze_kc_class_distribution(kc_dataset)

print("Class Distribution in Kane County Dataset:")
for label, count in kc_class_distribution.items():
    print(f"Class {label}: {count}")

: 

TypeError: Can't instantiate abstract class KaneCounty with abstract method __getitem__

In [None]:
config_module = "configs.config"  # Replace with your actual config module path
config = importlib.import_module(config_module)

naip_dataset, kc_dataset = initialize_dataset(config)

sampler_config = {
    "dataset": kc_dataset,
    "size": (128, 128),  # Patch size in pixels or CRS units
    "batch_size": 16,  # Number of patches per batch
    "length": 1000,  # Total number of samples per epoch
    "roi": kc_dataset.bounds,  # Region of interest (entire dataset bounds)
}

random_sampler = BalancedRandomBatchGeoSampler(sampler_config)
grid_sampler = BalancedGridGeoSampler({**sampler_config, "stride": (64, 64)})


def analyze_sampler_class_distribution(sampler):
    """Analyze class distribution using patches sampled by the sampler."""
    class_counts = Counter()

    for batch in sampler:
        for bbox in batch:
            try:
                sample = kc_dataset[bbox]
                mask = sample["mask"].flatten().tolist()
                class_counts.update(mask)
            except IndexError:
                continue

    return class_counts


# Using Random Batch Sampler
random_class_distribution = analyze_sampler_class_distribution(random_sampler)
print("Class Distribution (Random Sampler):", dict(random_class_distribution))

# Using Grid Sampler
grid_class_distribution = analyze_sampler_class_distribution(grid_sampler)
print("Class Distribution (Grid Sampler):", dict(grid_class_distribution))

Class Distribution (Random Sampler): {0: 14735880, 2: 444671, 1: 882219, 3: 139446, 4: 50712}


TypeError: 'float' object is not iterable

In [None]:
def calculate_imbalance_ratio(class_distribution):
    """Calculate the imbalance ratio from the class distribution."""
    majority_class_count = max(class_distribution.values())
    minority_class_count = min(class_distribution.values())
    imbalance_ratio = majority_class_count / minority_class_count
    return majority_class_count, minority_class_count, imbalance_ratio


majority_count, minority_count, imbalance_ratio = calculate_imbalance_ratio(
    kc_class_distribution
)

print("\nImbalance Ratio in Kane County Dataset:")
print(f"Majority Class Count: {majority_count}")
print(f"Minority Class Count: {minority_count}")
print(f"Imbalance Ratio: {imbalance_ratio:.2f}")