In [22]:
import os
import json
import numpy as np
from PIL import Image


In [23]:
def train_validation_test_split(files_per_weather, train_percent=0.8, validation_percent=0.1, test_percent=0.1):
    """Splits the dataset into train, validation, and test sets for each weather condition."""
    assert train_percent + validation_percent + test_percent == 1.0, "Splits must sum to 1."

    train_files, validation_files, test_files = {}, {}, {}

    for weather, files in files_per_weather.items():
        num_files = len(files)
        shuffled_files = np.random.permutation(files)  # Random shuffle

        if weather == "_outRaw":
            train_files[weather] = shuffled_files[:1000]  # Limit to 1000
            validation_files[weather] = shuffled_files[1000:1125]  # 125 validation
            test_files[weather] = shuffled_files[1125:1250]  # 125 test
        else:
            train_files[weather] = shuffled_files[:100]  # 100 train
            validation_files[weather] = shuffled_files[100:112]  # 12 validation
            test_files[weather] = shuffled_files[112:124]  # 12 test

    return train_files, validation_files, test_files

In [24]:
def make_odgt(raw_folders, seg_folders, train_files, validate_files, test_files, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    datasets = {
        'train': train_files,
        'validate': validate_files,
        'test': test_files
    }
    
    
    for key, file_dict in datasets.items():
        with open(os.path.join(output_dir, f'{key}.odgt'), 'w', encoding='utf-8') as odgt_file:
            for weather, files in file_dict.items():  # Iterate over each weather condition
                for raw in files:
                    raw_path = os.path.abspath(os.path.join(raw_folders[weather], raw))
                    ann_path = os.path.abspath(os.path.join(seg_folders[weather], raw))

                    if not os.path.exists(raw_path) or not os.path.exists(ann_path):
                        print(f"Skipping missing file: {raw}")
                        continue

                    raw_img = Image.open(raw_path)
                    ann_img = Image.open(ann_path)
                    assert raw_img.size == ann_img.size, f"Size mismatch for {raw} in {weather}"

                    odgt_line = json.dumps({
                        "fpath_img": raw_path,
                        "fpath_segm": ann_path,
                        "width": raw_img.width,
                        "height": raw_img.height,
                        "weather": weather  # Store weather condition
                    })

                    
                    odgt_file.write(odgt_line + '\n')

    for f in odgt_files.values():
        f.close()

# Define paths
data_root_dir = '/home/zhaob/Desktop/semantic-segmentation-pytorch/1_17_clear_day_mixed'
# weather_conditions = ["_outRaw", "_outRaw_foggy", "_outRaw_night"]
weather_conditions = ["_outRaw", "_outRaw_foggy"]
raw_folders = {w: os.path.join(data_root_dir, w) for w in weather_conditions}
seg_folders = {w: os.path.join(data_root_dir, w.replace("_outRaw", "_outSeg")) for w in weather_conditions}

# Collect only valid image files
files_per_weather = {
    w: sorted([f for f in os.listdir(raw_folders[w]) if f.endswith('.png')])
    for w in weather_conditions
}

# Perform dataset split
train, validate, test = train_validation_test_split(files_per_weather)

print(f"Train set sizes: { {w: len(train[w]) for w in train} }")
print(f"Validation set sizes: { {w: len(validate[w]) for w in validate} }")
print(f"Test set sizes: { {w: len(test[w]) for w in test} }")

# Create ODGT
make_odgt(raw_folders, seg_folders, train, validate, test, 'odgt')

print(f"Lines in train.odgt: {sum(1 for _ in open('odgt/train.odgt', 'r', encoding='utf-8'))}")
print(f"Lines in validate.odgt: {sum(1 for _ in open('odgt/validate.odgt', 'r', encoding='utf-8'))}")
print(f"Lines in test.odgt: {sum(1 for _ in open('odgt/test.odgt', 'r', encoding='utf-8'))}")

Train set sizes: {'_outRaw': 1000, '_outRaw_foggy': 100}
Validation set sizes: {'_outRaw': 125, '_outRaw_foggy': 12}
Test set sizes: {'_outRaw': 125, '_outRaw_foggy': 12}


NameError: name 'odgt_files' is not defined