In [1]:
import os
import json
import numpy as np
from PIL import Image


In [2]:
def train_validation_test_split(files_per_weather, train_percent=0.81):
    """Splits the dataset into train, validation, and test sets for each weather condition."""
    # assert train_percent + validation_percent + test_percent == 1.0, "Splits must sum to 1."

    train_files, validation_files, test_files = {}, {}, {}

    # for weather, files in files_per_weather.items():
    #     shuffled_files = np.random.permutation(files)  # Random shuffle

    #     # if weather == "_outRaw":
    #     train_files[weather] = shuffled_files[:1600]  # 1600 train
    #     validation_files[weather] = shuffled_files[1600:1800]  # 200 validation
    #     test_files[weather] = shuffled_files[1800:2000]  # 200 test
    #     # else:
    #     #     train_files[weather] = shuffled_files[:100]  # 100 train
    #     #     validation_files[weather] = shuffled_files[100:112]  # 12 validation
    #     #     test_files[weather] = shuffled_files[112:124]  # 12 test
    for weather, files in files_per_weather.items():
        shuffled_files = np.random.permutation(files)  # Random shuffle

        train_size = int(len(files) * train_percent)
        validation_size = int(len(files) - train_size) // 2
        

        train_files[weather] = shuffled_files[:train_size]
        validation_files[weather] = shuffled_files[train_size:train_size + validation_size]
        test_files[weather] = shuffled_files[train_size + validation_size:]

    return train_files, validation_files, test_files

In [3]:
def make_odgt(raw_folders, seg_folders, train_files, validate_files, test_files, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    datasets = {
        'train': train_files,
        'validate': validate_files,
        'test': test_files
    }
    
    
    for key, file_dict in datasets.items():
        with open(os.path.join(output_dir, f'{key}.odgt'), 'w', encoding='utf-8') as odgt_file:
            for weather, files in file_dict.items():  # Iterate over each weather condition
                for raw in files:
                    raw_path = os.path.abspath(os.path.join(raw_folders[weather], raw))
                    ann_path = os.path.abspath(os.path.join(seg_folders[weather], raw))

                    if not os.path.exists(raw_path) or not os.path.exists(ann_path):
                        print(f"Skipping missing file: {raw_path}")
                        continue

                    raw_img = Image.open(raw_path)
                    ann_img = Image.open(ann_path)
                    assert raw_img.size == ann_img.size, f"Size mismatch for {raw} in {weather}"

                    odgt_line = json.dumps({
                        "fpath_img": raw_path,
                        "fpath_segm": ann_path,
                        "width": raw_img.width,
                        "height": raw_img.height,
                        "weather": weather  # Store weather condition
                    })

                    
                    odgt_file.write(odgt_line + '\n')

    

# Define paths
data_root_dir = '/home/zhaob/Desktop/semantic-segmentation-pytorch/new_data'
# weather_conditions = ["_outRaw", "_outRaw_foggy", "_outRaw_night"]
# weather_conditions = ["_outRaw_", "_outRaw_foggy"]
weather_conditions_raw = [f"rgb_foggy_day{i}" for i in range(1, 96)]
raw_folders = {w: os.path.join(data_root_dir, w) for w in weather_conditions_raw}
weather_conditions_seg = [f"rgb_seg_foggy_day_{i}" for i in range(1, 96)]
seg_folders = {f"rgb_foggy_day{i}": os.path.join(data_root_dir, f"rgb_seg_foggy_day_{i}") for i in range(1, 96)}

# Collect only valid image files
files_per_weather = {
    w: sorted([f for f in os.listdir(raw_folders[w]) if f.endswith('.png')])
    for w in weather_conditions_raw
}

# Perform dataset split
train, validate, test = train_validation_test_split(files_per_weather)

print(f"Train set sizes: { {w: len(train[w]) for w in train} }")
print(f"Validation set sizes: { {w: len(validate[w]) for w in validate} }")
print(f"Test set sizes: { {w: len(test[w]) for w in test} }")

# Create ODGT
make_odgt(raw_folders, seg_folders, train, validate, test, 'odgt')

print(f"Lines in train.odgt: {sum(1 for _ in open('odgt/train.odgt', 'r', encoding='utf-8'))}")
print(f"Lines in validate.odgt: {sum(1 for _ in open('odgt/validate.odgt', 'r', encoding='utf-8'))}")
print(f"Lines in test.odgt: {sum(1 for _ in open('odgt/test.odgt', 'r', encoding='utf-8'))}")

Train set sizes: {'rgb_foggy_day1': 17, 'rgb_foggy_day2': 17, 'rgb_foggy_day3': 17, 'rgb_foggy_day4': 17, 'rgb_foggy_day5': 17, 'rgb_foggy_day6': 17, 'rgb_foggy_day7': 17, 'rgb_foggy_day8': 17, 'rgb_foggy_day9': 17, 'rgb_foggy_day10': 17, 'rgb_foggy_day11': 17, 'rgb_foggy_day12': 17, 'rgb_foggy_day13': 17, 'rgb_foggy_day14': 17, 'rgb_foggy_day15': 17, 'rgb_foggy_day16': 17, 'rgb_foggy_day17': 17, 'rgb_foggy_day18': 17, 'rgb_foggy_day19': 17, 'rgb_foggy_day20': 17, 'rgb_foggy_day21': 17, 'rgb_foggy_day22': 17, 'rgb_foggy_day23': 17, 'rgb_foggy_day24': 17, 'rgb_foggy_day25': 17, 'rgb_foggy_day26': 17, 'rgb_foggy_day27': 17, 'rgb_foggy_day28': 17, 'rgb_foggy_day29': 17, 'rgb_foggy_day30': 17, 'rgb_foggy_day31': 17, 'rgb_foggy_day32': 17, 'rgb_foggy_day33': 17, 'rgb_foggy_day34': 17, 'rgb_foggy_day35': 17, 'rgb_foggy_day36': 17, 'rgb_foggy_day37': 17, 'rgb_foggy_day38': 17, 'rgb_foggy_day39': 17, 'rgb_foggy_day40': 17, 'rgb_foggy_day41': 17, 'rgb_foggy_day42': 17, 'rgb_foggy_day43': 17, 'r