In [1]:
### load images

In [2]:
import rawpy
from matplotlib import pyplot as plt
from pathlib import Path
from collections import defaultdict
from dataclasses import asdict
import pandas as pd

In [3]:
import random

In [4]:
import os
from dataclasses import dataclass

@dataclass
class SIDFilenameInfo:
    split: int          # 0 = train, 1 = test, 2 = val
    scene_id: int       # e.g. 19 for "0019"
    scene_id_str: str   # "0019" if you care about zero-padding
    index: int          # inside-burst index, e.g. 0 for "00"
    index_str: str      # zero-padded index string, e.g. "00"
    exposure_seconds: float  # e.g. 0.033 for "0.033s"
    exposure_str: str   # original exposure string, e.g. "0.033s"
    extension: str      # file extension, e.g. "RAF", "ARW"

def parse_sid_filename(filename: str) -> SIDFilenameInfo:
    """
    Parse a SID-style filename like '10019_00_0.033s.RAF'.

    Pattern (from the SID README):
        [split][scene_id]_[index]_[exposure]s.[ext]
        - [split]: 1 digit (0=train, 1=test, 2=val)
        - [scene_id]: 4 digits (scene identifier)
        - [index]: 2 digits (burst index)
        - [exposure]: float in seconds, followed by 's'
    """
    # Strip directory and separate extension
    base = os.path.basename(filename)
    stem, ext = os.path.splitext(base)  # '10019_00_0.033s', '.RAF'
    ext = ext.lstrip(".")              # 'RAF'

    parts = stem.split("_")
    if len(parts) != 3:
        raise ValueError(f"Unexpected SID filename format: {filename}")

    head, index_str, exposure_str = parts  # '10019', '00', '0.033s'

    # First char of head is split, rest is scene id
    if len(head) < 5:
        raise ValueError(f"Head '{head}' too short for SID pattern in: {filename}")

    split = int(head[0])          # '1' -> 1
    scene_id_str = head[1:]       # '0019'
    scene_id = int(scene_id_str)  # 19

    # Index (burst index)
    index = int(index_str)        # '00' -> 0

    # Exposure: remove trailing 's' and parse as float
    if not exposure_str.endswith("s"):
        raise ValueError(f"Exposure part '{exposure_str}' missing 's' in: {filename}")
    exposure_val_str = exposure_str[:-1]   # '0.033'
    exposure_seconds = float(exposure_val_str)

    return SIDFilenameInfo(
        split=split,
        scene_id=scene_id,
        scene_id_str=scene_id_str,
        index=index,
        index_str=index_str,
        exposure_seconds=exposure_seconds,
        exposure_str=exposure_str,
        extension=ext,
    )

# Example usage
if __name__ == "__main__":
    info = parse_sid_filename("10019_00_0.033s.RAF")
    print(info)
    # SIDFilenameInfo(split=1, scene_id=19, scene_id_str='0019',
    #                 index=0, index_str='00',
    #                 exposure_seconds=0.033, exposure_str='0.033s',
    #                 extension='RAF')

SIDFilenameInfo(split=1, scene_id=19, scene_id_str='0019', index=0, index_str='00', exposure_seconds=0.033, exposure_str='0.033s', extension='RAF')


In [5]:
def sample_one_sony_photo():
    path = "/home/david.weijiecai/computational_imaging/ExposureDiffusion/datasets/SID/Sony/short"
    image_path = random.choice(list(Path(path).glob("*.ARW")))
    print(image_path)
    with rawpy.imread(str(image_path)) as raw:
        rgb = raw.postprocess()
    return rgb

In [6]:
def show_image(img):
    fig, ax = plt.subplots(figsize=(30, 30))  # new figure + one axes
    ax.imshow(img)
    ax.axis("off")
    plt.show()

In [7]:
def read_raw_img(file_path):
    with rawpy.imread(str(file_path)) as raw:
        rgb = raw.postprocess()
    return rgb

In [8]:
mapping = defaultdict(list)

root_path = Path("/home/david.weijiecai/computational_imaging/ExposureDiffusion/datasets/SID/Sony")

files = []
for raw in (root_path / "short").glob("*.ARW"):
    info = parse_sid_filename(str(raw))
    row = asdict(info)
    row["file_path"] = str(raw)
    files.append(row)
    
for raw in (root_path / "long").glob("*.ARW"):
    info = parse_sid_filename(str(raw))
    row = asdict(info)
    row["file_path"] = str(raw)
    files.append(row)

df = pd.DataFrame(files)

In [9]:
sample_photos = list(df[df.scene_id == 37].file_path)

In [None]:
for s in sample_photos:
    print(s)
    show_image(read_raw_img(str(s)))

In [10]:
df

Unnamed: 0,split,scene_id,scene_id_str,index,index_str,exposure_seconds,exposure_str,extension,file_path
0,0,49,0049,1,01,0.04,0.04s,ARW,/home/david.weijiecai/computational_imaging/Ex...
1,0,145,0145,3,03,0.10,0.1s,ARW,/home/david.weijiecai/computational_imaging/Ex...
2,0,37,0037,2,02,0.10,0.1s,ARW,/home/david.weijiecai/computational_imaging/Ex...
3,0,157,0157,4,04,0.10,0.1s,ARW,/home/david.weijiecai/computational_imaging/Ex...
4,0,67,0067,0,00,0.10,0.1s,ARW,/home/david.weijiecai/computational_imaging/Ex...
...,...,...,...,...,...,...,...,...,...
2923,0,70,0070,0,00,10.00,10s,ARW,/home/david.weijiecai/computational_imaging/Ex...
2924,0,36,0036,0,00,10.00,10s,ARW,/home/david.weijiecai/computational_imaging/Ex...
2925,0,164,0164,0,00,30.00,30s,ARW,/home/david.weijiecai/computational_imaging/Ex...
2926,0,47,0047,0,00,10.00,10s,ARW,/home/david.weijiecai/computational_imaging/Ex...


In [11]:
df.scene_id.nunique()

231