In [1]:
!ls /home/david.weijiecai/computational_imaging/Sony_test_list.txt

/home/david.weijiecai/computational_imaging/Sony_test_list.txt


In [2]:
!ls /home/david.weijiecai/computational_imaging/ExposureDiffusion/datasets/SID

Sony	      Sony_blur_v2	  Sony_train_list.txt
Sony_blur_v1  Sony_test_list.txt  Sony_val_list.txt


In [3]:
# !cat /home/david.weijiecai/computational_imaging/ExposureDiffusion/datasets/SID/Sony_test_list.txt | head -n 10

In [4]:
# !cat /home/david.weijiecai/computational_imaging/ExposureDiffusion/dataset/Sony_test.txt

In [5]:
import pandas as pd
import os
from pathlib import Path

In [6]:


def parse_image_pairs_file(path: str) -> pd.DataFrame:
    """
    Parse a text file containing pairs of image filenames into a DataFrame.

    Expected line format (whitespace-separated):
        dark_image bright_image

    Returns a DataFrame with two columns:
        - 'dark_image'
        - 'bright_image'
    """
    dark_images = []
    bright_images = []

    with open(path, "r") as f:
        for line_no, line in enumerate(f, start=1):
            line = line.strip()
            # Skip empty lines
            if not line:
                continue

            parts = line.split()
            if len(parts) < 2:
                raise ValueError(
                    f"Line {line_no} in {path!r} doesn't have two filenames: {line!r}"
                )

            dark, bright = parts[0], parts[1]
            dark_images.append(dark)
            bright_images.append(bright)

    df = pd.DataFrame({
        "dark_image": dark_images,
        "bright_image": bright_images,
    })
    return df

In [7]:

def parse_image_meta_file(path: str) -> pd.DataFrame:
    """
    Parse a text file containing lines like:
        dark_path bright_path ISOxxx Fxx

    Example line:
        ./Sony/short/10003_05_0.1s.ARW ./Sony/long/10003_00_10s.ARW ISO200 F9

    Returns a DataFrame with columns:
        - dark_image       (str)
        - bright_image     (str)
        - iso              (int)
        - aperture         (float)
        - exposure_ratio   (float)  # bright_exposure / dark_exposure
    """

    def extract_exposure_seconds(image_path: str) -> float:
        """
        Extract exposure time in seconds from a filename like:
            .../10003_05_0.1s.ARW  -> 0.1
            .../10003_00_10s.ARW   -> 10.0
        """
        base = os.path.basename(image_path)        # e.g. '10003_05_0.1s.ARW'
        stem, _ = os.path.splitext(base)          # '10003_05_0.1s'
        exp_token = stem.split('_')[-1]           # '0.1s'
        if not exp_token.lower().endswith('s'):
            raise ValueError(f"Cannot parse exposure from {image_path!r}")
        exp_str = exp_token[:-1]                  # drop trailing 's'
        return float(exp_str)

    records = []

    with open(path, "r") as f:
        for line_no, line in enumerate(f, start=1):
            line = line.strip()
            if not line:
                continue  # skip blank lines

            parts = line.split()
            if len(parts) < 4:
                raise ValueError(
                    f"Line {line_no} in {path!r} has fewer than 4 tokens: {line!r}"
                )

            dark_path, bright_path, iso_str, aperture_str = parts[:4]

            # ISO (e.g., 'ISO200' -> 200)
            if not iso_str.upper().startswith("ISO"):
                raise ValueError(
                    f"Line {line_no}: expected ISO token like 'ISO200', got {iso_str!r}"
                )
            iso_val = int(iso_str[3:])

            # Aperture (e.g., 'F9' -> 9.0)
            if not aperture_str.upper().startswith("F"):
                raise ValueError(
                    f"Line {line_no}: expected aperture token like 'F9', got {aperture_str!r}"
                )
            aperture_val = float(aperture_str[1:])

            # Exposure times from filenames
            dark_exp = extract_exposure_seconds(dark_path)
            bright_exp = extract_exposure_seconds(bright_path)
            exposure_ratio = bright_exp / dark_exp

            records.append(
                {
                    "dark_image": dark_path,
                    "bright_image": bright_path,
                    "iso": iso_val,
                    "aperture": aperture_val,
                    "exposure_ratio": exposure_ratio,
                }
            )

    return pd.DataFrame.from_records(records)

In [8]:
df_ed_test = parse_image_pairs_file("/home/david.weijiecai/computational_imaging/ExposureDiffusion/dataset/Sony_test.txt")
df_ed_test["scene_id"] = df_ed_test.dark_image.str.split("_").str[0]

In [9]:
df_ed_train = parse_image_pairs_file("/home/david.weijiecai/computational_imaging/ExposureDiffusion/dataset/Sony_train.txt")
df_ed_train["scene_id"] = df_ed_train.dark_image.str.split("_").str[0]

In [10]:
df2 = parse_image_meta_file("/home/david.weijiecai/computational_imaging/ExposureDiffusion/datasets/SID/Sony_test_list.txt")
df2["scene_id"] = df2.dark_image.str.split("_").str[0]

In [11]:
set(df_ed_train.scene_id).intersection(df2.scene_id)

set()

In [12]:
def filter_rows(suffixes, df):
    result = df.copy()
    for suffix in suffixes:
        result = result[~result["dark_image"].str.endswith(suffix, na=False)]
    return result.copy()

In [13]:
df2["dark_image_p"] = df2.dark_image.map(lambda x: Path(x).name)
df2["bright_image_p"] = df2.bright_image.map(lambda x: Path(x).name)
df2["scene_id"] = df2.dark_image.str.split("_").str[0]
# df2 = df2[~df2.scene_id.isin(["10034", "10045", "10172"])]
df2 = filter_rows(["10101_04_0.1s.ARW", "10105_06_0.1s.ARW"], df2)
# df2[["dark_image_p", "bright_image_p"]].to_csv(
#         "./dataset/Sony_test_new.txt",
#         sep=' ',
#         header=False,
#         index=False
#     )


In [18]:
df2_v2 = df2.copy()
df2_v2["test_v1"] = df2_v2.dark_image_p.str.replace("ARW", "tiff")
df2_v2 = df2_v2[~df2_v2.test_v1.str.endswith("10170_00_0.1s.tiff")]
df2_v2[["test_v1", "bright_image_p"]].to_csv(
        "./dataset/Sony_test_new_v2.txt",
        sep=' ',
        header=False,
        index=False
    )

In [29]:
df2["test_v1"]

0      10003_00_0.04s.tiff
1       10003_00_0.1s.tiff
2      10003_01_0.04s.tiff
3       10003_01_0.1s.tiff
4      10003_02_0.04s.tiff
              ...         
593    10228_05_0.04s.tiff
594    10228_06_0.04s.tiff
595    10228_07_0.04s.tiff
596    10228_08_0.04s.tiff
597    10228_09_0.04s.tiff
Name: test_v1, Length: 596, dtype: object

In [37]:
"10003_01_0.04s.ARW".split('_')[-1].split("s.")[0]

'0.04'

In [43]:
# df2.exposure_ratio.plot.hist()

In [None]:
"10101_04_0.1s.ARW"

In [53]:
df2

Unnamed: 0,dark_image,bright_image,iso,aperture,exposure_ratio,dark_image_p,bright_image_p,scene_id
0,./Sony/short/10003_00_0.04s.ARW,./Sony/long/10003_00_10s.ARW,200,9.0,250.0,10003_00_0.04s.ARW,10003_00_10s.ARW,./Sony/short/10003
1,./Sony/short/10003_00_0.1s.ARW,./Sony/long/10003_00_10s.ARW,200,9.0,100.0,10003_00_0.1s.ARW,10003_00_10s.ARW,./Sony/short/10003
2,./Sony/short/10003_01_0.04s.ARW,./Sony/long/10003_00_10s.ARW,200,9.0,250.0,10003_01_0.04s.ARW,10003_00_10s.ARW,./Sony/short/10003
3,./Sony/short/10003_01_0.1s.ARW,./Sony/long/10003_00_10s.ARW,200,9.0,100.0,10003_01_0.1s.ARW,10003_00_10s.ARW,./Sony/short/10003
4,./Sony/short/10003_02_0.04s.ARW,./Sony/long/10003_00_10s.ARW,200,9.0,250.0,10003_02_0.04s.ARW,10003_00_10s.ARW,./Sony/short/10003
...,...,...,...,...,...,...,...,...
593,./Sony/short/10228_05_0.04s.ARW,./Sony/long/10228_00_10s.ARW,12800,4.0,250.0,10228_05_0.04s.ARW,10228_00_10s.ARW,./Sony/short/10228
594,./Sony/short/10228_06_0.04s.ARW,./Sony/long/10228_00_10s.ARW,12800,4.0,250.0,10228_06_0.04s.ARW,10228_00_10s.ARW,./Sony/short/10228
595,./Sony/short/10228_07_0.04s.ARW,./Sony/long/10228_00_10s.ARW,12800,4.0,250.0,10228_07_0.04s.ARW,10228_00_10s.ARW,./Sony/short/10228
596,./Sony/short/10228_08_0.04s.ARW,./Sony/long/10228_00_10s.ARW,12800,4.0,250.0,10228_08_0.04s.ARW,10228_00_10s.ARW,./Sony/short/10228


In [38]:
df1

Unnamed: 0,dark_image,bright_image
0,10003_00_0.04s.ARW,10003_00_10s.ARW
1,10003_00_0.1s.ARW,10003_00_10s.ARW
2,10006_00_0.04s.ARW,10006_00_10s.ARW
3,10006_00_0.1s.ARW,10006_00_10s.ARW
4,10011_00_0.04s.ARW,10011_00_10s.ARW
...,...,...
88,10227_00_0.04s.ARW,10227_00_10s.ARW
89,10227_00_0.1s.ARW,10227_00_10s.ARW
90,10228_00_0.033s.ARW,10228_00_10s.ARW
91,10228_00_0.04s.ARW,10228_00_10s.ARW


In [41]:
df1["scene_id"] = df1.dark_image.str.split("_").str[0]

In [42]:
df1.scene_id.nunique()

50

In [46]:
df1.describe()

Unnamed: 0,dark_image,bright_image,scene_id
count,93,93,93
unique,93,50,50
top,10192_00_0.04s.ARW,10198_00_10s.ARW,10203
freq,1,3,3
