In [1]:
import os
import pandas as pd
from tqdm import tqdm

In [2]:
tiles_dir = "/net/scratch/jmoehring/tiles/"
metadata_path = "/net/scratch/jmoehring/metadata_manual_with_resolution.csv"

In [3]:
metadata_df = pd.read_csv(metadata_path)
# transoform df to dict of dicts with filename as key
metadata_dict = metadata_df.set_index('filename').T.to_dict('dict')

In [4]:
def process_file(file, root):
    # get name of file
    file_name = file.split("/")[-1]
    if file_name.endswith(".tif") and not "mask" in file_name:
        # get folder name two above the file
        base_file_folder = root.split("/")[-2]
        base_file_name = f"{base_file_folder}.tif"
        resolution = root.split("/")[-1]
        base_file_meta = metadata_dict[base_file_name]
        # extract x and y from file name with format "x_y.tif"
        x,y = file_name.split(".")[0].split("_")
        register_row = {
            "base_file_name": base_file_name,
            "file_path": file,
            "resolution": resolution,
            "x": x,
            "y": y,
            "label_quality": base_file_meta["label_quality"],
        }
        return register_row


In [5]:
register_rows = []
for root, dirs, files in os.walk(tiles_dir):
    for file in [os.path.join(root, file) for file in files]:
        register_row = process_file(file, root)
        if register_row:
            register_rows.append(register_row)


In [6]:
register_df = pd.DataFrame(register_rows)

In [10]:
# add boolean column "original" which is true for all tiles with the lowest resolution in the same base_file_name
register_df["original"] = register_df.groupby("base_file_name")["resolution"].transform(lambda x: x == x.min()) 

In [8]:
register_df.to_csv("/net/scratch/jmoehring/tiles_register.csv", index=False)