In [3]:
import os

import pandas as pd
import rasterio
from tqdm import tqdm

In [4]:
input_dir = "/net/data_ssd/tree_mortality_orthophotos/orthophotos/"
input_meta_file = "/net/home/jmoehring/scratch/metadata_manual.database.v2.expanded.csv"
output_file = "/net/home/jmoehring/scratch/metadata_manual_with_resolution.csv"

In [5]:
# read existing metadata file
meta_df = pd.read_csv(input_meta_file)

In [6]:
# Initialize an empty DataFrame
update_df = pd.DataFrame(
    columns=[
        "filename",
        "west",
        "east",
        "south",
        "north",
        "width",
        "height",
        "crs",
        "has_labels",
        "label_quality",
    ]
)

In [7]:
# Iterate over all GeoTIFF files in the directory
for filename in tqdm(os.listdir(input_dir)):
    # find corresponding row in metadata file
    meta_row = meta_df[meta_df["filename"] == filename]
    if filename.endswith(".tif"):
        filepath = os.path.join(input_dir, filename)
        # Read the image
        with rasterio.open(filepath) as src:
            # Extract the bounds and resolution
            bounds = src.bounds
            file_meta = src.meta
        # Add a new row to the DataFrame
        update_df = pd.concat(
            [
                update_df,
                pd.DataFrame(
                    [
                        {
                            "filename": filename,
                            "west": bounds.left,
                            "east": bounds.right,
                            "south": bounds.bottom,
                            "north": bounds.top,
                            "width": file_meta["width"],
                            "height": file_meta["height"],
                            "crs": file_meta["crs"],
                            "has_labels": meta_row["has_labels"].values[0],
                            "label_quality": meta_row["label_quality"].values[0],
                        }
                    ]
                ),
            ],
            axis=0,
            ignore_index=True,
        )

100%|██████████| 263/263 [00:01<00:00, 135.31it/s]


In [8]:
# Save the DataFrame to a CSV file
update_df.to_csv(output_file, index=False)