In [1]:
import os

import pandas as pd
import rasterio
from tqdm import tqdm

In [2]:
input_dir = "/net/data_ssd/tree_mortality_orthophotos/orthophotos/"
input_meta_file = "/net/data_ssd/tree_mortality_orthophotos/metadata_manual.copy.csv"
output_file = "/net/home/jmoehring/scratch/meta/metadata_manual_with_resolution.csv"

In [3]:
# read existing metadata file
meta_df = pd.read_csv(input_meta_file)

In [4]:
meta_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1536 entries, 0 to 1535
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   filename                   1536 non-null   object 
 1   project_id                 1536 non-null   object 
 2   authors_image              1536 non-null   object 
 3   acquisition_date_month     1446 non-null   float64
 4   acquisition_date_day       1423 non-null   float64
 5   acquisition_date_year      1536 non-null   float64
 6   acquisition_date_precise   839 non-null    float64
 7   email                      0 non-null      float64
 8   label_type                 1059 non-null   object 
 9   label_source               1059 non-null   object 
 10  image_platform             1536 non-null   object 
 11  image_spectral_properties  854 non-null    object 
 12  citation_doi               132 non-null    object 
 13  label_quality              1061 non-null   float

In [5]:
# Initialize an empty DataFrame
update_df = pd.DataFrame(
    columns=[
        "filename",
        "west",
        "east",
        "south",
        "north",
        "width",
        "height",
        "crs",
        "has_labels",
        "label_quality",
    ]
)
# Iterate over all GeoTIFF files in the directory
for filename in tqdm(os.listdir(input_dir)):
    # find corresponding row in metadata file
    meta_row = meta_df[meta_df["filename"] == filename]
    if filename.endswith(".tif"):
        filepath = os.path.join(input_dir, filename)
        # Read the image
        with rasterio.open(filepath) as src:
            # Extract the bounds and resolution
            bounds = src.bounds
            file_meta = src.meta
        # Add a new row to the DataFrame
        update_df = pd.concat(
            [
                update_df,
                pd.DataFrame(
                    [
                        {
                            "filename": filename,
                            "west": bounds.left,
                            "east": bounds.right,
                            "south": bounds.bottom,
                            "north": bounds.top,
                            "width": file_meta["width"],
                            "height": file_meta["height"],
                            "crs": file_meta["crs"],
                            "has_labels": meta_row["has_labels"].values[0],
                            "label_quality": meta_row["label_quality"].values[0],
                        }
                    ]
                ),
            ],
            axis=0,
            ignore_index=True,
        )

100%|██████████| 379/379 [00:02<00:00, 128.47it/s]


In [6]:
# Save the DataFrame to a CSV file
update_df.to_csv(output_file, index=False)