- recursively gather all `.csv` files in your subdirectories
- concatenate them into one big dataframe

You can do this easily in Python with `pandas` and `glob`

In [2]:
import pandas as pd
import glob
import os

def collect_csvs(suffix="_defect_area.csv"):
    # Find all matching csv files
    csv_files = glob.glob(f"**/*{suffix}", recursive=True)

    # Exclude already combined files
    csv_files = [f for f in csv_files if not os.path.basename(f).startswith("combined")]
    
    dfs = []
    for file in csv_files:
        df = pd.read_csv(file)
        dfs.append(df)

    combined_df = pd.concat(dfs, ignore_index=True)

    # Show preview
    print(combined_df.head())
    print(combined_df.tail())
    
    # Output filename derived from suffix
    output_file = f"combined_{suffix}"
    combined_df.to_csv(output_file, index=False)

    print(f"Collected {len(csv_files)} files into {output_file}")

In [3]:
# Example usage:
suffix = "_defect_area.csv"

collect_csvs(suffix)

      sample  defect_area_mm2
0  Gomez_2p3        10.507689
1  Gomez_2p4         5.012462
2  Gomez_4p6         7.903992
3  Gomez_3p3         9.905482
4  Gomez_2p2        12.377603
       sample  defect_area_mm2
23  Gomez_3p6        11.463010
24  Gomez 5p5        12.406270
25  Gomez 5p2         7.171833
26  Gomez_1p3        11.815759
27  Gomez_4p3        10.306166
Collected 28 files into combined__defect_area.csv


# Merge the combined CSV files 

Reshape the data into wide format

In [4]:
import pandas as pd

# Load the CSV files
defect_area_df = pd.read_csv("combined__defect_area.csv")
original_defect_df = pd.read_csv("combined__original_defect.csv")
volume_ingrowth_df = pd.read_csv("combined__volume_ingrowth.csv")

# Pivot the volume ingrowth data to wide format
volume_wide_df = volume_ingrowth_df.pivot(
    index="sample", 
    columns="segmentName", 
    values="volume_mm3"
).reset_index()

# Merge all dataframes on 'sample'
merged_df = defect_area_df.merge(original_defect_df, on="sample", how="outer")
merged_df = merged_df.merge(volume_wide_df, on="sample", how="outer")

# Save the result to a new CSV file
merged_df.to_csv("combined__wide_format.csv", index=False)