In [5]:
import pandas as pd
import sys

def merge_csv_files(csv1_path: str, csv2_path: str, output_path: str = "merged.csv", 
                    standardise_cols: bool = False, deduplicate: bool = False):
    # Load the two CSV files
    csv1 = pd.read_csv(csv1_path)
    csv2 = pd.read_csv(csv2_path)

    # Optionally standardise column names: lowercase and strip whitespace
    if standardise_cols:
        csv1.columns = [col.strip().lower() for col in csv1.columns]
        csv2.columns = [col.strip().lower() for col in csv2.columns]

    # Compare column sets
    set1 = set(csv1.columns)
    set2 = set(csv2.columns)

    if set1 != set2:
        only_in_1 = set1 - set2
        only_in_2 = set2 - set1

        print("ERROR: Column names do not match between the two CSV files.\n", file=sys.stderr)
        if only_in_1:
            print(f"Columns only in {csv1_path}:", only_in_1, file=sys.stderr)
        if only_in_2:
            print(f"Columns only in {csv2_path}:", only_in_2, file=sys.stderr)
        sys.exit(1)

    # Reorder csv2 columns to match csv1
    csv2 = csv2[csv1.columns]

    # Merge
    merged = pd.concat([csv1, csv2], ignore_index=True)

    # Optional deduplication
    if deduplicate:
        before = len(merged)
        merged.drop_duplicates(inplace=True)
        after = len(merged)
        print(f"Removed {before - after} duplicate rows.")

    # Save
    merged.to_csv(output_path, index=False)
    print(f"\nMerge complete:")
    print(f"  → {csv1_path}: {len(csv1)} rows")
    print(f"  → {csv2_path}: {len(csv2)} rows")
    print(f"  = Total rows in merged file: {len(merged)}")
    print(f"Saved merged CSV to '{output_path}'")

    return merged

In [None]:
csv1_path = "results_1B/scenarios_results.csv"
csv2_path = "results_3B/scenarios_results.csv"

output_path = "results/scenarios_results.csv"

if __name__ == "__main__":
    merge_csv_files(csv1_path, csv2_path, output_path, 
                    standardise_cols=True, 
                    deduplicate=True)
    

Removed 0 duplicate rows.

Merge complete:
  → results_1B/scenarios_results.csv: 113 rows
  → results_3B/scenarios_results.csv: 24 rows
  = Total rows in merged file: 137
Saved merged CSV to 'results/scenarios_results.csv'


In [7]:
csv_1 = pd.read_csv(csv1_path)