In [15]:
def compare_excel_files(file1_path, file2_path):
    import pandas as pd

    try:
        df1 = pd.read_excel(file1_path)
        df2 = pd.read_excel(file2_path)

        # Clean column names (remove trailing/leading spaces)
        df1.columns = df1.columns.str.strip()
        df2.columns = df2.columns.str.strip()

        # Sort and reset index
        df1 = df1.sort_index(axis=1).reset_index(drop=True)
        df2 = df2.sort_index(axis=1).reset_index(drop=True)

        # Align column order
        common_cols = sorted(set(df1.columns).intersection(df2.columns))
        df1 = df1[common_cols]
        df2 = df2[common_cols]

        if df1.shape != df2.shape:
            print("❌ Files have different shapes:")
            print(f"- {file1_path}: {df1.shape}")
            print(f"- {file2_path}: {df2.shape}")
            return

        differences = df1.compare(df2, keep_shape=True, keep_equal=False)
        differences = differences.dropna(how="all")

        if differences.empty:
            print("✅ No differences found. Files are identical in the shared columns.")
        else:
            print("⚠️ Differences found in the following rows:")
            print(differences)

    except Exception as e:
        print(f"❌ Error comparing files: {e}")


In [16]:
compare_excel_files("../data/processed/panel-list-ml.xlsx", "../data/processed/panel-list-system.xlsx")

✅ No differences found. Files are identical in the shared columns.


In [19]:
compare_excel_files("../data/processed/project-area-list-ml.xlsx", "../data/processed/project-area-list-system.xlsx")

✅ No differences found. Files are identical in the shared columns.
