In [None]:
import pandas as pd

In [None]:
# Check Differences in Two Lists
def list_membership(list1, list2):
    print("In List 1 but not List 2:")
    list1_notlist2 = list(set(list1) - set(list2))
    print("")
    print("In List 2 not in List 1")
    list2_notlist1 = list(set(list2) - set(list1))
    print("")
    output_list = [list1_notlist2, list2_notlist1]
    return output_list

In [None]:
# Compare Two Datasets for the Specified Columns
# Requires ID columns and columns to be compared to have the same names
def compare_datasets(df1, df2, list_id_cols, list_cols_compare, 
                     df1_str_cols, df1_num_cols, df2_str_cols, df2_num_cols,
                     df1_suffix, df2_suffix, full_output, left_right_all):
    
    assert isinstance(full_output, bool), "Full_Output must be a boolean (True or False)."
    assert left_right_all in {"left", "right", "all"}, f"Invalid Left_Right_All value. Expected 'left', 'right', or 'all'."
    
    output_list_comparisons = []

    # First Coerce Types to Reduce Ineffective Joins
    # Strings
    for str_col in df1_str_cols:
        df1[str_col] = df1[str_col].astype(str)

    for str_col in df2_str_cols:
        df2[str_col] = df2[str_col].astype(str)
    # Numerics
    df1[df1_num_cols] = df1[df1_num_cols].apply(pd.to_numeric, errors='coerce', axis=1)
    df2[df2_num_cols] = df2[df2_num_cols].apply(pd.to_numeric, errors='coerce', axis=1)

    for col_compare in list_cols_compare:
        print("Comparing " + col_compare + " now.")
        id_col_and_col_compare_list = []
        for id in list_id_cols:
            id_col_and_col_compare_list.append(id)
        id_col_and_col_compare_list.append(col_compare)
        
        df1_short = df1[id_col_and_col_compare_list]
        df2_short = df2[id_col_and_col_compare_list]

        df_join_1_2 = df1_short.merge(df2_short, how = "left", on = list_id_cols, suffixes=(df1_suffix, df2_suffix))
        df_join_2_1 = df2_short.merge(df1_short, how = "left", on = list_id_cols, suffixes=(df2_suffix, df1_suffix))

        #df_join_1_2["match"] = df_join_1_2[(col_compare + df1_suffix)] == df_join_1_2[(col_compare + df2_suffix)]
        df_join_1_2["match"] = df_join_1_2[(col_compare + df1_suffix)].fillna('-').eq(df_join_1_2[(col_compare + df2_suffix)].fillna('-'))
        #df_join_2_1["match"] = df_join_2_1[(col_compare + df2_suffix)] == df_join_2_1[(col_compare + df1_suffix)]
        df_join_2_1["match"] = df_join_2_1[(col_compare + df2_suffix)].fillna('-').eq(df_join_2_1[(col_compare + df1_suffix)].fillna('-'))

        df_join_1_2_mismatch = df_join_1_2[df_join_1_2["match"] == 0]
        df_join_2_1_mismatch = df_join_2_1[df_join_2_1["match"] == 0]

        if left_right_all == 'all':
            if full_output == True:
                output_list_comparisons.append(df_join_1_2)
                output_list_comparisons.append(df_join_2_1)
            output_list_comparisons.append(df_join_1_2_mismatch)
            output_list_comparisons.append(df_join_2_1_mismatch)
        elif left_right_all == 'left':
            if full_output == True:
                output_list_comparisons.append(df_join_1_2)
            output_list_comparisons.append(df_join_1_2_mismatch)
        elif left_right_all == 'right':
            if full_output == True:
                output_list_comparisons.append(df_join_2_1)
            output_list_comparisons.append(df_join_2_1_mismatch)

        print("")

    print("Function Completed.")
    return output_list_comparisons

In [None]:
def save_xls(list_dfs, xls_path):
    """
    Save a list of DataFrames to an Excel file, with each DataFrame as a separate sheet.
    """
    with pd.ExcelWriter(xls_path) as writer:
        for n, df in enumerate(list_dfs):
            df.to_excel(writer, sheet_name=f'sheet{n}')