In [None]:
import numpy as np
import pandas as pd

In [None]:
# Check Differences in Two Lists
def list_membership(list1, list2):
    print("In List 1 but not List 2:")
    list1_notlist2 = list(set(list1) - set(list2))
    print("")
    print("In List 2 not in List 1")
    list2_notlist1 = list(set(list2) - set(list1))
    print("")
    output_list = [list1_notlist2, list2_notlist1]
    return output_list

In [None]:
# Compare Two Datasets for the Specified Columns
# Requires ID columns and columns to be compared to have the same names
def compare_datasets(df1, df2, list_id_cols, list_cols_compare, 
                     df1_str_cols, df1_num_cols, df2_str_cols, df2_num_cols,
                     df1_suffix, df2_suffix, full_output, left_right_all):
    
    assert isinstance(full_output, bool), "Full_Output must be a boolean (True or False)."
    assert left_right_all in {"left", "right", "all"}, f"Invalid Left_Right_All value. Expected 'left', 'right', or 'all'."
    
    output_list_comparisons = []

    # First Coerce Types to Reduce Ineffective Joins
    # Strings
    for str_col in df1_str_cols:
        df1[str_col] = df1[str_col].astype(str)

    for str_col in df2_str_cols:
        df2[str_col] = df2[str_col].astype(str)
    # Numerics
    df1[df1_num_cols] = df1[df1_num_cols].apply(pd.to_numeric, errors='coerce', axis=1)
    df2[df2_num_cols] = df2[df2_num_cols].apply(pd.to_numeric, errors='coerce', axis=1)

    for col_compare in list_cols_compare:
        print("Comparing " + col_compare + " now.")
        id_col_and_col_compare_list = []
        for id in list_id_cols:
            id_col_and_col_compare_list.append(id)
        id_col_and_col_compare_list.append(col_compare)
        
        df1_short = df1[id_col_and_col_compare_list]
        df2_short = df2[id_col_and_col_compare_list]

        df_join_1_2 = df1_short.merge(df2_short, how = "left", on = list_id_cols, suffixes=(df1_suffix, df2_suffix))
        df_join_2_1 = df2_short.merge(df1_short, how = "left", on = list_id_cols, suffixes=(df2_suffix, df1_suffix))

        #df_join_1_2["match"] = df_join_1_2[(col_compare + df1_suffix)] == df_join_1_2[(col_compare + df2_suffix)]
        df_join_1_2["match"] = df_join_1_2[(col_compare + df1_suffix)].fillna('-').eq(df_join_1_2[(col_compare + df2_suffix)].fillna('-'))
        #df_join_2_1["match"] = df_join_2_1[(col_compare + df2_suffix)] == df_join_2_1[(col_compare + df1_suffix)]
        df_join_2_1["match"] = df_join_2_1[(col_compare + df2_suffix)].fillna('-').eq(df_join_2_1[(col_compare + df1_suffix)].fillna('-'))

        df_join_1_2_mismatch = df_join_1_2[df_join_1_2["match"] == 0]
        df_join_2_1_mismatch = df_join_2_1[df_join_2_1["match"] == 0]

        if left_right_all == 'all':
            if full_output == True:
                output_list_comparisons.append(df_join_1_2)
                output_list_comparisons.append(df_join_2_1)
            output_list_comparisons.append(df_join_1_2_mismatch)
            output_list_comparisons.append(df_join_2_1_mismatch)
        elif left_right_all == 'left':
            if full_output == True:
                output_list_comparisons.append(df_join_1_2)
            output_list_comparisons.append(df_join_1_2_mismatch)
        elif left_right_all == 'right':
            if full_output == True:
                output_list_comparisons.append(df_join_2_1)
            output_list_comparisons.append(df_join_2_1_mismatch)

        print("")

    print("Function Completed.")
    return output_list_comparisons

In [None]:
def save_xls(list_dfs, xls_path):
    """
    Save a list of DataFrames to an Excel file, with each DataFrame as a separate sheet.
    """
    with pd.ExcelWriter(xls_path) as writer:
        for n, df in enumerate(list_dfs):
            df.to_excel(writer, sheet_name=f'sheet{n}')

Cash Needs

In [None]:
cashneeds_dbd = pd.read_csv("db-dev/cashneeds.csv")
cashneeds_db1 = pd.read_csv("db1/cashneeds.csv")

In [None]:
cashneeds_dbd

In [None]:
cashneeds_dbd.columns

In [None]:
cashneeds_db1.columns

In [None]:
cashneeds_output = compare_datasets(cashneeds_dbd, cashneeds_db1, ["SalesforceId", "ClientId", "ToYear", "FromYear"], ['EffectiveFrom',
       'Amount', 'SortOrder', 'Type', 'Description', 'Inflate'], 
                     ["EffectiveFrom", "Type", "Description"], ["Amount", "Inflate", "SortOrder"], ["EffectiveFrom", "Type", "Description"], ["Amount", "Inflate", "SortOrder"],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(cashneeds_output, xls_path = "cashneeds.xlsx")

ClientList

In [None]:
clientlist_dbd = pd.read_csv("db-dev/clientlist.csv")
clientlist_db1 = pd.read_csv("db1/clientlist.csv")

In [None]:
clientlist_dbd

In [None]:
clientlist_dbd.columns

In [None]:
clientlist_db1.columns

In [None]:
clientlist_output = compare_datasets(clientlist_dbd, clientlist_db1, ["SalesforceId"], ['AccountName', 'Client1_FirstName', 'Client1_LastName',
       'Client1_Age', 'Client2_FirstName', 'Client2_LastName', 'Client2_Age',
       'MailingName', 'ClientId', 'InvestmentStrategy',
       'MinimumCapitalPreservation', 'YearsToProtect', 'MinimumHighIncome',
       'YearsToProtectHighIncome', 'AllocateToPrivateCredit'], 
       ['AccountName', 'Client1_FirstName', 'Client1_LastName', 'Client2_FirstName', 'Client2_LastName', 'MailingName', 'ClientId', 'InvestmentStrategy'], 
       ['Client1_Age', 'Client2_Age', 'MinimumCapitalPreservation', 'YearsToProtect', 'MinimumHighIncome',
       'YearsToProtectHighIncome', 'AllocateToPrivateCredit'], 
       ['AccountName', 'Client1_FirstName', 'Client1_LastName', 'Client2_FirstName', 'Client2_LastName', 'MailingName', 'ClientId', 'InvestmentStrategy'], 
       ['Client1_Age', 'Client2_Age', 'MinimumCapitalPreservation', 'YearsToProtect', 'MinimumHighIncome',
       'YearsToProtectHighIncome', 'AllocateToPrivateCredit'], "_dbd", "_db1", False, "left")

In [None]:
save_xls(clientlist_output, xls_path = "clientlist.xlsx")

Clients

In [None]:
clients_dbd = pd.read_csv("db-dev/clients.csv")
clients_db1 = pd.read_csv("db1/clients.csv")

In [None]:
clients_dbd

In [None]:
clients_dbd.columns

In [None]:
clients_db1.columns

In [None]:
clients_output = compare_datasets(clients_dbd, clients_db1, ["SalesforceId"], ['ClientId', 'Name', 'Wealth_Client', 'Wealth_Segment',
       'Office_Location', 'Managing_Director', 'Lead_Advisor',
       'Associate_Advisor', 'Support_Analyst', 'Client_Service_Coordinator',
       'Portfolio_Manager', 'Pod'], 
                     ['ClientId', 'Name', 'Wealth_Client', 'Wealth_Segment',
       'Office_Location', 'Managing_Director', 'Lead_Advisor',
       'Associate_Advisor', 'Support_Analyst', 'Client_Service_Coordinator',
       'Portfolio_Manager', 'Pod'], [], ['ClientId', 'Name', 'Wealth_Client', 'Wealth_Segment',
       'Office_Location', 'Managing_Director', 'Lead_Advisor',
       'Associate_Advisor', 'Support_Analyst', 'Client_Service_Coordinator',
       'Portfolio_Manager', 'Pod'], [],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(clients_output, xls_path = "clients.xlsx")

FundLineups

In [None]:
fundlineups_dbd = pd.read_csv("db-dev/fundlineups.csv")
fundlineups_db1 = pd.read_csv("db1/fundlineups.csv")

In [None]:
fundlineups_dbd

In [None]:
fundlineups_dbd.columns

In [None]:
fundlineups_db1.columns

In [None]:
fundlineups_output = compare_datasets(fundlineups_dbd, fundlineups_db1, ["SalesforceId"], ['AmountInvested', 'Ticker', 'LineUpDate', 'ClientId', 'Name'], 
                     ['Ticker', 'LineUpDate', 'ClientId', 'Name'], ['AmountInvested'], ['Ticker', 'LineUpDate', 'ClientId', 'Name'], ['AmountInvested'],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(fundlineups_output, xls_path = "fundlineups.xlsx")

ManualAssets

In [None]:
manualassets_dbd = pd.read_csv("db-dev/manualassets.csv")
manualassets_db1 = pd.read_csv("db1/manualassets.csv")

In [None]:
manualassets_dbd

In [None]:
manualassets_dbd.columns

In [None]:
manualassets_db1.columns

In [None]:
manualassets_output = compare_datasets(manualassets_dbd, manualassets_db1, ["SalesforceId"], ['Client_Id', 'Legacy_Name', 'Account_Type',
       'Sort_Order', 'Account_Number', 'Description', 'Updated_Value_Date',
       'Total_Value'], 
       ['Client_Id', 'Legacy_Name', 'Account_Type','Account_Number', 'Description', 'Updated_Value_Date'], 
       ['Sort_Order', 'Total_Value'], ['Client_Id', 'Legacy_Name', 'Account_Type','Account_Number', 'Description', 'Updated_Value_Date'], ['Sort_Order', 'Total_Value'],
       "_dbd", "_db1", False, "left")

In [None]:
save_xls(manualassets_output, xls_path = "manualassets.xlsx")

MrSettings

In [None]:
mrsettings_dbd = pd.read_csv("db-dev/mrsettings.csv")
mrsettings_db1 = pd.read_csv("db1/mrsettings.csv")

In [None]:
mrsettings_dbd

In [None]:

mrsettings_dbd.columns

In [None]:
mrsettings_db1.columns

In [None]:
mrsettings_output = compare_datasets(mrsettings_dbd, mrsettings_db1, ["SalesforceId"], ['ClientId', 'TargetWeightToAlternatives',
       'AllocateToBlueprint', 'ApplyTowardsCumulativeCashNeeds',
       'ApplyTowardsSPGoals', 'InvestmentStrategy',
       'MinimumCapitalPreservation', 'MinimumHighIncome', 'MiscellaneousA',
       'MiscellaneousB', 'MiscellaneousC', 'TaxRateAdjustment', 'Ticker1Name',
       'Ticker1Weight', 'Ticker2Name', 'Ticker2Weight', 'Ticker3Name',
       'Ticker3Weight', 'YearsToProtectCPP', 'YearsToProtectHIP'], 
                     ['ClientId', 'TargetWeightToAlternatives',
       'AllocateToBlueprint', 'ApplyTowardsCumulativeCashNeeds',
       'ApplyTowardsSPGoals', 'InvestmentStrategy',
       'MinimumCapitalPreservation', 'MinimumHighIncome', 'MiscellaneousA',
       'MiscellaneousB', 'MiscellaneousC', 'TaxRateAdjustment', 'Ticker1Name',
       'Ticker1Weight', 'Ticker2Name', 'Ticker2Weight', 'Ticker3Name',
       'Ticker3Weight', 'YearsToProtectCPP', 'YearsToProtectHIP'], [], ['ClientId', 'TargetWeightToAlternatives',
       'AllocateToBlueprint', 'ApplyTowardsCumulativeCashNeeds',
       'ApplyTowardsSPGoals', 'InvestmentStrategy',
       'MinimumCapitalPreservation', 'MinimumHighIncome', 'MiscellaneousA',
       'MiscellaneousB', 'MiscellaneousC', 'TaxRateAdjustment', 'Ticker1Name',
       'Ticker1Weight', 'Ticker2Name', 'Ticker2Weight', 'Ticker3Name',
       'Ticker3Weight', 'YearsToProtectCPP', 'YearsToProtectHIP'], [],
                     "_dbd", "_db1", False,"left")

In [None]:
save_xls(mrsettings_output, xls_path = "mrsettings.xlsx")

SecurityInfo

In [None]:
securityinfo_dbd = pd.read_csv("db-dev/securityinfo.csv")
securityinfo_db1 = pd.read_csv("db1/securityinfo.csv")

In [None]:
securityinfo_dbd

In [None]:
securityinfo_dbd.columns

In [None]:
securityinfo_db1.columns

In [None]:
securityinfo_output = compare_datasets(securityinfo_dbd, securityinfo_db1, ['Symbol'], ['SecurityDesc', 'SecurityType', 'CurrentPrice', 'Portfolio',
       'Component', 'SubComponent', 'HoldingsCurrentValue'], 
                     ['SecurityDesc', 'SecurityType', 'Portfolio',
       'Component', 'SubComponent'], ['CurrentPrice', 'HoldingsCurrentValue'], ['SecurityDesc', 'SecurityType', 'Portfolio',
       'Component', 'SubComponent'], ['CurrentPrice', 'HoldingsCurrentValue'],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(securityinfo_output, xls_path = "securityinfo.xlsx")

SimulationAssumptions

In [None]:
simuassump_dbd = pd.read_csv("db-dev/simulationsassumptions.csv")
simuassump_db1 = pd.read_csv("db1/simulationsassumptions.csv")

In [None]:
simuassump_dbd

In [None]:
simuassump_dbd.columns

In [None]:
simuassump_db1.columns

In [None]:
simuassump_output = compare_datasets(simuassump_dbd, simuassump_db1, ["SalesforceId"], 
       ['ClientId', 'Simulation_Scenario', 'Simulation_Type','FromYear', 'ToYear', 'Amount', 'CashFlow_Type', 'CashFlow_Description', 'SortOrder', 'Inflate', 'ToAgeVFAGradYearEFA'], 
       ['ClientId', 'Simulation_Scenario', 'Simulation_Type','CashFlow_Type', 'CashFlow_Description'], 
       ['FromYear', 'ToYear', 'Amount','SortOrder', 'Inflate', 'ToAgeVFAGradYearEFA'], 
       ['ClientId', 'Simulation_Scenario', 'Simulation_Type','CashFlow_Type', 'CashFlow_Description'], 
       ['FromYear', 'ToYear', 'Amount','SortOrder', 'Inflate', 'ToAgeVFAGradYearEFA'],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(simuassump_output, xls_path = "simuassump.xlsx")

Users

In [None]:
users_dbd = pd.read_csv("db-dev/users.csv")
users_db1 = pd.read_csv("db1/users.csv")

In [None]:
users_dbd

In [None]:
users_dbd.columns

In [None]:
users_db1.columns

In [None]:
users_output = compare_datasets(users_dbd, users_db1, ["SalesforceId"], ['Name', 'Email'], 
                     ['Name', 'Email'], [], ['Name', 'Email'], [],
                     "_dbd", "_db1", False, "left")

In [None]:
save_xls(users_output, xls_path = "users.xlsx")