## Find Conflicting Values Across Datasets

**Description**: You have two datasets: `crm_customers.csv` and `erp_customers.csv` . Find customers with conflicting "email" information.

In [1]:
import pandas as pd

def find_conflicting_emails(crm_file, erp_file, customer_id_col="customer_id", email_col="email"):
    """
    Finds customers with conflicting email information across two datasets.

    Args:
        crm_file (str): Path to the CSV file containing CRM customer data.
        erp_file (str): Path to the CSV file containing ERP customer data.
        customer_id_col (str, optional): Name of the customer ID column. Defaults to "customer_id".
        email_col (str, optional): Name of the email column. Defaults to "email".

    Returns:
        pandas.DataFrame: A DataFrame containing customers with conflicting emails,
                          or an empty DataFrame if no conflicts are found.
                          Returns None if there are errors.
    """
    try:
        # Read the CSV files into Pandas DataFrames
        crm_df = pd.read_csv(crm_file)
        erp_df = pd.read_csv(erp_file)
    except FileNotFoundError as e:
        print(f"Error: {e}")
        return None

    # Check if the required columns exist in both DataFrames
    if customer_id_col not in crm_df.columns or email_col not in crm_df.columns:
        print(f"Error: Missing required columns in CRM file. Expected '{customer_id_col}' and '{email_col}'.")
        return None
    if customer_id_col not in erp_df.columns or email_col not in erp_df.columns:
        print(f"Error: Missing required columns in ERP file. Expected '{customer_id_col}' and '{email_col}'.")
        return None

    # Merge the DataFrames on the customer ID
    merged_df = pd.merge(crm_df, erp_df, on=customer_id_col, suffixes=('_crm', '_erp'))

    # Find rows where emails do not match
    conflicting_emails_df = merged_df[merged_df[f'{email_col}_crm'] != merged_df[f'{email_col}_erp']]

    return conflicting_emails_df



def main():
    """
    Main function to run the conflicting email check and print the results.
    """
    # Provide the paths to your CSV files
    crm_file = 'crm_customers.csv'  # Replace with your actual file path
    erp_file = 'erp_customers.csv'  # Replace with your actual file path

    # Create dummy CSV files for demonstration
    try:
        with open(crm_file, 'w') as f:
            f.write("customer_id,email,name\n1,alice@example.com,Alice\n2,bob@example.com,Bob\n3,charlie@example.com,Charlie\n4,david@example.com,David")
        with open(erp_file, 'w') as f:
            f.write("customer_id,email,address\n1,alice@example.com,Address1\n2,robert@example.com,Address2\n3,charlie@example.com,Address3\n4,david@example.org,Address4")
    except FileExistsError:
        pass

    # Find conflicting emails
    conflicting_emails_df = find_conflicting_emails(crm_file, erp_file)

    # Print the results
    if conflicting_emails_df is not None:
        if not conflicting_emails_df.empty:
            print("Customers with conflicting email information:")
            print(conflicting_emails_df.to_string(index=False))
        else:
            print("No customers with conflicting email information found.")
    else:
        print("Error occurred during the email conflict check. Please check the file paths and column names.")



if __name__ == "__main__":
    main()


Customers with conflicting email information:
 customer_id         email_crm  name          email_erp  address
           2   bob@example.com   Bob robert@example.com Address2
           4 david@example.com David  david@example.org Address4
