## Find Conflicting Values Across Datasets

**Description**: You have two datasets: `crm_customers.csv` and `erp_customers.csv` . Find customers with conflicting "email" information.

In [1]:
# Write your code from here
import pandas as pd

# Sample data for crm_customers.csv
crm_data = {
    'customer_id': [12345, 67890, 11223],
    'name': ['John Doe', 'Jane Smith', 'Alice Brown'],
    'email': ['john.doe@example.com', 'jane.smith@example.com', 'alice.brown@example.com']
}

# Create DataFrame for crm_customers
crm_df = pd.DataFrame(crm_data)

# Write to crm_customers.csv
crm_df.to_csv('crm_customers.csv', index=False)

# Sample data for erp_customers.csv
erp_data = {
    'customer_id': [12345, 67890, 11223],
    'name': ['John Doe', 'Jane Smith', 'Alice Brown'],
    'email': ['johnny.doe@example.com', 'jsmith@example.com', 'alice.brown@example.com']
}

# Create DataFrame for erp_customers
erp_df = pd.DataFrame(erp_data)

# Write to erp_customers.csv
erp_df.to_csv('erp_customers.csv', index=False)

print("CSV files have been generated: crm_customers.csv, erp_customers.csv")

import pandas as pd
import os

# Function to check if file exists
def check_file_exists(file_path):
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
# Function to load datasets with error handling
def load_data(file_path, columns=None):
    try:
        check_file_exists(file_path)
        df = pd.read_csv(file_path, usecols=columns) if columns else pd.read_csv(file_path)
        return df
    except Exception as e:
        print(f"Error loading file {file_path}: {e}")
        return None

# Function to find conflicting emails
def find_conflicting_emails(crm_file, erp_file):
    # Columns to check for
    columns = ['product_id', 'email']

    # Load CRM and ERP datasets
    crm_df = load_data(crm_file, columns)
    erp_df = load_data(erp_file, columns)

    if crm_df is None or erp_df is None:
        print("Error: One or more files could not be loaded.")
        return
    
    # Merge datasets on 'email' column to find conflicts
    merged = pd.merge(crm_df, erp_df, on='email', how='outer', indicator=True)

    # Filter for conflicting emails (rows that are only present in one of the datasets)
    conflicting_emails = merged[merged['_merge'] != 'both']
    
    if conflicting_emails.empty:
        print("No conflicting emails found.")
    else:
        print("Conflicting emails found:")
        print(conflicting_emails)

# Specify file paths
crm_file = 'crm_customers.csv'
erp_file = 'erp_customers.csv'

# Call the function to find conflicting emails
find_conflicting_emails(crm_file, erp_file)

CSV files have been generated: crm_customers.csv, erp_customers.csv
Error loading file crm_customers.csv: Usecols do not match columns, columns expected but not found: ['product_id']
Error loading file erp_customers.csv: Usecols do not match columns, columns expected but not found: ['product_id']
Error: One or more files could not be loaded.
