# Ensuring Consistency

**Activity Overview**: Ensure consistency by identifying and resolving conflicting values across datasets.

## Title: Customer Address Discrepancies

**Task**: Address customer address mismatches between CRM and marketing databases.

**Steps**:
1. Compare customer addresses in the CRM with those in the marketing database.
2. Identify records with conflicting address information.
3. Propose a method to consolidate records with verified addresses.

In [1]:
# Write your code from here
import pandas as pd

# Create CRM customers DataFrame
crm_data = {
    'customer_id': [1, 2, 3, 4],
    'address': ['123 Main St', '456 Oak St', '789 Pine St', '101 Tech Ave']
}
crm_df = pd.DataFrame(crm_data)

# Save CRM customers DataFrame to CSV
crm_df.to_csv('crm_customers.csv', index=False)
print("CRM customer data CSV created.")

# Create Marketing customers DataFrame
marketing_data = {
    'customer_id': [1, 2, 3, 4],
    'address': ['123 Main Street', '456 Oak Street', '789 Pine St', '101 Tech Avenue']
}
marketing_df = pd.DataFrame(marketing_data)

# Save Marketing customers DataFrame to CSV
marketing_df.to_csv('marketing_customers.csv', index=False)
print("Marketing customer data CSV created.")

CRM customer data CSV created.
Marketing customer data CSV created.


In [2]:
import pandas as pd

def load_data(file_path):
    """Loads a CSV file and handles exceptions."""
    try:
        df = pd.read_csv(file_path)
        return df
    except FileNotFoundError:
        print(f"Error: The file {file_path} does not exist.")
        return None
    except pd.errors.ParserError:
        print("Error: The file could not be parsed.")
        return None

def identify_address_discrepancies(crm_df, marketing_df):
    """Identify discrepancies in customer addresses between CRM and marketing datasets."""
    
    # Merge the CRM and Marketing data on customer_id
    merged_df = pd.merge(crm_df[['customer_id', 'address']], marketing_df[['customer_id', 'address']], 
                         on='customer_id', how='outer', suffixes=('_crm', '_marketing'))
    
    # Identify records where addresses do not match
    discrepancies = merged_df[merged_df['address_crm'] != merged_df['address_marketing']]
    
    return discrepancies

def resolve_address_discrepancies(merged_df):
    """Propose a method to resolve address discrepancies (e.g., prefer CRM or marketing data)."""
    
    # Example rule: Prefer the CRM address when a discrepancy occurs
    merged_df['resolved_address'] = merged_df['address_crm'].fillna(merged_df['address_marketing'])
    
    # Flag rows that still have missing values after filling
    unresolved_discrepancies = merged_df[merged_df['resolved_address'].isnull()]
    
    return merged_df, unresolved_discrepancies

def main():
    # Load CRM and Marketing data
    crm_data = load_data('crm_customers.csv')
    marketing_data = load_data('marketing_customers.csv')

    if crm_data is not None and marketing_data is not None:
        # Identify address discrepancies
        discrepancies = identify_address_discrepancies(crm_data, marketing_data)
        
        if not discrepancies.empty:
            print("Address discrepancies found:")
            print(discrepancies[['customer_id', 'address_crm', 'address_marketing']])
            
            # Resolve discrepancies by applying rules
            merged_data, unresolved = resolve_address_discrepancies(discrepancies)
            
            if not unresolved.empty:
                print("Unresolved address discrepancies that require manual review:")
                print(unresolved[['customer_id', 'address_crm', 'address_marketing']])
            else:
                print("All discrepancies have been resolved.")
                
            # Save the resolved data (or for further analysis)
            merged_data.to_csv('resolved_customer_addresses.csv', index=False)
        else:
            print("No address discrepancies found.")

if __name__ == "__main__":
    main()

Address discrepancies found:
   customer_id   address_crm address_marketing
0            1   123 Main St   123 Main Street
1            2    456 Oak St    456 Oak Street
3            4  101 Tech Ave   101 Tech Avenue
All discrepancies have been resolved.
