In [None]:
# CRM Data Audit - Python Script
import pandas as pd
# Load datasets
legacy_df = pd.read_csv('old_system_data.csv')
crm_df = pd.read_csv('new_crm_data.csv')

In [None]:
# Convert date formats for consistency
legacy_df['DateOfBirth'] = pd.to_datetime(legacy_df['DateOfBirth'], errors='coerce')
crm_df['DateOfBirth'] = pd.to_datetime(crm_df['DateOfBirth'], errors='coerce')

In [None]:
# Completeness Check
legacy_nulls = legacy_df.isnull().sum()
crm_nulls = crm_df.isnull().sum()
print('Missing fields in Legacy:', legacy_nulls)
print('Missing fields in CRM:', crm_nulls)

In [None]:
# Accuracy Check
common_ids = set(legacy_df['CustomerID']) & set(crm_df['CustomerID'])
legacy_common = legacy_df[legacy_df['CustomerID'].isin(common_ids)].set_index('CustomerID')
crm_common = crm_df[crm_df['CustomerID'].isin(common_ids)].set_index('CustomerID')
mismatches = (legacy_common != crm_common) & ~(legacy_common.isnull() & crm_common.isnull())
print('Field mismatches:', mismatches.sum())

In [None]:
# Integrity Check
extra_crm_ids = set(crm_df['CustomerID']) - set(legacy_df['CustomerID'])
print('Extra CRM records:', extra_crm_ids)

In [None]:
# Duplicate Check
print('Duplicates in Legacy:', legacy_df.duplicated(subset='CustomerID').sum())
print('Duplicates in CRM:', crm_df.duplicated(subset='CustomerID').sum())