# Measuring Completeness

**Activity Overview**: Evaluate data completeness by checking missing data rates and handling partially available records.

## Title: Customer Profiles

**Task**: Calculate the missing data rate for customer profiles.

**Steps**:
1. List all required fields for a complete customer profile (e.g., name, address, email,
phone number).
2. Analyze the dataset to count how many profiles have missing fields.
3. Calculate the percentage of missing data fields across all profiles.

In [2]:
import os
import pandas as pd
from pathlib import Path
from tkinter import filedialog
from tkinter import Tk
import numpy as np

def get_file_path(default="customer_profiles.csv"):
    """Get file path with user interaction"""
    if Path(default).exists():
        return default
    
    try:
        root = Tk()
        root.withdraw()
        file_path = filedialog.askopenfilename(
            initialdir=os.getcwd(),
            title="Select customer profiles file",
            filetypes=[("CSV", "*.csv"), ("Excel", "*.xls *.xlsx")]
        )
        return file_path if file_path else default
    except:
        return default

def create_sample_data(path):
    """Create sample data if needed"""
    data = {
        'customer_id': range(1, 101),
        'first_name': ['John']*95 + [np.nan]*5,
        'last_name': ['Doe']*90 + [np.nan]*10,
        'email': [f"user{i}@example.com" for i in range(1, 81)] + [np.nan]*20,
        'phone': [f"555-{i:04d}" for i in range(1, 76)] + [np.nan]*25,
        'address': [f"{i} Main St" for i in range(1, 71)] + [np.nan]*30,
    }
    pd.DataFrame(data).to_csv(path, index=False)
    return path

if __name__ == "__main__":
    # Configuration
    CONFIG = {
        'data_path': get_file_path(),
        'required_fields': [
            'customer_id', 'first_name', 'last_name',
            'email', 'phone', 'address'
        ]
    }
    
    # Handle missing file
    if not CONFIG['data_path'] or not Path(CONFIG['data_path']).exists():
        print("No valid file selected. Creating sample data...")
        CONFIG['data_path'] = create_sample_data("customer_profiles.csv")
        print(f"Sample data created at {Path(CONFIG['data_path']).absolute()}")
    
    # Rest of your analysis code...

No valid file selected. Creating sample data...
Sample data created at /workspaces/AI_DATA_ANALYSIS_/src/Module 7/Measuring Data Accuracy, Completeness & Consistency/customer_profiles.csv
