In [9]:
def process_data(data):
    """Messy AI-generated code that works but needs refactoring"""
    result = {}
    for item in data:
        if 'type' in item and item['type'] == 'user':
            if 'active' in item and item['active']:
                if 'age' in item:
                    if item['age'] >= 18:
                        if 'email' in item and '@' in item['email']:
                            category = 'adult'
                            if item['age'] >= 65:
                                category = 'senior'
                            elif item['age'] >= 25:
                                category = 'adult'
                            else:
                                category = 'young_adult'

                            if category not in result:
                                result[category] = {'count': 0, 'emails': [], 'total_age': 0}

                            result[category]['count'] += 1
                            result[category]['emails'].append(item['email'])
                            result[category]['total_age'] += item['age']

    # Calculate averages
    for cat in result:
        result[cat]['avg_age'] = result[cat]['total_age'] / result[cat]['count']
        del result[cat]['total_age']

    return result

# Clean Refactored Code:
from typing import List, Dict, Any, Optional
from dataclasses import dataclass

@dataclass
class UserCategory:
    """Data class to represent user category statistics"""
    count: int = 0
    emails: List[str] = None
    total_age: int = 0
    avg_age: float = 0.0

    def __post_init__(self):
        if self.emails is None:
            self.emails = []

    def add_user(self, email: str, age: int):
        """Add a user to this category"""
        self.count += 1
        self.emails.append(email)
        self.total_age += age

    def calculate_average_age(self):
        """Calculate and return average age"""
        if self.count > 0:
            self.avg_age = self.total_age / self.count
        return self.avg_age

def is_valid_user(item: Dict[str, Any]) -> bool:
    """
    Check if item is a valid active user with required fields.

    Args:
        item: Dictionary containing user data

    Returns:
        bool: True if valid user, False otherwise
    """
    return (item.get('type') == 'user' and
            item.get('active', False) and
            isinstance(item.get('age'), (int, float)) and
            item['age'] >= 18 and
            '@' in item.get('email', ''))

def get_user_category(age: int) -> str:
    """
    Determine user category based on age.

    Args:
        age: User's age

    Returns:
        str: Category name
    """
    if age >= 65:
        return 'senior'
    elif age >= 25:
        return 'adult'
    else:
        return 'young_adult'

def process_user_data_clean(data: List[Dict[str, Any]]) -> Dict[str, UserCategory]:
    """
    Refactored version with clean code principles.
    Process user data and categorize by age groups.

    Args:
        data: List of user data dictionaries

    Returns:
        Dict mapping category names to UserCategory objects
    """
    categories: Dict[str, UserCategory] = {}

    for item in data:
        if not is_valid_user(item):
            continue

        category_name = get_user_category(item['age'])

        # Initialize category if it doesn't exist
        if category_name not in categories:
            categories[category_name] = UserCategory()

        # Add user to category
        categories[category_name].add_user(item['email'], item['age'])

    # Calculate average ages for all categories
    for category in categories.values():
        category.calculate_average_age()

    # Convert to dictionary for compatibility
    return {name: category.__dict__ for name, category in categories.items()}

# Alternative functional approach
def process_user_data_functional(data: List[Dict[str, Any]]) -> Dict[str, Dict]:
    """
    Functional programming style approach.
    """
    from collections import defaultdict

    def valid_user_filter(item):
        return (item.get('type') == 'user' and
                item.get('active', False) and
                isinstance(item.get('age'), (int, float)) and
                item['age'] >= 18 and
                '@' in item.get('email', ''))

    def categorize_user(item):
        age = item['age']
        if age >= 65: return 'senior'
        if age >= 25: return 'adult'
        return 'young_adult'

    # Filter and process valid users
    valid_users = filter(valid_user_filter, data)

    # Group by category
    categories = defaultdict(lambda: {'count': 0, 'emails': [], 'total_age': 0})

    for user in valid_users:
        category = categorize_user(user)
        categories[category]['count'] += 1
        categories[category]['emails'].append(user['email'])
        categories[category]['total_age'] += user['age']

    # Calculate averages
    for category_data in categories.values():
        if category_data['count'] > 0:
            category_data['avg_age'] = category_data['total_age'] / category_data['count']

    return dict(categories)
# Both Test version
# Test data
test_data = [
    {'type': 'user', 'active': True, 'age': 25, 'email': 'user1@test.com'},
    {'type': 'user', 'active': True, 'age': 70, 'email': 'user2@test.com'},
    {'type': 'user', 'active': False, 'age': 30, 'email': 'user3@test.com'},
    {'type': 'admin', 'active': True, 'age': 35, 'email': 'admin@test.com'},
    {'type': 'user', 'active': True, 'age': 20, 'email': 'invalid-email'},
    {'type': 'user', 'active': True, 'age': 40, 'email': 'user4@test.com'},
]

print("Original result:")
original_result = process_data(test_data)
print(original_result)

print("\nClean result:")
clean_result = process_user_data_clean(test_data)
print(clean_result)

print("\nFunctional result:")
functional_result = process_user_data_functional(test_data)
print(functional_result)


Original result:
{'adult': {'count': 2, 'emails': ['user1@test.com', 'user4@test.com'], 'avg_age': 32.5}, 'senior': {'count': 1, 'emails': ['user2@test.com'], 'avg_age': 70.0}}

Clean result:
{'adult': {'count': 2, 'emails': ['user1@test.com', 'user4@test.com'], 'total_age': 65, 'avg_age': 32.5}, 'senior': {'count': 1, 'emails': ['user2@test.com'], 'total_age': 70, 'avg_age': 70.0}}

Functional result:
{'adult': {'count': 2, 'emails': ['user1@test.com', 'user4@test.com'], 'total_age': 65, 'avg_age': 32.5}, 'senior': {'count': 1, 'emails': ['user2@test.com'], 'total_age': 70, 'avg_age': 70.0}}
