# Functions and Lambda Functions

## Learning Objectives
By the end of this notebook, you will be able to:
- Create and use functions
- Understand function parameters, return values, and scope
- Write and apply lambda functions for data processing
- Use functions with built-in functions like map(), filter(), and sorted()
- Apply functional programming concepts to data engineering tasks

## 1. Basic Function Definition and Usage

In [None]:
# Basic function
def calculate_total_sales(quantities, prices):
    # Calculate total sales from quantities and prices
    total = 0.0
    for quantity, price in zip(quantities, prices):
        total += quantity * price
    return total

# Test the function
item_quantities = [2, 1, 3, 5]
item_prices = [29.99, 199.99, 15.50, 8.75]

total_sales = calculate_total_sales(item_quantities, item_prices)
print(f"Total sales: ${total_sales:.2f}")

# Function with default parameters
def format_currency(amount, currency="USD", decimal_places=2):
    # Format a number as currency
    return f"{amount:.{decimal_places}f} {currency}"

# Test with different parameters
print(f"Default formatting: {format_currency(1234.567)}")
print(f"EUR formatting: {format_currency(1234.567, 'EUR')}")
print(f"No decimals: {format_currency(1234.567, 'USD', 0)}")

## 2. Functions with Different Parameter Types

In [None]:
# Function with optional parameters
def process_user_data(user_id, name, email=None, age=None):
    # Process user data and create a user record
    user_record = {
        "user_id": user_id,
        "name": name,
        "email": email if email else "Not provided",
        "age": age if age else "Unknown",
        "created_at": "2024-01-15T10:30:00Z"
    }
    return user_record

# Test with different parameter combinations
user1 = process_user_data("USR001", "Alice Johnson")
user2 = process_user_data("USR002", "Bob Smith", "bob@example.com")
user3 = process_user_data("USR003", "Charlie Brown", "charlie@example.com", 28)

print(f"User 1: {user1}")
print(f"User 2: {user2}")
print(f"User 3: {user3}")

# Function with variable arguments (*args)
def calculate_average(*numbers):
    # Calculate average of any number of values
    if not numbers:
        return 0.0
    return sum(numbers) / len(numbers)

# Test with different numbers of arguments
avg1 = calculate_average(10.5, 20.3, 15.7)
avg2 = calculate_average(100, 200, 300, 400, 500)
avg3 = calculate_average(42.0)

print(f"\nAverage of 3 numbers: {avg1:.2f}")
print(f"Average of 5 numbers: {avg2:.2f}")
print(f"Average of 1 number: {avg3:.2f}")

# Function with keyword arguments (**kwargs)
def create_product_record(product_id, name, **additional_info):
    # Create a product record with flexible additional information
    product = {
        "product_id": product_id,
        "name": name
    }
    
    # Add all additional information
    product.update(additional_info)
    
    return product

# Test with different keyword arguments
product1 = create_product_record("PRD001", "Laptop", price=999.99, category="Electronics")
product2 = create_product_record("PRD002", "Desk", price=299.99, category="Furniture", weight=25.5, color="Brown")

print(f"\nProduct 1: {product1}")
print(f"Product 2: {product2}")

## 3. Lambda Functions (Anonymous Functions)

In [None]:
# Basic lambda functions
# Lambda syntax: lambda arguments: expression

# Simple mathematical operations
square = lambda x: x ** 2
add_ten = lambda x: x + 10
multiply = lambda x, y: x * y

print(f"Square of 5: {square(5)}")
print(f"Add 10 to 15: {add_ten(15)}")
print(f"Multiply 4 and 7: {multiply(4, 7)}")

# Lambda functions for data processing
sales_data = [
    {"product": "Laptop", "quantity": 2, "price": 999.99},
    {"product": "Mouse", "quantity": 5, "price": 29.99},
    {"product": "Keyboard", "quantity": 3, "price": 79.99},
    {"product": "Monitor", "quantity": 1, "price": 299.99}
]

# Lambda to calculate total value for each item
calculate_total = lambda item: item["quantity"] * item["price"]

print("\nSales data with totals:")
for item in sales_data:
    total = calculate_total(item)
    print(f"{item['product']}: {item['quantity']} × ${item['price']:.2f} = ${total:.2f}")

# Lambda for data validation
is_high_value = lambda item: calculate_total(item) > 200
is_electronics = lambda product: product.lower() in ["laptop", "mouse", "keyboard", "monitor"]

print("\nHigh-value items:")
for item in sales_data:
    if is_high_value(item):
        print(f"{item['product']}: ${calculate_total(item):.2f}")

## 4. Using Lambda Functions with Built-in Functions

In [None]:
# Sample employee data
employees = [
    {"name": "Alice", "department": "Engineering", "salary": 75000, "years_experience": 5},
    {"name": "Bob", "department": "Marketing", "salary": 65000, "years_experience": 3},
    {"name": "Charlie", "department": "Engineering", "salary": 85000, "years_experience": 7},
    {"name": "Diana", "department": "Sales", "salary": 70000, "years_experience": 4},
    {"name": "Eve", "department": "Engineering", "salary": 90000, "years_experience": 8}
]

print(f"Original employees: {len(employees)} records")

# Using filter() with lambda
# Filter engineers only
engineers = list(filter(lambda emp: emp["department"] == "Engineering", employees))
print(f"\nEngineers: {len(engineers)} found")
for eng in engineers:
    print(f"  {eng['name']}: ${eng['salary']:,}")

# Filter high earners (salary > 70000)
high_earners = list(filter(lambda emp: emp["salary"] > 70000, employees))
print(f"\nHigh earners (>$70k): {len(high_earners)} found")
for emp in high_earners:
    print(f"  {emp['name']}: ${emp['salary']:,}")

# Using map() with lambda
# Extract just the names
names = list(map(lambda emp: emp["name"], employees))
print(f"\nEmployee names: {names}")

# Calculate annual bonus (10% of salary)
bonuses = list(map(lambda emp: emp["salary"] * 0.10, employees))
print(f"\nAnnual bonuses: {[f'${bonus:,.2f}' for bonus in bonuses]}")

# Create summary strings
summaries = list(map(
    lambda emp: f"{emp['name']} ({emp['department']}): ${emp['salary']:,}", 
    employees
))
print("\nEmployee summaries:")
for summary in summaries:
    print(f"  {summary}")

# Using sorted() with lambda
# Sort by salary (ascending)
by_salary = sorted(employees, key=lambda emp: emp["salary"])
print("\nSorted by salary (ascending):")
for emp in by_salary:
    print(f"  {emp['name']}: ${emp['salary']:,}")

# Sort by experience (descending)
by_experience = sorted(employees, key=lambda emp: emp["years_experience"], reverse=True)
print("\nSorted by experience (descending):")
for emp in by_experience:
    print(f"  {emp['name']}: {emp['years_experience']} years")

# Sort by multiple criteria (department, then salary)
by_dept_salary = sorted(employees, key=lambda emp: (emp["department"], emp["salary"]))
print("\nSorted by department, then salary:")
for emp in by_dept_salary:
    print(f"  {emp['department']}: {emp['name']} (${emp['salary']:,})")

## 5. Higher-Order Functions

In [None]:
# Functions that take other functions as parameters

def apply_transformation(data, transform_func):
    # Apply a transformation function to each element in the data
    return [transform_func(value) for value in data]

# Sample temperature data in Celsius
celsius_temps = [0, 10, 20, 25, 30, 35]

# Define transformation functions
celsius_to_fahrenheit = lambda c: (c * 9/5) + 32
celsius_to_kelvin = lambda c: c + 273.15

# Apply transformations
fahrenheit_temps = apply_transformation(celsius_temps, celsius_to_fahrenheit)
kelvin_temps = apply_transformation(celsius_temps, celsius_to_kelvin)

print(f"Celsius: {celsius_temps}")
print(f"Fahrenheit: {[f'{temp:.1f}' for temp in fahrenheit_temps]}")
print(f"Kelvin: {[f'{temp:.2f}' for temp in kelvin_temps]}")

# Function that returns a function (closure)
def create_multiplier(factor):
    # Create a function that multiplies by a given factor
    def multiplier(value):
        return value * factor
    return multiplier

# Create specific multiplier functions
double = create_multiplier(2)
triple = create_multiplier(3)
percent_to_decimal = create_multiplier(0.01)

# Test the created functions
test_values = [5, 10, 15, 20]

doubled = apply_transformation(test_values, double)
tripled = apply_transformation(test_values, triple)
percentages = [85, 92, 78, 96]  # Test scores as percentages
decimals = apply_transformation(percentages, percent_to_decimal)

print(f"\nOriginal: {test_values}")
print(f"Doubled: {doubled}")
print(f"Tripled: {tripled}")
print(f"Percentages: {percentages}")
print(f"As decimals: {decimals}")

# Function composition
def compose_functions(func1, func2):
    # Compose two functions: returns func2(func1(x))
    return lambda x: func2(func1(x))

# Compose functions: convert Celsius to Fahrenheit, then round
celsius_to_fahrenheit_rounded = compose_functions(
    celsius_to_fahrenheit,
    lambda x: round(x, 1)
)

rounded_fahrenheit = apply_transformation(celsius_temps, celsius_to_fahrenheit_rounded)
print(f"\nCelsius to Fahrenheit (rounded): {rounded_fahrenheit}")

## 6. Practical Data Engineering Examples

In [None]:
# Example 1: Data validation pipeline
def create_validator(min_value, max_value):
    # Create a validator function for numeric ranges
    return lambda value: min_value <= value <= max_value

# Create specific validators
temperature_validator = create_validator(-50, 50)  # Celsius range
percentage_validator = create_validator(0, 100)
age_validator = create_validator(0, 120)

# Sample sensor data
sensor_readings = [22.5, 105.0, -60.0, 25.3, 30.1, 200.0, 18.7]

# Validate temperature readings
valid_temps = list(filter(temperature_validator, sensor_readings))
invalid_temps = list(filter(lambda x: not temperature_validator(x), sensor_readings))

print(f"All readings: {sensor_readings}")
print(f"Valid temperatures: {valid_temps}")
print(f"Invalid temperatures: {invalid_temps}")

# Example 2: Data transformation pipeline
def process_sales_record(record):
    # Process a sales record by adding calculated fields
    # Create a copy to avoid modifying original
    processed = record.copy()
    
    # Calculate total before discount
    subtotal = record["quantity"] * record["unit_price"]
    
    # Apply discount
    discount_amount = subtotal * (record.get("discount_percent", 0) / 100)
    total = subtotal - discount_amount
    
    # Add calculated fields
    processed["subtotal"] = round(subtotal, 2)
    processed["discount_amount"] = round(discount_amount, 2)
    processed["total"] = round(total, 2)
    
    return processed

# Sample sales data
raw_sales = [
    {"product": "Laptop", "quantity": 2, "unit_price": 999.99, "discount_percent": 10},
    {"product": "Mouse", "quantity": 5, "unit_price": 29.99, "discount_percent": 0},
    {"product": "Keyboard", "quantity": 3, "unit_price": 79.99, "discount_percent": 5}
]

# Process all sales records
processed_sales = list(map(process_sales_record, raw_sales))

print("\nProcessed Sales Records:")
for sale in processed_sales:
    print(f"{sale['product']}: {sale['quantity']} × ${sale['unit_price']:.2f} = ${sale['total']:.2f} (after discount)")

# Calculate total revenue
total_revenue = sum(map(lambda sale: sale["total"], processed_sales))
print(f"\nTotal Revenue: ${total_revenue:.2f}")

# Example 3: Data aggregation with functions
def group_by_key(data, key):
    # Group records by a specific key
    groups = {}
    
    for record in data:
        group_key = record.get(key, "Unknown")
        if group_key not in groups:
            groups[group_key] = []
        groups[group_key].append(record)
    
    return groups

# Sample transaction data
transactions = [
    {"customer_id": "CUST001", "amount": 150.00, "category": "Electronics"},
    {"customer_id": "CUST002", "amount": 75.50, "category": "Books"},
    {"customer_id": "CUST001", "amount": 200.00, "category": "Clothing"},
    {"customer_id": "CUST003", "amount": 99.99, "category": "Electronics"},
    {"customer_id": "CUST002", "amount": 45.00, "category": "Books"}
]

# Group by customer
by_customer = group_by_key(transactions, "customer_id")

print("\nTransactions by Customer:")
for customer_id, customer_transactions in by_customer.items():
    total_spent = sum(map(lambda t: t["amount"], customer_transactions))
    print(f"{customer_id}: {len(customer_transactions)} transactions, ${total_spent:.2f} total")

# Group by category
by_category = group_by_key(transactions, "category")

print("\nTransactions by Category:")
for category, category_transactions in by_category.items():
    total_revenue = sum(map(lambda t: t["amount"], category_transactions))
    avg_transaction = total_revenue / len(category_transactions)
    print(f"{category}: {len(category_transactions)} transactions, ${total_revenue:.2f} revenue, ${avg_transaction:.2f} avg")

## Practice Exercises

Complete the following exercises to reinforce your understanding:

In [None]:
# Exercise 1: Create a function that calculates statistics
def calculate_statistics(numbers):
    # Calculate basic statistics for a list of numbers
    # Should return dictionary with mean, median, min, max, and count
    # TODO: Implement this function
    pass

# Test data
test_numbers = [10.5, 20.3, 15.7, 25.1, 18.9, 22.4, 16.8]

# Expected output should include mean, median, min, max, count
# stats = calculate_statistics(test_numbers)
# print(f"Statistics: {stats}")

In [None]:
# Exercise 2: Create a data cleaning function using lambda
def clean_data_records(records, required_fields, validator_func):
    # Clean data records by filtering out invalid ones
    # 1. Filter records that have all required fields
    # 2. Apply the validator function
    # 3. Return the cleaned records
    # TODO: Implement this function
    pass

# Test data with some invalid records
dirty_records = [
    {"name": "Alice", "age": 25, "salary": 50000},
    {"name": "Bob", "salary": 60000},  # Missing age
    {"name": "Charlie", "age": -5, "salary": 55000},  # Invalid age
    {"name": "Diana", "age": 30, "salary": 70000},
    {"age": 28, "salary": 45000},  # Missing name
]

# Create a validator that checks age > 0 and salary > 0
# is_valid_record = lambda record: record.get("age", 0) > 0 and record.get("salary", 0) > 0

# Expected: Should return only Alice and Diana
# clean_records = clean_data_records(dirty_records, ["name", "age", "salary"], is_valid_record)
# print(f"Clean records: {clean_records}")