In [8]:
import pandas as pd
import numpy as np

def analyze_sales_data(df, group_by_column):
    """
    Analyze sales data by grouping and calculating statistics.

    Args:
        df: DataFrame with columns ['product', 'category', 'sales', 'profit']
        group_by_column: Column name to group by

    Returns:
        DataFrame with aggregated statistics
    """
    # ✅ Handle edge cases
    if df.empty or group_by_column not in df.columns:
        # Return empty DataFrame with required columns
        return pd.DataFrame(columns=['sales_sum', 'sales_mean', 'profit_sum', 'profit_mean', 'profit_margin'])

    # ✅ Handle missing values - treat as 0
    df_filled = df.copy()
    df_filled['sales'] = df_filled['sales'].fillna(0)
    df_filled['profit'] = df_filled['profit'].fillna(0)

    # ✅ Group by specified column and calculate statistics
    grouped = df_filled.groupby(group_by_column).agg({
        'sales': ['sum', 'mean'],
        'profit': ['sum', 'mean']
    })

    # ✅ Flatten column names
    grouped.columns = ['sales_sum', 'sales_mean', 'profit_sum', 'profit_mean']

    # ✅ Calculate profit margin (handle division by zero)
    grouped['profit_margin'] = grouped['profit_sum'] / grouped['sales_sum']
    grouped['profit_margin'] = grouped['profit_margin'].replace([np.inf, -np.inf], np.nan)

    return grouped

# Test the Implementation:
# Create sample data for testing
sample_data = pd.DataFrame({
    'product': ['A', 'B', 'C', 'A', 'B', 'C', 'A'],
    'category': ['Electronics', 'Electronics', 'Clothing', 'Electronics', 'Electronics', 'Clothing', 'Electronics'],
    'sales': [100, 200, 150, 120, np.nan, 180, 110],
    'profit': [20, 50, 30, 25, 40, 35, 22]
})

print("Sample data:")
print(sample_data)
print("\nAnalysis by product:")
result = analyze_sales_data(sample_data, 'product')
print(result)

print("\nAnalysis by category:")
result_category = analyze_sales_data(sample_data, 'category')
print(result_category)


Sample data:
  product     category  sales  profit
0       A  Electronics  100.0      20
1       B  Electronics  200.0      50
2       C     Clothing  150.0      30
3       A  Electronics  120.0      25
4       B  Electronics    NaN      40
5       C     Clothing  180.0      35
6       A  Electronics  110.0      22

Analysis by product:
         sales_sum  sales_mean  profit_sum  profit_mean  profit_margin
product                                                               
A            330.0       110.0          67    22.333333        0.20303
B            200.0       100.0          90    45.000000        0.45000
C            330.0       165.0          65    32.500000        0.19697

Analysis by category:
             sales_sum  sales_mean  profit_sum  profit_mean  profit_margin
category                                                                  
Clothing         330.0       165.0          65         32.5       0.196970
Electronics      530.0       106.0         157         31.