# E-Commerce Customer Behavior Analysis

Take-home exercise for Week 9

## Scenario
You work as a data analyst for an e-commerce platform. The marketing team wants to understand customer purchase behavior to improve retention and increase sales. They've provided you with customer transaction data spanning 6 months.

In [None]:
import csv

In [None]:
# Part 1: Data Loading
def load_transaction_data(file_path):
    """Load transaction data from CSV"""
    transactions = []
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            
            for row in reader:
                # Convert Purchase_Amount to float
                row['Purchase_Amount'] = float(row['Purchase_Amount'])
                transactions.append(row)
        
        return transactions
    except FileNotFoundError:
        print(f"Error: {file_path} not found")
        return []
    except Exception as e:
        print(f"Error: {e}")
        return []

In [None]:
def build_customer_profiles(transactions: list(str)):
    """
    Build comprehensive customer profiles from transactions
    Returns: Dictionary with customer_id as key and profile data as a dictionary of key/value pair
    """
    profiles = {}

    for transaction in transactions:
        customer_id = transaction.get("Customer_ID") # or using transaction["Customer_ID"]

        if customer_id not in profiles:
            profiles[customer_id] = {
                "customer_id": transaction["Customer_ID"],
                "name": transaction["Customer_Name"],
                "email": transaction["Email"],
                "signup_date": transaction["Signup_Date"],
                "transactions": [],
                "transactions_count": 0,
                "total_spent": 0,
                "product_categories": set(),
                "payment_methods": set(),
                "devices": set(),
            }

        # Update profile with transaction data
        profile = profiles[customer_id]
        profile["transactions"].append(transaction)
        profile["total_spent"] += transaction["Purchase_Amount"]
        profile["product_categories"].add(transaction["Product_Category"])
        profile["payment_methods"].add(transaction["Payment_Method"])
        profile["devices"].add(transaction["Device"])

    return profiles

In [100]:
def get_total_transactions(data):
    return len(data["transactions"])

In [None]:
# Create display_customer_summary(customer_profiles) that shows:

# Total unique customers
# Average transactions per customer
# Average spend per customer
# Customer with most transactions
# Customer with highest total spend
from pprint import pprint

def display_customer_summary(customer_profiles):
    """Display dataset summary"""

    # Total unique customers
    unique_customers_count = len(customer_profiles.keys())

    # Average transactions per customer
    total_transactions = sum([len(p["transactions"]) for p in customer_profiles.values()])
    avg_transactions = total_transactions / unique_customers_count

    # # Average spend per customer
    total_spent = sum([p["total_spent"] for p in customer_profiles.values()])
    avg_clv = total_spent / unique_customers_count

    # Customer with most transactions
    # most_active = max(customer_profiles.values(), key=lambda x: len(x["transactions"]))

    most_active = max(customer_profiles.values(), key=get_total_transactions)

    print("What is most active?", len(most_active["transactions"]))

    # Customer with highest total spend
    most_transactions = 0
    customer = None

    for profile in customer_profiles.values():
        tx_count = len(profile["transactions"])

        if tx_count > most_transactions:
            most_transactions = tx_count
            customer = profile["customer_id"]
    
    # The 5 lines of code below performs the same function as the code line above
    # purchases = []
    # for t in transactions:
    #     purchases.append(t["Purchase_Amount"])

    # total_revenue = sum(purchases)


def testing_func():
    """"""
    pass

In [103]:
from pprint import pprint

data = load_transaction_data("customer_transactions.csv")

profiles = build_customer_profiles(data)

display_customer_summary(profiles)

pprint(profiles)

What is most active? 15
{'CUST-0001': {'customer_id': 'CUST-0001',
               'devices': {'Desktop', 'Tablet', 'Mobile'},
               'email': 'sam.king258@email.com',
               'name': 'Sam King',
               'payment_methods': {'Apple Pay',
                                   'Bank Transfer',
                                   'Credit Card',
                                   'Debit Card',
                                   'Google Pay',
                                   'PayPal'},
               'product_categories': {'Beauty & Health',
                                      'Clothing',
                                      'Food & Grocery',
                                      'Jewelry',
                                      'Toys & Games'},
               'signup_date': '2024-01-09',
               'total_spent': 2638.64,
               'transactions': [{'Customer_ID': 'CUST-0001',
                                 'Customer_Name': 'Sam King',
                       