In [1]:
# --- Basic E-commerce Data Analysis Project ---
# This project demonstrates fundamental Python concepts (data types,
# control flow, functions, lists, dictionaries) by performing
# simple analysis on a simulated e-commerce dataset.

# --- 1. Data Setup ---
# We'll represent our data using lists of dictionaries,
# where each dictionary is a record (like a row in a database table).

# Sample Customer Data (List of Dictionaries)
# Each dictionary represents a customer with their ID, name, age, and gender.
customers = [
    {"customer_id": 1, "name": "Alice", "age": 30, "gender": "Female"},
    {"customer_id": 2, "name": "Bob", "age": 24, "gender": "Male"},
    {"customer_id": 3, "name": "Carol", "age": 35, "gender": "Female"},
    {"customer_id": 4, "name": "David", "age": 29, "gender": "Male"},
    {"customer_id": 5, "name": "Eve", "age": 22, "gender": "Female"},
    {"customer_id": 6, "name": "Frank", "age": 40, "gender": "Male"},
    {"customer_id": 7, "name": "Grace", "age": 28, "gender": "Female"},
    {"customer_id": 8, "name": "Henry", "age": 32, "gender": "Male"}
]

# Sample Product Data (List of Dictionaries)
# Each dictionary represents a product with its ID, name, and price.
products = [
    {"product_id": 101, "name": "Laptop", "price": 1200.00},
    {"product_id": 102, "name": "Mouse", "price": 25.50},
    {"product_id": 103, "name": "Keyboard", "price": 75.00},
    {"product_id": 104, "name": "Monitor", "price": 300.00},
    {"product_id": 105, "name": "Webcam", "price": 50.00},
    {"product_id": 106, "name": "Headphones", "price": 150.00}
]

# Sample Order Data (List of Dictionaries)
# Each dictionary represents an order, linking customer, product, quantity, and date.
orders = [
    {"order_id": 1001, "customer_id": 1, "product_id": 101, "quantity": 1, "date": "2025-01-15"},
    {"order_id": 1002, "customer_id": 2, "product_id": 102, "quantity": 2, "date": "2025-01-16"},
    {"order_id": 1003, "customer_id": 1, "product_id": 103, "quantity": 1, "date": "2025-01-17"},
    {"order_id": 1004, "customer_id": 4, "product_id": 101, "quantity": 1, "date": "2025-01-18"},
    {"order_id": 1005, "customer_id": 2, "product_id": 105, "quantity": 3, "date": "2025-01-19"},
    {"order_id": 1006, "customer_id": 3, "product_id": 104, "quantity": 1, "date": "2025-01-20"},
    {"order_id": 1007, "customer_id": 1, "product_id": 102, "quantity": 1, "date": "2025-01-21"},
    {"order_id": 1008, "customer_id": 5, "product_id": 106, "quantity": 1, "date": "2025-01-22"},
    {"order_id": 1009, "customer_id": 4, "product_id": 103, "quantity": 2, "date": "2025-01-23"},
    {"order_id": 1010, "customer_id": 6, "product_id": 101, "quantity": 1, "date": "2025-01-24"},
    {"order_id": 1011, "customer_id": 7, "product_id": 105, "quantity": 1, "date": "2025-01-25"},
    {"order_id": 1012, "customer_id": 8, "product_id": 104, "quantity": 1, "date": "2025-01-26"},
    {"order_id": 1013, "customer_id": 1, "product_id": 106, "quantity": 1, "date": "2025-01-27"},
]

print("Data setup complete!")
# You can uncomment the following lines to inspect the data:
# print("\nCustomers Data:")
# for customer in customers:
#     print(customer)
# print("\nProducts Data:")
# for product in products:
#     print(product)
# print("\nOrders Data:")
# for order in orders:
#     print(order)

Data setup complete!


In [2]:
# Calculating the Total Revenue.

In [3]:
# --- 2. Analysis Task: Calculate Total Revenue ---
# Goal: Sum the total revenue from all orders.
# Revenue for each order = product_quantity * product_price

total_revenue = 0.0

# Create a dictionary for quick product price lookup
# This avoids iterating through the 'products' list for every order,
# making the lookup much more efficient (O(1) average time complexity).
product_prices = {product['product_id']: product['price'] for product in products}

# Iterate through each order
for order in orders:
    product_id = order['product_id']
    quantity = order['quantity']

    # Get the price using the product_prices dictionary
    # Use .get() with a default of 0.0 in case a product_id is not found (error handling)
    price = product_prices.get(product_id, 0.0)

    # Calculate the subtotal for the current order
    order_subtotal = quantity * price
    total_revenue += order_subtotal

print(f"\n--- Analysis Result: Total Revenue ---")
print(f"Total revenue generated from all orders: ${total_revenue:,.2f}")


--- Analysis Result: Total Revenue ---
Total revenue generated from all orders: $5,001.50


In [4]:
# --- 3. Analysis Task: Customer Spending Summary ---
# Goal: Calculate the total amount each customer has spent.
# This will involve aggregating spending by customer ID and then
# mapping customer IDs back to customer names.

customer_spending = {} # Dictionary to store total spending per customer_id

# Iterate through each order to sum up spending per customer
for order in orders:
    customer_id = order['customer_id']
    product_id = order['product_id']
    quantity = order['quantity']

    # Get the price using the pre-built product_prices dictionary
    price = product_prices.get(product_id, 0.0)

    order_total = quantity * price

    # Add the order total to the customer's cumulative spending
    # If customer_id is not yet in customer_spending, it initializes to 0 first.
    customer_spending[customer_id] = customer_spending.get(customer_id, 0.0) + order_total

# Create a dictionary for quick customer name lookup
customer_names = {customer['customer_id']: customer['name'] for customer in customers}

print(f"\n--- Analysis Result: Customer Spending Summary ---")
# Print the total spending for each customer, using their name
for customer_id, total_spent in customer_spending.items():
    customer_name = customer_names.get(customer_id, f"Unknown Customer ({customer_id})")
    print(f"{customer_name} (ID: {customer_id}) spent: ${total_spent:,.2f}")


--- Analysis Result: Customer Spending Summary ---
Alice (ID: 1) spent: $1,450.50
Bob (ID: 2) spent: $201.00
David (ID: 4) spent: $1,350.00
Carol (ID: 3) spent: $300.00
Eve (ID: 5) spent: $150.00
Frank (ID: 6) spent: $1,200.00
Grace (ID: 7) spent: $50.00
Henry (ID: 8) spent: $300.00


In [5]:
# --- 4. Analysis Task: Most Popular Product (by quantity sold) ---
# Goal: Identify the product that has been sold the most in terms of total quantity.

product_quantities = {} # Dictionary to store total quantity sold per product_id

# Iterate through each order to sum up quantities per product
for order in orders:
    product_id = order['product_id']
    quantity = order['quantity']

    # Add the current order's quantity to the product's cumulative quantity
    product_quantities[product_id] = product_quantities.get(product_id, 0) + quantity

# Find the product(s) with the maximum quantity sold
if product_quantities: # Ensure the dictionary is not empty
    max_quantity = 0
    most_popular_product_ids = []

    # Find the maximum quantity
    for product_id, total_qty in product_quantities.items():
        if total_qty > max_quantity:
            max_quantity = total_qty

    # Collect all product IDs that match the max_quantity (in case of ties)
    for product_id, total_qty in product_quantities.items():
        if total_qty == max_quantity:
            most_popular_product_ids.append(product_id)

    # Create a dictionary for quick product name lookup
    product_names = {product['product_id']: product['name'] for product in products}

    print(f"\n--- Analysis Result: Most Popular Product (by Quantity) ---")
    print(f"Products with the highest quantity sold ({max_quantity} units):")
    for prod_id in most_popular_product_ids:
        product_name = product_names.get(prod_id, f"Unknown Product ({prod_id})")
        print(f"- {product_name} (ID: {prod_id})")
else:
    print(f"\n--- Analysis Result: Most Popular Product (by Quantity) ---")
    print("No orders found to determine the most popular product.")


--- Analysis Result: Most Popular Product (by Quantity) ---
Products with the highest quantity sold (4 units):
- Webcam (ID: 105)


In [6]:
# --- 5. Analysis Task: Filtered Customers (e.g., customers above a certain age who have placed orders) ---
# Goal: Find customers who are older than a specified age AND have at least one order.

min_age_filter = 25 # Define the minimum age for filtering

# First, get a set of all customer IDs who have placed orders
# Using a set for O(1) average time complexity lookup
customers_with_orders = {order['customer_id'] for order in orders}

filtered_customers = [] # List to store the details of customers who meet the criteria

# Iterate through each customer
for customer in customers:
    customer_id = customer['customer_id']
    customer_age = customer['age']

    # Check if the customer meets both criteria:
    # 1. Age is greater than min_age_filter
    # 2. Customer ID is present in the set of customers who have placed orders
    if customer_age > min_age_filter and customer_id in customers_with_orders:
        filtered_customers.append(customer)

print(f"\n--- Analysis Result: Filtered Customers ---")
if filtered_customers:
    print(f"Customers older than {min_age_filter} who have placed orders:")
    for customer in filtered_customers:
        print(f"- Name: {customer['name']}, Age: {customer['age']}, ID: {customer['customer_id']}")
else:
    print(f"No customers found older than {min_age_filter} who have placed orders.")


--- Analysis Result: Filtered Customers ---
Customers older than 25 who have placed orders:
- Name: Alice, Age: 30, ID: 1
- Name: Carol, Age: 35, ID: 3
- Name: David, Age: 29, ID: 4
- Name: Frank, Age: 40, ID: 6
- Name: Grace, Age: 28, ID: 7
- Name: Henry, Age: 32, ID: 8
